1 /* Medium-level subroutines: convert bit-field store and extract 2 and shifts, multiplies and divides to rtl instructions. 3 Copyright (C) 1987-2017 Free Software Foundation, Inc. 4 5 This file is part of GCC. 6 7 GCC is free software; you can redistribute it and/or modify it under 8 the terms of the GNU General Public License as published by the Free 9 Software Foundation; either version 3, or (at your option) any later 10 version. 11 12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY 13 WARRANTY; without even the implied warranty of MERCHANTABILITY or 14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 15 for more details. 16 17 You should have received a copy of the GNU General Public License 18 along with GCC; see the file COPYING3. If not see 19 <http://www.gnu.org/licenses/>. */ 20 21 22 #include "config.h" 23 #include "system.h" 24 #include "coretypes.h" 25 #include "backend.h" 26 #include "target.h" 27 #include "rtl.h" 28 #include "tree.h" 29 #include "predict.h" 30 #include "memmodel.h" 31 #include "tm_p.h" 32 #include "expmed.h" 33 #include "optabs.h" 34 #include "emit-rtl.h" 35 #include "diagnostic-core.h" 36 #include "fold-const.h" 37 #include "stor-layout.h" 38 #include "dojump.h" 39 #include "explow.h" 40 #include "expr.h" 41 #include "langhooks.h" 42 43 struct target_expmed default_target_expmed; 44 #if SWITCHABLE_TARGET 45 struct target_expmed *this_target_expmed = &default_target_expmed; 46 #endif 47 48 static void store_fixed_bit_field (rtx, unsigned HOST_WIDE_INT, 49 unsigned HOST_WIDE_INT, 50 unsigned HOST_WIDE_INT, 51 unsigned HOST_WIDE_INT, 52 rtx, bool); 53 static void store_fixed_bit_field_1 (rtx, unsigned HOST_WIDE_INT, 54 unsigned HOST_WIDE_INT, 55 rtx, bool); 56 static void store_split_bit_field (rtx, unsigned HOST_WIDE_INT, 57 unsigned HOST_WIDE_INT, 58 unsigned HOST_WIDE_INT, 59 unsigned HOST_WIDE_INT, 60 rtx, bool); 61 static rtx extract_fixed_bit_field (machine_mode, rtx, 62 unsigned HOST_WIDE_INT, 63 unsigned HOST_WIDE_INT, rtx, int, bool); 64 static rtx extract_fixed_bit_field_1 (machine_mode, rtx, 65 unsigned HOST_WIDE_INT, 66 unsigned HOST_WIDE_INT, rtx, int, bool); 67 static rtx lshift_value (machine_mode, unsigned HOST_WIDE_INT, int); 68 static rtx extract_split_bit_field (rtx, unsigned HOST_WIDE_INT, 69 unsigned HOST_WIDE_INT, int, bool); 70 static void do_cmp_and_jump (rtx, rtx, enum rtx_code, machine_mode, rtx_code_label *); 71 static rtx expand_smod_pow2 (machine_mode, rtx, HOST_WIDE_INT); 72 static rtx expand_sdiv_pow2 (machine_mode, rtx, HOST_WIDE_INT); 73 74 /* Return a constant integer mask value of mode MODE with BITSIZE ones 75 followed by BITPOS zeros, or the complement of that if COMPLEMENT. 76 The mask is truncated if necessary to the width of mode MODE. The 77 mask is zero-extended if BITSIZE+BITPOS is too small for MODE. */ 78 79 static inline rtx 80 mask_rtx (machine_mode mode, int bitpos, int bitsize, bool complement) 81 { 82 return immed_wide_int_const 83 (wi::shifted_mask (bitpos, bitsize, complement, 84 GET_MODE_PRECISION (mode)), mode); 85 } 86 87 /* Test whether a value is zero of a power of two. */ 88 #define EXACT_POWER_OF_2_OR_ZERO_P(x) \ 89 (((x) & ((x) - HOST_WIDE_INT_1U)) == 0) 90 91 struct init_expmed_rtl 92 { 93 rtx reg; 94 rtx plus; 95 rtx neg; 96 rtx mult; 97 rtx sdiv; 98 rtx udiv; 99 rtx sdiv_32; 100 rtx smod_32; 101 rtx wide_mult; 102 rtx wide_lshr; 103 rtx wide_trunc; 104 rtx shift; 105 rtx shift_mult; 106 rtx shift_add; 107 rtx shift_sub0; 108 rtx shift_sub1; 109 rtx zext; 110 rtx trunc; 111 112 rtx pow2[MAX_BITS_PER_WORD]; 113 rtx cint[MAX_BITS_PER_WORD]; 114 }; 115 116 static void 117 init_expmed_one_conv (struct init_expmed_rtl *all, machine_mode to_mode, 118 machine_mode from_mode, bool speed) 119 { 120 int to_size, from_size; 121 rtx which; 122 123 to_size = GET_MODE_PRECISION (to_mode); 124 from_size = GET_MODE_PRECISION (from_mode); 125 126 /* Most partial integers have a precision less than the "full" 127 integer it requires for storage. In case one doesn't, for 128 comparison purposes here, reduce the bit size by one in that 129 case. */ 130 if (GET_MODE_CLASS (to_mode) == MODE_PARTIAL_INT 131 && pow2p_hwi (to_size)) 132 to_size --; 133 if (GET_MODE_CLASS (from_mode) == MODE_PARTIAL_INT 134 && pow2p_hwi (from_size)) 135 from_size --; 136 137 /* Assume cost of zero-extend and sign-extend is the same. */ 138 which = (to_size < from_size ? all->trunc : all->zext); 139 140 PUT_MODE (all->reg, from_mode); 141 set_convert_cost (to_mode, from_mode, speed, 142 set_src_cost (which, to_mode, speed)); 143 } 144 145 static void 146 init_expmed_one_mode (struct init_expmed_rtl *all, 147 machine_mode mode, int speed) 148 { 149 int m, n, mode_bitsize; 150 machine_mode mode_from; 151 152 mode_bitsize = GET_MODE_UNIT_BITSIZE (mode); 153 154 PUT_MODE (all->reg, mode); 155 PUT_MODE (all->plus, mode); 156 PUT_MODE (all->neg, mode); 157 PUT_MODE (all->mult, mode); 158 PUT_MODE (all->sdiv, mode); 159 PUT_MODE (all->udiv, mode); 160 PUT_MODE (all->sdiv_32, mode); 161 PUT_MODE (all->smod_32, mode); 162 PUT_MODE (all->wide_trunc, mode); 163 PUT_MODE (all->shift, mode); 164 PUT_MODE (all->shift_mult, mode); 165 PUT_MODE (all->shift_add, mode); 166 PUT_MODE (all->shift_sub0, mode); 167 PUT_MODE (all->shift_sub1, mode); 168 PUT_MODE (all->zext, mode); 169 PUT_MODE (all->trunc, mode); 170 171 set_add_cost (speed, mode, set_src_cost (all->plus, mode, speed)); 172 set_neg_cost (speed, mode, set_src_cost (all->neg, mode, speed)); 173 set_mul_cost (speed, mode, set_src_cost (all->mult, mode, speed)); 174 set_sdiv_cost (speed, mode, set_src_cost (all->sdiv, mode, speed)); 175 set_udiv_cost (speed, mode, set_src_cost (all->udiv, mode, speed)); 176 177 set_sdiv_pow2_cheap (speed, mode, (set_src_cost (all->sdiv_32, mode, speed) 178 <= 2 * add_cost (speed, mode))); 179 set_smod_pow2_cheap (speed, mode, (set_src_cost (all->smod_32, mode, speed) 180 <= 4 * add_cost (speed, mode))); 181 182 set_shift_cost (speed, mode, 0, 0); 183 { 184 int cost = add_cost (speed, mode); 185 set_shiftadd_cost (speed, mode, 0, cost); 186 set_shiftsub0_cost (speed, mode, 0, cost); 187 set_shiftsub1_cost (speed, mode, 0, cost); 188 } 189 190 n = MIN (MAX_BITS_PER_WORD, mode_bitsize); 191 for (m = 1; m < n; m++) 192 { 193 XEXP (all->shift, 1) = all->cint[m]; 194 XEXP (all->shift_mult, 1) = all->pow2[m]; 195 196 set_shift_cost (speed, mode, m, set_src_cost (all->shift, mode, speed)); 197 set_shiftadd_cost (speed, mode, m, set_src_cost (all->shift_add, mode, 198 speed)); 199 set_shiftsub0_cost (speed, mode, m, set_src_cost (all->shift_sub0, mode, 200 speed)); 201 set_shiftsub1_cost (speed, mode, m, set_src_cost (all->shift_sub1, mode, 202 speed)); 203 } 204 205 if (SCALAR_INT_MODE_P (mode)) 206 { 207 for (mode_from = MIN_MODE_INT; mode_from <= MAX_MODE_INT; 208 mode_from = (machine_mode)(mode_from + 1)) 209 init_expmed_one_conv (all, mode, mode_from, speed); 210 } 211 if (GET_MODE_CLASS (mode) == MODE_INT) 212 { 213 machine_mode wider_mode = GET_MODE_WIDER_MODE (mode); 214 if (wider_mode != VOIDmode) 215 { 216 PUT_MODE (all->zext, wider_mode); 217 PUT_MODE (all->wide_mult, wider_mode); 218 PUT_MODE (all->wide_lshr, wider_mode); 219 XEXP (all->wide_lshr, 1) = GEN_INT (mode_bitsize); 220 221 set_mul_widen_cost (speed, wider_mode, 222 set_src_cost (all->wide_mult, wider_mode, speed)); 223 set_mul_highpart_cost (speed, mode, 224 set_src_cost (all->wide_trunc, mode, speed)); 225 } 226 } 227 } 228 229 void 230 init_expmed (void) 231 { 232 struct init_expmed_rtl all; 233 machine_mode mode = QImode; 234 int m, speed; 235 236 memset (&all, 0, sizeof all); 237 for (m = 1; m < MAX_BITS_PER_WORD; m++) 238 { 239 all.pow2[m] = GEN_INT (HOST_WIDE_INT_1 << m); 240 all.cint[m] = GEN_INT (m); 241 } 242 243 /* Avoid using hard regs in ways which may be unsupported. */ 244 all.reg = gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1); 245 all.plus = gen_rtx_PLUS (mode, all.reg, all.reg); 246 all.neg = gen_rtx_NEG (mode, all.reg); 247 all.mult = gen_rtx_MULT (mode, all.reg, all.reg); 248 all.sdiv = gen_rtx_DIV (mode, all.reg, all.reg); 249 all.udiv = gen_rtx_UDIV (mode, all.reg, all.reg); 250 all.sdiv_32 = gen_rtx_DIV (mode, all.reg, all.pow2[5]); 251 all.smod_32 = gen_rtx_MOD (mode, all.reg, all.pow2[5]); 252 all.zext = gen_rtx_ZERO_EXTEND (mode, all.reg); 253 all.wide_mult = gen_rtx_MULT (mode, all.zext, all.zext); 254 all.wide_lshr = gen_rtx_LSHIFTRT (mode, all.wide_mult, all.reg); 255 all.wide_trunc = gen_rtx_TRUNCATE (mode, all.wide_lshr); 256 all.shift = gen_rtx_ASHIFT (mode, all.reg, all.reg); 257 all.shift_mult = gen_rtx_MULT (mode, all.reg, all.reg); 258 all.shift_add = gen_rtx_PLUS (mode, all.shift_mult, all.reg); 259 all.shift_sub0 = gen_rtx_MINUS (mode, all.shift_mult, all.reg); 260 all.shift_sub1 = gen_rtx_MINUS (mode, all.reg, all.shift_mult); 261 all.trunc = gen_rtx_TRUNCATE (mode, all.reg); 262 263 for (speed = 0; speed < 2; speed++) 264 { 265 crtl->maybe_hot_insn_p = speed; 266 set_zero_cost (speed, set_src_cost (const0_rtx, mode, speed)); 267 268 for (mode = MIN_MODE_INT; mode <= MAX_MODE_INT; 269 mode = (machine_mode)(mode + 1)) 270 init_expmed_one_mode (&all, mode, speed); 271 272 if (MIN_MODE_PARTIAL_INT != VOIDmode) 273 for (mode = MIN_MODE_PARTIAL_INT; mode <= MAX_MODE_PARTIAL_INT; 274 mode = (machine_mode)(mode + 1)) 275 init_expmed_one_mode (&all, mode, speed); 276 277 if (MIN_MODE_VECTOR_INT != VOIDmode) 278 for (mode = MIN_MODE_VECTOR_INT; mode <= MAX_MODE_VECTOR_INT; 279 mode = (machine_mode)(mode + 1)) 280 init_expmed_one_mode (&all, mode, speed); 281 } 282 283 if (alg_hash_used_p ()) 284 { 285 struct alg_hash_entry *p = alg_hash_entry_ptr (0); 286 memset (p, 0, sizeof (*p) * NUM_ALG_HASH_ENTRIES); 287 } 288 else 289 set_alg_hash_used_p (true); 290 default_rtl_profile (); 291 292 ggc_free (all.trunc); 293 ggc_free (all.shift_sub1); 294 ggc_free (all.shift_sub0); 295 ggc_free (all.shift_add); 296 ggc_free (all.shift_mult); 297 ggc_free (all.shift); 298 ggc_free (all.wide_trunc); 299 ggc_free (all.wide_lshr); 300 ggc_free (all.wide_mult); 301 ggc_free (all.zext); 302 ggc_free (all.smod_32); 303 ggc_free (all.sdiv_32); 304 ggc_free (all.udiv); 305 ggc_free (all.sdiv); 306 ggc_free (all.mult); 307 ggc_free (all.neg); 308 ggc_free (all.plus); 309 ggc_free (all.reg); 310 } 311 312 /* Return an rtx representing minus the value of X. 313 MODE is the intended mode of the result, 314 useful if X is a CONST_INT. */ 315 316 rtx 317 negate_rtx (machine_mode mode, rtx x) 318 { 319 rtx result = simplify_unary_operation (NEG, mode, x, mode); 320 321 if (result == 0) 322 result = expand_unop (mode, neg_optab, x, NULL_RTX, 0); 323 324 return result; 325 } 326 327 /* Whether reverse storage order is supported on the target. */ 328 static int reverse_storage_order_supported = -1; 329 330 /* Check whether reverse storage order is supported on the target. */ 331 332 static void 333 check_reverse_storage_order_support (void) 334 { 335 if (BYTES_BIG_ENDIAN != WORDS_BIG_ENDIAN) 336 { 337 reverse_storage_order_supported = 0; 338 sorry ("reverse scalar storage order"); 339 } 340 else 341 reverse_storage_order_supported = 1; 342 } 343 344 /* Whether reverse FP storage order is supported on the target. */ 345 static int reverse_float_storage_order_supported = -1; 346 347 /* Check whether reverse FP storage order is supported on the target. */ 348 349 static void 350 check_reverse_float_storage_order_support (void) 351 { 352 if (FLOAT_WORDS_BIG_ENDIAN != WORDS_BIG_ENDIAN) 353 { 354 reverse_float_storage_order_supported = 0; 355 sorry ("reverse floating-point scalar storage order"); 356 } 357 else 358 reverse_float_storage_order_supported = 1; 359 } 360 361 /* Return an rtx representing value of X with reverse storage order. 362 MODE is the intended mode of the result, 363 useful if X is a CONST_INT. */ 364 365 rtx 366 flip_storage_order (enum machine_mode mode, rtx x) 367 { 368 enum machine_mode int_mode; 369 rtx result; 370 371 if (mode == QImode) 372 return x; 373 374 if (COMPLEX_MODE_P (mode)) 375 { 376 rtx real = read_complex_part (x, false); 377 rtx imag = read_complex_part (x, true); 378 379 real = flip_storage_order (GET_MODE_INNER (mode), real); 380 imag = flip_storage_order (GET_MODE_INNER (mode), imag); 381 382 return gen_rtx_CONCAT (mode, real, imag); 383 } 384 385 if (__builtin_expect (reverse_storage_order_supported < 0, 0)) 386 check_reverse_storage_order_support (); 387 388 if (SCALAR_INT_MODE_P (mode)) 389 int_mode = mode; 390 else 391 { 392 if (FLOAT_MODE_P (mode) 393 && __builtin_expect (reverse_float_storage_order_supported < 0, 0)) 394 check_reverse_float_storage_order_support (); 395 396 int_mode = mode_for_size (GET_MODE_PRECISION (mode), MODE_INT, 0); 397 if (int_mode == BLKmode) 398 { 399 sorry ("reverse storage order for %smode", GET_MODE_NAME (mode)); 400 return x; 401 } 402 x = gen_lowpart (int_mode, x); 403 } 404 405 result = simplify_unary_operation (BSWAP, int_mode, x, int_mode); 406 if (result == 0) 407 result = expand_unop (int_mode, bswap_optab, x, NULL_RTX, 1); 408 409 if (int_mode != mode) 410 result = gen_lowpart (mode, result); 411 412 return result; 413 } 414 415 /* Adjust bitfield memory MEM so that it points to the first unit of mode 416 MODE that contains a bitfield of size BITSIZE at bit position BITNUM. 417 If MODE is BLKmode, return a reference to every byte in the bitfield. 418 Set *NEW_BITNUM to the bit position of the field within the new memory. */ 419 420 static rtx 421 narrow_bit_field_mem (rtx mem, machine_mode mode, 422 unsigned HOST_WIDE_INT bitsize, 423 unsigned HOST_WIDE_INT bitnum, 424 unsigned HOST_WIDE_INT *new_bitnum) 425 { 426 if (mode == BLKmode) 427 { 428 *new_bitnum = bitnum % BITS_PER_UNIT; 429 HOST_WIDE_INT offset = bitnum / BITS_PER_UNIT; 430 HOST_WIDE_INT size = ((*new_bitnum + bitsize + BITS_PER_UNIT - 1) 431 / BITS_PER_UNIT); 432 return adjust_bitfield_address_size (mem, mode, offset, size); 433 } 434 else 435 { 436 unsigned int unit = GET_MODE_BITSIZE (mode); 437 *new_bitnum = bitnum % unit; 438 HOST_WIDE_INT offset = (bitnum - *new_bitnum) / BITS_PER_UNIT; 439 return adjust_bitfield_address (mem, mode, offset); 440 } 441 } 442 443 /* The caller wants to perform insertion or extraction PATTERN on a 444 bitfield of size BITSIZE at BITNUM bits into memory operand OP0. 445 BITREGION_START and BITREGION_END are as for store_bit_field 446 and FIELDMODE is the natural mode of the field. 447 448 Search for a mode that is compatible with the memory access 449 restrictions and (where applicable) with a register insertion or 450 extraction. Return the new memory on success, storing the adjusted 451 bit position in *NEW_BITNUM. Return null otherwise. */ 452 453 static rtx 454 adjust_bit_field_mem_for_reg (enum extraction_pattern pattern, 455 rtx op0, HOST_WIDE_INT bitsize, 456 HOST_WIDE_INT bitnum, 457 unsigned HOST_WIDE_INT bitregion_start, 458 unsigned HOST_WIDE_INT bitregion_end, 459 machine_mode fieldmode, 460 unsigned HOST_WIDE_INT *new_bitnum) 461 { 462 bit_field_mode_iterator iter (bitsize, bitnum, bitregion_start, 463 bitregion_end, MEM_ALIGN (op0), 464 MEM_VOLATILE_P (op0)); 465 machine_mode best_mode; 466 if (iter.next_mode (&best_mode)) 467 { 468 /* We can use a memory in BEST_MODE. See whether this is true for 469 any wider modes. All other things being equal, we prefer to 470 use the widest mode possible because it tends to expose more 471 CSE opportunities. */ 472 if (!iter.prefer_smaller_modes ()) 473 { 474 /* Limit the search to the mode required by the corresponding 475 register insertion or extraction instruction, if any. */ 476 machine_mode limit_mode = word_mode; 477 extraction_insn insn; 478 if (get_best_reg_extraction_insn (&insn, pattern, 479 GET_MODE_BITSIZE (best_mode), 480 fieldmode)) 481 limit_mode = insn.field_mode; 482 483 machine_mode wider_mode; 484 while (iter.next_mode (&wider_mode) 485 && GET_MODE_SIZE (wider_mode) <= GET_MODE_SIZE (limit_mode)) 486 best_mode = wider_mode; 487 } 488 return narrow_bit_field_mem (op0, best_mode, bitsize, bitnum, 489 new_bitnum); 490 } 491 return NULL_RTX; 492 } 493 494 /* Return true if a bitfield of size BITSIZE at bit number BITNUM within 495 a structure of mode STRUCT_MODE represents a lowpart subreg. The subreg 496 offset is then BITNUM / BITS_PER_UNIT. */ 497 498 static bool 499 lowpart_bit_field_p (unsigned HOST_WIDE_INT bitnum, 500 unsigned HOST_WIDE_INT bitsize, 501 machine_mode struct_mode) 502 { 503 if (BYTES_BIG_ENDIAN) 504 return (bitnum % BITS_PER_UNIT == 0 505 && (bitnum + bitsize == GET_MODE_BITSIZE (struct_mode) 506 || (bitnum + bitsize) % BITS_PER_WORD == 0)); 507 else 508 return bitnum % BITS_PER_WORD == 0; 509 } 510 511 /* Return true if -fstrict-volatile-bitfields applies to an access of OP0 512 containing BITSIZE bits starting at BITNUM, with field mode FIELDMODE. 513 Return false if the access would touch memory outside the range 514 BITREGION_START to BITREGION_END for conformance to the C++ memory 515 model. */ 516 517 static bool 518 strict_volatile_bitfield_p (rtx op0, unsigned HOST_WIDE_INT bitsize, 519 unsigned HOST_WIDE_INT bitnum, 520 machine_mode fieldmode, 521 unsigned HOST_WIDE_INT bitregion_start, 522 unsigned HOST_WIDE_INT bitregion_end) 523 { 524 unsigned HOST_WIDE_INT modesize = GET_MODE_BITSIZE (fieldmode); 525 526 /* -fstrict-volatile-bitfields must be enabled and we must have a 527 volatile MEM. */ 528 if (!MEM_P (op0) 529 || !MEM_VOLATILE_P (op0) 530 || flag_strict_volatile_bitfields <= 0) 531 return false; 532 533 /* Non-integral modes likely only happen with packed structures. 534 Punt. */ 535 if (!SCALAR_INT_MODE_P (fieldmode)) 536 return false; 537 538 /* The bit size must not be larger than the field mode, and 539 the field mode must not be larger than a word. */ 540 if (bitsize > modesize || modesize > BITS_PER_WORD) 541 return false; 542 543 /* Check for cases of unaligned fields that must be split. */ 544 if (bitnum % modesize + bitsize > modesize) 545 return false; 546 547 /* The memory must be sufficiently aligned for a MODESIZE access. 548 This condition guarantees, that the memory access will not 549 touch anything after the end of the structure. */ 550 if (MEM_ALIGN (op0) < modesize) 551 return false; 552 553 /* Check for cases where the C++ memory model applies. */ 554 if (bitregion_end != 0 555 && (bitnum - bitnum % modesize < bitregion_start 556 || bitnum - bitnum % modesize + modesize - 1 > bitregion_end)) 557 return false; 558 559 return true; 560 } 561 562 /* Return true if OP is a memory and if a bitfield of size BITSIZE at 563 bit number BITNUM can be treated as a simple value of mode MODE. */ 564 565 static bool 566 simple_mem_bitfield_p (rtx op0, unsigned HOST_WIDE_INT bitsize, 567 unsigned HOST_WIDE_INT bitnum, machine_mode mode) 568 { 569 return (MEM_P (op0) 570 && bitnum % BITS_PER_UNIT == 0 571 && bitsize == GET_MODE_BITSIZE (mode) 572 && (!SLOW_UNALIGNED_ACCESS (mode, MEM_ALIGN (op0)) 573 || (bitnum % GET_MODE_ALIGNMENT (mode) == 0 574 && MEM_ALIGN (op0) >= GET_MODE_ALIGNMENT (mode)))); 575 } 576 577 /* Try to use instruction INSV to store VALUE into a field of OP0. 578 BITSIZE and BITNUM are as for store_bit_field. */ 579 580 static bool 581 store_bit_field_using_insv (const extraction_insn *insv, rtx op0, 582 unsigned HOST_WIDE_INT bitsize, 583 unsigned HOST_WIDE_INT bitnum, 584 rtx value) 585 { 586 struct expand_operand ops[4]; 587 rtx value1; 588 rtx xop0 = op0; 589 rtx_insn *last = get_last_insn (); 590 bool copy_back = false; 591 592 machine_mode op_mode = insv->field_mode; 593 unsigned int unit = GET_MODE_BITSIZE (op_mode); 594 if (bitsize == 0 || bitsize > unit) 595 return false; 596 597 if (MEM_P (xop0)) 598 /* Get a reference to the first byte of the field. */ 599 xop0 = narrow_bit_field_mem (xop0, insv->struct_mode, bitsize, bitnum, 600 &bitnum); 601 else 602 { 603 /* Convert from counting within OP0 to counting in OP_MODE. */ 604 if (BYTES_BIG_ENDIAN) 605 bitnum += unit - GET_MODE_BITSIZE (GET_MODE (op0)); 606 607 /* If xop0 is a register, we need it in OP_MODE 608 to make it acceptable to the format of insv. */ 609 if (GET_CODE (xop0) == SUBREG) 610 /* We can't just change the mode, because this might clobber op0, 611 and we will need the original value of op0 if insv fails. */ 612 xop0 = gen_rtx_SUBREG (op_mode, SUBREG_REG (xop0), SUBREG_BYTE (xop0)); 613 if (REG_P (xop0) && GET_MODE (xop0) != op_mode) 614 xop0 = gen_lowpart_SUBREG (op_mode, xop0); 615 } 616 617 /* If the destination is a paradoxical subreg such that we need a 618 truncate to the inner mode, perform the insertion on a temporary and 619 truncate the result to the original destination. Note that we can't 620 just truncate the paradoxical subreg as (truncate:N (subreg:W (reg:N 621 X) 0)) is (reg:N X). */ 622 if (GET_CODE (xop0) == SUBREG 623 && REG_P (SUBREG_REG (xop0)) 624 && !TRULY_NOOP_TRUNCATION_MODES_P (GET_MODE (SUBREG_REG (xop0)), 625 op_mode)) 626 { 627 rtx tem = gen_reg_rtx (op_mode); 628 emit_move_insn (tem, xop0); 629 xop0 = tem; 630 copy_back = true; 631 } 632 633 /* There are similar overflow check at the start of store_bit_field_1, 634 but that only check the situation where the field lies completely 635 outside the register, while there do have situation where the field 636 lies partialy in the register, we need to adjust bitsize for this 637 partial overflow situation. Without this fix, pr48335-2.c on big-endian 638 will broken on those arch support bit insert instruction, like arm, aarch64 639 etc. */ 640 if (bitsize + bitnum > unit && bitnum < unit) 641 { 642 warning (OPT_Wextra, "write of %wu-bit data outside the bound of " 643 "destination object, data truncated into %wu-bit", 644 bitsize, unit - bitnum); 645 bitsize = unit - bitnum; 646 } 647 648 /* If BITS_BIG_ENDIAN is zero on a BYTES_BIG_ENDIAN machine, we count 649 "backwards" from the size of the unit we are inserting into. 650 Otherwise, we count bits from the most significant on a 651 BYTES/BITS_BIG_ENDIAN machine. */ 652 653 if (BITS_BIG_ENDIAN != BYTES_BIG_ENDIAN) 654 bitnum = unit - bitsize - bitnum; 655 656 /* Convert VALUE to op_mode (which insv insn wants) in VALUE1. */ 657 value1 = value; 658 if (GET_MODE (value) != op_mode) 659 { 660 if (GET_MODE_BITSIZE (GET_MODE (value)) >= bitsize) 661 { 662 rtx tmp; 663 /* Optimization: Don't bother really extending VALUE 664 if it has all the bits we will actually use. However, 665 if we must narrow it, be sure we do it correctly. */ 666 667 if (GET_MODE_SIZE (GET_MODE (value)) < GET_MODE_SIZE (op_mode)) 668 { 669 tmp = simplify_subreg (op_mode, value1, GET_MODE (value), 0); 670 if (! tmp) 671 tmp = simplify_gen_subreg (op_mode, 672 force_reg (GET_MODE (value), 673 value1), 674 GET_MODE (value), 0); 675 } 676 else 677 { 678 tmp = gen_lowpart_if_possible (op_mode, value1); 679 if (! tmp) 680 tmp = gen_lowpart (op_mode, force_reg (GET_MODE (value), 681 value1)); 682 } 683 value1 = tmp; 684 } 685 else if (CONST_INT_P (value)) 686 value1 = gen_int_mode (INTVAL (value), op_mode); 687 else 688 /* Parse phase is supposed to make VALUE's data type 689 match that of the component reference, which is a type 690 at least as wide as the field; so VALUE should have 691 a mode that corresponds to that type. */ 692 gcc_assert (CONSTANT_P (value)); 693 } 694 695 create_fixed_operand (&ops[0], xop0); 696 create_integer_operand (&ops[1], bitsize); 697 create_integer_operand (&ops[2], bitnum); 698 create_input_operand (&ops[3], value1, op_mode); 699 if (maybe_expand_insn (insv->icode, 4, ops)) 700 { 701 if (copy_back) 702 convert_move (op0, xop0, true); 703 return true; 704 } 705 delete_insns_since (last); 706 return false; 707 } 708 709 /* A subroutine of store_bit_field, with the same arguments. Return true 710 if the operation could be implemented. 711 712 If FALLBACK_P is true, fall back to store_fixed_bit_field if we have 713 no other way of implementing the operation. If FALLBACK_P is false, 714 return false instead. */ 715 716 static bool 717 store_bit_field_1 (rtx str_rtx, unsigned HOST_WIDE_INT bitsize, 718 unsigned HOST_WIDE_INT bitnum, 719 unsigned HOST_WIDE_INT bitregion_start, 720 unsigned HOST_WIDE_INT bitregion_end, 721 machine_mode fieldmode, 722 rtx value, bool reverse, bool fallback_p) 723 { 724 rtx op0 = str_rtx; 725 rtx orig_value; 726 727 while (GET_CODE (op0) == SUBREG) 728 { 729 /* The following line once was done only if WORDS_BIG_ENDIAN, 730 but I think that is a mistake. WORDS_BIG_ENDIAN is 731 meaningful at a much higher level; when structures are copied 732 between memory and regs, the higher-numbered regs 733 always get higher addresses. */ 734 int inner_mode_size = GET_MODE_SIZE (GET_MODE (SUBREG_REG (op0))); 735 int outer_mode_size = GET_MODE_SIZE (GET_MODE (op0)); 736 int byte_offset = 0; 737 738 /* Paradoxical subregs need special handling on big-endian machines. */ 739 if (SUBREG_BYTE (op0) == 0 && inner_mode_size < outer_mode_size) 740 { 741 int difference = inner_mode_size - outer_mode_size; 742 743 if (WORDS_BIG_ENDIAN) 744 byte_offset += (difference / UNITS_PER_WORD) * UNITS_PER_WORD; 745 if (BYTES_BIG_ENDIAN) 746 byte_offset += difference % UNITS_PER_WORD; 747 } 748 else 749 byte_offset = SUBREG_BYTE (op0); 750 751 bitnum += byte_offset * BITS_PER_UNIT; 752 op0 = SUBREG_REG (op0); 753 } 754 755 /* No action is needed if the target is a register and if the field 756 lies completely outside that register. This can occur if the source 757 code contains an out-of-bounds access to a small array. */ 758 if (REG_P (op0) && bitnum >= GET_MODE_BITSIZE (GET_MODE (op0))) 759 return true; 760 761 /* Use vec_set patterns for inserting parts of vectors whenever 762 available. */ 763 if (VECTOR_MODE_P (GET_MODE (op0)) 764 && !MEM_P (op0) 765 && optab_handler (vec_set_optab, GET_MODE (op0)) != CODE_FOR_nothing 766 && fieldmode == GET_MODE_INNER (GET_MODE (op0)) 767 && bitsize == GET_MODE_UNIT_BITSIZE (GET_MODE (op0)) 768 && !(bitnum % GET_MODE_UNIT_BITSIZE (GET_MODE (op0)))) 769 { 770 struct expand_operand ops[3]; 771 machine_mode outermode = GET_MODE (op0); 772 machine_mode innermode = GET_MODE_INNER (outermode); 773 enum insn_code icode = optab_handler (vec_set_optab, outermode); 774 int pos = bitnum / GET_MODE_BITSIZE (innermode); 775 776 create_fixed_operand (&ops[0], op0); 777 create_input_operand (&ops[1], value, innermode); 778 create_integer_operand (&ops[2], pos); 779 if (maybe_expand_insn (icode, 3, ops)) 780 return true; 781 } 782 783 /* If the target is a register, overwriting the entire object, or storing 784 a full-word or multi-word field can be done with just a SUBREG. */ 785 if (!MEM_P (op0) 786 && bitsize == GET_MODE_BITSIZE (fieldmode) 787 && ((bitsize == GET_MODE_BITSIZE (GET_MODE (op0)) && bitnum == 0) 788 || (bitsize % BITS_PER_WORD == 0 && bitnum % BITS_PER_WORD == 0))) 789 { 790 /* Use the subreg machinery either to narrow OP0 to the required 791 words or to cope with mode punning between equal-sized modes. 792 In the latter case, use subreg on the rhs side, not lhs. */ 793 rtx sub; 794 795 if (bitsize == GET_MODE_BITSIZE (GET_MODE (op0))) 796 { 797 sub = simplify_gen_subreg (GET_MODE (op0), value, fieldmode, 0); 798 if (sub) 799 { 800 if (reverse) 801 sub = flip_storage_order (GET_MODE (op0), sub); 802 emit_move_insn (op0, sub); 803 return true; 804 } 805 } 806 else 807 { 808 sub = simplify_gen_subreg (fieldmode, op0, GET_MODE (op0), 809 bitnum / BITS_PER_UNIT); 810 if (sub) 811 { 812 if (reverse) 813 value = flip_storage_order (fieldmode, value); 814 emit_move_insn (sub, value); 815 return true; 816 } 817 } 818 } 819 820 /* If the target is memory, storing any naturally aligned field can be 821 done with a simple store. For targets that support fast unaligned 822 memory, any naturally sized, unit aligned field can be done directly. */ 823 if (simple_mem_bitfield_p (op0, bitsize, bitnum, fieldmode)) 824 { 825 op0 = adjust_bitfield_address (op0, fieldmode, bitnum / BITS_PER_UNIT); 826 if (reverse) 827 value = flip_storage_order (fieldmode, value); 828 emit_move_insn (op0, value); 829 return true; 830 } 831 832 /* Make sure we are playing with integral modes. Pun with subregs 833 if we aren't. This must come after the entire register case above, 834 since that case is valid for any mode. The following cases are only 835 valid for integral modes. */ 836 { 837 machine_mode imode = int_mode_for_mode (GET_MODE (op0)); 838 if (imode != GET_MODE (op0)) 839 { 840 if (MEM_P (op0)) 841 op0 = adjust_bitfield_address_size (op0, imode, 0, MEM_SIZE (op0)); 842 else 843 { 844 gcc_assert (imode != BLKmode); 845 op0 = gen_lowpart (imode, op0); 846 } 847 } 848 } 849 850 /* Storing an lsb-aligned field in a register 851 can be done with a movstrict instruction. */ 852 853 if (!MEM_P (op0) 854 && !reverse 855 && lowpart_bit_field_p (bitnum, bitsize, GET_MODE (op0)) 856 && bitsize == GET_MODE_BITSIZE (fieldmode) 857 && optab_handler (movstrict_optab, fieldmode) != CODE_FOR_nothing) 858 { 859 struct expand_operand ops[2]; 860 enum insn_code icode = optab_handler (movstrict_optab, fieldmode); 861 rtx arg0 = op0; 862 unsigned HOST_WIDE_INT subreg_off; 863 864 if (GET_CODE (arg0) == SUBREG) 865 { 866 /* Else we've got some float mode source being extracted into 867 a different float mode destination -- this combination of 868 subregs results in Severe Tire Damage. */ 869 gcc_assert (GET_MODE (SUBREG_REG (arg0)) == fieldmode 870 || GET_MODE_CLASS (fieldmode) == MODE_INT 871 || GET_MODE_CLASS (fieldmode) == MODE_PARTIAL_INT); 872 arg0 = SUBREG_REG (arg0); 873 } 874 875 subreg_off = bitnum / BITS_PER_UNIT; 876 if (validate_subreg (fieldmode, GET_MODE (arg0), arg0, subreg_off)) 877 { 878 arg0 = gen_rtx_SUBREG (fieldmode, arg0, subreg_off); 879 880 create_fixed_operand (&ops[0], arg0); 881 /* Shrink the source operand to FIELDMODE. */ 882 create_convert_operand_to (&ops[1], value, fieldmode, false); 883 if (maybe_expand_insn (icode, 2, ops)) 884 return true; 885 } 886 } 887 888 /* Handle fields bigger than a word. */ 889 890 if (bitsize > BITS_PER_WORD) 891 { 892 /* Here we transfer the words of the field 893 in the order least significant first. 894 This is because the most significant word is the one which may 895 be less than full. 896 However, only do that if the value is not BLKmode. */ 897 898 const bool backwards = WORDS_BIG_ENDIAN && fieldmode != BLKmode; 899 unsigned int nwords = (bitsize + (BITS_PER_WORD - 1)) / BITS_PER_WORD; 900 unsigned int i; 901 rtx_insn *last; 902 903 /* This is the mode we must force value to, so that there will be enough 904 subwords to extract. Note that fieldmode will often (always?) be 905 VOIDmode, because that is what store_field uses to indicate that this 906 is a bit field, but passing VOIDmode to operand_subword_force 907 is not allowed. */ 908 fieldmode = GET_MODE (value); 909 if (fieldmode == VOIDmode) 910 fieldmode = smallest_mode_for_size (nwords * BITS_PER_WORD, MODE_INT); 911 912 last = get_last_insn (); 913 for (i = 0; i < nwords; i++) 914 { 915 /* If I is 0, use the low-order word in both field and target; 916 if I is 1, use the next to lowest word; and so on. */ 917 unsigned int wordnum = (backwards 918 ? GET_MODE_SIZE (fieldmode) / UNITS_PER_WORD 919 - i - 1 920 : i); 921 unsigned int bit_offset = (backwards ^ reverse 922 ? MAX ((int) bitsize - ((int) i + 1) 923 * BITS_PER_WORD, 924 0) 925 : (int) i * BITS_PER_WORD); 926 rtx value_word = operand_subword_force (value, wordnum, fieldmode); 927 unsigned HOST_WIDE_INT new_bitsize = 928 MIN (BITS_PER_WORD, bitsize - i * BITS_PER_WORD); 929 930 /* If the remaining chunk doesn't have full wordsize we have 931 to make sure that for big-endian machines the higher order 932 bits are used. */ 933 if (new_bitsize < BITS_PER_WORD && BYTES_BIG_ENDIAN && !backwards) 934 value_word = simplify_expand_binop (word_mode, lshr_optab, 935 value_word, 936 GEN_INT (BITS_PER_WORD 937 - new_bitsize), 938 NULL_RTX, true, 939 OPTAB_LIB_WIDEN); 940 941 if (!store_bit_field_1 (op0, new_bitsize, 942 bitnum + bit_offset, 943 bitregion_start, bitregion_end, 944 word_mode, 945 value_word, reverse, fallback_p)) 946 { 947 delete_insns_since (last); 948 return false; 949 } 950 } 951 return true; 952 } 953 954 /* If VALUE has a floating-point or complex mode, access it as an 955 integer of the corresponding size. This can occur on a machine 956 with 64 bit registers that uses SFmode for float. It can also 957 occur for unaligned float or complex fields. */ 958 orig_value = value; 959 if (GET_MODE (value) != VOIDmode 960 && GET_MODE_CLASS (GET_MODE (value)) != MODE_INT 961 && GET_MODE_CLASS (GET_MODE (value)) != MODE_PARTIAL_INT) 962 { 963 value = gen_reg_rtx (int_mode_for_mode (GET_MODE (value))); 964 emit_move_insn (gen_lowpart (GET_MODE (orig_value), value), orig_value); 965 } 966 967 /* If OP0 is a multi-word register, narrow it to the affected word. 968 If the region spans two words, defer to store_split_bit_field. 969 Don't do this if op0 is a single hard register wider than word 970 such as a float or vector register. */ 971 if (!MEM_P (op0) 972 && GET_MODE_SIZE (GET_MODE (op0)) > UNITS_PER_WORD 973 && (!REG_P (op0) 974 || !HARD_REGISTER_P (op0) 975 || HARD_REGNO_NREGS (REGNO (op0), GET_MODE (op0)) != 1)) 976 { 977 if (bitnum % BITS_PER_WORD + bitsize > BITS_PER_WORD) 978 { 979 if (!fallback_p) 980 return false; 981 982 store_split_bit_field (op0, bitsize, bitnum, bitregion_start, 983 bitregion_end, value, reverse); 984 return true; 985 } 986 op0 = simplify_gen_subreg (word_mode, op0, GET_MODE (op0), 987 bitnum / BITS_PER_WORD * UNITS_PER_WORD); 988 gcc_assert (op0); 989 bitnum %= BITS_PER_WORD; 990 } 991 992 /* From here on we can assume that the field to be stored in fits 993 within a word. If the destination is a register, it too fits 994 in a word. */ 995 996 extraction_insn insv; 997 if (!MEM_P (op0) 998 && !reverse 999 && get_best_reg_extraction_insn (&insv, EP_insv, 1000 GET_MODE_BITSIZE (GET_MODE (op0)), 1001 fieldmode) 1002 && store_bit_field_using_insv (&insv, op0, bitsize, bitnum, value)) 1003 return true; 1004 1005 /* If OP0 is a memory, try copying it to a register and seeing if a 1006 cheap register alternative is available. */ 1007 if (MEM_P (op0) && !reverse) 1008 { 1009 if (get_best_mem_extraction_insn (&insv, EP_insv, bitsize, bitnum, 1010 fieldmode) 1011 && store_bit_field_using_insv (&insv, op0, bitsize, bitnum, value)) 1012 return true; 1013 1014 rtx_insn *last = get_last_insn (); 1015 1016 /* Try loading part of OP0 into a register, inserting the bitfield 1017 into that, and then copying the result back to OP0. */ 1018 unsigned HOST_WIDE_INT bitpos; 1019 rtx xop0 = adjust_bit_field_mem_for_reg (EP_insv, op0, bitsize, bitnum, 1020 bitregion_start, bitregion_end, 1021 fieldmode, &bitpos); 1022 if (xop0) 1023 { 1024 rtx tempreg = copy_to_reg (xop0); 1025 if (store_bit_field_1 (tempreg, bitsize, bitpos, 1026 bitregion_start, bitregion_end, 1027 fieldmode, orig_value, reverse, false)) 1028 { 1029 emit_move_insn (xop0, tempreg); 1030 return true; 1031 } 1032 delete_insns_since (last); 1033 } 1034 } 1035 1036 if (!fallback_p) 1037 return false; 1038 1039 store_fixed_bit_field (op0, bitsize, bitnum, bitregion_start, 1040 bitregion_end, value, reverse); 1041 return true; 1042 } 1043 1044 /* Generate code to store value from rtx VALUE 1045 into a bit-field within structure STR_RTX 1046 containing BITSIZE bits starting at bit BITNUM. 1047 1048 BITREGION_START is bitpos of the first bitfield in this region. 1049 BITREGION_END is the bitpos of the ending bitfield in this region. 1050 These two fields are 0, if the C++ memory model does not apply, 1051 or we are not interested in keeping track of bitfield regions. 1052 1053 FIELDMODE is the machine-mode of the FIELD_DECL node for this field. 1054 1055 If REVERSE is true, the store is to be done in reverse order. */ 1056 1057 void 1058 store_bit_field (rtx str_rtx, unsigned HOST_WIDE_INT bitsize, 1059 unsigned HOST_WIDE_INT bitnum, 1060 unsigned HOST_WIDE_INT bitregion_start, 1061 unsigned HOST_WIDE_INT bitregion_end, 1062 machine_mode fieldmode, 1063 rtx value, bool reverse) 1064 { 1065 /* Handle -fstrict-volatile-bitfields in the cases where it applies. */ 1066 if (strict_volatile_bitfield_p (str_rtx, bitsize, bitnum, fieldmode, 1067 bitregion_start, bitregion_end)) 1068 { 1069 /* Storing of a full word can be done with a simple store. 1070 We know here that the field can be accessed with one single 1071 instruction. For targets that support unaligned memory, 1072 an unaligned access may be necessary. */ 1073 if (bitsize == GET_MODE_BITSIZE (fieldmode)) 1074 { 1075 str_rtx = adjust_bitfield_address (str_rtx, fieldmode, 1076 bitnum / BITS_PER_UNIT); 1077 if (reverse) 1078 value = flip_storage_order (fieldmode, value); 1079 gcc_assert (bitnum % BITS_PER_UNIT == 0); 1080 emit_move_insn (str_rtx, value); 1081 } 1082 else 1083 { 1084 rtx temp; 1085 1086 str_rtx = narrow_bit_field_mem (str_rtx, fieldmode, bitsize, bitnum, 1087 &bitnum); 1088 gcc_assert (bitnum + bitsize <= GET_MODE_BITSIZE (fieldmode)); 1089 temp = copy_to_reg (str_rtx); 1090 if (!store_bit_field_1 (temp, bitsize, bitnum, 0, 0, 1091 fieldmode, value, reverse, true)) 1092 gcc_unreachable (); 1093 1094 emit_move_insn (str_rtx, temp); 1095 } 1096 1097 return; 1098 } 1099 1100 /* Under the C++0x memory model, we must not touch bits outside the 1101 bit region. Adjust the address to start at the beginning of the 1102 bit region. */ 1103 if (MEM_P (str_rtx) && bitregion_start > 0) 1104 { 1105 machine_mode bestmode; 1106 HOST_WIDE_INT offset, size; 1107 1108 gcc_assert ((bitregion_start % BITS_PER_UNIT) == 0); 1109 1110 offset = bitregion_start / BITS_PER_UNIT; 1111 bitnum -= bitregion_start; 1112 size = (bitnum + bitsize + BITS_PER_UNIT - 1) / BITS_PER_UNIT; 1113 bitregion_end -= bitregion_start; 1114 bitregion_start = 0; 1115 bestmode = get_best_mode (bitsize, bitnum, 1116 bitregion_start, bitregion_end, 1117 MEM_ALIGN (str_rtx), VOIDmode, 1118 MEM_VOLATILE_P (str_rtx)); 1119 str_rtx = adjust_bitfield_address_size (str_rtx, bestmode, offset, size); 1120 } 1121 1122 if (!store_bit_field_1 (str_rtx, bitsize, bitnum, 1123 bitregion_start, bitregion_end, 1124 fieldmode, value, reverse, true)) 1125 gcc_unreachable (); 1126 } 1127 1128 /* Use shifts and boolean operations to store VALUE into a bit field of 1129 width BITSIZE in OP0, starting at bit BITNUM. 1130 1131 If REVERSE is true, the store is to be done in reverse order. */ 1132 1133 static void 1134 store_fixed_bit_field (rtx op0, unsigned HOST_WIDE_INT bitsize, 1135 unsigned HOST_WIDE_INT bitnum, 1136 unsigned HOST_WIDE_INT bitregion_start, 1137 unsigned HOST_WIDE_INT bitregion_end, 1138 rtx value, bool reverse) 1139 { 1140 /* There is a case not handled here: 1141 a structure with a known alignment of just a halfword 1142 and a field split across two aligned halfwords within the structure. 1143 Or likewise a structure with a known alignment of just a byte 1144 and a field split across two bytes. 1145 Such cases are not supposed to be able to occur. */ 1146 1147 if (MEM_P (op0)) 1148 { 1149 machine_mode mode = GET_MODE (op0); 1150 if (GET_MODE_BITSIZE (mode) == 0 1151 || GET_MODE_BITSIZE (mode) > GET_MODE_BITSIZE (word_mode)) 1152 mode = word_mode; 1153 mode = get_best_mode (bitsize, bitnum, bitregion_start, bitregion_end, 1154 MEM_ALIGN (op0), mode, MEM_VOLATILE_P (op0)); 1155 1156 if (mode == VOIDmode) 1157 { 1158 /* The only way this should occur is if the field spans word 1159 boundaries. */ 1160 store_split_bit_field (op0, bitsize, bitnum, bitregion_start, 1161 bitregion_end, value, reverse); 1162 return; 1163 } 1164 1165 op0 = narrow_bit_field_mem (op0, mode, bitsize, bitnum, &bitnum); 1166 } 1167 1168 store_fixed_bit_field_1 (op0, bitsize, bitnum, value, reverse); 1169 } 1170 1171 /* Helper function for store_fixed_bit_field, stores 1172 the bit field always using the MODE of OP0. */ 1173 1174 static void 1175 store_fixed_bit_field_1 (rtx op0, unsigned HOST_WIDE_INT bitsize, 1176 unsigned HOST_WIDE_INT bitnum, 1177 rtx value, bool reverse) 1178 { 1179 machine_mode mode; 1180 rtx temp; 1181 int all_zero = 0; 1182 int all_one = 0; 1183 1184 mode = GET_MODE (op0); 1185 gcc_assert (SCALAR_INT_MODE_P (mode)); 1186 1187 /* Note that bitsize + bitnum can be greater than GET_MODE_BITSIZE (mode) 1188 for invalid input, such as f5 from gcc.dg/pr48335-2.c. */ 1189 1190 if (reverse ? !BYTES_BIG_ENDIAN : BYTES_BIG_ENDIAN) 1191 /* BITNUM is the distance between our msb 1192 and that of the containing datum. 1193 Convert it to the distance from the lsb. */ 1194 bitnum = GET_MODE_BITSIZE (mode) - bitsize - bitnum; 1195 1196 /* Now BITNUM is always the distance between our lsb 1197 and that of OP0. */ 1198 1199 /* Shift VALUE left by BITNUM bits. If VALUE is not constant, 1200 we must first convert its mode to MODE. */ 1201 1202 if (CONST_INT_P (value)) 1203 { 1204 unsigned HOST_WIDE_INT v = UINTVAL (value); 1205 1206 if (bitsize < HOST_BITS_PER_WIDE_INT) 1207 v &= (HOST_WIDE_INT_1U << bitsize) - 1; 1208 1209 if (v == 0) 1210 all_zero = 1; 1211 else if ((bitsize < HOST_BITS_PER_WIDE_INT 1212 && v == (HOST_WIDE_INT_1U << bitsize) - 1) 1213 || (bitsize == HOST_BITS_PER_WIDE_INT 1214 && v == HOST_WIDE_INT_M1U)) 1215 all_one = 1; 1216 1217 value = lshift_value (mode, v, bitnum); 1218 } 1219 else 1220 { 1221 int must_and = (GET_MODE_BITSIZE (GET_MODE (value)) != bitsize 1222 && bitnum + bitsize != GET_MODE_BITSIZE (mode)); 1223 1224 if (GET_MODE (value) != mode) 1225 value = convert_to_mode (mode, value, 1); 1226 1227 if (must_and) 1228 value = expand_binop (mode, and_optab, value, 1229 mask_rtx (mode, 0, bitsize, 0), 1230 NULL_RTX, 1, OPTAB_LIB_WIDEN); 1231 if (bitnum > 0) 1232 value = expand_shift (LSHIFT_EXPR, mode, value, 1233 bitnum, NULL_RTX, 1); 1234 } 1235 1236 if (reverse) 1237 value = flip_storage_order (mode, value); 1238 1239 /* Now clear the chosen bits in OP0, 1240 except that if VALUE is -1 we need not bother. */ 1241 /* We keep the intermediates in registers to allow CSE to combine 1242 consecutive bitfield assignments. */ 1243 1244 temp = force_reg (mode, op0); 1245 1246 if (! all_one) 1247 { 1248 rtx mask = mask_rtx (mode, bitnum, bitsize, 1); 1249 if (reverse) 1250 mask = flip_storage_order (mode, mask); 1251 temp = expand_binop (mode, and_optab, temp, mask, 1252 NULL_RTX, 1, OPTAB_LIB_WIDEN); 1253 temp = force_reg (mode, temp); 1254 } 1255 1256 /* Now logical-or VALUE into OP0, unless it is zero. */ 1257 1258 if (! all_zero) 1259 { 1260 temp = expand_binop (mode, ior_optab, temp, value, 1261 NULL_RTX, 1, OPTAB_LIB_WIDEN); 1262 temp = force_reg (mode, temp); 1263 } 1264 1265 if (op0 != temp) 1266 { 1267 op0 = copy_rtx (op0); 1268 emit_move_insn (op0, temp); 1269 } 1270 } 1271 1272 /* Store a bit field that is split across multiple accessible memory objects. 1273 1274 OP0 is the REG, SUBREG or MEM rtx for the first of the objects. 1275 BITSIZE is the field width; BITPOS the position of its first bit 1276 (within the word). 1277 VALUE is the value to store. 1278 1279 If REVERSE is true, the store is to be done in reverse order. 1280 1281 This does not yet handle fields wider than BITS_PER_WORD. */ 1282 1283 static void 1284 store_split_bit_field (rtx op0, unsigned HOST_WIDE_INT bitsize, 1285 unsigned HOST_WIDE_INT bitpos, 1286 unsigned HOST_WIDE_INT bitregion_start, 1287 unsigned HOST_WIDE_INT bitregion_end, 1288 rtx value, bool reverse) 1289 { 1290 unsigned int unit, total_bits, bitsdone = 0; 1291 1292 /* Make sure UNIT isn't larger than BITS_PER_WORD, we can only handle that 1293 much at a time. */ 1294 if (REG_P (op0) || GET_CODE (op0) == SUBREG) 1295 unit = BITS_PER_WORD; 1296 else 1297 unit = MIN (MEM_ALIGN (op0), BITS_PER_WORD); 1298 1299 /* If OP0 is a memory with a mode, then UNIT must not be larger than 1300 OP0's mode as well. Otherwise, store_fixed_bit_field will call us 1301 again, and we will mutually recurse forever. */ 1302 if (MEM_P (op0) && GET_MODE_BITSIZE (GET_MODE (op0)) > 0) 1303 unit = MIN (unit, GET_MODE_BITSIZE (GET_MODE (op0))); 1304 1305 /* If VALUE is a constant other than a CONST_INT, get it into a register in 1306 WORD_MODE. If we can do this using gen_lowpart_common, do so. Note 1307 that VALUE might be a floating-point constant. */ 1308 if (CONSTANT_P (value) && !CONST_INT_P (value)) 1309 { 1310 rtx word = gen_lowpart_common (word_mode, value); 1311 1312 if (word && (value != word)) 1313 value = word; 1314 else 1315 value = gen_lowpart_common (word_mode, 1316 force_reg (GET_MODE (value) != VOIDmode 1317 ? GET_MODE (value) 1318 : word_mode, value)); 1319 } 1320 1321 total_bits = GET_MODE_BITSIZE (GET_MODE (value)); 1322 1323 while (bitsdone < bitsize) 1324 { 1325 unsigned HOST_WIDE_INT thissize; 1326 unsigned HOST_WIDE_INT thispos; 1327 unsigned HOST_WIDE_INT offset; 1328 rtx part, word; 1329 1330 offset = (bitpos + bitsdone) / unit; 1331 thispos = (bitpos + bitsdone) % unit; 1332 1333 /* When region of bytes we can touch is restricted, decrease 1334 UNIT close to the end of the region as needed. If op0 is a REG 1335 or SUBREG of REG, don't do this, as there can't be data races 1336 on a register and we can expand shorter code in some cases. */ 1337 if (bitregion_end 1338 && unit > BITS_PER_UNIT 1339 && bitpos + bitsdone - thispos + unit > bitregion_end + 1 1340 && !REG_P (op0) 1341 && (GET_CODE (op0) != SUBREG || !REG_P (SUBREG_REG (op0)))) 1342 { 1343 unit = unit / 2; 1344 continue; 1345 } 1346 1347 /* THISSIZE must not overrun a word boundary. Otherwise, 1348 store_fixed_bit_field will call us again, and we will mutually 1349 recurse forever. */ 1350 thissize = MIN (bitsize - bitsdone, BITS_PER_WORD); 1351 thissize = MIN (thissize, unit - thispos); 1352 1353 if (reverse ? !BYTES_BIG_ENDIAN : BYTES_BIG_ENDIAN) 1354 { 1355 /* Fetch successively less significant portions. */ 1356 if (CONST_INT_P (value)) 1357 part = GEN_INT (((unsigned HOST_WIDE_INT) (INTVAL (value)) 1358 >> (bitsize - bitsdone - thissize)) 1359 & ((HOST_WIDE_INT_1 << thissize) - 1)); 1360 /* Likewise, but the source is little-endian. */ 1361 else if (reverse) 1362 part = extract_fixed_bit_field (word_mode, value, thissize, 1363 bitsize - bitsdone - thissize, 1364 NULL_RTX, 1, false); 1365 else 1366 { 1367 int total_bits = GET_MODE_BITSIZE (GET_MODE (value)); 1368 /* The args are chosen so that the last part includes the 1369 lsb. Give extract_bit_field the value it needs (with 1370 endianness compensation) to fetch the piece we want. */ 1371 part = extract_fixed_bit_field (word_mode, value, thissize, 1372 total_bits - bitsize + bitsdone, 1373 NULL_RTX, 1, false); 1374 } 1375 } 1376 else 1377 { 1378 /* Fetch successively more significant portions. */ 1379 if (CONST_INT_P (value)) 1380 part = GEN_INT (((unsigned HOST_WIDE_INT) (INTVAL (value)) 1381 >> bitsdone) 1382 & ((HOST_WIDE_INT_1 << thissize) - 1)); 1383 /* Likewise, but the source is big-endian. */ 1384 else if (reverse) 1385 part = extract_fixed_bit_field (word_mode, value, thissize, 1386 total_bits - bitsdone - thissize, 1387 NULL_RTX, 1, false); 1388 else 1389 part = extract_fixed_bit_field (word_mode, value, thissize, 1390 bitsdone, NULL_RTX, 1, false); 1391 } 1392 1393 /* If OP0 is a register, then handle OFFSET here. */ 1394 if (SUBREG_P (op0) || REG_P (op0)) 1395 { 1396 machine_mode op0_mode = GET_MODE (op0); 1397 if (op0_mode != BLKmode && GET_MODE_SIZE (op0_mode) < UNITS_PER_WORD) 1398 word = offset ? const0_rtx : op0; 1399 else 1400 word = operand_subword_force (op0, offset * unit / BITS_PER_WORD, 1401 GET_MODE (op0)); 1402 offset &= BITS_PER_WORD / unit - 1; 1403 } 1404 else 1405 word = op0; 1406 1407 /* OFFSET is in UNITs, and UNIT is in bits. If WORD is const0_rtx, 1408 it is just an out-of-bounds access. Ignore it. */ 1409 if (word != const0_rtx) 1410 store_fixed_bit_field (word, thissize, offset * unit + thispos, 1411 bitregion_start, bitregion_end, part, 1412 reverse); 1413 bitsdone += thissize; 1414 } 1415 } 1416 1417 /* A subroutine of extract_bit_field_1 that converts return value X 1418 to either MODE or TMODE. MODE, TMODE and UNSIGNEDP are arguments 1419 to extract_bit_field. */ 1420 1421 static rtx 1422 convert_extracted_bit_field (rtx x, machine_mode mode, 1423 machine_mode tmode, bool unsignedp) 1424 { 1425 if (GET_MODE (x) == tmode || GET_MODE (x) == mode) 1426 return x; 1427 1428 /* If the x mode is not a scalar integral, first convert to the 1429 integer mode of that size and then access it as a floating-point 1430 value via a SUBREG. */ 1431 if (!SCALAR_INT_MODE_P (tmode)) 1432 { 1433 machine_mode smode; 1434 1435 smode = mode_for_size (GET_MODE_BITSIZE (tmode), MODE_INT, 0); 1436 x = convert_to_mode (smode, x, unsignedp); 1437 x = force_reg (smode, x); 1438 return gen_lowpart (tmode, x); 1439 } 1440 1441 return convert_to_mode (tmode, x, unsignedp); 1442 } 1443 1444 /* Try to use an ext(z)v pattern to extract a field from OP0. 1445 Return the extracted value on success, otherwise return null. 1446 EXT_MODE is the mode of the extraction and the other arguments 1447 are as for extract_bit_field. */ 1448 1449 static rtx 1450 extract_bit_field_using_extv (const extraction_insn *extv, rtx op0, 1451 unsigned HOST_WIDE_INT bitsize, 1452 unsigned HOST_WIDE_INT bitnum, 1453 int unsignedp, rtx target, 1454 machine_mode mode, machine_mode tmode) 1455 { 1456 struct expand_operand ops[4]; 1457 rtx spec_target = target; 1458 rtx spec_target_subreg = 0; 1459 machine_mode ext_mode = extv->field_mode; 1460 unsigned unit = GET_MODE_BITSIZE (ext_mode); 1461 1462 if (bitsize == 0 || unit < bitsize) 1463 return NULL_RTX; 1464 1465 if (MEM_P (op0)) 1466 /* Get a reference to the first byte of the field. */ 1467 op0 = narrow_bit_field_mem (op0, extv->struct_mode, bitsize, bitnum, 1468 &bitnum); 1469 else 1470 { 1471 /* Convert from counting within OP0 to counting in EXT_MODE. */ 1472 if (BYTES_BIG_ENDIAN) 1473 bitnum += unit - GET_MODE_BITSIZE (GET_MODE (op0)); 1474 1475 /* If op0 is a register, we need it in EXT_MODE to make it 1476 acceptable to the format of ext(z)v. */ 1477 if (GET_CODE (op0) == SUBREG && GET_MODE (op0) != ext_mode) 1478 return NULL_RTX; 1479 if (REG_P (op0) && GET_MODE (op0) != ext_mode) 1480 op0 = gen_lowpart_SUBREG (ext_mode, op0); 1481 } 1482 1483 /* If BITS_BIG_ENDIAN is zero on a BYTES_BIG_ENDIAN machine, we count 1484 "backwards" from the size of the unit we are extracting from. 1485 Otherwise, we count bits from the most significant on a 1486 BYTES/BITS_BIG_ENDIAN machine. */ 1487 1488 if (BITS_BIG_ENDIAN != BYTES_BIG_ENDIAN) 1489 bitnum = unit - bitsize - bitnum; 1490 1491 if (target == 0) 1492 target = spec_target = gen_reg_rtx (tmode); 1493 1494 if (GET_MODE (target) != ext_mode) 1495 { 1496 /* Don't use LHS paradoxical subreg if explicit truncation is needed 1497 between the mode of the extraction (word_mode) and the target 1498 mode. Instead, create a temporary and use convert_move to set 1499 the target. */ 1500 if (REG_P (target) 1501 && TRULY_NOOP_TRUNCATION_MODES_P (GET_MODE (target), ext_mode)) 1502 { 1503 target = gen_lowpart (ext_mode, target); 1504 if (GET_MODE_PRECISION (ext_mode) 1505 > GET_MODE_PRECISION (GET_MODE (spec_target))) 1506 spec_target_subreg = target; 1507 } 1508 else 1509 target = gen_reg_rtx (ext_mode); 1510 } 1511 1512 create_output_operand (&ops[0], target, ext_mode); 1513 create_fixed_operand (&ops[1], op0); 1514 create_integer_operand (&ops[2], bitsize); 1515 create_integer_operand (&ops[3], bitnum); 1516 if (maybe_expand_insn (extv->icode, 4, ops)) 1517 { 1518 target = ops[0].value; 1519 if (target == spec_target) 1520 return target; 1521 if (target == spec_target_subreg) 1522 return spec_target; 1523 return convert_extracted_bit_field (target, mode, tmode, unsignedp); 1524 } 1525 return NULL_RTX; 1526 } 1527 1528 /* A subroutine of extract_bit_field, with the same arguments. 1529 If FALLBACK_P is true, fall back to extract_fixed_bit_field 1530 if we can find no other means of implementing the operation. 1531 if FALLBACK_P is false, return NULL instead. */ 1532 1533 static rtx 1534 extract_bit_field_1 (rtx str_rtx, unsigned HOST_WIDE_INT bitsize, 1535 unsigned HOST_WIDE_INT bitnum, int unsignedp, rtx target, 1536 machine_mode mode, machine_mode tmode, 1537 bool reverse, bool fallback_p) 1538 { 1539 rtx op0 = str_rtx; 1540 machine_mode int_mode; 1541 machine_mode mode1; 1542 1543 if (tmode == VOIDmode) 1544 tmode = mode; 1545 1546 while (GET_CODE (op0) == SUBREG) 1547 { 1548 bitnum += SUBREG_BYTE (op0) * BITS_PER_UNIT; 1549 op0 = SUBREG_REG (op0); 1550 } 1551 1552 /* If we have an out-of-bounds access to a register, just return an 1553 uninitialized register of the required mode. This can occur if the 1554 source code contains an out-of-bounds access to a small array. */ 1555 if (REG_P (op0) && bitnum >= GET_MODE_BITSIZE (GET_MODE (op0))) 1556 return gen_reg_rtx (tmode); 1557 1558 if (REG_P (op0) 1559 && mode == GET_MODE (op0) 1560 && bitnum == 0 1561 && bitsize == GET_MODE_BITSIZE (GET_MODE (op0))) 1562 { 1563 if (reverse) 1564 op0 = flip_storage_order (mode, op0); 1565 /* We're trying to extract a full register from itself. */ 1566 return op0; 1567 } 1568 1569 /* See if we can get a better vector mode before extracting. */ 1570 if (VECTOR_MODE_P (GET_MODE (op0)) 1571 && !MEM_P (op0) 1572 && GET_MODE_INNER (GET_MODE (op0)) != tmode) 1573 { 1574 machine_mode new_mode; 1575 1576 if (GET_MODE_CLASS (tmode) == MODE_FLOAT) 1577 new_mode = MIN_MODE_VECTOR_FLOAT; 1578 else if (GET_MODE_CLASS (tmode) == MODE_FRACT) 1579 new_mode = MIN_MODE_VECTOR_FRACT; 1580 else if (GET_MODE_CLASS (tmode) == MODE_UFRACT) 1581 new_mode = MIN_MODE_VECTOR_UFRACT; 1582 else if (GET_MODE_CLASS (tmode) == MODE_ACCUM) 1583 new_mode = MIN_MODE_VECTOR_ACCUM; 1584 else if (GET_MODE_CLASS (tmode) == MODE_UACCUM) 1585 new_mode = MIN_MODE_VECTOR_UACCUM; 1586 else 1587 new_mode = MIN_MODE_VECTOR_INT; 1588 1589 for (; new_mode != VOIDmode ; new_mode = GET_MODE_WIDER_MODE (new_mode)) 1590 if (GET_MODE_SIZE (new_mode) == GET_MODE_SIZE (GET_MODE (op0)) 1591 && GET_MODE_UNIT_SIZE (new_mode) == GET_MODE_SIZE (tmode) 1592 && targetm.vector_mode_supported_p (new_mode)) 1593 break; 1594 if (new_mode != VOIDmode) 1595 op0 = gen_lowpart (new_mode, op0); 1596 } 1597 1598 /* Use vec_extract patterns for extracting parts of vectors whenever 1599 available. */ 1600 if (VECTOR_MODE_P (GET_MODE (op0)) 1601 && !MEM_P (op0) 1602 && optab_handler (vec_extract_optab, GET_MODE (op0)) != CODE_FOR_nothing 1603 && ((bitnum + bitsize - 1) / GET_MODE_UNIT_BITSIZE (GET_MODE (op0)) 1604 == bitnum / GET_MODE_UNIT_BITSIZE (GET_MODE (op0)))) 1605 { 1606 struct expand_operand ops[3]; 1607 machine_mode outermode = GET_MODE (op0); 1608 machine_mode innermode = GET_MODE_INNER (outermode); 1609 enum insn_code icode = optab_handler (vec_extract_optab, outermode); 1610 unsigned HOST_WIDE_INT pos = bitnum / GET_MODE_BITSIZE (innermode); 1611 1612 create_output_operand (&ops[0], target, innermode); 1613 create_input_operand (&ops[1], op0, outermode); 1614 create_integer_operand (&ops[2], pos); 1615 if (maybe_expand_insn (icode, 3, ops)) 1616 { 1617 target = ops[0].value; 1618 if (GET_MODE (target) != mode) 1619 return gen_lowpart (tmode, target); 1620 return target; 1621 } 1622 } 1623 1624 /* Make sure we are playing with integral modes. Pun with subregs 1625 if we aren't. */ 1626 { 1627 machine_mode imode = int_mode_for_mode (GET_MODE (op0)); 1628 if (imode != GET_MODE (op0)) 1629 { 1630 if (MEM_P (op0)) 1631 op0 = adjust_bitfield_address_size (op0, imode, 0, MEM_SIZE (op0)); 1632 else if (imode != BLKmode) 1633 { 1634 op0 = gen_lowpart (imode, op0); 1635 1636 /* If we got a SUBREG, force it into a register since we 1637 aren't going to be able to do another SUBREG on it. */ 1638 if (GET_CODE (op0) == SUBREG) 1639 op0 = force_reg (imode, op0); 1640 } 1641 else 1642 { 1643 HOST_WIDE_INT size = GET_MODE_SIZE (GET_MODE (op0)); 1644 rtx mem = assign_stack_temp (GET_MODE (op0), size); 1645 emit_move_insn (mem, op0); 1646 op0 = adjust_bitfield_address_size (mem, BLKmode, 0, size); 1647 } 1648 } 1649 } 1650 1651 /* ??? We currently assume TARGET is at least as big as BITSIZE. 1652 If that's wrong, the solution is to test for it and set TARGET to 0 1653 if needed. */ 1654 1655 /* Get the mode of the field to use for atomic access or subreg 1656 conversion. */ 1657 mode1 = mode; 1658 if (SCALAR_INT_MODE_P (tmode)) 1659 { 1660 machine_mode try_mode = mode_for_size (bitsize, 1661 GET_MODE_CLASS (tmode), 0); 1662 if (try_mode != BLKmode) 1663 mode1 = try_mode; 1664 } 1665 gcc_assert (mode1 != BLKmode); 1666 1667 /* Extraction of a full MODE1 value can be done with a subreg as long 1668 as the least significant bit of the value is the least significant 1669 bit of either OP0 or a word of OP0. */ 1670 if (!MEM_P (op0) 1671 && !reverse 1672 && lowpart_bit_field_p (bitnum, bitsize, GET_MODE (op0)) 1673 && bitsize == GET_MODE_BITSIZE (mode1) 1674 && TRULY_NOOP_TRUNCATION_MODES_P (mode1, GET_MODE (op0))) 1675 { 1676 rtx sub = simplify_gen_subreg (mode1, op0, GET_MODE (op0), 1677 bitnum / BITS_PER_UNIT); 1678 if (sub) 1679 return convert_extracted_bit_field (sub, mode, tmode, unsignedp); 1680 } 1681 1682 /* Extraction of a full MODE1 value can be done with a load as long as 1683 the field is on a byte boundary and is sufficiently aligned. */ 1684 if (simple_mem_bitfield_p (op0, bitsize, bitnum, mode1)) 1685 { 1686 op0 = adjust_bitfield_address (op0, mode1, bitnum / BITS_PER_UNIT); 1687 if (reverse) 1688 op0 = flip_storage_order (mode1, op0); 1689 return convert_extracted_bit_field (op0, mode, tmode, unsignedp); 1690 } 1691 1692 /* Handle fields bigger than a word. */ 1693 1694 if (bitsize > BITS_PER_WORD) 1695 { 1696 /* Here we transfer the words of the field 1697 in the order least significant first. 1698 This is because the most significant word is the one which may 1699 be less than full. */ 1700 1701 const bool backwards = WORDS_BIG_ENDIAN; 1702 unsigned int nwords = (bitsize + (BITS_PER_WORD - 1)) / BITS_PER_WORD; 1703 unsigned int i; 1704 rtx_insn *last; 1705 1706 if (target == 0 || !REG_P (target) || !valid_multiword_target_p (target)) 1707 target = gen_reg_rtx (mode); 1708 1709 /* In case we're about to clobber a base register or something 1710 (see gcc.c-torture/execute/20040625-1.c). */ 1711 if (reg_mentioned_p (target, str_rtx)) 1712 target = gen_reg_rtx (mode); 1713 1714 /* Indicate for flow that the entire target reg is being set. */ 1715 emit_clobber (target); 1716 1717 last = get_last_insn (); 1718 for (i = 0; i < nwords; i++) 1719 { 1720 /* If I is 0, use the low-order word in both field and target; 1721 if I is 1, use the next to lowest word; and so on. */ 1722 /* Word number in TARGET to use. */ 1723 unsigned int wordnum 1724 = (backwards 1725 ? GET_MODE_SIZE (GET_MODE (target)) / UNITS_PER_WORD - i - 1 1726 : i); 1727 /* Offset from start of field in OP0. */ 1728 unsigned int bit_offset = (backwards ^ reverse 1729 ? MAX ((int) bitsize - ((int) i + 1) 1730 * BITS_PER_WORD, 1731 0) 1732 : (int) i * BITS_PER_WORD); 1733 rtx target_part = operand_subword (target, wordnum, 1, VOIDmode); 1734 rtx result_part 1735 = extract_bit_field_1 (op0, MIN (BITS_PER_WORD, 1736 bitsize - i * BITS_PER_WORD), 1737 bitnum + bit_offset, 1, target_part, 1738 mode, word_mode, reverse, fallback_p); 1739 1740 gcc_assert (target_part); 1741 if (!result_part) 1742 { 1743 delete_insns_since (last); 1744 return NULL; 1745 } 1746 1747 if (result_part != target_part) 1748 emit_move_insn (target_part, result_part); 1749 } 1750 1751 if (unsignedp) 1752 { 1753 /* Unless we've filled TARGET, the upper regs in a multi-reg value 1754 need to be zero'd out. */ 1755 if (GET_MODE_SIZE (GET_MODE (target)) > nwords * UNITS_PER_WORD) 1756 { 1757 unsigned int i, total_words; 1758 1759 total_words = GET_MODE_SIZE (GET_MODE (target)) / UNITS_PER_WORD; 1760 for (i = nwords; i < total_words; i++) 1761 emit_move_insn 1762 (operand_subword (target, 1763 backwards ? total_words - i - 1 : i, 1764 1, VOIDmode), 1765 const0_rtx); 1766 } 1767 return target; 1768 } 1769 1770 /* Signed bit field: sign-extend with two arithmetic shifts. */ 1771 target = expand_shift (LSHIFT_EXPR, mode, target, 1772 GET_MODE_BITSIZE (mode) - bitsize, NULL_RTX, 0); 1773 return expand_shift (RSHIFT_EXPR, mode, target, 1774 GET_MODE_BITSIZE (mode) - bitsize, NULL_RTX, 0); 1775 } 1776 1777 /* If OP0 is a multi-word register, narrow it to the affected word. 1778 If the region spans two words, defer to extract_split_bit_field. */ 1779 if (!MEM_P (op0) && GET_MODE_SIZE (GET_MODE (op0)) > UNITS_PER_WORD) 1780 { 1781 if (bitnum % BITS_PER_WORD + bitsize > BITS_PER_WORD) 1782 { 1783 if (!fallback_p) 1784 return NULL_RTX; 1785 target = extract_split_bit_field (op0, bitsize, bitnum, unsignedp, 1786 reverse); 1787 return convert_extracted_bit_field (target, mode, tmode, unsignedp); 1788 } 1789 op0 = simplify_gen_subreg (word_mode, op0, GET_MODE (op0), 1790 bitnum / BITS_PER_WORD * UNITS_PER_WORD); 1791 bitnum %= BITS_PER_WORD; 1792 } 1793 1794 /* From here on we know the desired field is smaller than a word. 1795 If OP0 is a register, it too fits within a word. */ 1796 enum extraction_pattern pattern = unsignedp ? EP_extzv : EP_extv; 1797 extraction_insn extv; 1798 if (!MEM_P (op0) 1799 && !reverse 1800 /* ??? We could limit the structure size to the part of OP0 that 1801 contains the field, with appropriate checks for endianness 1802 and TRULY_NOOP_TRUNCATION. */ 1803 && get_best_reg_extraction_insn (&extv, pattern, 1804 GET_MODE_BITSIZE (GET_MODE (op0)), 1805 tmode)) 1806 { 1807 rtx result = extract_bit_field_using_extv (&extv, op0, bitsize, bitnum, 1808 unsignedp, target, mode, 1809 tmode); 1810 if (result) 1811 return result; 1812 } 1813 1814 /* If OP0 is a memory, try copying it to a register and seeing if a 1815 cheap register alternative is available. */ 1816 if (MEM_P (op0) & !reverse) 1817 { 1818 if (get_best_mem_extraction_insn (&extv, pattern, bitsize, bitnum, 1819 tmode)) 1820 { 1821 rtx result = extract_bit_field_using_extv (&extv, op0, bitsize, 1822 bitnum, unsignedp, 1823 target, mode, 1824 tmode); 1825 if (result) 1826 return result; 1827 } 1828 1829 rtx_insn *last = get_last_insn (); 1830 1831 /* Try loading part of OP0 into a register and extracting the 1832 bitfield from that. */ 1833 unsigned HOST_WIDE_INT bitpos; 1834 rtx xop0 = adjust_bit_field_mem_for_reg (pattern, op0, bitsize, bitnum, 1835 0, 0, tmode, &bitpos); 1836 if (xop0) 1837 { 1838 xop0 = copy_to_reg (xop0); 1839 rtx result = extract_bit_field_1 (xop0, bitsize, bitpos, 1840 unsignedp, target, 1841 mode, tmode, reverse, false); 1842 if (result) 1843 return result; 1844 delete_insns_since (last); 1845 } 1846 } 1847 1848 if (!fallback_p) 1849 return NULL; 1850 1851 /* Find a correspondingly-sized integer field, so we can apply 1852 shifts and masks to it. */ 1853 int_mode = int_mode_for_mode (tmode); 1854 if (int_mode == BLKmode) 1855 int_mode = int_mode_for_mode (mode); 1856 /* Should probably push op0 out to memory and then do a load. */ 1857 gcc_assert (int_mode != BLKmode); 1858 1859 target = extract_fixed_bit_field (int_mode, op0, bitsize, bitnum, target, 1860 unsignedp, reverse); 1861 1862 /* Complex values must be reversed piecewise, so we need to undo the global 1863 reversal, convert to the complex mode and reverse again. */ 1864 if (reverse && COMPLEX_MODE_P (tmode)) 1865 { 1866 target = flip_storage_order (int_mode, target); 1867 target = convert_extracted_bit_field (target, mode, tmode, unsignedp); 1868 target = flip_storage_order (tmode, target); 1869 } 1870 else 1871 target = convert_extracted_bit_field (target, mode, tmode, unsignedp); 1872 1873 return target; 1874 } 1875 1876 /* Generate code to extract a byte-field from STR_RTX 1877 containing BITSIZE bits, starting at BITNUM, 1878 and put it in TARGET if possible (if TARGET is nonzero). 1879 Regardless of TARGET, we return the rtx for where the value is placed. 1880 1881 STR_RTX is the structure containing the byte (a REG or MEM). 1882 UNSIGNEDP is nonzero if this is an unsigned bit field. 1883 MODE is the natural mode of the field value once extracted. 1884 TMODE is the mode the caller would like the value to have; 1885 but the value may be returned with type MODE instead. 1886 1887 If REVERSE is true, the extraction is to be done in reverse order. 1888 1889 If a TARGET is specified and we can store in it at no extra cost, 1890 we do so, and return TARGET. 1891 Otherwise, we return a REG of mode TMODE or MODE, with TMODE preferred 1892 if they are equally easy. */ 1893 1894 rtx 1895 extract_bit_field (rtx str_rtx, unsigned HOST_WIDE_INT bitsize, 1896 unsigned HOST_WIDE_INT bitnum, int unsignedp, rtx target, 1897 machine_mode mode, machine_mode tmode, bool reverse) 1898 { 1899 machine_mode mode1; 1900 1901 /* Handle -fstrict-volatile-bitfields in the cases where it applies. */ 1902 if (GET_MODE_BITSIZE (GET_MODE (str_rtx)) > 0) 1903 mode1 = GET_MODE (str_rtx); 1904 else if (target && GET_MODE_BITSIZE (GET_MODE (target)) > 0) 1905 mode1 = GET_MODE (target); 1906 else 1907 mode1 = tmode; 1908 1909 if (strict_volatile_bitfield_p (str_rtx, bitsize, bitnum, mode1, 0, 0)) 1910 { 1911 /* Extraction of a full MODE1 value can be done with a simple load. 1912 We know here that the field can be accessed with one single 1913 instruction. For targets that support unaligned memory, 1914 an unaligned access may be necessary. */ 1915 if (bitsize == GET_MODE_BITSIZE (mode1)) 1916 { 1917 rtx result = adjust_bitfield_address (str_rtx, mode1, 1918 bitnum / BITS_PER_UNIT); 1919 if (reverse) 1920 result = flip_storage_order (mode1, result); 1921 gcc_assert (bitnum % BITS_PER_UNIT == 0); 1922 return convert_extracted_bit_field (result, mode, tmode, unsignedp); 1923 } 1924 1925 str_rtx = narrow_bit_field_mem (str_rtx, mode1, bitsize, bitnum, 1926 &bitnum); 1927 gcc_assert (bitnum + bitsize <= GET_MODE_BITSIZE (mode1)); 1928 str_rtx = copy_to_reg (str_rtx); 1929 } 1930 1931 return extract_bit_field_1 (str_rtx, bitsize, bitnum, unsignedp, 1932 target, mode, tmode, reverse, true); 1933 } 1934 1935 /* Use shifts and boolean operations to extract a field of BITSIZE bits 1936 from bit BITNUM of OP0. 1937 1938 UNSIGNEDP is nonzero for an unsigned bit field (don't sign-extend value). 1939 If REVERSE is true, the extraction is to be done in reverse order. 1940 1941 If TARGET is nonzero, attempts to store the value there 1942 and return TARGET, but this is not guaranteed. 1943 If TARGET is not used, create a pseudo-reg of mode TMODE for the value. */ 1944 1945 static rtx 1946 extract_fixed_bit_field (machine_mode tmode, rtx op0, 1947 unsigned HOST_WIDE_INT bitsize, 1948 unsigned HOST_WIDE_INT bitnum, rtx target, 1949 int unsignedp, bool reverse) 1950 { 1951 if (MEM_P (op0)) 1952 { 1953 machine_mode mode 1954 = get_best_mode (bitsize, bitnum, 0, 0, MEM_ALIGN (op0), word_mode, 1955 MEM_VOLATILE_P (op0)); 1956 1957 if (mode == VOIDmode) 1958 /* The only way this should occur is if the field spans word 1959 boundaries. */ 1960 return extract_split_bit_field (op0, bitsize, bitnum, unsignedp, 1961 reverse); 1962 1963 op0 = narrow_bit_field_mem (op0, mode, bitsize, bitnum, &bitnum); 1964 } 1965 1966 return extract_fixed_bit_field_1 (tmode, op0, bitsize, bitnum, 1967 target, unsignedp, reverse); 1968 } 1969 1970 /* Helper function for extract_fixed_bit_field, extracts 1971 the bit field always using the MODE of OP0. */ 1972 1973 static rtx 1974 extract_fixed_bit_field_1 (machine_mode tmode, rtx op0, 1975 unsigned HOST_WIDE_INT bitsize, 1976 unsigned HOST_WIDE_INT bitnum, rtx target, 1977 int unsignedp, bool reverse) 1978 { 1979 machine_mode mode = GET_MODE (op0); 1980 gcc_assert (SCALAR_INT_MODE_P (mode)); 1981 1982 /* Note that bitsize + bitnum can be greater than GET_MODE_BITSIZE (mode) 1983 for invalid input, such as extract equivalent of f5 from 1984 gcc.dg/pr48335-2.c. */ 1985 1986 if (reverse ? !BYTES_BIG_ENDIAN : BYTES_BIG_ENDIAN) 1987 /* BITNUM is the distance between our msb and that of OP0. 1988 Convert it to the distance from the lsb. */ 1989 bitnum = GET_MODE_BITSIZE (mode) - bitsize - bitnum; 1990 1991 /* Now BITNUM is always the distance between the field's lsb and that of OP0. 1992 We have reduced the big-endian case to the little-endian case. */ 1993 if (reverse) 1994 op0 = flip_storage_order (mode, op0); 1995 1996 if (unsignedp) 1997 { 1998 if (bitnum) 1999 { 2000 /* If the field does not already start at the lsb, 2001 shift it so it does. */ 2002 /* Maybe propagate the target for the shift. */ 2003 rtx subtarget = (target != 0 && REG_P (target) ? target : 0); 2004 if (tmode != mode) 2005 subtarget = 0; 2006 op0 = expand_shift (RSHIFT_EXPR, mode, op0, bitnum, subtarget, 1); 2007 } 2008 /* Convert the value to the desired mode. */ 2009 if (mode != tmode) 2010 op0 = convert_to_mode (tmode, op0, 1); 2011 2012 /* Unless the msb of the field used to be the msb when we shifted, 2013 mask out the upper bits. */ 2014 2015 if (GET_MODE_BITSIZE (mode) != bitnum + bitsize) 2016 return expand_binop (GET_MODE (op0), and_optab, op0, 2017 mask_rtx (GET_MODE (op0), 0, bitsize, 0), 2018 target, 1, OPTAB_LIB_WIDEN); 2019 return op0; 2020 } 2021 2022 /* To extract a signed bit-field, first shift its msb to the msb of the word, 2023 then arithmetic-shift its lsb to the lsb of the word. */ 2024 op0 = force_reg (mode, op0); 2025 2026 /* Find the narrowest integer mode that contains the field. */ 2027 2028 for (mode = GET_CLASS_NARROWEST_MODE (MODE_INT); mode != VOIDmode; 2029 mode = GET_MODE_WIDER_MODE (mode)) 2030 if (GET_MODE_BITSIZE (mode) >= bitsize + bitnum) 2031 { 2032 op0 = convert_to_mode (mode, op0, 0); 2033 break; 2034 } 2035 2036 if (mode != tmode) 2037 target = 0; 2038 2039 if (GET_MODE_BITSIZE (mode) != (bitsize + bitnum)) 2040 { 2041 int amount = GET_MODE_BITSIZE (mode) - (bitsize + bitnum); 2042 /* Maybe propagate the target for the shift. */ 2043 rtx subtarget = (target != 0 && REG_P (target) ? target : 0); 2044 op0 = expand_shift (LSHIFT_EXPR, mode, op0, amount, subtarget, 1); 2045 } 2046 2047 return expand_shift (RSHIFT_EXPR, mode, op0, 2048 GET_MODE_BITSIZE (mode) - bitsize, target, 0); 2049 } 2050 2051 /* Return a constant integer (CONST_INT or CONST_DOUBLE) rtx with the value 2052 VALUE << BITPOS. */ 2053 2054 static rtx 2055 lshift_value (machine_mode mode, unsigned HOST_WIDE_INT value, 2056 int bitpos) 2057 { 2058 return immed_wide_int_const (wi::lshift (value, bitpos), mode); 2059 } 2060 2061 /* Extract a bit field that is split across two words 2062 and return an RTX for the result. 2063 2064 OP0 is the REG, SUBREG or MEM rtx for the first of the two words. 2065 BITSIZE is the field width; BITPOS, position of its first bit, in the word. 2066 UNSIGNEDP is 1 if should zero-extend the contents; else sign-extend. 2067 2068 If REVERSE is true, the extraction is to be done in reverse order. */ 2069 2070 static rtx 2071 extract_split_bit_field (rtx op0, unsigned HOST_WIDE_INT bitsize, 2072 unsigned HOST_WIDE_INT bitpos, int unsignedp, 2073 bool reverse) 2074 { 2075 unsigned int unit; 2076 unsigned int bitsdone = 0; 2077 rtx result = NULL_RTX; 2078 int first = 1; 2079 2080 /* Make sure UNIT isn't larger than BITS_PER_WORD, we can only handle that 2081 much at a time. */ 2082 if (REG_P (op0) || GET_CODE (op0) == SUBREG) 2083 unit = BITS_PER_WORD; 2084 else 2085 unit = MIN (MEM_ALIGN (op0), BITS_PER_WORD); 2086 2087 while (bitsdone < bitsize) 2088 { 2089 unsigned HOST_WIDE_INT thissize; 2090 rtx part, word; 2091 unsigned HOST_WIDE_INT thispos; 2092 unsigned HOST_WIDE_INT offset; 2093 2094 offset = (bitpos + bitsdone) / unit; 2095 thispos = (bitpos + bitsdone) % unit; 2096 2097 /* THISSIZE must not overrun a word boundary. Otherwise, 2098 extract_fixed_bit_field will call us again, and we will mutually 2099 recurse forever. */ 2100 thissize = MIN (bitsize - bitsdone, BITS_PER_WORD); 2101 thissize = MIN (thissize, unit - thispos); 2102 2103 /* If OP0 is a register, then handle OFFSET here. */ 2104 if (SUBREG_P (op0) || REG_P (op0)) 2105 { 2106 word = operand_subword_force (op0, offset, GET_MODE (op0)); 2107 offset = 0; 2108 } 2109 else 2110 word = op0; 2111 2112 /* Extract the parts in bit-counting order, 2113 whose meaning is determined by BYTES_PER_UNIT. 2114 OFFSET is in UNITs, and UNIT is in bits. */ 2115 part = extract_fixed_bit_field (word_mode, word, thissize, 2116 offset * unit + thispos, 0, 1, reverse); 2117 bitsdone += thissize; 2118 2119 /* Shift this part into place for the result. */ 2120 if (reverse ? !BYTES_BIG_ENDIAN : BYTES_BIG_ENDIAN) 2121 { 2122 if (bitsize != bitsdone) 2123 part = expand_shift (LSHIFT_EXPR, word_mode, part, 2124 bitsize - bitsdone, 0, 1); 2125 } 2126 else 2127 { 2128 if (bitsdone != thissize) 2129 part = expand_shift (LSHIFT_EXPR, word_mode, part, 2130 bitsdone - thissize, 0, 1); 2131 } 2132 2133 if (first) 2134 result = part; 2135 else 2136 /* Combine the parts with bitwise or. This works 2137 because we extracted each part as an unsigned bit field. */ 2138 result = expand_binop (word_mode, ior_optab, part, result, NULL_RTX, 1, 2139 OPTAB_LIB_WIDEN); 2140 2141 first = 0; 2142 } 2143 2144 /* Unsigned bit field: we are done. */ 2145 if (unsignedp) 2146 return result; 2147 /* Signed bit field: sign-extend with two arithmetic shifts. */ 2148 result = expand_shift (LSHIFT_EXPR, word_mode, result, 2149 BITS_PER_WORD - bitsize, NULL_RTX, 0); 2150 return expand_shift (RSHIFT_EXPR, word_mode, result, 2151 BITS_PER_WORD - bitsize, NULL_RTX, 0); 2152 } 2153 2154 /* Try to read the low bits of SRC as an rvalue of mode MODE, preserving 2155 the bit pattern. SRC_MODE is the mode of SRC; if this is smaller than 2156 MODE, fill the upper bits with zeros. Fail if the layout of either 2157 mode is unknown (as for CC modes) or if the extraction would involve 2158 unprofitable mode punning. Return the value on success, otherwise 2159 return null. 2160 2161 This is different from gen_lowpart* in these respects: 2162 2163 - the returned value must always be considered an rvalue 2164 2165 - when MODE is wider than SRC_MODE, the extraction involves 2166 a zero extension 2167 2168 - when MODE is smaller than SRC_MODE, the extraction involves 2169 a truncation (and is thus subject to TRULY_NOOP_TRUNCATION). 2170 2171 In other words, this routine performs a computation, whereas the 2172 gen_lowpart* routines are conceptually lvalue or rvalue subreg 2173 operations. */ 2174 2175 rtx 2176 extract_low_bits (machine_mode mode, machine_mode src_mode, rtx src) 2177 { 2178 machine_mode int_mode, src_int_mode; 2179 2180 if (mode == src_mode) 2181 return src; 2182 2183 if (CONSTANT_P (src)) 2184 { 2185 /* simplify_gen_subreg can't be used here, as if simplify_subreg 2186 fails, it will happily create (subreg (symbol_ref)) or similar 2187 invalid SUBREGs. */ 2188 unsigned int byte = subreg_lowpart_offset (mode, src_mode); 2189 rtx ret = simplify_subreg (mode, src, src_mode, byte); 2190 if (ret) 2191 return ret; 2192 2193 if (GET_MODE (src) == VOIDmode 2194 || !validate_subreg (mode, src_mode, src, byte)) 2195 return NULL_RTX; 2196 2197 src = force_reg (GET_MODE (src), src); 2198 return gen_rtx_SUBREG (mode, src, byte); 2199 } 2200 2201 if (GET_MODE_CLASS (mode) == MODE_CC || GET_MODE_CLASS (src_mode) == MODE_CC) 2202 return NULL_RTX; 2203 2204 if (GET_MODE_BITSIZE (mode) == GET_MODE_BITSIZE (src_mode) 2205 && MODES_TIEABLE_P (mode, src_mode)) 2206 { 2207 rtx x = gen_lowpart_common (mode, src); 2208 if (x) 2209 return x; 2210 } 2211 2212 src_int_mode = int_mode_for_mode (src_mode); 2213 int_mode = int_mode_for_mode (mode); 2214 if (src_int_mode == BLKmode || int_mode == BLKmode) 2215 return NULL_RTX; 2216 2217 if (!MODES_TIEABLE_P (src_int_mode, src_mode)) 2218 return NULL_RTX; 2219 if (!MODES_TIEABLE_P (int_mode, mode)) 2220 return NULL_RTX; 2221 2222 src = gen_lowpart (src_int_mode, src); 2223 src = convert_modes (int_mode, src_int_mode, src, true); 2224 src = gen_lowpart (mode, src); 2225 return src; 2226 } 2227 2228 /* Add INC into TARGET. */ 2229 2230 void 2231 expand_inc (rtx target, rtx inc) 2232 { 2233 rtx value = expand_binop (GET_MODE (target), add_optab, 2234 target, inc, 2235 target, 0, OPTAB_LIB_WIDEN); 2236 if (value != target) 2237 emit_move_insn (target, value); 2238 } 2239 2240 /* Subtract DEC from TARGET. */ 2241 2242 void 2243 expand_dec (rtx target, rtx dec) 2244 { 2245 rtx value = expand_binop (GET_MODE (target), sub_optab, 2246 target, dec, 2247 target, 0, OPTAB_LIB_WIDEN); 2248 if (value != target) 2249 emit_move_insn (target, value); 2250 } 2251 2252 /* Output a shift instruction for expression code CODE, 2253 with SHIFTED being the rtx for the value to shift, 2254 and AMOUNT the rtx for the amount to shift by. 2255 Store the result in the rtx TARGET, if that is convenient. 2256 If UNSIGNEDP is nonzero, do a logical shift; otherwise, arithmetic. 2257 Return the rtx for where the value is. 2258 If that cannot be done, abort the compilation unless MAY_FAIL is true, 2259 in which case 0 is returned. */ 2260 2261 static rtx 2262 expand_shift_1 (enum tree_code code, machine_mode mode, rtx shifted, 2263 rtx amount, rtx target, int unsignedp, bool may_fail = false) 2264 { 2265 rtx op1, temp = 0; 2266 int left = (code == LSHIFT_EXPR || code == LROTATE_EXPR); 2267 int rotate = (code == LROTATE_EXPR || code == RROTATE_EXPR); 2268 optab lshift_optab = ashl_optab; 2269 optab rshift_arith_optab = ashr_optab; 2270 optab rshift_uns_optab = lshr_optab; 2271 optab lrotate_optab = rotl_optab; 2272 optab rrotate_optab = rotr_optab; 2273 machine_mode op1_mode; 2274 machine_mode scalar_mode = mode; 2275 int attempt; 2276 bool speed = optimize_insn_for_speed_p (); 2277 2278 if (VECTOR_MODE_P (mode)) 2279 scalar_mode = GET_MODE_INNER (mode); 2280 op1 = amount; 2281 op1_mode = GET_MODE (op1); 2282 2283 /* Determine whether the shift/rotate amount is a vector, or scalar. If the 2284 shift amount is a vector, use the vector/vector shift patterns. */ 2285 if (VECTOR_MODE_P (mode) && VECTOR_MODE_P (op1_mode)) 2286 { 2287 lshift_optab = vashl_optab; 2288 rshift_arith_optab = vashr_optab; 2289 rshift_uns_optab = vlshr_optab; 2290 lrotate_optab = vrotl_optab; 2291 rrotate_optab = vrotr_optab; 2292 } 2293 2294 /* Previously detected shift-counts computed by NEGATE_EXPR 2295 and shifted in the other direction; but that does not work 2296 on all machines. */ 2297 2298 if (SHIFT_COUNT_TRUNCATED) 2299 { 2300 if (CONST_INT_P (op1) 2301 && ((unsigned HOST_WIDE_INT) INTVAL (op1) >= 2302 (unsigned HOST_WIDE_INT) GET_MODE_BITSIZE (scalar_mode))) 2303 op1 = GEN_INT ((unsigned HOST_WIDE_INT) INTVAL (op1) 2304 % GET_MODE_BITSIZE (scalar_mode)); 2305 else if (GET_CODE (op1) == SUBREG 2306 && subreg_lowpart_p (op1) 2307 && SCALAR_INT_MODE_P (GET_MODE (SUBREG_REG (op1))) 2308 && SCALAR_INT_MODE_P (GET_MODE (op1))) 2309 op1 = SUBREG_REG (op1); 2310 } 2311 2312 /* Canonicalize rotates by constant amount. If op1 is bitsize / 2, 2313 prefer left rotation, if op1 is from bitsize / 2 + 1 to 2314 bitsize - 1, use other direction of rotate with 1 .. bitsize / 2 - 1 2315 amount instead. */ 2316 if (rotate 2317 && CONST_INT_P (op1) 2318 && IN_RANGE (INTVAL (op1), GET_MODE_BITSIZE (scalar_mode) / 2 + left, 2319 GET_MODE_BITSIZE (scalar_mode) - 1)) 2320 { 2321 op1 = GEN_INT (GET_MODE_BITSIZE (scalar_mode) - INTVAL (op1)); 2322 left = !left; 2323 code = left ? LROTATE_EXPR : RROTATE_EXPR; 2324 } 2325 2326 /* Rotation of 16bit values by 8 bits is effectively equivalent to a bswaphi. 2327 Note that this is not the case for bigger values. For instance a rotation 2328 of 0x01020304 by 16 bits gives 0x03040102 which is different from 2329 0x04030201 (bswapsi). */ 2330 if (rotate 2331 && CONST_INT_P (op1) 2332 && INTVAL (op1) == BITS_PER_UNIT 2333 && GET_MODE_SIZE (scalar_mode) == 2 2334 && optab_handler (bswap_optab, mode) != CODE_FOR_nothing) 2335 return expand_unop (mode, bswap_optab, shifted, NULL_RTX, unsignedp); 2336 2337 if (op1 == const0_rtx) 2338 return shifted; 2339 2340 /* Check whether its cheaper to implement a left shift by a constant 2341 bit count by a sequence of additions. */ 2342 if (code == LSHIFT_EXPR 2343 && CONST_INT_P (op1) 2344 && INTVAL (op1) > 0 2345 && INTVAL (op1) < GET_MODE_PRECISION (scalar_mode) 2346 && INTVAL (op1) < MAX_BITS_PER_WORD 2347 && (shift_cost (speed, mode, INTVAL (op1)) 2348 > INTVAL (op1) * add_cost (speed, mode)) 2349 && shift_cost (speed, mode, INTVAL (op1)) != MAX_COST) 2350 { 2351 int i; 2352 for (i = 0; i < INTVAL (op1); i++) 2353 { 2354 temp = force_reg (mode, shifted); 2355 shifted = expand_binop (mode, add_optab, temp, temp, NULL_RTX, 2356 unsignedp, OPTAB_LIB_WIDEN); 2357 } 2358 return shifted; 2359 } 2360 2361 for (attempt = 0; temp == 0 && attempt < 3; attempt++) 2362 { 2363 enum optab_methods methods; 2364 2365 if (attempt == 0) 2366 methods = OPTAB_DIRECT; 2367 else if (attempt == 1) 2368 methods = OPTAB_WIDEN; 2369 else 2370 methods = OPTAB_LIB_WIDEN; 2371 2372 if (rotate) 2373 { 2374 /* Widening does not work for rotation. */ 2375 if (methods == OPTAB_WIDEN) 2376 continue; 2377 else if (methods == OPTAB_LIB_WIDEN) 2378 { 2379 /* If we have been unable to open-code this by a rotation, 2380 do it as the IOR of two shifts. I.e., to rotate A 2381 by N bits, compute 2382 (A << N) | ((unsigned) A >> ((-N) & (C - 1))) 2383 where C is the bitsize of A. 2384 2385 It is theoretically possible that the target machine might 2386 not be able to perform either shift and hence we would 2387 be making two libcalls rather than just the one for the 2388 shift (similarly if IOR could not be done). We will allow 2389 this extremely unlikely lossage to avoid complicating the 2390 code below. */ 2391 2392 rtx subtarget = target == shifted ? 0 : target; 2393 rtx new_amount, other_amount; 2394 rtx temp1; 2395 2396 new_amount = op1; 2397 if (op1 == const0_rtx) 2398 return shifted; 2399 else if (CONST_INT_P (op1)) 2400 other_amount = GEN_INT (GET_MODE_BITSIZE (scalar_mode) 2401 - INTVAL (op1)); 2402 else 2403 { 2404 other_amount 2405 = simplify_gen_unary (NEG, GET_MODE (op1), 2406 op1, GET_MODE (op1)); 2407 HOST_WIDE_INT mask = GET_MODE_PRECISION (scalar_mode) - 1; 2408 other_amount 2409 = simplify_gen_binary (AND, GET_MODE (op1), other_amount, 2410 gen_int_mode (mask, GET_MODE (op1))); 2411 } 2412 2413 shifted = force_reg (mode, shifted); 2414 2415 temp = expand_shift_1 (left ? LSHIFT_EXPR : RSHIFT_EXPR, 2416 mode, shifted, new_amount, 0, 1); 2417 temp1 = expand_shift_1 (left ? RSHIFT_EXPR : LSHIFT_EXPR, 2418 mode, shifted, other_amount, 2419 subtarget, 1); 2420 return expand_binop (mode, ior_optab, temp, temp1, target, 2421 unsignedp, methods); 2422 } 2423 2424 temp = expand_binop (mode, 2425 left ? lrotate_optab : rrotate_optab, 2426 shifted, op1, target, unsignedp, methods); 2427 } 2428 else if (unsignedp) 2429 temp = expand_binop (mode, 2430 left ? lshift_optab : rshift_uns_optab, 2431 shifted, op1, target, unsignedp, methods); 2432 2433 /* Do arithmetic shifts. 2434 Also, if we are going to widen the operand, we can just as well 2435 use an arithmetic right-shift instead of a logical one. */ 2436 if (temp == 0 && ! rotate 2437 && (! unsignedp || (! left && methods == OPTAB_WIDEN))) 2438 { 2439 enum optab_methods methods1 = methods; 2440 2441 /* If trying to widen a log shift to an arithmetic shift, 2442 don't accept an arithmetic shift of the same size. */ 2443 if (unsignedp) 2444 methods1 = OPTAB_MUST_WIDEN; 2445 2446 /* Arithmetic shift */ 2447 2448 temp = expand_binop (mode, 2449 left ? lshift_optab : rshift_arith_optab, 2450 shifted, op1, target, unsignedp, methods1); 2451 } 2452 2453 /* We used to try extzv here for logical right shifts, but that was 2454 only useful for one machine, the VAX, and caused poor code 2455 generation there for lshrdi3, so the code was deleted and a 2456 define_expand for lshrsi3 was added to vax.md. */ 2457 } 2458 2459 gcc_assert (temp != NULL_RTX || may_fail); 2460 return temp; 2461 } 2462 2463 /* Output a shift instruction for expression code CODE, 2464 with SHIFTED being the rtx for the value to shift, 2465 and AMOUNT the amount to shift by. 2466 Store the result in the rtx TARGET, if that is convenient. 2467 If UNSIGNEDP is nonzero, do a logical shift; otherwise, arithmetic. 2468 Return the rtx for where the value is. */ 2469 2470 rtx 2471 expand_shift (enum tree_code code, machine_mode mode, rtx shifted, 2472 int amount, rtx target, int unsignedp) 2473 { 2474 return expand_shift_1 (code, mode, 2475 shifted, GEN_INT (amount), target, unsignedp); 2476 } 2477 2478 /* Likewise, but return 0 if that cannot be done. */ 2479 2480 static rtx 2481 maybe_expand_shift (enum tree_code code, machine_mode mode, rtx shifted, 2482 int amount, rtx target, int unsignedp) 2483 { 2484 return expand_shift_1 (code, mode, 2485 shifted, GEN_INT (amount), target, unsignedp, true); 2486 } 2487 2488 /* Output a shift instruction for expression code CODE, 2489 with SHIFTED being the rtx for the value to shift, 2490 and AMOUNT the tree for the amount to shift by. 2491 Store the result in the rtx TARGET, if that is convenient. 2492 If UNSIGNEDP is nonzero, do a logical shift; otherwise, arithmetic. 2493 Return the rtx for where the value is. */ 2494 2495 rtx 2496 expand_variable_shift (enum tree_code code, machine_mode mode, rtx shifted, 2497 tree amount, rtx target, int unsignedp) 2498 { 2499 return expand_shift_1 (code, mode, 2500 shifted, expand_normal (amount), target, unsignedp); 2501 } 2502 2503 2504 static void synth_mult (struct algorithm *, unsigned HOST_WIDE_INT, 2505 const struct mult_cost *, machine_mode mode); 2506 static rtx expand_mult_const (machine_mode, rtx, HOST_WIDE_INT, rtx, 2507 const struct algorithm *, enum mult_variant); 2508 static unsigned HOST_WIDE_INT invert_mod2n (unsigned HOST_WIDE_INT, int); 2509 static rtx extract_high_half (machine_mode, rtx); 2510 static rtx expmed_mult_highpart (machine_mode, rtx, rtx, rtx, int, int); 2511 static rtx expmed_mult_highpart_optab (machine_mode, rtx, rtx, rtx, 2512 int, int); 2513 /* Compute and return the best algorithm for multiplying by T. 2514 The algorithm must cost less than cost_limit 2515 If retval.cost >= COST_LIMIT, no algorithm was found and all 2516 other field of the returned struct are undefined. 2517 MODE is the machine mode of the multiplication. */ 2518 2519 static void 2520 synth_mult (struct algorithm *alg_out, unsigned HOST_WIDE_INT t, 2521 const struct mult_cost *cost_limit, machine_mode mode) 2522 { 2523 int m; 2524 struct algorithm *alg_in, *best_alg; 2525 struct mult_cost best_cost; 2526 struct mult_cost new_limit; 2527 int op_cost, op_latency; 2528 unsigned HOST_WIDE_INT orig_t = t; 2529 unsigned HOST_WIDE_INT q; 2530 int maxm, hash_index; 2531 bool cache_hit = false; 2532 enum alg_code cache_alg = alg_zero; 2533 bool speed = optimize_insn_for_speed_p (); 2534 machine_mode imode; 2535 struct alg_hash_entry *entry_ptr; 2536 2537 /* Indicate that no algorithm is yet found. If no algorithm 2538 is found, this value will be returned and indicate failure. */ 2539 alg_out->cost.cost = cost_limit->cost + 1; 2540 alg_out->cost.latency = cost_limit->latency + 1; 2541 2542 if (cost_limit->cost < 0 2543 || (cost_limit->cost == 0 && cost_limit->latency <= 0)) 2544 return; 2545 2546 /* Be prepared for vector modes. */ 2547 imode = GET_MODE_INNER (mode); 2548 2549 maxm = MIN (BITS_PER_WORD, GET_MODE_BITSIZE (imode)); 2550 2551 /* Restrict the bits of "t" to the multiplication's mode. */ 2552 t &= GET_MODE_MASK (imode); 2553 2554 /* t == 1 can be done in zero cost. */ 2555 if (t == 1) 2556 { 2557 alg_out->ops = 1; 2558 alg_out->cost.cost = 0; 2559 alg_out->cost.latency = 0; 2560 alg_out->op[0] = alg_m; 2561 return; 2562 } 2563 2564 /* t == 0 sometimes has a cost. If it does and it exceeds our limit, 2565 fail now. */ 2566 if (t == 0) 2567 { 2568 if (MULT_COST_LESS (cost_limit, zero_cost (speed))) 2569 return; 2570 else 2571 { 2572 alg_out->ops = 1; 2573 alg_out->cost.cost = zero_cost (speed); 2574 alg_out->cost.latency = zero_cost (speed); 2575 alg_out->op[0] = alg_zero; 2576 return; 2577 } 2578 } 2579 2580 /* We'll be needing a couple extra algorithm structures now. */ 2581 2582 alg_in = XALLOCA (struct algorithm); 2583 best_alg = XALLOCA (struct algorithm); 2584 best_cost = *cost_limit; 2585 2586 /* Compute the hash index. */ 2587 hash_index = (t ^ (unsigned int) mode ^ (speed * 256)) % NUM_ALG_HASH_ENTRIES; 2588 2589 /* See if we already know what to do for T. */ 2590 entry_ptr = alg_hash_entry_ptr (hash_index); 2591 if (entry_ptr->t == t 2592 && entry_ptr->mode == mode 2593 && entry_ptr->speed == speed 2594 && entry_ptr->alg != alg_unknown) 2595 { 2596 cache_alg = entry_ptr->alg; 2597 2598 if (cache_alg == alg_impossible) 2599 { 2600 /* The cache tells us that it's impossible to synthesize 2601 multiplication by T within entry_ptr->cost. */ 2602 if (!CHEAPER_MULT_COST (&entry_ptr->cost, cost_limit)) 2603 /* COST_LIMIT is at least as restrictive as the one 2604 recorded in the hash table, in which case we have no 2605 hope of synthesizing a multiplication. Just 2606 return. */ 2607 return; 2608 2609 /* If we get here, COST_LIMIT is less restrictive than the 2610 one recorded in the hash table, so we may be able to 2611 synthesize a multiplication. Proceed as if we didn't 2612 have the cache entry. */ 2613 } 2614 else 2615 { 2616 if (CHEAPER_MULT_COST (cost_limit, &entry_ptr->cost)) 2617 /* The cached algorithm shows that this multiplication 2618 requires more cost than COST_LIMIT. Just return. This 2619 way, we don't clobber this cache entry with 2620 alg_impossible but retain useful information. */ 2621 return; 2622 2623 cache_hit = true; 2624 2625 switch (cache_alg) 2626 { 2627 case alg_shift: 2628 goto do_alg_shift; 2629 2630 case alg_add_t_m2: 2631 case alg_sub_t_m2: 2632 goto do_alg_addsub_t_m2; 2633 2634 case alg_add_factor: 2635 case alg_sub_factor: 2636 goto do_alg_addsub_factor; 2637 2638 case alg_add_t2_m: 2639 goto do_alg_add_t2_m; 2640 2641 case alg_sub_t2_m: 2642 goto do_alg_sub_t2_m; 2643 2644 default: 2645 gcc_unreachable (); 2646 } 2647 } 2648 } 2649 2650 /* If we have a group of zero bits at the low-order part of T, try 2651 multiplying by the remaining bits and then doing a shift. */ 2652 2653 if ((t & 1) == 0) 2654 { 2655 do_alg_shift: 2656 m = ctz_or_zero (t); /* m = number of low zero bits */ 2657 if (m < maxm) 2658 { 2659 q = t >> m; 2660 /* The function expand_shift will choose between a shift and 2661 a sequence of additions, so the observed cost is given as 2662 MIN (m * add_cost(speed, mode), shift_cost(speed, mode, m)). */ 2663 op_cost = m * add_cost (speed, mode); 2664 if (shift_cost (speed, mode, m) < op_cost) 2665 op_cost = shift_cost (speed, mode, m); 2666 new_limit.cost = best_cost.cost - op_cost; 2667 new_limit.latency = best_cost.latency - op_cost; 2668 synth_mult (alg_in, q, &new_limit, mode); 2669 2670 alg_in->cost.cost += op_cost; 2671 alg_in->cost.latency += op_cost; 2672 if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost)) 2673 { 2674 best_cost = alg_in->cost; 2675 std::swap (alg_in, best_alg); 2676 best_alg->log[best_alg->ops] = m; 2677 best_alg->op[best_alg->ops] = alg_shift; 2678 } 2679 2680 /* See if treating ORIG_T as a signed number yields a better 2681 sequence. Try this sequence only for a negative ORIG_T 2682 as it would be useless for a non-negative ORIG_T. */ 2683 if ((HOST_WIDE_INT) orig_t < 0) 2684 { 2685 /* Shift ORIG_T as follows because a right shift of a 2686 negative-valued signed type is implementation 2687 defined. */ 2688 q = ~(~orig_t >> m); 2689 /* The function expand_shift will choose between a shift 2690 and a sequence of additions, so the observed cost is 2691 given as MIN (m * add_cost(speed, mode), 2692 shift_cost(speed, mode, m)). */ 2693 op_cost = m * add_cost (speed, mode); 2694 if (shift_cost (speed, mode, m) < op_cost) 2695 op_cost = shift_cost (speed, mode, m); 2696 new_limit.cost = best_cost.cost - op_cost; 2697 new_limit.latency = best_cost.latency - op_cost; 2698 synth_mult (alg_in, q, &new_limit, mode); 2699 2700 alg_in->cost.cost += op_cost; 2701 alg_in->cost.latency += op_cost; 2702 if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost)) 2703 { 2704 best_cost = alg_in->cost; 2705 std::swap (alg_in, best_alg); 2706 best_alg->log[best_alg->ops] = m; 2707 best_alg->op[best_alg->ops] = alg_shift; 2708 } 2709 } 2710 } 2711 if (cache_hit) 2712 goto done; 2713 } 2714 2715 /* If we have an odd number, add or subtract one. */ 2716 if ((t & 1) != 0) 2717 { 2718 unsigned HOST_WIDE_INT w; 2719 2720 do_alg_addsub_t_m2: 2721 for (w = 1; (w & t) != 0; w <<= 1) 2722 ; 2723 /* If T was -1, then W will be zero after the loop. This is another 2724 case where T ends with ...111. Handling this with (T + 1) and 2725 subtract 1 produces slightly better code and results in algorithm 2726 selection much faster than treating it like the ...0111 case 2727 below. */ 2728 if (w == 0 2729 || (w > 2 2730 /* Reject the case where t is 3. 2731 Thus we prefer addition in that case. */ 2732 && t != 3)) 2733 { 2734 /* T ends with ...111. Multiply by (T + 1) and subtract T. */ 2735 2736 op_cost = add_cost (speed, mode); 2737 new_limit.cost = best_cost.cost - op_cost; 2738 new_limit.latency = best_cost.latency - op_cost; 2739 synth_mult (alg_in, t + 1, &new_limit, mode); 2740 2741 alg_in->cost.cost += op_cost; 2742 alg_in->cost.latency += op_cost; 2743 if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost)) 2744 { 2745 best_cost = alg_in->cost; 2746 std::swap (alg_in, best_alg); 2747 best_alg->log[best_alg->ops] = 0; 2748 best_alg->op[best_alg->ops] = alg_sub_t_m2; 2749 } 2750 } 2751 else 2752 { 2753 /* T ends with ...01 or ...011. Multiply by (T - 1) and add T. */ 2754 2755 op_cost = add_cost (speed, mode); 2756 new_limit.cost = best_cost.cost - op_cost; 2757 new_limit.latency = best_cost.latency - op_cost; 2758 synth_mult (alg_in, t - 1, &new_limit, mode); 2759 2760 alg_in->cost.cost += op_cost; 2761 alg_in->cost.latency += op_cost; 2762 if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost)) 2763 { 2764 best_cost = alg_in->cost; 2765 std::swap (alg_in, best_alg); 2766 best_alg->log[best_alg->ops] = 0; 2767 best_alg->op[best_alg->ops] = alg_add_t_m2; 2768 } 2769 } 2770 2771 /* We may be able to calculate a * -7, a * -15, a * -31, etc 2772 quickly with a - a * n for some appropriate constant n. */ 2773 m = exact_log2 (-orig_t + 1); 2774 if (m >= 0 && m < maxm) 2775 { 2776 op_cost = add_cost (speed, mode) + shift_cost (speed, mode, m); 2777 /* If the target has a cheap shift-and-subtract insn use 2778 that in preference to a shift insn followed by a sub insn. 2779 Assume that the shift-and-sub is "atomic" with a latency 2780 equal to it's cost, otherwise assume that on superscalar 2781 hardware the shift may be executed concurrently with the 2782 earlier steps in the algorithm. */ 2783 if (shiftsub1_cost (speed, mode, m) <= op_cost) 2784 { 2785 op_cost = shiftsub1_cost (speed, mode, m); 2786 op_latency = op_cost; 2787 } 2788 else 2789 op_latency = add_cost (speed, mode); 2790 2791 new_limit.cost = best_cost.cost - op_cost; 2792 new_limit.latency = best_cost.latency - op_latency; 2793 synth_mult (alg_in, (unsigned HOST_WIDE_INT) (-orig_t + 1) >> m, 2794 &new_limit, mode); 2795 2796 alg_in->cost.cost += op_cost; 2797 alg_in->cost.latency += op_latency; 2798 if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost)) 2799 { 2800 best_cost = alg_in->cost; 2801 std::swap (alg_in, best_alg); 2802 best_alg->log[best_alg->ops] = m; 2803 best_alg->op[best_alg->ops] = alg_sub_t_m2; 2804 } 2805 } 2806 2807 if (cache_hit) 2808 goto done; 2809 } 2810 2811 /* Look for factors of t of the form 2812 t = q(2**m +- 1), 2 <= m <= floor(log2(t - 1)). 2813 If we find such a factor, we can multiply by t using an algorithm that 2814 multiplies by q, shift the result by m and add/subtract it to itself. 2815 2816 We search for large factors first and loop down, even if large factors 2817 are less probable than small; if we find a large factor we will find a 2818 good sequence quickly, and therefore be able to prune (by decreasing 2819 COST_LIMIT) the search. */ 2820 2821 do_alg_addsub_factor: 2822 for (m = floor_log2 (t - 1); m >= 2; m--) 2823 { 2824 unsigned HOST_WIDE_INT d; 2825 2826 d = (HOST_WIDE_INT_1U << m) + 1; 2827 if (t % d == 0 && t > d && m < maxm 2828 && (!cache_hit || cache_alg == alg_add_factor)) 2829 { 2830 op_cost = add_cost (speed, mode) + shift_cost (speed, mode, m); 2831 if (shiftadd_cost (speed, mode, m) <= op_cost) 2832 op_cost = shiftadd_cost (speed, mode, m); 2833 2834 op_latency = op_cost; 2835 2836 2837 new_limit.cost = best_cost.cost - op_cost; 2838 new_limit.latency = best_cost.latency - op_latency; 2839 synth_mult (alg_in, t / d, &new_limit, mode); 2840 2841 alg_in->cost.cost += op_cost; 2842 alg_in->cost.latency += op_latency; 2843 if (alg_in->cost.latency < op_cost) 2844 alg_in->cost.latency = op_cost; 2845 if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost)) 2846 { 2847 best_cost = alg_in->cost; 2848 std::swap (alg_in, best_alg); 2849 best_alg->log[best_alg->ops] = m; 2850 best_alg->op[best_alg->ops] = alg_add_factor; 2851 } 2852 /* Other factors will have been taken care of in the recursion. */ 2853 break; 2854 } 2855 2856 d = (HOST_WIDE_INT_1U << m) - 1; 2857 if (t % d == 0 && t > d && m < maxm 2858 && (!cache_hit || cache_alg == alg_sub_factor)) 2859 { 2860 op_cost = add_cost (speed, mode) + shift_cost (speed, mode, m); 2861 if (shiftsub0_cost (speed, mode, m) <= op_cost) 2862 op_cost = shiftsub0_cost (speed, mode, m); 2863 2864 op_latency = op_cost; 2865 2866 new_limit.cost = best_cost.cost - op_cost; 2867 new_limit.latency = best_cost.latency - op_latency; 2868 synth_mult (alg_in, t / d, &new_limit, mode); 2869 2870 alg_in->cost.cost += op_cost; 2871 alg_in->cost.latency += op_latency; 2872 if (alg_in->cost.latency < op_cost) 2873 alg_in->cost.latency = op_cost; 2874 if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost)) 2875 { 2876 best_cost = alg_in->cost; 2877 std::swap (alg_in, best_alg); 2878 best_alg->log[best_alg->ops] = m; 2879 best_alg->op[best_alg->ops] = alg_sub_factor; 2880 } 2881 break; 2882 } 2883 } 2884 if (cache_hit) 2885 goto done; 2886 2887 /* Try shift-and-add (load effective address) instructions, 2888 i.e. do a*3, a*5, a*9. */ 2889 if ((t & 1) != 0) 2890 { 2891 do_alg_add_t2_m: 2892 q = t - 1; 2893 m = ctz_hwi (q); 2894 if (q && m < maxm) 2895 { 2896 op_cost = shiftadd_cost (speed, mode, m); 2897 new_limit.cost = best_cost.cost - op_cost; 2898 new_limit.latency = best_cost.latency - op_cost; 2899 synth_mult (alg_in, (t - 1) >> m, &new_limit, mode); 2900 2901 alg_in->cost.cost += op_cost; 2902 alg_in->cost.latency += op_cost; 2903 if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost)) 2904 { 2905 best_cost = alg_in->cost; 2906 std::swap (alg_in, best_alg); 2907 best_alg->log[best_alg->ops] = m; 2908 best_alg->op[best_alg->ops] = alg_add_t2_m; 2909 } 2910 } 2911 if (cache_hit) 2912 goto done; 2913 2914 do_alg_sub_t2_m: 2915 q = t + 1; 2916 m = ctz_hwi (q); 2917 if (q && m < maxm) 2918 { 2919 op_cost = shiftsub0_cost (speed, mode, m); 2920 new_limit.cost = best_cost.cost - op_cost; 2921 new_limit.latency = best_cost.latency - op_cost; 2922 synth_mult (alg_in, (t + 1) >> m, &new_limit, mode); 2923 2924 alg_in->cost.cost += op_cost; 2925 alg_in->cost.latency += op_cost; 2926 if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost)) 2927 { 2928 best_cost = alg_in->cost; 2929 std::swap (alg_in, best_alg); 2930 best_alg->log[best_alg->ops] = m; 2931 best_alg->op[best_alg->ops] = alg_sub_t2_m; 2932 } 2933 } 2934 if (cache_hit) 2935 goto done; 2936 } 2937 2938 done: 2939 /* If best_cost has not decreased, we have not found any algorithm. */ 2940 if (!CHEAPER_MULT_COST (&best_cost, cost_limit)) 2941 { 2942 /* We failed to find an algorithm. Record alg_impossible for 2943 this case (that is, <T, MODE, COST_LIMIT>) so that next time 2944 we are asked to find an algorithm for T within the same or 2945 lower COST_LIMIT, we can immediately return to the 2946 caller. */ 2947 entry_ptr->t = t; 2948 entry_ptr->mode = mode; 2949 entry_ptr->speed = speed; 2950 entry_ptr->alg = alg_impossible; 2951 entry_ptr->cost = *cost_limit; 2952 return; 2953 } 2954 2955 /* Cache the result. */ 2956 if (!cache_hit) 2957 { 2958 entry_ptr->t = t; 2959 entry_ptr->mode = mode; 2960 entry_ptr->speed = speed; 2961 entry_ptr->alg = best_alg->op[best_alg->ops]; 2962 entry_ptr->cost.cost = best_cost.cost; 2963 entry_ptr->cost.latency = best_cost.latency; 2964 } 2965 2966 /* If we are getting a too long sequence for `struct algorithm' 2967 to record, make this search fail. */ 2968 if (best_alg->ops == MAX_BITS_PER_WORD) 2969 return; 2970 2971 /* Copy the algorithm from temporary space to the space at alg_out. 2972 We avoid using structure assignment because the majority of 2973 best_alg is normally undefined, and this is a critical function. */ 2974 alg_out->ops = best_alg->ops + 1; 2975 alg_out->cost = best_cost; 2976 memcpy (alg_out->op, best_alg->op, 2977 alg_out->ops * sizeof *alg_out->op); 2978 memcpy (alg_out->log, best_alg->log, 2979 alg_out->ops * sizeof *alg_out->log); 2980 } 2981 2982 /* Find the cheapest way of multiplying a value of mode MODE by VAL. 2983 Try three variations: 2984 2985 - a shift/add sequence based on VAL itself 2986 - a shift/add sequence based on -VAL, followed by a negation 2987 - a shift/add sequence based on VAL - 1, followed by an addition. 2988 2989 Return true if the cheapest of these cost less than MULT_COST, 2990 describing the algorithm in *ALG and final fixup in *VARIANT. */ 2991 2992 bool 2993 choose_mult_variant (machine_mode mode, HOST_WIDE_INT val, 2994 struct algorithm *alg, enum mult_variant *variant, 2995 int mult_cost) 2996 { 2997 struct algorithm alg2; 2998 struct mult_cost limit; 2999 int op_cost; 3000 bool speed = optimize_insn_for_speed_p (); 3001 3002 /* Fail quickly for impossible bounds. */ 3003 if (mult_cost < 0) 3004 return false; 3005 3006 /* Ensure that mult_cost provides a reasonable upper bound. 3007 Any constant multiplication can be performed with less 3008 than 2 * bits additions. */ 3009 op_cost = 2 * GET_MODE_UNIT_BITSIZE (mode) * add_cost (speed, mode); 3010 if (mult_cost > op_cost) 3011 mult_cost = op_cost; 3012 3013 *variant = basic_variant; 3014 limit.cost = mult_cost; 3015 limit.latency = mult_cost; 3016 synth_mult (alg, val, &limit, mode); 3017 3018 /* This works only if the inverted value actually fits in an 3019 `unsigned int' */ 3020 if (HOST_BITS_PER_INT >= GET_MODE_UNIT_BITSIZE (mode)) 3021 { 3022 op_cost = neg_cost (speed, mode); 3023 if (MULT_COST_LESS (&alg->cost, mult_cost)) 3024 { 3025 limit.cost = alg->cost.cost - op_cost; 3026 limit.latency = alg->cost.latency - op_cost; 3027 } 3028 else 3029 { 3030 limit.cost = mult_cost - op_cost; 3031 limit.latency = mult_cost - op_cost; 3032 } 3033 3034 synth_mult (&alg2, -val, &limit, mode); 3035 alg2.cost.cost += op_cost; 3036 alg2.cost.latency += op_cost; 3037 if (CHEAPER_MULT_COST (&alg2.cost, &alg->cost)) 3038 *alg = alg2, *variant = negate_variant; 3039 } 3040 3041 /* This proves very useful for division-by-constant. */ 3042 op_cost = add_cost (speed, mode); 3043 if (MULT_COST_LESS (&alg->cost, mult_cost)) 3044 { 3045 limit.cost = alg->cost.cost - op_cost; 3046 limit.latency = alg->cost.latency - op_cost; 3047 } 3048 else 3049 { 3050 limit.cost = mult_cost - op_cost; 3051 limit.latency = mult_cost - op_cost; 3052 } 3053 3054 synth_mult (&alg2, val - 1, &limit, mode); 3055 alg2.cost.cost += op_cost; 3056 alg2.cost.latency += op_cost; 3057 if (CHEAPER_MULT_COST (&alg2.cost, &alg->cost)) 3058 *alg = alg2, *variant = add_variant; 3059 3060 return MULT_COST_LESS (&alg->cost, mult_cost); 3061 } 3062 3063 /* A subroutine of expand_mult, used for constant multiplications. 3064 Multiply OP0 by VAL in mode MODE, storing the result in TARGET if 3065 convenient. Use the shift/add sequence described by ALG and apply 3066 the final fixup specified by VARIANT. */ 3067 3068 static rtx 3069 expand_mult_const (machine_mode mode, rtx op0, HOST_WIDE_INT val, 3070 rtx target, const struct algorithm *alg, 3071 enum mult_variant variant) 3072 { 3073 unsigned HOST_WIDE_INT val_so_far; 3074 rtx_insn *insn; 3075 rtx accum, tem; 3076 int opno; 3077 machine_mode nmode; 3078 3079 /* Avoid referencing memory over and over and invalid sharing 3080 on SUBREGs. */ 3081 op0 = force_reg (mode, op0); 3082 3083 /* ACCUM starts out either as OP0 or as a zero, depending on 3084 the first operation. */ 3085 3086 if (alg->op[0] == alg_zero) 3087 { 3088 accum = copy_to_mode_reg (mode, CONST0_RTX (mode)); 3089 val_so_far = 0; 3090 } 3091 else if (alg->op[0] == alg_m) 3092 { 3093 accum = copy_to_mode_reg (mode, op0); 3094 val_so_far = 1; 3095 } 3096 else 3097 gcc_unreachable (); 3098 3099 for (opno = 1; opno < alg->ops; opno++) 3100 { 3101 int log = alg->log[opno]; 3102 rtx shift_subtarget = optimize ? 0 : accum; 3103 rtx add_target 3104 = (opno == alg->ops - 1 && target != 0 && variant != add_variant 3105 && !optimize) 3106 ? target : 0; 3107 rtx accum_target = optimize ? 0 : accum; 3108 rtx accum_inner; 3109 3110 switch (alg->op[opno]) 3111 { 3112 case alg_shift: 3113 tem = expand_shift (LSHIFT_EXPR, mode, accum, log, NULL_RTX, 0); 3114 /* REG_EQUAL note will be attached to the following insn. */ 3115 emit_move_insn (accum, tem); 3116 val_so_far <<= log; 3117 break; 3118 3119 case alg_add_t_m2: 3120 tem = expand_shift (LSHIFT_EXPR, mode, op0, log, NULL_RTX, 0); 3121 accum = force_operand (gen_rtx_PLUS (mode, accum, tem), 3122 add_target ? add_target : accum_target); 3123 val_so_far += HOST_WIDE_INT_1U << log; 3124 break; 3125 3126 case alg_sub_t_m2: 3127 tem = expand_shift (LSHIFT_EXPR, mode, op0, log, NULL_RTX, 0); 3128 accum = force_operand (gen_rtx_MINUS (mode, accum, tem), 3129 add_target ? add_target : accum_target); 3130 val_so_far -= HOST_WIDE_INT_1U << log; 3131 break; 3132 3133 case alg_add_t2_m: 3134 accum = expand_shift (LSHIFT_EXPR, mode, accum, 3135 log, shift_subtarget, 0); 3136 accum = force_operand (gen_rtx_PLUS (mode, accum, op0), 3137 add_target ? add_target : accum_target); 3138 val_so_far = (val_so_far << log) + 1; 3139 break; 3140 3141 case alg_sub_t2_m: 3142 accum = expand_shift (LSHIFT_EXPR, mode, accum, 3143 log, shift_subtarget, 0); 3144 accum = force_operand (gen_rtx_MINUS (mode, accum, op0), 3145 add_target ? add_target : accum_target); 3146 val_so_far = (val_so_far << log) - 1; 3147 break; 3148 3149 case alg_add_factor: 3150 tem = expand_shift (LSHIFT_EXPR, mode, accum, log, NULL_RTX, 0); 3151 accum = force_operand (gen_rtx_PLUS (mode, accum, tem), 3152 add_target ? add_target : accum_target); 3153 val_so_far += val_so_far << log; 3154 break; 3155 3156 case alg_sub_factor: 3157 tem = expand_shift (LSHIFT_EXPR, mode, accum, log, NULL_RTX, 0); 3158 accum = force_operand (gen_rtx_MINUS (mode, tem, accum), 3159 (add_target 3160 ? add_target : (optimize ? 0 : tem))); 3161 val_so_far = (val_so_far << log) - val_so_far; 3162 break; 3163 3164 default: 3165 gcc_unreachable (); 3166 } 3167 3168 if (SCALAR_INT_MODE_P (mode)) 3169 { 3170 /* Write a REG_EQUAL note on the last insn so that we can cse 3171 multiplication sequences. Note that if ACCUM is a SUBREG, 3172 we've set the inner register and must properly indicate that. */ 3173 tem = op0, nmode = mode; 3174 accum_inner = accum; 3175 if (GET_CODE (accum) == SUBREG) 3176 { 3177 accum_inner = SUBREG_REG (accum); 3178 nmode = GET_MODE (accum_inner); 3179 tem = gen_lowpart (nmode, op0); 3180 } 3181 3182 insn = get_last_insn (); 3183 set_dst_reg_note (insn, REG_EQUAL, 3184 gen_rtx_MULT (nmode, tem, 3185 gen_int_mode (val_so_far, nmode)), 3186 accum_inner); 3187 } 3188 } 3189 3190 if (variant == negate_variant) 3191 { 3192 val_so_far = -val_so_far; 3193 accum = expand_unop (mode, neg_optab, accum, target, 0); 3194 } 3195 else if (variant == add_variant) 3196 { 3197 val_so_far = val_so_far + 1; 3198 accum = force_operand (gen_rtx_PLUS (mode, accum, op0), target); 3199 } 3200 3201 /* Compare only the bits of val and val_so_far that are significant 3202 in the result mode, to avoid sign-/zero-extension confusion. */ 3203 nmode = GET_MODE_INNER (mode); 3204 val &= GET_MODE_MASK (nmode); 3205 val_so_far &= GET_MODE_MASK (nmode); 3206 gcc_assert (val == (HOST_WIDE_INT) val_so_far); 3207 3208 return accum; 3209 } 3210 3211 /* Perform a multiplication and return an rtx for the result. 3212 MODE is mode of value; OP0 and OP1 are what to multiply (rtx's); 3213 TARGET is a suggestion for where to store the result (an rtx). 3214 3215 We check specially for a constant integer as OP1. 3216 If you want this check for OP0 as well, then before calling 3217 you should swap the two operands if OP0 would be constant. */ 3218 3219 rtx 3220 expand_mult (machine_mode mode, rtx op0, rtx op1, rtx target, 3221 int unsignedp) 3222 { 3223 enum mult_variant variant; 3224 struct algorithm algorithm; 3225 rtx scalar_op1; 3226 int max_cost; 3227 bool speed = optimize_insn_for_speed_p (); 3228 bool do_trapv = flag_trapv && SCALAR_INT_MODE_P (mode) && !unsignedp; 3229 3230 if (CONSTANT_P (op0)) 3231 std::swap (op0, op1); 3232 3233 /* For vectors, there are several simplifications that can be made if 3234 all elements of the vector constant are identical. */ 3235 scalar_op1 = unwrap_const_vec_duplicate (op1); 3236 3237 if (INTEGRAL_MODE_P (mode)) 3238 { 3239 rtx fake_reg; 3240 HOST_WIDE_INT coeff; 3241 bool is_neg; 3242 int mode_bitsize; 3243 3244 if (op1 == CONST0_RTX (mode)) 3245 return op1; 3246 if (op1 == CONST1_RTX (mode)) 3247 return op0; 3248 if (op1 == CONSTM1_RTX (mode)) 3249 return expand_unop (mode, do_trapv ? negv_optab : neg_optab, 3250 op0, target, 0); 3251 3252 if (do_trapv) 3253 goto skip_synth; 3254 3255 /* If mode is integer vector mode, check if the backend supports 3256 vector lshift (by scalar or vector) at all. If not, we can't use 3257 synthetized multiply. */ 3258 if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT 3259 && optab_handler (vashl_optab, mode) == CODE_FOR_nothing 3260 && optab_handler (ashl_optab, mode) == CODE_FOR_nothing) 3261 goto skip_synth; 3262 3263 /* These are the operations that are potentially turned into 3264 a sequence of shifts and additions. */ 3265 mode_bitsize = GET_MODE_UNIT_BITSIZE (mode); 3266 3267 /* synth_mult does an `unsigned int' multiply. As long as the mode is 3268 less than or equal in size to `unsigned int' this doesn't matter. 3269 If the mode is larger than `unsigned int', then synth_mult works 3270 only if the constant value exactly fits in an `unsigned int' without 3271 any truncation. This means that multiplying by negative values does 3272 not work; results are off by 2^32 on a 32 bit machine. */ 3273 if (CONST_INT_P (scalar_op1)) 3274 { 3275 coeff = INTVAL (scalar_op1); 3276 is_neg = coeff < 0; 3277 } 3278 #if TARGET_SUPPORTS_WIDE_INT 3279 else if (CONST_WIDE_INT_P (scalar_op1)) 3280 #else 3281 else if (CONST_DOUBLE_AS_INT_P (scalar_op1)) 3282 #endif 3283 { 3284 int shift = wi::exact_log2 (rtx_mode_t (scalar_op1, mode)); 3285 /* Perfect power of 2 (other than 1, which is handled above). */ 3286 if (shift > 0) 3287 return expand_shift (LSHIFT_EXPR, mode, op0, 3288 shift, target, unsignedp); 3289 else 3290 goto skip_synth; 3291 } 3292 else 3293 goto skip_synth; 3294 3295 /* We used to test optimize here, on the grounds that it's better to 3296 produce a smaller program when -O is not used. But this causes 3297 such a terrible slowdown sometimes that it seems better to always 3298 use synth_mult. */ 3299 3300 /* Special case powers of two. */ 3301 if (EXACT_POWER_OF_2_OR_ZERO_P (coeff) 3302 && !(is_neg && mode_bitsize > HOST_BITS_PER_WIDE_INT)) 3303 return expand_shift (LSHIFT_EXPR, mode, op0, 3304 floor_log2 (coeff), target, unsignedp); 3305 3306 fake_reg = gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1); 3307 3308 /* Attempt to handle multiplication of DImode values by negative 3309 coefficients, by performing the multiplication by a positive 3310 multiplier and then inverting the result. */ 3311 if (is_neg && mode_bitsize > HOST_BITS_PER_WIDE_INT) 3312 { 3313 /* Its safe to use -coeff even for INT_MIN, as the 3314 result is interpreted as an unsigned coefficient. 3315 Exclude cost of op0 from max_cost to match the cost 3316 calculation of the synth_mult. */ 3317 coeff = -(unsigned HOST_WIDE_INT) coeff; 3318 max_cost = (set_src_cost (gen_rtx_MULT (mode, fake_reg, op1), 3319 mode, speed) 3320 - neg_cost (speed, mode)); 3321 if (max_cost <= 0) 3322 goto skip_synth; 3323 3324 /* Special case powers of two. */ 3325 if (EXACT_POWER_OF_2_OR_ZERO_P (coeff)) 3326 { 3327 rtx temp = expand_shift (LSHIFT_EXPR, mode, op0, 3328 floor_log2 (coeff), target, unsignedp); 3329 return expand_unop (mode, neg_optab, temp, target, 0); 3330 } 3331 3332 if (choose_mult_variant (mode, coeff, &algorithm, &variant, 3333 max_cost)) 3334 { 3335 rtx temp = expand_mult_const (mode, op0, coeff, NULL_RTX, 3336 &algorithm, variant); 3337 return expand_unop (mode, neg_optab, temp, target, 0); 3338 } 3339 goto skip_synth; 3340 } 3341 3342 /* Exclude cost of op0 from max_cost to match the cost 3343 calculation of the synth_mult. */ 3344 max_cost = set_src_cost (gen_rtx_MULT (mode, fake_reg, op1), mode, speed); 3345 if (choose_mult_variant (mode, coeff, &algorithm, &variant, max_cost)) 3346 return expand_mult_const (mode, op0, coeff, target, 3347 &algorithm, variant); 3348 } 3349 skip_synth: 3350 3351 /* Expand x*2.0 as x+x. */ 3352 if (CONST_DOUBLE_AS_FLOAT_P (scalar_op1) 3353 && real_equal (CONST_DOUBLE_REAL_VALUE (scalar_op1), &dconst2)) 3354 { 3355 op0 = force_reg (GET_MODE (op0), op0); 3356 return expand_binop (mode, add_optab, op0, op0, 3357 target, unsignedp, OPTAB_LIB_WIDEN); 3358 } 3359 3360 /* This used to use umul_optab if unsigned, but for non-widening multiply 3361 there is no difference between signed and unsigned. */ 3362 op0 = expand_binop (mode, do_trapv ? smulv_optab : smul_optab, 3363 op0, op1, target, unsignedp, OPTAB_LIB_WIDEN); 3364 gcc_assert (op0); 3365 return op0; 3366 } 3367 3368 /* Return a cost estimate for multiplying a register by the given 3369 COEFFicient in the given MODE and SPEED. */ 3370 3371 int 3372 mult_by_coeff_cost (HOST_WIDE_INT coeff, machine_mode mode, bool speed) 3373 { 3374 int max_cost; 3375 struct algorithm algorithm; 3376 enum mult_variant variant; 3377 3378 rtx fake_reg = gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1); 3379 max_cost = set_src_cost (gen_rtx_MULT (mode, fake_reg, fake_reg), 3380 mode, speed); 3381 if (choose_mult_variant (mode, coeff, &algorithm, &variant, max_cost)) 3382 return algorithm.cost.cost; 3383 else 3384 return max_cost; 3385 } 3386 3387 /* Perform a widening multiplication and return an rtx for the result. 3388 MODE is mode of value; OP0 and OP1 are what to multiply (rtx's); 3389 TARGET is a suggestion for where to store the result (an rtx). 3390 THIS_OPTAB is the optab we should use, it must be either umul_widen_optab 3391 or smul_widen_optab. 3392 3393 We check specially for a constant integer as OP1, comparing the 3394 cost of a widening multiply against the cost of a sequence of shifts 3395 and adds. */ 3396 3397 rtx 3398 expand_widening_mult (machine_mode mode, rtx op0, rtx op1, rtx target, 3399 int unsignedp, optab this_optab) 3400 { 3401 bool speed = optimize_insn_for_speed_p (); 3402 rtx cop1; 3403 3404 if (CONST_INT_P (op1) 3405 && GET_MODE (op0) != VOIDmode 3406 && (cop1 = convert_modes (mode, GET_MODE (op0), op1, 3407 this_optab == umul_widen_optab)) 3408 && CONST_INT_P (cop1) 3409 && (INTVAL (cop1) >= 0 3410 || HWI_COMPUTABLE_MODE_P (mode))) 3411 { 3412 HOST_WIDE_INT coeff = INTVAL (cop1); 3413 int max_cost; 3414 enum mult_variant variant; 3415 struct algorithm algorithm; 3416 3417 if (coeff == 0) 3418 return CONST0_RTX (mode); 3419 3420 /* Special case powers of two. */ 3421 if (EXACT_POWER_OF_2_OR_ZERO_P (coeff)) 3422 { 3423 op0 = convert_to_mode (mode, op0, this_optab == umul_widen_optab); 3424 return expand_shift (LSHIFT_EXPR, mode, op0, 3425 floor_log2 (coeff), target, unsignedp); 3426 } 3427 3428 /* Exclude cost of op0 from max_cost to match the cost 3429 calculation of the synth_mult. */ 3430 max_cost = mul_widen_cost (speed, mode); 3431 if (choose_mult_variant (mode, coeff, &algorithm, &variant, 3432 max_cost)) 3433 { 3434 op0 = convert_to_mode (mode, op0, this_optab == umul_widen_optab); 3435 return expand_mult_const (mode, op0, coeff, target, 3436 &algorithm, variant); 3437 } 3438 } 3439 return expand_binop (mode, this_optab, op0, op1, target, 3440 unsignedp, OPTAB_LIB_WIDEN); 3441 } 3442 3443 /* Choose a minimal N + 1 bit approximation to 1/D that can be used to 3444 replace division by D, and put the least significant N bits of the result 3445 in *MULTIPLIER_PTR and return the most significant bit. 3446 3447 The width of operations is N (should be <= HOST_BITS_PER_WIDE_INT), the 3448 needed precision is in PRECISION (should be <= N). 3449 3450 PRECISION should be as small as possible so this function can choose 3451 multiplier more freely. 3452 3453 The rounded-up logarithm of D is placed in *lgup_ptr. A shift count that 3454 is to be used for a final right shift is placed in *POST_SHIFT_PTR. 3455 3456 Using this function, x/D will be equal to (x * m) >> (*POST_SHIFT_PTR), 3457 where m is the full HOST_BITS_PER_WIDE_INT + 1 bit multiplier. */ 3458 3459 unsigned HOST_WIDE_INT 3460 choose_multiplier (unsigned HOST_WIDE_INT d, int n, int precision, 3461 unsigned HOST_WIDE_INT *multiplier_ptr, 3462 int *post_shift_ptr, int *lgup_ptr) 3463 { 3464 int lgup, post_shift; 3465 int pow, pow2; 3466 3467 /* lgup = ceil(log2(divisor)); */ 3468 lgup = ceil_log2 (d); 3469 3470 gcc_assert (lgup <= n); 3471 3472 pow = n + lgup; 3473 pow2 = n + lgup - precision; 3474 3475 /* mlow = 2^(N + lgup)/d */ 3476 wide_int val = wi::set_bit_in_zero (pow, HOST_BITS_PER_DOUBLE_INT); 3477 wide_int mlow = wi::udiv_trunc (val, d); 3478 3479 /* mhigh = (2^(N + lgup) + 2^(N + lgup - precision))/d */ 3480 val |= wi::set_bit_in_zero (pow2, HOST_BITS_PER_DOUBLE_INT); 3481 wide_int mhigh = wi::udiv_trunc (val, d); 3482 3483 /* If precision == N, then mlow, mhigh exceed 2^N 3484 (but they do not exceed 2^(N+1)). */ 3485 3486 /* Reduce to lowest terms. */ 3487 for (post_shift = lgup; post_shift > 0; post_shift--) 3488 { 3489 unsigned HOST_WIDE_INT ml_lo = wi::extract_uhwi (mlow, 1, 3490 HOST_BITS_PER_WIDE_INT); 3491 unsigned HOST_WIDE_INT mh_lo = wi::extract_uhwi (mhigh, 1, 3492 HOST_BITS_PER_WIDE_INT); 3493 if (ml_lo >= mh_lo) 3494 break; 3495 3496 mlow = wi::uhwi (ml_lo, HOST_BITS_PER_DOUBLE_INT); 3497 mhigh = wi::uhwi (mh_lo, HOST_BITS_PER_DOUBLE_INT); 3498 } 3499 3500 *post_shift_ptr = post_shift; 3501 *lgup_ptr = lgup; 3502 if (n < HOST_BITS_PER_WIDE_INT) 3503 { 3504 unsigned HOST_WIDE_INT mask = (HOST_WIDE_INT_1U << n) - 1; 3505 *multiplier_ptr = mhigh.to_uhwi () & mask; 3506 return mhigh.to_uhwi () >= mask; 3507 } 3508 else 3509 { 3510 *multiplier_ptr = mhigh.to_uhwi (); 3511 return wi::extract_uhwi (mhigh, HOST_BITS_PER_WIDE_INT, 1); 3512 } 3513 } 3514 3515 /* Compute the inverse of X mod 2**n, i.e., find Y such that X * Y is 3516 congruent to 1 (mod 2**N). */ 3517 3518 static unsigned HOST_WIDE_INT 3519 invert_mod2n (unsigned HOST_WIDE_INT x, int n) 3520 { 3521 /* Solve x*y == 1 (mod 2^n), where x is odd. Return y. */ 3522 3523 /* The algorithm notes that the choice y = x satisfies 3524 x*y == 1 mod 2^3, since x is assumed odd. 3525 Each iteration doubles the number of bits of significance in y. */ 3526 3527 unsigned HOST_WIDE_INT mask; 3528 unsigned HOST_WIDE_INT y = x; 3529 int nbit = 3; 3530 3531 mask = (n == HOST_BITS_PER_WIDE_INT 3532 ? HOST_WIDE_INT_M1U 3533 : (HOST_WIDE_INT_1U << n) - 1); 3534 3535 while (nbit < n) 3536 { 3537 y = y * (2 - x*y) & mask; /* Modulo 2^N */ 3538 nbit *= 2; 3539 } 3540 return y; 3541 } 3542 3543 /* Emit code to adjust ADJ_OPERAND after multiplication of wrong signedness 3544 flavor of OP0 and OP1. ADJ_OPERAND is already the high half of the 3545 product OP0 x OP1. If UNSIGNEDP is nonzero, adjust the signed product 3546 to become unsigned, if UNSIGNEDP is zero, adjust the unsigned product to 3547 become signed. 3548 3549 The result is put in TARGET if that is convenient. 3550 3551 MODE is the mode of operation. */ 3552 3553 rtx 3554 expand_mult_highpart_adjust (machine_mode mode, rtx adj_operand, rtx op0, 3555 rtx op1, rtx target, int unsignedp) 3556 { 3557 rtx tem; 3558 enum rtx_code adj_code = unsignedp ? PLUS : MINUS; 3559 3560 tem = expand_shift (RSHIFT_EXPR, mode, op0, 3561 GET_MODE_BITSIZE (mode) - 1, NULL_RTX, 0); 3562 tem = expand_and (mode, tem, op1, NULL_RTX); 3563 adj_operand 3564 = force_operand (gen_rtx_fmt_ee (adj_code, mode, adj_operand, tem), 3565 adj_operand); 3566 3567 tem = expand_shift (RSHIFT_EXPR, mode, op1, 3568 GET_MODE_BITSIZE (mode) - 1, NULL_RTX, 0); 3569 tem = expand_and (mode, tem, op0, NULL_RTX); 3570 target = force_operand (gen_rtx_fmt_ee (adj_code, mode, adj_operand, tem), 3571 target); 3572 3573 return target; 3574 } 3575 3576 /* Subroutine of expmed_mult_highpart. Return the MODE high part of OP. */ 3577 3578 static rtx 3579 extract_high_half (machine_mode mode, rtx op) 3580 { 3581 machine_mode wider_mode; 3582 3583 if (mode == word_mode) 3584 return gen_highpart (mode, op); 3585 3586 gcc_assert (!SCALAR_FLOAT_MODE_P (mode)); 3587 3588 wider_mode = GET_MODE_WIDER_MODE (mode); 3589 op = expand_shift (RSHIFT_EXPR, wider_mode, op, 3590 GET_MODE_BITSIZE (mode), 0, 1); 3591 return convert_modes (mode, wider_mode, op, 0); 3592 } 3593 3594 /* Like expmed_mult_highpart, but only consider using a multiplication 3595 optab. OP1 is an rtx for the constant operand. */ 3596 3597 static rtx 3598 expmed_mult_highpart_optab (machine_mode mode, rtx op0, rtx op1, 3599 rtx target, int unsignedp, int max_cost) 3600 { 3601 rtx narrow_op1 = gen_int_mode (INTVAL (op1), mode); 3602 machine_mode wider_mode; 3603 optab moptab; 3604 rtx tem; 3605 int size; 3606 bool speed = optimize_insn_for_speed_p (); 3607 3608 gcc_assert (!SCALAR_FLOAT_MODE_P (mode)); 3609 3610 wider_mode = GET_MODE_WIDER_MODE (mode); 3611 size = GET_MODE_BITSIZE (mode); 3612 3613 /* Firstly, try using a multiplication insn that only generates the needed 3614 high part of the product, and in the sign flavor of unsignedp. */ 3615 if (mul_highpart_cost (speed, mode) < max_cost) 3616 { 3617 moptab = unsignedp ? umul_highpart_optab : smul_highpart_optab; 3618 tem = expand_binop (mode, moptab, op0, narrow_op1, target, 3619 unsignedp, OPTAB_DIRECT); 3620 if (tem) 3621 return tem; 3622 } 3623 3624 /* Secondly, same as above, but use sign flavor opposite of unsignedp. 3625 Need to adjust the result after the multiplication. */ 3626 if (size - 1 < BITS_PER_WORD 3627 && (mul_highpart_cost (speed, mode) 3628 + 2 * shift_cost (speed, mode, size-1) 3629 + 4 * add_cost (speed, mode) < max_cost)) 3630 { 3631 moptab = unsignedp ? smul_highpart_optab : umul_highpart_optab; 3632 tem = expand_binop (mode, moptab, op0, narrow_op1, target, 3633 unsignedp, OPTAB_DIRECT); 3634 if (tem) 3635 /* We used the wrong signedness. Adjust the result. */ 3636 return expand_mult_highpart_adjust (mode, tem, op0, narrow_op1, 3637 tem, unsignedp); 3638 } 3639 3640 /* Try widening multiplication. */ 3641 moptab = unsignedp ? umul_widen_optab : smul_widen_optab; 3642 if (widening_optab_handler (moptab, wider_mode, mode) != CODE_FOR_nothing 3643 && mul_widen_cost (speed, wider_mode) < max_cost) 3644 { 3645 tem = expand_binop (wider_mode, moptab, op0, narrow_op1, 0, 3646 unsignedp, OPTAB_WIDEN); 3647 if (tem) 3648 return extract_high_half (mode, tem); 3649 } 3650 3651 /* Try widening the mode and perform a non-widening multiplication. */ 3652 if (optab_handler (smul_optab, wider_mode) != CODE_FOR_nothing 3653 && size - 1 < BITS_PER_WORD 3654 && (mul_cost (speed, wider_mode) + shift_cost (speed, mode, size-1) 3655 < max_cost)) 3656 { 3657 rtx_insn *insns; 3658 rtx wop0, wop1; 3659 3660 /* We need to widen the operands, for example to ensure the 3661 constant multiplier is correctly sign or zero extended. 3662 Use a sequence to clean-up any instructions emitted by 3663 the conversions if things don't work out. */ 3664 start_sequence (); 3665 wop0 = convert_modes (wider_mode, mode, op0, unsignedp); 3666 wop1 = convert_modes (wider_mode, mode, op1, unsignedp); 3667 tem = expand_binop (wider_mode, smul_optab, wop0, wop1, 0, 3668 unsignedp, OPTAB_WIDEN); 3669 insns = get_insns (); 3670 end_sequence (); 3671 3672 if (tem) 3673 { 3674 emit_insn (insns); 3675 return extract_high_half (mode, tem); 3676 } 3677 } 3678 3679 /* Try widening multiplication of opposite signedness, and adjust. */ 3680 moptab = unsignedp ? smul_widen_optab : umul_widen_optab; 3681 if (widening_optab_handler (moptab, wider_mode, mode) != CODE_FOR_nothing 3682 && size - 1 < BITS_PER_WORD 3683 && (mul_widen_cost (speed, wider_mode) 3684 + 2 * shift_cost (speed, mode, size-1) 3685 + 4 * add_cost (speed, mode) < max_cost)) 3686 { 3687 tem = expand_binop (wider_mode, moptab, op0, narrow_op1, 3688 NULL_RTX, ! unsignedp, OPTAB_WIDEN); 3689 if (tem != 0) 3690 { 3691 tem = extract_high_half (mode, tem); 3692 /* We used the wrong signedness. Adjust the result. */ 3693 return expand_mult_highpart_adjust (mode, tem, op0, narrow_op1, 3694 target, unsignedp); 3695 } 3696 } 3697 3698 return 0; 3699 } 3700 3701 /* Emit code to multiply OP0 and OP1 (where OP1 is an integer constant), 3702 putting the high half of the result in TARGET if that is convenient, 3703 and return where the result is. If the operation can not be performed, 3704 0 is returned. 3705 3706 MODE is the mode of operation and result. 3707 3708 UNSIGNEDP nonzero means unsigned multiply. 3709 3710 MAX_COST is the total allowed cost for the expanded RTL. */ 3711 3712 static rtx 3713 expmed_mult_highpart (machine_mode mode, rtx op0, rtx op1, 3714 rtx target, int unsignedp, int max_cost) 3715 { 3716 machine_mode wider_mode = GET_MODE_WIDER_MODE (mode); 3717 unsigned HOST_WIDE_INT cnst1; 3718 int extra_cost; 3719 bool sign_adjust = false; 3720 enum mult_variant variant; 3721 struct algorithm alg; 3722 rtx tem; 3723 bool speed = optimize_insn_for_speed_p (); 3724 3725 gcc_assert (!SCALAR_FLOAT_MODE_P (mode)); 3726 /* We can't support modes wider than HOST_BITS_PER_INT. */ 3727 gcc_assert (HWI_COMPUTABLE_MODE_P (mode)); 3728 3729 cnst1 = INTVAL (op1) & GET_MODE_MASK (mode); 3730 3731 /* We can't optimize modes wider than BITS_PER_WORD. 3732 ??? We might be able to perform double-word arithmetic if 3733 mode == word_mode, however all the cost calculations in 3734 synth_mult etc. assume single-word operations. */ 3735 if (GET_MODE_BITSIZE (wider_mode) > BITS_PER_WORD) 3736 return expmed_mult_highpart_optab (mode, op0, op1, target, 3737 unsignedp, max_cost); 3738 3739 extra_cost = shift_cost (speed, mode, GET_MODE_BITSIZE (mode) - 1); 3740 3741 /* Check whether we try to multiply by a negative constant. */ 3742 if (!unsignedp && ((cnst1 >> (GET_MODE_BITSIZE (mode) - 1)) & 1)) 3743 { 3744 sign_adjust = true; 3745 extra_cost += add_cost (speed, mode); 3746 } 3747 3748 /* See whether shift/add multiplication is cheap enough. */ 3749 if (choose_mult_variant (wider_mode, cnst1, &alg, &variant, 3750 max_cost - extra_cost)) 3751 { 3752 /* See whether the specialized multiplication optabs are 3753 cheaper than the shift/add version. */ 3754 tem = expmed_mult_highpart_optab (mode, op0, op1, target, unsignedp, 3755 alg.cost.cost + extra_cost); 3756 if (tem) 3757 return tem; 3758 3759 tem = convert_to_mode (wider_mode, op0, unsignedp); 3760 tem = expand_mult_const (wider_mode, tem, cnst1, 0, &alg, variant); 3761 tem = extract_high_half (mode, tem); 3762 3763 /* Adjust result for signedness. */ 3764 if (sign_adjust) 3765 tem = force_operand (gen_rtx_MINUS (mode, tem, op0), tem); 3766 3767 return tem; 3768 } 3769 return expmed_mult_highpart_optab (mode, op0, op1, target, 3770 unsignedp, max_cost); 3771 } 3772 3773 3774 /* Expand signed modulus of OP0 by a power of two D in mode MODE. */ 3775 3776 static rtx 3777 expand_smod_pow2 (machine_mode mode, rtx op0, HOST_WIDE_INT d) 3778 { 3779 rtx result, temp, shift; 3780 rtx_code_label *label; 3781 int logd; 3782 int prec = GET_MODE_PRECISION (mode); 3783 3784 logd = floor_log2 (d); 3785 result = gen_reg_rtx (mode); 3786 3787 /* Avoid conditional branches when they're expensive. */ 3788 if (BRANCH_COST (optimize_insn_for_speed_p (), false) >= 2 3789 && optimize_insn_for_speed_p ()) 3790 { 3791 rtx signmask = emit_store_flag (result, LT, op0, const0_rtx, 3792 mode, 0, -1); 3793 if (signmask) 3794 { 3795 HOST_WIDE_INT masklow = (HOST_WIDE_INT_1 << logd) - 1; 3796 signmask = force_reg (mode, signmask); 3797 shift = GEN_INT (GET_MODE_BITSIZE (mode) - logd); 3798 3799 /* Use the rtx_cost of a LSHIFTRT instruction to determine 3800 which instruction sequence to use. If logical right shifts 3801 are expensive the use 2 XORs, 2 SUBs and an AND, otherwise 3802 use a LSHIFTRT, 1 ADD, 1 SUB and an AND. */ 3803 3804 temp = gen_rtx_LSHIFTRT (mode, result, shift); 3805 if (optab_handler (lshr_optab, mode) == CODE_FOR_nothing 3806 || (set_src_cost (temp, mode, optimize_insn_for_speed_p ()) 3807 > COSTS_N_INSNS (2))) 3808 { 3809 temp = expand_binop (mode, xor_optab, op0, signmask, 3810 NULL_RTX, 1, OPTAB_LIB_WIDEN); 3811 temp = expand_binop (mode, sub_optab, temp, signmask, 3812 NULL_RTX, 1, OPTAB_LIB_WIDEN); 3813 temp = expand_binop (mode, and_optab, temp, 3814 gen_int_mode (masklow, mode), 3815 NULL_RTX, 1, OPTAB_LIB_WIDEN); 3816 temp = expand_binop (mode, xor_optab, temp, signmask, 3817 NULL_RTX, 1, OPTAB_LIB_WIDEN); 3818 temp = expand_binop (mode, sub_optab, temp, signmask, 3819 NULL_RTX, 1, OPTAB_LIB_WIDEN); 3820 } 3821 else 3822 { 3823 signmask = expand_binop (mode, lshr_optab, signmask, shift, 3824 NULL_RTX, 1, OPTAB_LIB_WIDEN); 3825 signmask = force_reg (mode, signmask); 3826 3827 temp = expand_binop (mode, add_optab, op0, signmask, 3828 NULL_RTX, 1, OPTAB_LIB_WIDEN); 3829 temp = expand_binop (mode, and_optab, temp, 3830 gen_int_mode (masklow, mode), 3831 NULL_RTX, 1, OPTAB_LIB_WIDEN); 3832 temp = expand_binop (mode, sub_optab, temp, signmask, 3833 NULL_RTX, 1, OPTAB_LIB_WIDEN); 3834 } 3835 return temp; 3836 } 3837 } 3838 3839 /* Mask contains the mode's signbit and the significant bits of the 3840 modulus. By including the signbit in the operation, many targets 3841 can avoid an explicit compare operation in the following comparison 3842 against zero. */ 3843 wide_int mask = wi::mask (logd, false, prec); 3844 mask = wi::set_bit (mask, prec - 1); 3845 3846 temp = expand_binop (mode, and_optab, op0, 3847 immed_wide_int_const (mask, mode), 3848 result, 1, OPTAB_LIB_WIDEN); 3849 if (temp != result) 3850 emit_move_insn (result, temp); 3851 3852 label = gen_label_rtx (); 3853 do_cmp_and_jump (result, const0_rtx, GE, mode, label); 3854 3855 temp = expand_binop (mode, sub_optab, result, const1_rtx, result, 3856 0, OPTAB_LIB_WIDEN); 3857 3858 mask = wi::mask (logd, true, prec); 3859 temp = expand_binop (mode, ior_optab, temp, 3860 immed_wide_int_const (mask, mode), 3861 result, 1, OPTAB_LIB_WIDEN); 3862 temp = expand_binop (mode, add_optab, temp, const1_rtx, result, 3863 0, OPTAB_LIB_WIDEN); 3864 if (temp != result) 3865 emit_move_insn (result, temp); 3866 emit_label (label); 3867 return result; 3868 } 3869 3870 /* Expand signed division of OP0 by a power of two D in mode MODE. 3871 This routine is only called for positive values of D. */ 3872 3873 static rtx 3874 expand_sdiv_pow2 (machine_mode mode, rtx op0, HOST_WIDE_INT d) 3875 { 3876 rtx temp; 3877 rtx_code_label *label; 3878 int logd; 3879 3880 logd = floor_log2 (d); 3881 3882 if (d == 2 3883 && BRANCH_COST (optimize_insn_for_speed_p (), 3884 false) >= 1) 3885 { 3886 temp = gen_reg_rtx (mode); 3887 temp = emit_store_flag (temp, LT, op0, const0_rtx, mode, 0, 1); 3888 temp = expand_binop (mode, add_optab, temp, op0, NULL_RTX, 3889 0, OPTAB_LIB_WIDEN); 3890 return expand_shift (RSHIFT_EXPR, mode, temp, logd, NULL_RTX, 0); 3891 } 3892 3893 if (HAVE_conditional_move 3894 && BRANCH_COST (optimize_insn_for_speed_p (), false) >= 2) 3895 { 3896 rtx temp2; 3897 3898 start_sequence (); 3899 temp2 = copy_to_mode_reg (mode, op0); 3900 temp = expand_binop (mode, add_optab, temp2, gen_int_mode (d - 1, mode), 3901 NULL_RTX, 0, OPTAB_LIB_WIDEN); 3902 temp = force_reg (mode, temp); 3903 3904 /* Construct "temp2 = (temp2 < 0) ? temp : temp2". */ 3905 temp2 = emit_conditional_move (temp2, LT, temp2, const0_rtx, 3906 mode, temp, temp2, mode, 0); 3907 if (temp2) 3908 { 3909 rtx_insn *seq = get_insns (); 3910 end_sequence (); 3911 emit_insn (seq); 3912 return expand_shift (RSHIFT_EXPR, mode, temp2, logd, NULL_RTX, 0); 3913 } 3914 end_sequence (); 3915 } 3916 3917 if (BRANCH_COST (optimize_insn_for_speed_p (), 3918 false) >= 2) 3919 { 3920 int ushift = GET_MODE_BITSIZE (mode) - logd; 3921 3922 temp = gen_reg_rtx (mode); 3923 temp = emit_store_flag (temp, LT, op0, const0_rtx, mode, 0, -1); 3924 if (GET_MODE_BITSIZE (mode) >= BITS_PER_WORD 3925 || shift_cost (optimize_insn_for_speed_p (), mode, ushift) 3926 > COSTS_N_INSNS (1)) 3927 temp = expand_binop (mode, and_optab, temp, gen_int_mode (d - 1, mode), 3928 NULL_RTX, 0, OPTAB_LIB_WIDEN); 3929 else 3930 temp = expand_shift (RSHIFT_EXPR, mode, temp, 3931 ushift, NULL_RTX, 1); 3932 temp = expand_binop (mode, add_optab, temp, op0, NULL_RTX, 3933 0, OPTAB_LIB_WIDEN); 3934 return expand_shift (RSHIFT_EXPR, mode, temp, logd, NULL_RTX, 0); 3935 } 3936 3937 label = gen_label_rtx (); 3938 temp = copy_to_mode_reg (mode, op0); 3939 do_cmp_and_jump (temp, const0_rtx, GE, mode, label); 3940 expand_inc (temp, gen_int_mode (d - 1, mode)); 3941 emit_label (label); 3942 return expand_shift (RSHIFT_EXPR, mode, temp, logd, NULL_RTX, 0); 3943 } 3944 3945 /* Emit the code to divide OP0 by OP1, putting the result in TARGET 3946 if that is convenient, and returning where the result is. 3947 You may request either the quotient or the remainder as the result; 3948 specify REM_FLAG nonzero to get the remainder. 3949 3950 CODE is the expression code for which kind of division this is; 3951 it controls how rounding is done. MODE is the machine mode to use. 3952 UNSIGNEDP nonzero means do unsigned division. */ 3953 3954 /* ??? For CEIL_MOD_EXPR, can compute incorrect remainder with ANDI 3955 and then correct it by or'ing in missing high bits 3956 if result of ANDI is nonzero. 3957 For ROUND_MOD_EXPR, can use ANDI and then sign-extend the result. 3958 This could optimize to a bfexts instruction. 3959 But C doesn't use these operations, so their optimizations are 3960 left for later. */ 3961 /* ??? For modulo, we don't actually need the highpart of the first product, 3962 the low part will do nicely. And for small divisors, the second multiply 3963 can also be a low-part only multiply or even be completely left out. 3964 E.g. to calculate the remainder of a division by 3 with a 32 bit 3965 multiply, multiply with 0x55555556 and extract the upper two bits; 3966 the result is exact for inputs up to 0x1fffffff. 3967 The input range can be reduced by using cross-sum rules. 3968 For odd divisors >= 3, the following table gives right shift counts 3969 so that if a number is shifted by an integer multiple of the given 3970 amount, the remainder stays the same: 3971 2, 4, 3, 6, 10, 12, 4, 8, 18, 6, 11, 20, 18, 0, 5, 10, 12, 0, 12, 20, 3972 14, 12, 23, 21, 8, 0, 20, 18, 0, 0, 6, 12, 0, 22, 0, 18, 20, 30, 0, 0, 3973 0, 8, 0, 11, 12, 10, 36, 0, 30, 0, 0, 12, 0, 0, 0, 0, 44, 12, 24, 0, 3974 20, 0, 7, 14, 0, 18, 36, 0, 0, 46, 60, 0, 42, 0, 15, 24, 20, 0, 0, 33, 3975 0, 20, 0, 0, 18, 0, 60, 0, 0, 0, 0, 0, 40, 18, 0, 0, 12 3976 3977 Cross-sum rules for even numbers can be derived by leaving as many bits 3978 to the right alone as the divisor has zeros to the right. 3979 E.g. if x is an unsigned 32 bit number: 3980 (x mod 12) == (((x & 1023) + ((x >> 8) & ~3)) * 0x15555558 >> 2 * 3) >> 28 3981 */ 3982 3983 rtx 3984 expand_divmod (int rem_flag, enum tree_code code, machine_mode mode, 3985 rtx op0, rtx op1, rtx target, int unsignedp) 3986 { 3987 machine_mode compute_mode; 3988 rtx tquotient; 3989 rtx quotient = 0, remainder = 0; 3990 rtx_insn *last; 3991 int size; 3992 rtx_insn *insn; 3993 optab optab1, optab2; 3994 int op1_is_constant, op1_is_pow2 = 0; 3995 int max_cost, extra_cost; 3996 static HOST_WIDE_INT last_div_const = 0; 3997 bool speed = optimize_insn_for_speed_p (); 3998 3999 op1_is_constant = CONST_INT_P (op1); 4000 if (op1_is_constant) 4001 { 4002 wide_int ext_op1 = rtx_mode_t (op1, mode); 4003 op1_is_pow2 = (wi::popcount (ext_op1) == 1 4004 || (! unsignedp 4005 && wi::popcount (wi::neg (ext_op1)) == 1)); 4006 } 4007 4008 /* 4009 This is the structure of expand_divmod: 4010 4011 First comes code to fix up the operands so we can perform the operations 4012 correctly and efficiently. 4013 4014 Second comes a switch statement with code specific for each rounding mode. 4015 For some special operands this code emits all RTL for the desired 4016 operation, for other cases, it generates only a quotient and stores it in 4017 QUOTIENT. The case for trunc division/remainder might leave quotient = 0, 4018 to indicate that it has not done anything. 4019 4020 Last comes code that finishes the operation. If QUOTIENT is set and 4021 REM_FLAG is set, the remainder is computed as OP0 - QUOTIENT * OP1. If 4022 QUOTIENT is not set, it is computed using trunc rounding. 4023 4024 We try to generate special code for division and remainder when OP1 is a 4025 constant. If |OP1| = 2**n we can use shifts and some other fast 4026 operations. For other values of OP1, we compute a carefully selected 4027 fixed-point approximation m = 1/OP1, and generate code that multiplies OP0 4028 by m. 4029 4030 In all cases but EXACT_DIV_EXPR, this multiplication requires the upper 4031 half of the product. Different strategies for generating the product are 4032 implemented in expmed_mult_highpart. 4033 4034 If what we actually want is the remainder, we generate that by another 4035 by-constant multiplication and a subtraction. */ 4036 4037 /* We shouldn't be called with OP1 == const1_rtx, but some of the 4038 code below will malfunction if we are, so check here and handle 4039 the special case if so. */ 4040 if (op1 == const1_rtx) 4041 return rem_flag ? const0_rtx : op0; 4042 4043 /* When dividing by -1, we could get an overflow. 4044 negv_optab can handle overflows. */ 4045 if (! unsignedp && op1 == constm1_rtx) 4046 { 4047 if (rem_flag) 4048 return const0_rtx; 4049 return expand_unop (mode, flag_trapv && GET_MODE_CLASS (mode) == MODE_INT 4050 ? negv_optab : neg_optab, op0, target, 0); 4051 } 4052 4053 if (target 4054 /* Don't use the function value register as a target 4055 since we have to read it as well as write it, 4056 and function-inlining gets confused by this. */ 4057 && ((REG_P (target) && REG_FUNCTION_VALUE_P (target)) 4058 /* Don't clobber an operand while doing a multi-step calculation. */ 4059 || ((rem_flag || op1_is_constant) 4060 && (reg_mentioned_p (target, op0) 4061 || (MEM_P (op0) && MEM_P (target)))) 4062 || reg_mentioned_p (target, op1) 4063 || (MEM_P (op1) && MEM_P (target)))) 4064 target = 0; 4065 4066 /* Get the mode in which to perform this computation. Normally it will 4067 be MODE, but sometimes we can't do the desired operation in MODE. 4068 If so, pick a wider mode in which we can do the operation. Convert 4069 to that mode at the start to avoid repeated conversions. 4070 4071 First see what operations we need. These depend on the expression 4072 we are evaluating. (We assume that divxx3 insns exist under the 4073 same conditions that modxx3 insns and that these insns don't normally 4074 fail. If these assumptions are not correct, we may generate less 4075 efficient code in some cases.) 4076 4077 Then see if we find a mode in which we can open-code that operation 4078 (either a division, modulus, or shift). Finally, check for the smallest 4079 mode for which we can do the operation with a library call. */ 4080 4081 /* We might want to refine this now that we have division-by-constant 4082 optimization. Since expmed_mult_highpart tries so many variants, it is 4083 not straightforward to generalize this. Maybe we should make an array 4084 of possible modes in init_expmed? Save this for GCC 2.7. */ 4085 4086 optab1 = (op1_is_pow2 4087 ? (unsignedp ? lshr_optab : ashr_optab) 4088 : (unsignedp ? udiv_optab : sdiv_optab)); 4089 optab2 = (op1_is_pow2 ? optab1 4090 : (unsignedp ? udivmod_optab : sdivmod_optab)); 4091 4092 for (compute_mode = mode; compute_mode != VOIDmode; 4093 compute_mode = GET_MODE_WIDER_MODE (compute_mode)) 4094 if (optab_handler (optab1, compute_mode) != CODE_FOR_nothing 4095 || optab_handler (optab2, compute_mode) != CODE_FOR_nothing) 4096 break; 4097 4098 if (compute_mode == VOIDmode) 4099 for (compute_mode = mode; compute_mode != VOIDmode; 4100 compute_mode = GET_MODE_WIDER_MODE (compute_mode)) 4101 if (optab_libfunc (optab1, compute_mode) 4102 || optab_libfunc (optab2, compute_mode)) 4103 break; 4104 4105 /* If we still couldn't find a mode, use MODE, but expand_binop will 4106 probably die. */ 4107 if (compute_mode == VOIDmode) 4108 compute_mode = mode; 4109 4110 if (target && GET_MODE (target) == compute_mode) 4111 tquotient = target; 4112 else 4113 tquotient = gen_reg_rtx (compute_mode); 4114 4115 size = GET_MODE_BITSIZE (compute_mode); 4116 #if 0 4117 /* It should be possible to restrict the precision to GET_MODE_BITSIZE 4118 (mode), and thereby get better code when OP1 is a constant. Do that 4119 later. It will require going over all usages of SIZE below. */ 4120 size = GET_MODE_BITSIZE (mode); 4121 #endif 4122 4123 /* Only deduct something for a REM if the last divide done was 4124 for a different constant. Then set the constant of the last 4125 divide. */ 4126 max_cost = (unsignedp 4127 ? udiv_cost (speed, compute_mode) 4128 : sdiv_cost (speed, compute_mode)); 4129 if (rem_flag && ! (last_div_const != 0 && op1_is_constant 4130 && INTVAL (op1) == last_div_const)) 4131 max_cost -= (mul_cost (speed, compute_mode) 4132 + add_cost (speed, compute_mode)); 4133 4134 last_div_const = ! rem_flag && op1_is_constant ? INTVAL (op1) : 0; 4135 4136 /* Now convert to the best mode to use. */ 4137 if (compute_mode != mode) 4138 { 4139 op0 = convert_modes (compute_mode, mode, op0, unsignedp); 4140 op1 = convert_modes (compute_mode, mode, op1, unsignedp); 4141 4142 /* convert_modes may have placed op1 into a register, so we 4143 must recompute the following. */ 4144 op1_is_constant = CONST_INT_P (op1); 4145 if (op1_is_constant) 4146 { 4147 wide_int ext_op1 = rtx_mode_t (op1, compute_mode); 4148 op1_is_pow2 = (wi::popcount (ext_op1) == 1 4149 || (! unsignedp 4150 && wi::popcount (wi::neg (ext_op1)) == 1)); 4151 } 4152 else 4153 op1_is_pow2 = 0; 4154 } 4155 4156 /* If one of the operands is a volatile MEM, copy it into a register. */ 4157 4158 if (MEM_P (op0) && MEM_VOLATILE_P (op0)) 4159 op0 = force_reg (compute_mode, op0); 4160 if (MEM_P (op1) && MEM_VOLATILE_P (op1)) 4161 op1 = force_reg (compute_mode, op1); 4162 4163 /* If we need the remainder or if OP1 is constant, we need to 4164 put OP0 in a register in case it has any queued subexpressions. */ 4165 if (rem_flag || op1_is_constant) 4166 op0 = force_reg (compute_mode, op0); 4167 4168 last = get_last_insn (); 4169 4170 /* Promote floor rounding to trunc rounding for unsigned operations. */ 4171 if (unsignedp) 4172 { 4173 if (code == FLOOR_DIV_EXPR) 4174 code = TRUNC_DIV_EXPR; 4175 if (code == FLOOR_MOD_EXPR) 4176 code = TRUNC_MOD_EXPR; 4177 if (code == EXACT_DIV_EXPR && op1_is_pow2) 4178 code = TRUNC_DIV_EXPR; 4179 } 4180 4181 if (op1 != const0_rtx) 4182 switch (code) 4183 { 4184 case TRUNC_MOD_EXPR: 4185 case TRUNC_DIV_EXPR: 4186 if (op1_is_constant) 4187 { 4188 if (unsignedp) 4189 { 4190 unsigned HOST_WIDE_INT mh, ml; 4191 int pre_shift, post_shift; 4192 int dummy; 4193 wide_int wd = rtx_mode_t (op1, compute_mode); 4194 unsigned HOST_WIDE_INT d = wd.to_uhwi (); 4195 4196 if (wi::popcount (wd) == 1) 4197 { 4198 pre_shift = floor_log2 (d); 4199 if (rem_flag) 4200 { 4201 unsigned HOST_WIDE_INT mask 4202 = (HOST_WIDE_INT_1U << pre_shift) - 1; 4203 remainder 4204 = expand_binop (compute_mode, and_optab, op0, 4205 gen_int_mode (mask, compute_mode), 4206 remainder, 1, 4207 OPTAB_LIB_WIDEN); 4208 if (remainder) 4209 return gen_lowpart (mode, remainder); 4210 } 4211 quotient = expand_shift (RSHIFT_EXPR, compute_mode, op0, 4212 pre_shift, tquotient, 1); 4213 } 4214 else if (size <= HOST_BITS_PER_WIDE_INT) 4215 { 4216 if (d >= (HOST_WIDE_INT_1U << (size - 1))) 4217 { 4218 /* Most significant bit of divisor is set; emit an scc 4219 insn. */ 4220 quotient = emit_store_flag_force (tquotient, GEU, op0, op1, 4221 compute_mode, 1, 1); 4222 } 4223 else 4224 { 4225 /* Find a suitable multiplier and right shift count 4226 instead of multiplying with D. */ 4227 4228 mh = choose_multiplier (d, size, size, 4229 &ml, &post_shift, &dummy); 4230 4231 /* If the suggested multiplier is more than SIZE bits, 4232 we can do better for even divisors, using an 4233 initial right shift. */ 4234 if (mh != 0 && (d & 1) == 0) 4235 { 4236 pre_shift = ctz_or_zero (d); 4237 mh = choose_multiplier (d >> pre_shift, size, 4238 size - pre_shift, 4239 &ml, &post_shift, &dummy); 4240 gcc_assert (!mh); 4241 } 4242 else 4243 pre_shift = 0; 4244 4245 if (mh != 0) 4246 { 4247 rtx t1, t2, t3, t4; 4248 4249 if (post_shift - 1 >= BITS_PER_WORD) 4250 goto fail1; 4251 4252 extra_cost 4253 = (shift_cost (speed, compute_mode, post_shift - 1) 4254 + shift_cost (speed, compute_mode, 1) 4255 + 2 * add_cost (speed, compute_mode)); 4256 t1 = expmed_mult_highpart 4257 (compute_mode, op0, 4258 gen_int_mode (ml, compute_mode), 4259 NULL_RTX, 1, max_cost - extra_cost); 4260 if (t1 == 0) 4261 goto fail1; 4262 t2 = force_operand (gen_rtx_MINUS (compute_mode, 4263 op0, t1), 4264 NULL_RTX); 4265 t3 = expand_shift (RSHIFT_EXPR, compute_mode, 4266 t2, 1, NULL_RTX, 1); 4267 t4 = force_operand (gen_rtx_PLUS (compute_mode, 4268 t1, t3), 4269 NULL_RTX); 4270 quotient = expand_shift 4271 (RSHIFT_EXPR, compute_mode, t4, 4272 post_shift - 1, tquotient, 1); 4273 } 4274 else 4275 { 4276 rtx t1, t2; 4277 4278 if (pre_shift >= BITS_PER_WORD 4279 || post_shift >= BITS_PER_WORD) 4280 goto fail1; 4281 4282 t1 = expand_shift 4283 (RSHIFT_EXPR, compute_mode, op0, 4284 pre_shift, NULL_RTX, 1); 4285 extra_cost 4286 = (shift_cost (speed, compute_mode, pre_shift) 4287 + shift_cost (speed, compute_mode, post_shift)); 4288 t2 = expmed_mult_highpart 4289 (compute_mode, t1, 4290 gen_int_mode (ml, compute_mode), 4291 NULL_RTX, 1, max_cost - extra_cost); 4292 if (t2 == 0) 4293 goto fail1; 4294 quotient = expand_shift 4295 (RSHIFT_EXPR, compute_mode, t2, 4296 post_shift, tquotient, 1); 4297 } 4298 } 4299 } 4300 else /* Too wide mode to use tricky code */ 4301 break; 4302 4303 insn = get_last_insn (); 4304 if (insn != last) 4305 set_dst_reg_note (insn, REG_EQUAL, 4306 gen_rtx_UDIV (compute_mode, op0, op1), 4307 quotient); 4308 } 4309 else /* TRUNC_DIV, signed */ 4310 { 4311 unsigned HOST_WIDE_INT ml; 4312 int lgup, post_shift; 4313 rtx mlr; 4314 HOST_WIDE_INT d = INTVAL (op1); 4315 unsigned HOST_WIDE_INT abs_d; 4316 4317 /* Not prepared to handle division/remainder by 4318 0xffffffffffffffff8000000000000000 etc. */ 4319 if (d == HOST_WIDE_INT_MIN && size > HOST_BITS_PER_WIDE_INT) 4320 break; 4321 4322 /* Since d might be INT_MIN, we have to cast to 4323 unsigned HOST_WIDE_INT before negating to avoid 4324 undefined signed overflow. */ 4325 abs_d = (d >= 0 4326 ? (unsigned HOST_WIDE_INT) d 4327 : - (unsigned HOST_WIDE_INT) d); 4328 4329 /* n rem d = n rem -d */ 4330 if (rem_flag && d < 0) 4331 { 4332 d = abs_d; 4333 op1 = gen_int_mode (abs_d, compute_mode); 4334 } 4335 4336 if (d == 1) 4337 quotient = op0; 4338 else if (d == -1) 4339 quotient = expand_unop (compute_mode, neg_optab, op0, 4340 tquotient, 0); 4341 else if (size <= HOST_BITS_PER_WIDE_INT 4342 && abs_d == HOST_WIDE_INT_1U << (size - 1)) 4343 { 4344 /* This case is not handled correctly below. */ 4345 quotient = emit_store_flag (tquotient, EQ, op0, op1, 4346 compute_mode, 1, 1); 4347 if (quotient == 0) 4348 goto fail1; 4349 } 4350 else if (EXACT_POWER_OF_2_OR_ZERO_P (d) 4351 && (size <= HOST_BITS_PER_WIDE_INT || d >= 0) 4352 && (rem_flag 4353 ? smod_pow2_cheap (speed, compute_mode) 4354 : sdiv_pow2_cheap (speed, compute_mode)) 4355 /* We assume that cheap metric is true if the 4356 optab has an expander for this mode. */ 4357 && ((optab_handler ((rem_flag ? smod_optab 4358 : sdiv_optab), 4359 compute_mode) 4360 != CODE_FOR_nothing) 4361 || (optab_handler (sdivmod_optab, 4362 compute_mode) 4363 != CODE_FOR_nothing))) 4364 ; 4365 else if (EXACT_POWER_OF_2_OR_ZERO_P (abs_d)) 4366 { 4367 if (rem_flag) 4368 { 4369 remainder = expand_smod_pow2 (compute_mode, op0, d); 4370 if (remainder) 4371 return gen_lowpart (mode, remainder); 4372 } 4373 4374 if (sdiv_pow2_cheap (speed, compute_mode) 4375 && ((optab_handler (sdiv_optab, compute_mode) 4376 != CODE_FOR_nothing) 4377 || (optab_handler (sdivmod_optab, compute_mode) 4378 != CODE_FOR_nothing))) 4379 quotient = expand_divmod (0, TRUNC_DIV_EXPR, 4380 compute_mode, op0, 4381 gen_int_mode (abs_d, 4382 compute_mode), 4383 NULL_RTX, 0); 4384 else 4385 quotient = expand_sdiv_pow2 (compute_mode, op0, abs_d); 4386 4387 /* We have computed OP0 / abs(OP1). If OP1 is negative, 4388 negate the quotient. */ 4389 if (d < 0) 4390 { 4391 insn = get_last_insn (); 4392 if (insn != last 4393 && abs_d < (HOST_WIDE_INT_1U 4394 << (HOST_BITS_PER_WIDE_INT - 1))) 4395 set_dst_reg_note (insn, REG_EQUAL, 4396 gen_rtx_DIV (compute_mode, op0, 4397 gen_int_mode 4398 (abs_d, 4399 compute_mode)), 4400 quotient); 4401 4402 quotient = expand_unop (compute_mode, neg_optab, 4403 quotient, quotient, 0); 4404 } 4405 } 4406 else if (size <= HOST_BITS_PER_WIDE_INT) 4407 { 4408 choose_multiplier (abs_d, size, size - 1, 4409 &ml, &post_shift, &lgup); 4410 if (ml < HOST_WIDE_INT_1U << (size - 1)) 4411 { 4412 rtx t1, t2, t3; 4413 4414 if (post_shift >= BITS_PER_WORD 4415 || size - 1 >= BITS_PER_WORD) 4416 goto fail1; 4417 4418 extra_cost = (shift_cost (speed, compute_mode, post_shift) 4419 + shift_cost (speed, compute_mode, size - 1) 4420 + add_cost (speed, compute_mode)); 4421 t1 = expmed_mult_highpart 4422 (compute_mode, op0, gen_int_mode (ml, compute_mode), 4423 NULL_RTX, 0, max_cost - extra_cost); 4424 if (t1 == 0) 4425 goto fail1; 4426 t2 = expand_shift 4427 (RSHIFT_EXPR, compute_mode, t1, 4428 post_shift, NULL_RTX, 0); 4429 t3 = expand_shift 4430 (RSHIFT_EXPR, compute_mode, op0, 4431 size - 1, NULL_RTX, 0); 4432 if (d < 0) 4433 quotient 4434 = force_operand (gen_rtx_MINUS (compute_mode, 4435 t3, t2), 4436 tquotient); 4437 else 4438 quotient 4439 = force_operand (gen_rtx_MINUS (compute_mode, 4440 t2, t3), 4441 tquotient); 4442 } 4443 else 4444 { 4445 rtx t1, t2, t3, t4; 4446 4447 if (post_shift >= BITS_PER_WORD 4448 || size - 1 >= BITS_PER_WORD) 4449 goto fail1; 4450 4451 ml |= HOST_WIDE_INT_M1U << (size - 1); 4452 mlr = gen_int_mode (ml, compute_mode); 4453 extra_cost = (shift_cost (speed, compute_mode, post_shift) 4454 + shift_cost (speed, compute_mode, size - 1) 4455 + 2 * add_cost (speed, compute_mode)); 4456 t1 = expmed_mult_highpart (compute_mode, op0, mlr, 4457 NULL_RTX, 0, 4458 max_cost - extra_cost); 4459 if (t1 == 0) 4460 goto fail1; 4461 t2 = force_operand (gen_rtx_PLUS (compute_mode, 4462 t1, op0), 4463 NULL_RTX); 4464 t3 = expand_shift 4465 (RSHIFT_EXPR, compute_mode, t2, 4466 post_shift, NULL_RTX, 0); 4467 t4 = expand_shift 4468 (RSHIFT_EXPR, compute_mode, op0, 4469 size - 1, NULL_RTX, 0); 4470 if (d < 0) 4471 quotient 4472 = force_operand (gen_rtx_MINUS (compute_mode, 4473 t4, t3), 4474 tquotient); 4475 else 4476 quotient 4477 = force_operand (gen_rtx_MINUS (compute_mode, 4478 t3, t4), 4479 tquotient); 4480 } 4481 } 4482 else /* Too wide mode to use tricky code */ 4483 break; 4484 4485 insn = get_last_insn (); 4486 if (insn != last) 4487 set_dst_reg_note (insn, REG_EQUAL, 4488 gen_rtx_DIV (compute_mode, op0, op1), 4489 quotient); 4490 } 4491 break; 4492 } 4493 fail1: 4494 delete_insns_since (last); 4495 break; 4496 4497 case FLOOR_DIV_EXPR: 4498 case FLOOR_MOD_EXPR: 4499 /* We will come here only for signed operations. */ 4500 if (op1_is_constant && size <= HOST_BITS_PER_WIDE_INT) 4501 { 4502 unsigned HOST_WIDE_INT mh, ml; 4503 int pre_shift, lgup, post_shift; 4504 HOST_WIDE_INT d = INTVAL (op1); 4505 4506 if (d > 0) 4507 { 4508 /* We could just as easily deal with negative constants here, 4509 but it does not seem worth the trouble for GCC 2.6. */ 4510 if (EXACT_POWER_OF_2_OR_ZERO_P (d)) 4511 { 4512 pre_shift = floor_log2 (d); 4513 if (rem_flag) 4514 { 4515 unsigned HOST_WIDE_INT mask 4516 = (HOST_WIDE_INT_1U << pre_shift) - 1; 4517 remainder = expand_binop 4518 (compute_mode, and_optab, op0, 4519 gen_int_mode (mask, compute_mode), 4520 remainder, 0, OPTAB_LIB_WIDEN); 4521 if (remainder) 4522 return gen_lowpart (mode, remainder); 4523 } 4524 quotient = expand_shift 4525 (RSHIFT_EXPR, compute_mode, op0, 4526 pre_shift, tquotient, 0); 4527 } 4528 else 4529 { 4530 rtx t1, t2, t3, t4; 4531 4532 mh = choose_multiplier (d, size, size - 1, 4533 &ml, &post_shift, &lgup); 4534 gcc_assert (!mh); 4535 4536 if (post_shift < BITS_PER_WORD 4537 && size - 1 < BITS_PER_WORD) 4538 { 4539 t1 = expand_shift 4540 (RSHIFT_EXPR, compute_mode, op0, 4541 size - 1, NULL_RTX, 0); 4542 t2 = expand_binop (compute_mode, xor_optab, op0, t1, 4543 NULL_RTX, 0, OPTAB_WIDEN); 4544 extra_cost = (shift_cost (speed, compute_mode, post_shift) 4545 + shift_cost (speed, compute_mode, size - 1) 4546 + 2 * add_cost (speed, compute_mode)); 4547 t3 = expmed_mult_highpart 4548 (compute_mode, t2, gen_int_mode (ml, compute_mode), 4549 NULL_RTX, 1, max_cost - extra_cost); 4550 if (t3 != 0) 4551 { 4552 t4 = expand_shift 4553 (RSHIFT_EXPR, compute_mode, t3, 4554 post_shift, NULL_RTX, 1); 4555 quotient = expand_binop (compute_mode, xor_optab, 4556 t4, t1, tquotient, 0, 4557 OPTAB_WIDEN); 4558 } 4559 } 4560 } 4561 } 4562 else 4563 { 4564 rtx nsign, t1, t2, t3, t4; 4565 t1 = force_operand (gen_rtx_PLUS (compute_mode, 4566 op0, constm1_rtx), NULL_RTX); 4567 t2 = expand_binop (compute_mode, ior_optab, op0, t1, NULL_RTX, 4568 0, OPTAB_WIDEN); 4569 nsign = expand_shift (RSHIFT_EXPR, compute_mode, t2, 4570 size - 1, NULL_RTX, 0); 4571 t3 = force_operand (gen_rtx_MINUS (compute_mode, t1, nsign), 4572 NULL_RTX); 4573 t4 = expand_divmod (0, TRUNC_DIV_EXPR, compute_mode, t3, op1, 4574 NULL_RTX, 0); 4575 if (t4) 4576 { 4577 rtx t5; 4578 t5 = expand_unop (compute_mode, one_cmpl_optab, nsign, 4579 NULL_RTX, 0); 4580 quotient = force_operand (gen_rtx_PLUS (compute_mode, 4581 t4, t5), 4582 tquotient); 4583 } 4584 } 4585 } 4586 4587 if (quotient != 0) 4588 break; 4589 delete_insns_since (last); 4590 4591 /* Try using an instruction that produces both the quotient and 4592 remainder, using truncation. We can easily compensate the quotient 4593 or remainder to get floor rounding, once we have the remainder. 4594 Notice that we compute also the final remainder value here, 4595 and return the result right away. */ 4596 if (target == 0 || GET_MODE (target) != compute_mode) 4597 target = gen_reg_rtx (compute_mode); 4598 4599 if (rem_flag) 4600 { 4601 remainder 4602 = REG_P (target) ? target : gen_reg_rtx (compute_mode); 4603 quotient = gen_reg_rtx (compute_mode); 4604 } 4605 else 4606 { 4607 quotient 4608 = REG_P (target) ? target : gen_reg_rtx (compute_mode); 4609 remainder = gen_reg_rtx (compute_mode); 4610 } 4611 4612 if (expand_twoval_binop (sdivmod_optab, op0, op1, 4613 quotient, remainder, 0)) 4614 { 4615 /* This could be computed with a branch-less sequence. 4616 Save that for later. */ 4617 rtx tem; 4618 rtx_code_label *label = gen_label_rtx (); 4619 do_cmp_and_jump (remainder, const0_rtx, EQ, compute_mode, label); 4620 tem = expand_binop (compute_mode, xor_optab, op0, op1, 4621 NULL_RTX, 0, OPTAB_WIDEN); 4622 do_cmp_and_jump (tem, const0_rtx, GE, compute_mode, label); 4623 expand_dec (quotient, const1_rtx); 4624 expand_inc (remainder, op1); 4625 emit_label (label); 4626 return gen_lowpart (mode, rem_flag ? remainder : quotient); 4627 } 4628 4629 /* No luck with division elimination or divmod. Have to do it 4630 by conditionally adjusting op0 *and* the result. */ 4631 { 4632 rtx_code_label *label1, *label2, *label3, *label4, *label5; 4633 rtx adjusted_op0; 4634 rtx tem; 4635 4636 quotient = gen_reg_rtx (compute_mode); 4637 adjusted_op0 = copy_to_mode_reg (compute_mode, op0); 4638 label1 = gen_label_rtx (); 4639 label2 = gen_label_rtx (); 4640 label3 = gen_label_rtx (); 4641 label4 = gen_label_rtx (); 4642 label5 = gen_label_rtx (); 4643 do_cmp_and_jump (op1, const0_rtx, LT, compute_mode, label2); 4644 do_cmp_and_jump (adjusted_op0, const0_rtx, LT, compute_mode, label1); 4645 tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1, 4646 quotient, 0, OPTAB_LIB_WIDEN); 4647 if (tem != quotient) 4648 emit_move_insn (quotient, tem); 4649 emit_jump_insn (targetm.gen_jump (label5)); 4650 emit_barrier (); 4651 emit_label (label1); 4652 expand_inc (adjusted_op0, const1_rtx); 4653 emit_jump_insn (targetm.gen_jump (label4)); 4654 emit_barrier (); 4655 emit_label (label2); 4656 do_cmp_and_jump (adjusted_op0, const0_rtx, GT, compute_mode, label3); 4657 tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1, 4658 quotient, 0, OPTAB_LIB_WIDEN); 4659 if (tem != quotient) 4660 emit_move_insn (quotient, tem); 4661 emit_jump_insn (targetm.gen_jump (label5)); 4662 emit_barrier (); 4663 emit_label (label3); 4664 expand_dec (adjusted_op0, const1_rtx); 4665 emit_label (label4); 4666 tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1, 4667 quotient, 0, OPTAB_LIB_WIDEN); 4668 if (tem != quotient) 4669 emit_move_insn (quotient, tem); 4670 expand_dec (quotient, const1_rtx); 4671 emit_label (label5); 4672 } 4673 break; 4674 4675 case CEIL_DIV_EXPR: 4676 case CEIL_MOD_EXPR: 4677 if (unsignedp) 4678 { 4679 if (op1_is_constant 4680 && EXACT_POWER_OF_2_OR_ZERO_P (INTVAL (op1)) 4681 && (size <= HOST_BITS_PER_WIDE_INT 4682 || INTVAL (op1) >= 0)) 4683 { 4684 rtx t1, t2, t3; 4685 unsigned HOST_WIDE_INT d = INTVAL (op1); 4686 t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0, 4687 floor_log2 (d), tquotient, 1); 4688 t2 = expand_binop (compute_mode, and_optab, op0, 4689 gen_int_mode (d - 1, compute_mode), 4690 NULL_RTX, 1, OPTAB_LIB_WIDEN); 4691 t3 = gen_reg_rtx (compute_mode); 4692 t3 = emit_store_flag (t3, NE, t2, const0_rtx, 4693 compute_mode, 1, 1); 4694 if (t3 == 0) 4695 { 4696 rtx_code_label *lab; 4697 lab = gen_label_rtx (); 4698 do_cmp_and_jump (t2, const0_rtx, EQ, compute_mode, lab); 4699 expand_inc (t1, const1_rtx); 4700 emit_label (lab); 4701 quotient = t1; 4702 } 4703 else 4704 quotient = force_operand (gen_rtx_PLUS (compute_mode, 4705 t1, t3), 4706 tquotient); 4707 break; 4708 } 4709 4710 /* Try using an instruction that produces both the quotient and 4711 remainder, using truncation. We can easily compensate the 4712 quotient or remainder to get ceiling rounding, once we have the 4713 remainder. Notice that we compute also the final remainder 4714 value here, and return the result right away. */ 4715 if (target == 0 || GET_MODE (target) != compute_mode) 4716 target = gen_reg_rtx (compute_mode); 4717 4718 if (rem_flag) 4719 { 4720 remainder = (REG_P (target) 4721 ? target : gen_reg_rtx (compute_mode)); 4722 quotient = gen_reg_rtx (compute_mode); 4723 } 4724 else 4725 { 4726 quotient = (REG_P (target) 4727 ? target : gen_reg_rtx (compute_mode)); 4728 remainder = gen_reg_rtx (compute_mode); 4729 } 4730 4731 if (expand_twoval_binop (udivmod_optab, op0, op1, quotient, 4732 remainder, 1)) 4733 { 4734 /* This could be computed with a branch-less sequence. 4735 Save that for later. */ 4736 rtx_code_label *label = gen_label_rtx (); 4737 do_cmp_and_jump (remainder, const0_rtx, EQ, 4738 compute_mode, label); 4739 expand_inc (quotient, const1_rtx); 4740 expand_dec (remainder, op1); 4741 emit_label (label); 4742 return gen_lowpart (mode, rem_flag ? remainder : quotient); 4743 } 4744 4745 /* No luck with division elimination or divmod. Have to do it 4746 by conditionally adjusting op0 *and* the result. */ 4747 { 4748 rtx_code_label *label1, *label2; 4749 rtx adjusted_op0, tem; 4750 4751 quotient = gen_reg_rtx (compute_mode); 4752 adjusted_op0 = copy_to_mode_reg (compute_mode, op0); 4753 label1 = gen_label_rtx (); 4754 label2 = gen_label_rtx (); 4755 do_cmp_and_jump (adjusted_op0, const0_rtx, NE, 4756 compute_mode, label1); 4757 emit_move_insn (quotient, const0_rtx); 4758 emit_jump_insn (targetm.gen_jump (label2)); 4759 emit_barrier (); 4760 emit_label (label1); 4761 expand_dec (adjusted_op0, const1_rtx); 4762 tem = expand_binop (compute_mode, udiv_optab, adjusted_op0, op1, 4763 quotient, 1, OPTAB_LIB_WIDEN); 4764 if (tem != quotient) 4765 emit_move_insn (quotient, tem); 4766 expand_inc (quotient, const1_rtx); 4767 emit_label (label2); 4768 } 4769 } 4770 else /* signed */ 4771 { 4772 if (op1_is_constant && EXACT_POWER_OF_2_OR_ZERO_P (INTVAL (op1)) 4773 && INTVAL (op1) >= 0) 4774 { 4775 /* This is extremely similar to the code for the unsigned case 4776 above. For 2.7 we should merge these variants, but for 4777 2.6.1 I don't want to touch the code for unsigned since that 4778 get used in C. The signed case will only be used by other 4779 languages (Ada). */ 4780 4781 rtx t1, t2, t3; 4782 unsigned HOST_WIDE_INT d = INTVAL (op1); 4783 t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0, 4784 floor_log2 (d), tquotient, 0); 4785 t2 = expand_binop (compute_mode, and_optab, op0, 4786 gen_int_mode (d - 1, compute_mode), 4787 NULL_RTX, 1, OPTAB_LIB_WIDEN); 4788 t3 = gen_reg_rtx (compute_mode); 4789 t3 = emit_store_flag (t3, NE, t2, const0_rtx, 4790 compute_mode, 1, 1); 4791 if (t3 == 0) 4792 { 4793 rtx_code_label *lab; 4794 lab = gen_label_rtx (); 4795 do_cmp_and_jump (t2, const0_rtx, EQ, compute_mode, lab); 4796 expand_inc (t1, const1_rtx); 4797 emit_label (lab); 4798 quotient = t1; 4799 } 4800 else 4801 quotient = force_operand (gen_rtx_PLUS (compute_mode, 4802 t1, t3), 4803 tquotient); 4804 break; 4805 } 4806 4807 /* Try using an instruction that produces both the quotient and 4808 remainder, using truncation. We can easily compensate the 4809 quotient or remainder to get ceiling rounding, once we have the 4810 remainder. Notice that we compute also the final remainder 4811 value here, and return the result right away. */ 4812 if (target == 0 || GET_MODE (target) != compute_mode) 4813 target = gen_reg_rtx (compute_mode); 4814 if (rem_flag) 4815 { 4816 remainder= (REG_P (target) 4817 ? target : gen_reg_rtx (compute_mode)); 4818 quotient = gen_reg_rtx (compute_mode); 4819 } 4820 else 4821 { 4822 quotient = (REG_P (target) 4823 ? target : gen_reg_rtx (compute_mode)); 4824 remainder = gen_reg_rtx (compute_mode); 4825 } 4826 4827 if (expand_twoval_binop (sdivmod_optab, op0, op1, quotient, 4828 remainder, 0)) 4829 { 4830 /* This could be computed with a branch-less sequence. 4831 Save that for later. */ 4832 rtx tem; 4833 rtx_code_label *label = gen_label_rtx (); 4834 do_cmp_and_jump (remainder, const0_rtx, EQ, 4835 compute_mode, label); 4836 tem = expand_binop (compute_mode, xor_optab, op0, op1, 4837 NULL_RTX, 0, OPTAB_WIDEN); 4838 do_cmp_and_jump (tem, const0_rtx, LT, compute_mode, label); 4839 expand_inc (quotient, const1_rtx); 4840 expand_dec (remainder, op1); 4841 emit_label (label); 4842 return gen_lowpart (mode, rem_flag ? remainder : quotient); 4843 } 4844 4845 /* No luck with division elimination or divmod. Have to do it 4846 by conditionally adjusting op0 *and* the result. */ 4847 { 4848 rtx_code_label *label1, *label2, *label3, *label4, *label5; 4849 rtx adjusted_op0; 4850 rtx tem; 4851 4852 quotient = gen_reg_rtx (compute_mode); 4853 adjusted_op0 = copy_to_mode_reg (compute_mode, op0); 4854 label1 = gen_label_rtx (); 4855 label2 = gen_label_rtx (); 4856 label3 = gen_label_rtx (); 4857 label4 = gen_label_rtx (); 4858 label5 = gen_label_rtx (); 4859 do_cmp_and_jump (op1, const0_rtx, LT, compute_mode, label2); 4860 do_cmp_and_jump (adjusted_op0, const0_rtx, GT, 4861 compute_mode, label1); 4862 tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1, 4863 quotient, 0, OPTAB_LIB_WIDEN); 4864 if (tem != quotient) 4865 emit_move_insn (quotient, tem); 4866 emit_jump_insn (targetm.gen_jump (label5)); 4867 emit_barrier (); 4868 emit_label (label1); 4869 expand_dec (adjusted_op0, const1_rtx); 4870 emit_jump_insn (targetm.gen_jump (label4)); 4871 emit_barrier (); 4872 emit_label (label2); 4873 do_cmp_and_jump (adjusted_op0, const0_rtx, LT, 4874 compute_mode, label3); 4875 tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1, 4876 quotient, 0, OPTAB_LIB_WIDEN); 4877 if (tem != quotient) 4878 emit_move_insn (quotient, tem); 4879 emit_jump_insn (targetm.gen_jump (label5)); 4880 emit_barrier (); 4881 emit_label (label3); 4882 expand_inc (adjusted_op0, const1_rtx); 4883 emit_label (label4); 4884 tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1, 4885 quotient, 0, OPTAB_LIB_WIDEN); 4886 if (tem != quotient) 4887 emit_move_insn (quotient, tem); 4888 expand_inc (quotient, const1_rtx); 4889 emit_label (label5); 4890 } 4891 } 4892 break; 4893 4894 case EXACT_DIV_EXPR: 4895 if (op1_is_constant && size <= HOST_BITS_PER_WIDE_INT) 4896 { 4897 HOST_WIDE_INT d = INTVAL (op1); 4898 unsigned HOST_WIDE_INT ml; 4899 int pre_shift; 4900 rtx t1; 4901 4902 pre_shift = ctz_or_zero (d); 4903 ml = invert_mod2n (d >> pre_shift, size); 4904 t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0, 4905 pre_shift, NULL_RTX, unsignedp); 4906 quotient = expand_mult (compute_mode, t1, 4907 gen_int_mode (ml, compute_mode), 4908 NULL_RTX, 1); 4909 4910 insn = get_last_insn (); 4911 set_dst_reg_note (insn, REG_EQUAL, 4912 gen_rtx_fmt_ee (unsignedp ? UDIV : DIV, 4913 compute_mode, op0, op1), 4914 quotient); 4915 } 4916 break; 4917 4918 case ROUND_DIV_EXPR: 4919 case ROUND_MOD_EXPR: 4920 if (unsignedp) 4921 { 4922 rtx tem; 4923 rtx_code_label *label; 4924 label = gen_label_rtx (); 4925 quotient = gen_reg_rtx (compute_mode); 4926 remainder = gen_reg_rtx (compute_mode); 4927 if (expand_twoval_binop (udivmod_optab, op0, op1, quotient, remainder, 1) == 0) 4928 { 4929 rtx tem; 4930 quotient = expand_binop (compute_mode, udiv_optab, op0, op1, 4931 quotient, 1, OPTAB_LIB_WIDEN); 4932 tem = expand_mult (compute_mode, quotient, op1, NULL_RTX, 1); 4933 remainder = expand_binop (compute_mode, sub_optab, op0, tem, 4934 remainder, 1, OPTAB_LIB_WIDEN); 4935 } 4936 tem = plus_constant (compute_mode, op1, -1); 4937 tem = expand_shift (RSHIFT_EXPR, compute_mode, tem, 1, NULL_RTX, 1); 4938 do_cmp_and_jump (remainder, tem, LEU, compute_mode, label); 4939 expand_inc (quotient, const1_rtx); 4940 expand_dec (remainder, op1); 4941 emit_label (label); 4942 } 4943 else 4944 { 4945 rtx abs_rem, abs_op1, tem, mask; 4946 rtx_code_label *label; 4947 label = gen_label_rtx (); 4948 quotient = gen_reg_rtx (compute_mode); 4949 remainder = gen_reg_rtx (compute_mode); 4950 if (expand_twoval_binop (sdivmod_optab, op0, op1, quotient, remainder, 0) == 0) 4951 { 4952 rtx tem; 4953 quotient = expand_binop (compute_mode, sdiv_optab, op0, op1, 4954 quotient, 0, OPTAB_LIB_WIDEN); 4955 tem = expand_mult (compute_mode, quotient, op1, NULL_RTX, 0); 4956 remainder = expand_binop (compute_mode, sub_optab, op0, tem, 4957 remainder, 0, OPTAB_LIB_WIDEN); 4958 } 4959 abs_rem = expand_abs (compute_mode, remainder, NULL_RTX, 1, 0); 4960 abs_op1 = expand_abs (compute_mode, op1, NULL_RTX, 1, 0); 4961 tem = expand_shift (LSHIFT_EXPR, compute_mode, abs_rem, 4962 1, NULL_RTX, 1); 4963 do_cmp_and_jump (tem, abs_op1, LTU, compute_mode, label); 4964 tem = expand_binop (compute_mode, xor_optab, op0, op1, 4965 NULL_RTX, 0, OPTAB_WIDEN); 4966 mask = expand_shift (RSHIFT_EXPR, compute_mode, tem, 4967 size - 1, NULL_RTX, 0); 4968 tem = expand_binop (compute_mode, xor_optab, mask, const1_rtx, 4969 NULL_RTX, 0, OPTAB_WIDEN); 4970 tem = expand_binop (compute_mode, sub_optab, tem, mask, 4971 NULL_RTX, 0, OPTAB_WIDEN); 4972 expand_inc (quotient, tem); 4973 tem = expand_binop (compute_mode, xor_optab, mask, op1, 4974 NULL_RTX, 0, OPTAB_WIDEN); 4975 tem = expand_binop (compute_mode, sub_optab, tem, mask, 4976 NULL_RTX, 0, OPTAB_WIDEN); 4977 expand_dec (remainder, tem); 4978 emit_label (label); 4979 } 4980 return gen_lowpart (mode, rem_flag ? remainder : quotient); 4981 4982 default: 4983 gcc_unreachable (); 4984 } 4985 4986 if (quotient == 0) 4987 { 4988 if (target && GET_MODE (target) != compute_mode) 4989 target = 0; 4990 4991 if (rem_flag) 4992 { 4993 /* Try to produce the remainder without producing the quotient. 4994 If we seem to have a divmod pattern that does not require widening, 4995 don't try widening here. We should really have a WIDEN argument 4996 to expand_twoval_binop, since what we'd really like to do here is 4997 1) try a mod insn in compute_mode 4998 2) try a divmod insn in compute_mode 4999 3) try a div insn in compute_mode and multiply-subtract to get 5000 remainder 5001 4) try the same things with widening allowed. */ 5002 remainder 5003 = sign_expand_binop (compute_mode, umod_optab, smod_optab, 5004 op0, op1, target, 5005 unsignedp, 5006 ((optab_handler (optab2, compute_mode) 5007 != CODE_FOR_nothing) 5008 ? OPTAB_DIRECT : OPTAB_WIDEN)); 5009 if (remainder == 0) 5010 { 5011 /* No luck there. Can we do remainder and divide at once 5012 without a library call? */ 5013 remainder = gen_reg_rtx (compute_mode); 5014 if (! expand_twoval_binop ((unsignedp 5015 ? udivmod_optab 5016 : sdivmod_optab), 5017 op0, op1, 5018 NULL_RTX, remainder, unsignedp)) 5019 remainder = 0; 5020 } 5021 5022 if (remainder) 5023 return gen_lowpart (mode, remainder); 5024 } 5025 5026 /* Produce the quotient. Try a quotient insn, but not a library call. 5027 If we have a divmod in this mode, use it in preference to widening 5028 the div (for this test we assume it will not fail). Note that optab2 5029 is set to the one of the two optabs that the call below will use. */ 5030 quotient 5031 = sign_expand_binop (compute_mode, udiv_optab, sdiv_optab, 5032 op0, op1, rem_flag ? NULL_RTX : target, 5033 unsignedp, 5034 ((optab_handler (optab2, compute_mode) 5035 != CODE_FOR_nothing) 5036 ? OPTAB_DIRECT : OPTAB_WIDEN)); 5037 5038 if (quotient == 0) 5039 { 5040 /* No luck there. Try a quotient-and-remainder insn, 5041 keeping the quotient alone. */ 5042 quotient = gen_reg_rtx (compute_mode); 5043 if (! expand_twoval_binop (unsignedp ? udivmod_optab : sdivmod_optab, 5044 op0, op1, 5045 quotient, NULL_RTX, unsignedp)) 5046 { 5047 quotient = 0; 5048 if (! rem_flag) 5049 /* Still no luck. If we are not computing the remainder, 5050 use a library call for the quotient. */ 5051 quotient = sign_expand_binop (compute_mode, 5052 udiv_optab, sdiv_optab, 5053 op0, op1, target, 5054 unsignedp, OPTAB_LIB_WIDEN); 5055 } 5056 } 5057 } 5058 5059 if (rem_flag) 5060 { 5061 if (target && GET_MODE (target) != compute_mode) 5062 target = 0; 5063 5064 if (quotient == 0) 5065 { 5066 /* No divide instruction either. Use library for remainder. */ 5067 remainder = sign_expand_binop (compute_mode, umod_optab, smod_optab, 5068 op0, op1, target, 5069 unsignedp, OPTAB_LIB_WIDEN); 5070 /* No remainder function. Try a quotient-and-remainder 5071 function, keeping the remainder. */ 5072 if (!remainder) 5073 { 5074 remainder = gen_reg_rtx (compute_mode); 5075 if (!expand_twoval_binop_libfunc 5076 (unsignedp ? udivmod_optab : sdivmod_optab, 5077 op0, op1, 5078 NULL_RTX, remainder, 5079 unsignedp ? UMOD : MOD)) 5080 remainder = NULL_RTX; 5081 } 5082 } 5083 else 5084 { 5085 /* We divided. Now finish doing X - Y * (X / Y). */ 5086 remainder = expand_mult (compute_mode, quotient, op1, 5087 NULL_RTX, unsignedp); 5088 remainder = expand_binop (compute_mode, sub_optab, op0, 5089 remainder, target, unsignedp, 5090 OPTAB_LIB_WIDEN); 5091 } 5092 } 5093 5094 return gen_lowpart (mode, rem_flag ? remainder : quotient); 5095 } 5096 5097 /* Return a tree node with data type TYPE, describing the value of X. 5098 Usually this is an VAR_DECL, if there is no obvious better choice. 5099 X may be an expression, however we only support those expressions 5100 generated by loop.c. */ 5101 5102 tree 5103 make_tree (tree type, rtx x) 5104 { 5105 tree t; 5106 5107 switch (GET_CODE (x)) 5108 { 5109 case CONST_INT: 5110 case CONST_WIDE_INT: 5111 t = wide_int_to_tree (type, rtx_mode_t (x, TYPE_MODE (type))); 5112 return t; 5113 5114 case CONST_DOUBLE: 5115 STATIC_ASSERT (HOST_BITS_PER_WIDE_INT * 2 <= MAX_BITSIZE_MODE_ANY_INT); 5116 if (TARGET_SUPPORTS_WIDE_INT == 0 && GET_MODE (x) == VOIDmode) 5117 t = wide_int_to_tree (type, 5118 wide_int::from_array (&CONST_DOUBLE_LOW (x), 2, 5119 HOST_BITS_PER_WIDE_INT * 2)); 5120 else 5121 t = build_real (type, *CONST_DOUBLE_REAL_VALUE (x)); 5122 5123 return t; 5124 5125 case CONST_VECTOR: 5126 { 5127 int units = CONST_VECTOR_NUNITS (x); 5128 tree itype = TREE_TYPE (type); 5129 tree *elts; 5130 int i; 5131 5132 /* Build a tree with vector elements. */ 5133 elts = XALLOCAVEC (tree, units); 5134 for (i = units - 1; i >= 0; --i) 5135 { 5136 rtx elt = CONST_VECTOR_ELT (x, i); 5137 elts[i] = make_tree (itype, elt); 5138 } 5139 5140 return build_vector (type, elts); 5141 } 5142 5143 case PLUS: 5144 return fold_build2 (PLUS_EXPR, type, make_tree (type, XEXP (x, 0)), 5145 make_tree (type, XEXP (x, 1))); 5146 5147 case MINUS: 5148 return fold_build2 (MINUS_EXPR, type, make_tree (type, XEXP (x, 0)), 5149 make_tree (type, XEXP (x, 1))); 5150 5151 case NEG: 5152 return fold_build1 (NEGATE_EXPR, type, make_tree (type, XEXP (x, 0))); 5153 5154 case MULT: 5155 return fold_build2 (MULT_EXPR, type, make_tree (type, XEXP (x, 0)), 5156 make_tree (type, XEXP (x, 1))); 5157 5158 case ASHIFT: 5159 return fold_build2 (LSHIFT_EXPR, type, make_tree (type, XEXP (x, 0)), 5160 make_tree (type, XEXP (x, 1))); 5161 5162 case LSHIFTRT: 5163 t = unsigned_type_for (type); 5164 return fold_convert (type, build2 (RSHIFT_EXPR, t, 5165 make_tree (t, XEXP (x, 0)), 5166 make_tree (type, XEXP (x, 1)))); 5167 5168 case ASHIFTRT: 5169 t = signed_type_for (type); 5170 return fold_convert (type, build2 (RSHIFT_EXPR, t, 5171 make_tree (t, XEXP (x, 0)), 5172 make_tree (type, XEXP (x, 1)))); 5173 5174 case DIV: 5175 if (TREE_CODE (type) != REAL_TYPE) 5176 t = signed_type_for (type); 5177 else 5178 t = type; 5179 5180 return fold_convert (type, build2 (TRUNC_DIV_EXPR, t, 5181 make_tree (t, XEXP (x, 0)), 5182 make_tree (t, XEXP (x, 1)))); 5183 case UDIV: 5184 t = unsigned_type_for (type); 5185 return fold_convert (type, build2 (TRUNC_DIV_EXPR, t, 5186 make_tree (t, XEXP (x, 0)), 5187 make_tree (t, XEXP (x, 1)))); 5188 5189 case SIGN_EXTEND: 5190 case ZERO_EXTEND: 5191 t = lang_hooks.types.type_for_mode (GET_MODE (XEXP (x, 0)), 5192 GET_CODE (x) == ZERO_EXTEND); 5193 return fold_convert (type, make_tree (t, XEXP (x, 0))); 5194 5195 case CONST: 5196 return make_tree (type, XEXP (x, 0)); 5197 5198 case SYMBOL_REF: 5199 t = SYMBOL_REF_DECL (x); 5200 if (t) 5201 return fold_convert (type, build_fold_addr_expr (t)); 5202 /* fall through. */ 5203 5204 default: 5205 t = build_decl (RTL_LOCATION (x), VAR_DECL, NULL_TREE, type); 5206 5207 /* If TYPE is a POINTER_TYPE, we might need to convert X from 5208 address mode to pointer mode. */ 5209 if (POINTER_TYPE_P (type)) 5210 x = convert_memory_address_addr_space 5211 (TYPE_MODE (type), x, TYPE_ADDR_SPACE (TREE_TYPE (type))); 5212 5213 /* Note that we do *not* use SET_DECL_RTL here, because we do not 5214 want set_decl_rtl to go adjusting REG_ATTRS for this temporary. */ 5215 t->decl_with_rtl.rtl = x; 5216 5217 return t; 5218 } 5219 } 5220 5221 /* Compute the logical-and of OP0 and OP1, storing it in TARGET 5222 and returning TARGET. 5223 5224 If TARGET is 0, a pseudo-register or constant is returned. */ 5225 5226 rtx 5227 expand_and (machine_mode mode, rtx op0, rtx op1, rtx target) 5228 { 5229 rtx tem = 0; 5230 5231 if (GET_MODE (op0) == VOIDmode && GET_MODE (op1) == VOIDmode) 5232 tem = simplify_binary_operation (AND, mode, op0, op1); 5233 if (tem == 0) 5234 tem = expand_binop (mode, and_optab, op0, op1, target, 0, OPTAB_LIB_WIDEN); 5235 5236 if (target == 0) 5237 target = tem; 5238 else if (tem != target) 5239 emit_move_insn (target, tem); 5240 return target; 5241 } 5242 5243 /* Helper function for emit_store_flag. */ 5244 rtx 5245 emit_cstore (rtx target, enum insn_code icode, enum rtx_code code, 5246 machine_mode mode, machine_mode compare_mode, 5247 int unsignedp, rtx x, rtx y, int normalizep, 5248 machine_mode target_mode) 5249 { 5250 struct expand_operand ops[4]; 5251 rtx op0, comparison, subtarget; 5252 rtx_insn *last; 5253 machine_mode result_mode = targetm.cstore_mode (icode); 5254 5255 last = get_last_insn (); 5256 x = prepare_operand (icode, x, 2, mode, compare_mode, unsignedp); 5257 y = prepare_operand (icode, y, 3, mode, compare_mode, unsignedp); 5258 if (!x || !y) 5259 { 5260 delete_insns_since (last); 5261 return NULL_RTX; 5262 } 5263 5264 if (target_mode == VOIDmode) 5265 target_mode = result_mode; 5266 if (!target) 5267 target = gen_reg_rtx (target_mode); 5268 5269 comparison = gen_rtx_fmt_ee (code, result_mode, x, y); 5270 5271 create_output_operand (&ops[0], optimize ? NULL_RTX : target, result_mode); 5272 create_fixed_operand (&ops[1], comparison); 5273 create_fixed_operand (&ops[2], x); 5274 create_fixed_operand (&ops[3], y); 5275 if (!maybe_expand_insn (icode, 4, ops)) 5276 { 5277 delete_insns_since (last); 5278 return NULL_RTX; 5279 } 5280 subtarget = ops[0].value; 5281 5282 /* If we are converting to a wider mode, first convert to 5283 TARGET_MODE, then normalize. This produces better combining 5284 opportunities on machines that have a SIGN_EXTRACT when we are 5285 testing a single bit. This mostly benefits the 68k. 5286 5287 If STORE_FLAG_VALUE does not have the sign bit set when 5288 interpreted in MODE, we can do this conversion as unsigned, which 5289 is usually more efficient. */ 5290 if (GET_MODE_SIZE (target_mode) > GET_MODE_SIZE (result_mode)) 5291 { 5292 convert_move (target, subtarget, 5293 val_signbit_known_clear_p (result_mode, 5294 STORE_FLAG_VALUE)); 5295 op0 = target; 5296 result_mode = target_mode; 5297 } 5298 else 5299 op0 = subtarget; 5300 5301 /* If we want to keep subexpressions around, don't reuse our last 5302 target. */ 5303 if (optimize) 5304 subtarget = 0; 5305 5306 /* Now normalize to the proper value in MODE. Sometimes we don't 5307 have to do anything. */ 5308 if (normalizep == 0 || normalizep == STORE_FLAG_VALUE) 5309 ; 5310 /* STORE_FLAG_VALUE might be the most negative number, so write 5311 the comparison this way to avoid a compiler-time warning. */ 5312 else if (- normalizep == STORE_FLAG_VALUE) 5313 op0 = expand_unop (result_mode, neg_optab, op0, subtarget, 0); 5314 5315 /* We don't want to use STORE_FLAG_VALUE < 0 below since this makes 5316 it hard to use a value of just the sign bit due to ANSI integer 5317 constant typing rules. */ 5318 else if (val_signbit_known_set_p (result_mode, STORE_FLAG_VALUE)) 5319 op0 = expand_shift (RSHIFT_EXPR, result_mode, op0, 5320 GET_MODE_BITSIZE (result_mode) - 1, subtarget, 5321 normalizep == 1); 5322 else 5323 { 5324 gcc_assert (STORE_FLAG_VALUE & 1); 5325 5326 op0 = expand_and (result_mode, op0, const1_rtx, subtarget); 5327 if (normalizep == -1) 5328 op0 = expand_unop (result_mode, neg_optab, op0, op0, 0); 5329 } 5330 5331 /* If we were converting to a smaller mode, do the conversion now. */ 5332 if (target_mode != result_mode) 5333 { 5334 convert_move (target, op0, 0); 5335 return target; 5336 } 5337 else 5338 return op0; 5339 } 5340 5341 5342 /* A subroutine of emit_store_flag only including "tricks" that do not 5343 need a recursive call. These are kept separate to avoid infinite 5344 loops. */ 5345 5346 static rtx 5347 emit_store_flag_1 (rtx target, enum rtx_code code, rtx op0, rtx op1, 5348 machine_mode mode, int unsignedp, int normalizep, 5349 machine_mode target_mode) 5350 { 5351 rtx subtarget; 5352 enum insn_code icode; 5353 machine_mode compare_mode; 5354 enum mode_class mclass; 5355 enum rtx_code scode; 5356 5357 if (unsignedp) 5358 code = unsigned_condition (code); 5359 scode = swap_condition (code); 5360 5361 /* If one operand is constant, make it the second one. Only do this 5362 if the other operand is not constant as well. */ 5363 5364 if (swap_commutative_operands_p (op0, op1)) 5365 { 5366 std::swap (op0, op1); 5367 code = swap_condition (code); 5368 } 5369 5370 if (mode == VOIDmode) 5371 mode = GET_MODE (op0); 5372 5373 /* For some comparisons with 1 and -1, we can convert this to 5374 comparisons with zero. This will often produce more opportunities for 5375 store-flag insns. */ 5376 5377 switch (code) 5378 { 5379 case LT: 5380 if (op1 == const1_rtx) 5381 op1 = const0_rtx, code = LE; 5382 break; 5383 case LE: 5384 if (op1 == constm1_rtx) 5385 op1 = const0_rtx, code = LT; 5386 break; 5387 case GE: 5388 if (op1 == const1_rtx) 5389 op1 = const0_rtx, code = GT; 5390 break; 5391 case GT: 5392 if (op1 == constm1_rtx) 5393 op1 = const0_rtx, code = GE; 5394 break; 5395 case GEU: 5396 if (op1 == const1_rtx) 5397 op1 = const0_rtx, code = NE; 5398 break; 5399 case LTU: 5400 if (op1 == const1_rtx) 5401 op1 = const0_rtx, code = EQ; 5402 break; 5403 default: 5404 break; 5405 } 5406 5407 /* If we are comparing a double-word integer with zero or -1, we can 5408 convert the comparison into one involving a single word. */ 5409 if (GET_MODE_BITSIZE (mode) == BITS_PER_WORD * 2 5410 && GET_MODE_CLASS (mode) == MODE_INT 5411 && (!MEM_P (op0) || ! MEM_VOLATILE_P (op0))) 5412 { 5413 rtx tem; 5414 if ((code == EQ || code == NE) 5415 && (op1 == const0_rtx || op1 == constm1_rtx)) 5416 { 5417 rtx op00, op01; 5418 5419 /* Do a logical OR or AND of the two words and compare the 5420 result. */ 5421 op00 = simplify_gen_subreg (word_mode, op0, mode, 0); 5422 op01 = simplify_gen_subreg (word_mode, op0, mode, UNITS_PER_WORD); 5423 tem = expand_binop (word_mode, 5424 op1 == const0_rtx ? ior_optab : and_optab, 5425 op00, op01, NULL_RTX, unsignedp, 5426 OPTAB_DIRECT); 5427 5428 if (tem != 0) 5429 tem = emit_store_flag (NULL_RTX, code, tem, op1, word_mode, 5430 unsignedp, normalizep); 5431 } 5432 else if ((code == LT || code == GE) && op1 == const0_rtx) 5433 { 5434 rtx op0h; 5435 5436 /* If testing the sign bit, can just test on high word. */ 5437 op0h = simplify_gen_subreg (word_mode, op0, mode, 5438 subreg_highpart_offset (word_mode, 5439 mode)); 5440 tem = emit_store_flag (NULL_RTX, code, op0h, op1, word_mode, 5441 unsignedp, normalizep); 5442 } 5443 else 5444 tem = NULL_RTX; 5445 5446 if (tem) 5447 { 5448 if (target_mode == VOIDmode || GET_MODE (tem) == target_mode) 5449 return tem; 5450 if (!target) 5451 target = gen_reg_rtx (target_mode); 5452 5453 convert_move (target, tem, 5454 !val_signbit_known_set_p (word_mode, 5455 (normalizep ? normalizep 5456 : STORE_FLAG_VALUE))); 5457 return target; 5458 } 5459 } 5460 5461 /* If this is A < 0 or A >= 0, we can do this by taking the ones 5462 complement of A (for GE) and shifting the sign bit to the low bit. */ 5463 if (op1 == const0_rtx && (code == LT || code == GE) 5464 && GET_MODE_CLASS (mode) == MODE_INT 5465 && (normalizep || STORE_FLAG_VALUE == 1 5466 || val_signbit_p (mode, STORE_FLAG_VALUE))) 5467 { 5468 subtarget = target; 5469 5470 if (!target) 5471 target_mode = mode; 5472 5473 /* If the result is to be wider than OP0, it is best to convert it 5474 first. If it is to be narrower, it is *incorrect* to convert it 5475 first. */ 5476 else if (GET_MODE_SIZE (target_mode) > GET_MODE_SIZE (mode)) 5477 { 5478 op0 = convert_modes (target_mode, mode, op0, 0); 5479 mode = target_mode; 5480 } 5481 5482 if (target_mode != mode) 5483 subtarget = 0; 5484 5485 if (code == GE) 5486 op0 = expand_unop (mode, one_cmpl_optab, op0, 5487 ((STORE_FLAG_VALUE == 1 || normalizep) 5488 ? 0 : subtarget), 0); 5489 5490 if (STORE_FLAG_VALUE == 1 || normalizep) 5491 /* If we are supposed to produce a 0/1 value, we want to do 5492 a logical shift from the sign bit to the low-order bit; for 5493 a -1/0 value, we do an arithmetic shift. */ 5494 op0 = expand_shift (RSHIFT_EXPR, mode, op0, 5495 GET_MODE_BITSIZE (mode) - 1, 5496 subtarget, normalizep != -1); 5497 5498 if (mode != target_mode) 5499 op0 = convert_modes (target_mode, mode, op0, 0); 5500 5501 return op0; 5502 } 5503 5504 mclass = GET_MODE_CLASS (mode); 5505 for (compare_mode = mode; compare_mode != VOIDmode; 5506 compare_mode = GET_MODE_WIDER_MODE (compare_mode)) 5507 { 5508 machine_mode optab_mode = mclass == MODE_CC ? CCmode : compare_mode; 5509 icode = optab_handler (cstore_optab, optab_mode); 5510 if (icode != CODE_FOR_nothing) 5511 { 5512 do_pending_stack_adjust (); 5513 rtx tem = emit_cstore (target, icode, code, mode, compare_mode, 5514 unsignedp, op0, op1, normalizep, target_mode); 5515 if (tem) 5516 return tem; 5517 5518 if (GET_MODE_CLASS (mode) == MODE_FLOAT) 5519 { 5520 tem = emit_cstore (target, icode, scode, mode, compare_mode, 5521 unsignedp, op1, op0, normalizep, target_mode); 5522 if (tem) 5523 return tem; 5524 } 5525 break; 5526 } 5527 } 5528 5529 return 0; 5530 } 5531 5532 /* Emit a store-flags instruction for comparison CODE on OP0 and OP1 5533 and storing in TARGET. Normally return TARGET. 5534 Return 0 if that cannot be done. 5535 5536 MODE is the mode to use for OP0 and OP1 should they be CONST_INTs. If 5537 it is VOIDmode, they cannot both be CONST_INT. 5538 5539 UNSIGNEDP is for the case where we have to widen the operands 5540 to perform the operation. It says to use zero-extension. 5541 5542 NORMALIZEP is 1 if we should convert the result to be either zero 5543 or one. Normalize is -1 if we should convert the result to be 5544 either zero or -1. If NORMALIZEP is zero, the result will be left 5545 "raw" out of the scc insn. */ 5546 5547 rtx 5548 emit_store_flag (rtx target, enum rtx_code code, rtx op0, rtx op1, 5549 machine_mode mode, int unsignedp, int normalizep) 5550 { 5551 machine_mode target_mode = target ? GET_MODE (target) : VOIDmode; 5552 enum rtx_code rcode; 5553 rtx subtarget; 5554 rtx tem, trueval; 5555 rtx_insn *last; 5556 5557 /* If we compare constants, we shouldn't use a store-flag operation, 5558 but a constant load. We can get there via the vanilla route that 5559 usually generates a compare-branch sequence, but will in this case 5560 fold the comparison to a constant, and thus elide the branch. */ 5561 if (CONSTANT_P (op0) && CONSTANT_P (op1)) 5562 return NULL_RTX; 5563 5564 tem = emit_store_flag_1 (target, code, op0, op1, mode, unsignedp, normalizep, 5565 target_mode); 5566 if (tem) 5567 return tem; 5568 5569 /* If we reached here, we can't do this with a scc insn, however there 5570 are some comparisons that can be done in other ways. Don't do any 5571 of these cases if branches are very cheap. */ 5572 if (BRANCH_COST (optimize_insn_for_speed_p (), false) == 0) 5573 return 0; 5574 5575 /* See what we need to return. We can only return a 1, -1, or the 5576 sign bit. */ 5577 5578 if (normalizep == 0) 5579 { 5580 if (STORE_FLAG_VALUE == 1 || STORE_FLAG_VALUE == -1) 5581 normalizep = STORE_FLAG_VALUE; 5582 5583 else if (val_signbit_p (mode, STORE_FLAG_VALUE)) 5584 ; 5585 else 5586 return 0; 5587 } 5588 5589 last = get_last_insn (); 5590 5591 /* If optimizing, use different pseudo registers for each insn, instead 5592 of reusing the same pseudo. This leads to better CSE, but slows 5593 down the compiler, since there are more pseudos */ 5594 subtarget = (!optimize 5595 && (target_mode == mode)) ? target : NULL_RTX; 5596 trueval = GEN_INT (normalizep ? normalizep : STORE_FLAG_VALUE); 5597 5598 /* For floating-point comparisons, try the reverse comparison or try 5599 changing the "orderedness" of the comparison. */ 5600 if (GET_MODE_CLASS (mode) == MODE_FLOAT) 5601 { 5602 enum rtx_code first_code; 5603 bool and_them; 5604 5605 rcode = reverse_condition_maybe_unordered (code); 5606 if (can_compare_p (rcode, mode, ccp_store_flag) 5607 && (code == ORDERED || code == UNORDERED 5608 || (! HONOR_NANS (mode) && (code == LTGT || code == UNEQ)) 5609 || (! HONOR_SNANS (mode) && (code == EQ || code == NE)))) 5610 { 5611 int want_add = ((STORE_FLAG_VALUE == 1 && normalizep == -1) 5612 || (STORE_FLAG_VALUE == -1 && normalizep == 1)); 5613 5614 /* For the reverse comparison, use either an addition or a XOR. */ 5615 if (want_add 5616 && rtx_cost (GEN_INT (normalizep), mode, PLUS, 1, 5617 optimize_insn_for_speed_p ()) == 0) 5618 { 5619 tem = emit_store_flag_1 (subtarget, rcode, op0, op1, mode, 0, 5620 STORE_FLAG_VALUE, target_mode); 5621 if (tem) 5622 return expand_binop (target_mode, add_optab, tem, 5623 gen_int_mode (normalizep, target_mode), 5624 target, 0, OPTAB_WIDEN); 5625 } 5626 else if (!want_add 5627 && rtx_cost (trueval, mode, XOR, 1, 5628 optimize_insn_for_speed_p ()) == 0) 5629 { 5630 tem = emit_store_flag_1 (subtarget, rcode, op0, op1, mode, 0, 5631 normalizep, target_mode); 5632 if (tem) 5633 return expand_binop (target_mode, xor_optab, tem, trueval, 5634 target, INTVAL (trueval) >= 0, OPTAB_WIDEN); 5635 } 5636 } 5637 5638 delete_insns_since (last); 5639 5640 /* Cannot split ORDERED and UNORDERED, only try the above trick. */ 5641 if (code == ORDERED || code == UNORDERED) 5642 return 0; 5643 5644 and_them = split_comparison (code, mode, &first_code, &code); 5645 5646 /* If there are no NaNs, the first comparison should always fall through. 5647 Effectively change the comparison to the other one. */ 5648 if (!HONOR_NANS (mode)) 5649 { 5650 gcc_assert (first_code == (and_them ? ORDERED : UNORDERED)); 5651 return emit_store_flag_1 (target, code, op0, op1, mode, 0, normalizep, 5652 target_mode); 5653 } 5654 5655 if (!HAVE_conditional_move) 5656 return 0; 5657 5658 /* Try using a setcc instruction for ORDERED/UNORDERED, followed by a 5659 conditional move. */ 5660 tem = emit_store_flag_1 (subtarget, first_code, op0, op1, mode, 0, 5661 normalizep, target_mode); 5662 if (tem == 0) 5663 return 0; 5664 5665 if (and_them) 5666 tem = emit_conditional_move (target, code, op0, op1, mode, 5667 tem, const0_rtx, GET_MODE (tem), 0); 5668 else 5669 tem = emit_conditional_move (target, code, op0, op1, mode, 5670 trueval, tem, GET_MODE (tem), 0); 5671 5672 if (tem == 0) 5673 delete_insns_since (last); 5674 return tem; 5675 } 5676 5677 /* The remaining tricks only apply to integer comparisons. */ 5678 5679 if (GET_MODE_CLASS (mode) != MODE_INT) 5680 return 0; 5681 5682 /* If this is an equality comparison of integers, we can try to exclusive-or 5683 (or subtract) the two operands and use a recursive call to try the 5684 comparison with zero. Don't do any of these cases if branches are 5685 very cheap. */ 5686 5687 if ((code == EQ || code == NE) && op1 != const0_rtx) 5688 { 5689 tem = expand_binop (mode, xor_optab, op0, op1, subtarget, 1, 5690 OPTAB_WIDEN); 5691 5692 if (tem == 0) 5693 tem = expand_binop (mode, sub_optab, op0, op1, subtarget, 1, 5694 OPTAB_WIDEN); 5695 if (tem != 0) 5696 tem = emit_store_flag (target, code, tem, const0_rtx, 5697 mode, unsignedp, normalizep); 5698 if (tem != 0) 5699 return tem; 5700 5701 delete_insns_since (last); 5702 } 5703 5704 /* For integer comparisons, try the reverse comparison. However, for 5705 small X and if we'd have anyway to extend, implementing "X != 0" 5706 as "-(int)X >> 31" is still cheaper than inverting "(int)X == 0". */ 5707 rcode = reverse_condition (code); 5708 if (can_compare_p (rcode, mode, ccp_store_flag) 5709 && ! (optab_handler (cstore_optab, mode) == CODE_FOR_nothing 5710 && code == NE 5711 && GET_MODE_SIZE (mode) < UNITS_PER_WORD 5712 && op1 == const0_rtx)) 5713 { 5714 int want_add = ((STORE_FLAG_VALUE == 1 && normalizep == -1) 5715 || (STORE_FLAG_VALUE == -1 && normalizep == 1)); 5716 5717 /* Again, for the reverse comparison, use either an addition or a XOR. */ 5718 if (want_add 5719 && rtx_cost (GEN_INT (normalizep), mode, PLUS, 1, 5720 optimize_insn_for_speed_p ()) == 0) 5721 { 5722 tem = emit_store_flag_1 (subtarget, rcode, op0, op1, mode, 0, 5723 STORE_FLAG_VALUE, target_mode); 5724 if (tem != 0) 5725 tem = expand_binop (target_mode, add_optab, tem, 5726 gen_int_mode (normalizep, target_mode), 5727 target, 0, OPTAB_WIDEN); 5728 } 5729 else if (!want_add 5730 && rtx_cost (trueval, mode, XOR, 1, 5731 optimize_insn_for_speed_p ()) == 0) 5732 { 5733 tem = emit_store_flag_1 (subtarget, rcode, op0, op1, mode, 0, 5734 normalizep, target_mode); 5735 if (tem != 0) 5736 tem = expand_binop (target_mode, xor_optab, tem, trueval, target, 5737 INTVAL (trueval) >= 0, OPTAB_WIDEN); 5738 } 5739 5740 if (tem != 0) 5741 return tem; 5742 delete_insns_since (last); 5743 } 5744 5745 /* Some other cases we can do are EQ, NE, LE, and GT comparisons with 5746 the constant zero. Reject all other comparisons at this point. Only 5747 do LE and GT if branches are expensive since they are expensive on 5748 2-operand machines. */ 5749 5750 if (op1 != const0_rtx 5751 || (code != EQ && code != NE 5752 && (BRANCH_COST (optimize_insn_for_speed_p (), 5753 false) <= 1 || (code != LE && code != GT)))) 5754 return 0; 5755 5756 /* Try to put the result of the comparison in the sign bit. Assume we can't 5757 do the necessary operation below. */ 5758 5759 tem = 0; 5760 5761 /* To see if A <= 0, compute (A | (A - 1)). A <= 0 iff that result has 5762 the sign bit set. */ 5763 5764 if (code == LE) 5765 { 5766 /* This is destructive, so SUBTARGET can't be OP0. */ 5767 if (rtx_equal_p (subtarget, op0)) 5768 subtarget = 0; 5769 5770 tem = expand_binop (mode, sub_optab, op0, const1_rtx, subtarget, 0, 5771 OPTAB_WIDEN); 5772 if (tem) 5773 tem = expand_binop (mode, ior_optab, op0, tem, subtarget, 0, 5774 OPTAB_WIDEN); 5775 } 5776 5777 /* To see if A > 0, compute (((signed) A) << BITS) - A, where BITS is the 5778 number of bits in the mode of OP0, minus one. */ 5779 5780 if (code == GT) 5781 { 5782 if (rtx_equal_p (subtarget, op0)) 5783 subtarget = 0; 5784 5785 tem = maybe_expand_shift (RSHIFT_EXPR, mode, op0, 5786 GET_MODE_BITSIZE (mode) - 1, 5787 subtarget, 0); 5788 if (tem) 5789 tem = expand_binop (mode, sub_optab, tem, op0, subtarget, 0, 5790 OPTAB_WIDEN); 5791 } 5792 5793 if (code == EQ || code == NE) 5794 { 5795 /* For EQ or NE, one way to do the comparison is to apply an operation 5796 that converts the operand into a positive number if it is nonzero 5797 or zero if it was originally zero. Then, for EQ, we subtract 1 and 5798 for NE we negate. This puts the result in the sign bit. Then we 5799 normalize with a shift, if needed. 5800 5801 Two operations that can do the above actions are ABS and FFS, so try 5802 them. If that doesn't work, and MODE is smaller than a full word, 5803 we can use zero-extension to the wider mode (an unsigned conversion) 5804 as the operation. */ 5805 5806 /* Note that ABS doesn't yield a positive number for INT_MIN, but 5807 that is compensated by the subsequent overflow when subtracting 5808 one / negating. */ 5809 5810 if (optab_handler (abs_optab, mode) != CODE_FOR_nothing) 5811 tem = expand_unop (mode, abs_optab, op0, subtarget, 1); 5812 else if (optab_handler (ffs_optab, mode) != CODE_FOR_nothing) 5813 tem = expand_unop (mode, ffs_optab, op0, subtarget, 1); 5814 else if (GET_MODE_SIZE (mode) < UNITS_PER_WORD) 5815 { 5816 tem = convert_modes (word_mode, mode, op0, 1); 5817 mode = word_mode; 5818 } 5819 5820 if (tem != 0) 5821 { 5822 if (code == EQ) 5823 tem = expand_binop (mode, sub_optab, tem, const1_rtx, subtarget, 5824 0, OPTAB_WIDEN); 5825 else 5826 tem = expand_unop (mode, neg_optab, tem, subtarget, 0); 5827 } 5828 5829 /* If we couldn't do it that way, for NE we can "or" the two's complement 5830 of the value with itself. For EQ, we take the one's complement of 5831 that "or", which is an extra insn, so we only handle EQ if branches 5832 are expensive. */ 5833 5834 if (tem == 0 5835 && (code == NE 5836 || BRANCH_COST (optimize_insn_for_speed_p (), 5837 false) > 1)) 5838 { 5839 if (rtx_equal_p (subtarget, op0)) 5840 subtarget = 0; 5841 5842 tem = expand_unop (mode, neg_optab, op0, subtarget, 0); 5843 tem = expand_binop (mode, ior_optab, tem, op0, subtarget, 0, 5844 OPTAB_WIDEN); 5845 5846 if (tem && code == EQ) 5847 tem = expand_unop (mode, one_cmpl_optab, tem, subtarget, 0); 5848 } 5849 } 5850 5851 if (tem && normalizep) 5852 tem = maybe_expand_shift (RSHIFT_EXPR, mode, tem, 5853 GET_MODE_BITSIZE (mode) - 1, 5854 subtarget, normalizep == 1); 5855 5856 if (tem) 5857 { 5858 if (!target) 5859 ; 5860 else if (GET_MODE (tem) != target_mode) 5861 { 5862 convert_move (target, tem, 0); 5863 tem = target; 5864 } 5865 else if (!subtarget) 5866 { 5867 emit_move_insn (target, tem); 5868 tem = target; 5869 } 5870 } 5871 else 5872 delete_insns_since (last); 5873 5874 return tem; 5875 } 5876 5877 /* Like emit_store_flag, but always succeeds. */ 5878 5879 rtx 5880 emit_store_flag_force (rtx target, enum rtx_code code, rtx op0, rtx op1, 5881 machine_mode mode, int unsignedp, int normalizep) 5882 { 5883 rtx tem; 5884 rtx_code_label *label; 5885 rtx trueval, falseval; 5886 5887 /* First see if emit_store_flag can do the job. */ 5888 tem = emit_store_flag (target, code, op0, op1, mode, unsignedp, normalizep); 5889 if (tem != 0) 5890 return tem; 5891 5892 /* If one operand is constant, make it the second one. Only do this 5893 if the other operand is not constant as well. */ 5894 5895 if (swap_commutative_operands_p (op0, op1)) 5896 { 5897 std::swap (op0, op1); 5898 code = swap_condition (code); 5899 } 5900 5901 if (mode == VOIDmode) 5902 mode = GET_MODE (op0); 5903 5904 if (!target) 5905 target = gen_reg_rtx (word_mode); 5906 5907 /* If this failed, we have to do this with set/compare/jump/set code. 5908 For foo != 0, if foo is in OP0, just replace it with 1 if nonzero. */ 5909 trueval = normalizep ? GEN_INT (normalizep) : const1_rtx; 5910 if (code == NE 5911 && GET_MODE_CLASS (mode) == MODE_INT 5912 && REG_P (target) 5913 && op0 == target 5914 && op1 == const0_rtx) 5915 { 5916 label = gen_label_rtx (); 5917 do_compare_rtx_and_jump (target, const0_rtx, EQ, unsignedp, mode, 5918 NULL_RTX, NULL, label, -1); 5919 emit_move_insn (target, trueval); 5920 emit_label (label); 5921 return target; 5922 } 5923 5924 if (!REG_P (target) 5925 || reg_mentioned_p (target, op0) || reg_mentioned_p (target, op1)) 5926 target = gen_reg_rtx (GET_MODE (target)); 5927 5928 /* Jump in the right direction if the target cannot implement CODE 5929 but can jump on its reverse condition. */ 5930 falseval = const0_rtx; 5931 if (! can_compare_p (code, mode, ccp_jump) 5932 && (! FLOAT_MODE_P (mode) 5933 || code == ORDERED || code == UNORDERED 5934 || (! HONOR_NANS (mode) && (code == LTGT || code == UNEQ)) 5935 || (! HONOR_SNANS (mode) && (code == EQ || code == NE)))) 5936 { 5937 enum rtx_code rcode; 5938 if (FLOAT_MODE_P (mode)) 5939 rcode = reverse_condition_maybe_unordered (code); 5940 else 5941 rcode = reverse_condition (code); 5942 5943 /* Canonicalize to UNORDERED for the libcall. */ 5944 if (can_compare_p (rcode, mode, ccp_jump) 5945 || (code == ORDERED && ! can_compare_p (ORDERED, mode, ccp_jump))) 5946 { 5947 falseval = trueval; 5948 trueval = const0_rtx; 5949 code = rcode; 5950 } 5951 } 5952 5953 emit_move_insn (target, trueval); 5954 label = gen_label_rtx (); 5955 do_compare_rtx_and_jump (op0, op1, code, unsignedp, mode, NULL_RTX, NULL, 5956 label, -1); 5957 5958 emit_move_insn (target, falseval); 5959 emit_label (label); 5960 5961 return target; 5962 } 5963 5964 /* Perform possibly multi-word comparison and conditional jump to LABEL 5965 if ARG1 OP ARG2 true where ARG1 and ARG2 are of mode MODE. This is 5966 now a thin wrapper around do_compare_rtx_and_jump. */ 5967 5968 static void 5969 do_cmp_and_jump (rtx arg1, rtx arg2, enum rtx_code op, machine_mode mode, 5970 rtx_code_label *label) 5971 { 5972 int unsignedp = (op == LTU || op == LEU || op == GTU || op == GEU); 5973 do_compare_rtx_and_jump (arg1, arg2, op, unsignedp, mode, NULL_RTX, 5974 NULL, label, -1); 5975 } 5976