1 /* Medium-level subroutines: convert bit-field store and extract 2 and shifts, multiplies and divides to rtl instructions. 3 Copyright (C) 1987-2015 Free Software Foundation, Inc. 4 5 This file is part of GCC. 6 7 GCC is free software; you can redistribute it and/or modify it under 8 the terms of the GNU General Public License as published by the Free 9 Software Foundation; either version 3, or (at your option) any later 10 version. 11 12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY 13 WARRANTY; without even the implied warranty of MERCHANTABILITY or 14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 15 for more details. 16 17 You should have received a copy of the GNU General Public License 18 along with GCC; see the file COPYING3. If not see 19 <http://www.gnu.org/licenses/>. */ 20 21 22 #include "config.h" 23 #include "system.h" 24 #include "coretypes.h" 25 #include "tm.h" 26 #include "diagnostic-core.h" 27 #include "rtl.h" 28 #include "hash-set.h" 29 #include "machmode.h" 30 #include "vec.h" 31 #include "double-int.h" 32 #include "input.h" 33 #include "alias.h" 34 #include "symtab.h" 35 #include "wide-int.h" 36 #include "inchash.h" 37 #include "tree.h" 38 #include "fold-const.h" 39 #include "stor-layout.h" 40 #include "tm_p.h" 41 #include "flags.h" 42 #include "insn-config.h" 43 #include "hashtab.h" 44 #include "hard-reg-set.h" 45 #include "function.h" 46 #include "statistics.h" 47 #include "real.h" 48 #include "fixed-value.h" 49 #include "expmed.h" 50 #include "dojump.h" 51 #include "explow.h" 52 #include "calls.h" 53 #include "emit-rtl.h" 54 #include "varasm.h" 55 #include "stmt.h" 56 #include "expr.h" 57 #include "insn-codes.h" 58 #include "optabs.h" 59 #include "recog.h" 60 #include "langhooks.h" 61 #include "predict.h" 62 #include "basic-block.h" 63 #include "df.h" 64 #include "target.h" 65 66 struct target_expmed default_target_expmed; 67 #if SWITCHABLE_TARGET 68 struct target_expmed *this_target_expmed = &default_target_expmed; 69 #endif 70 71 static void store_fixed_bit_field (rtx, unsigned HOST_WIDE_INT, 72 unsigned HOST_WIDE_INT, 73 unsigned HOST_WIDE_INT, 74 unsigned HOST_WIDE_INT, 75 rtx); 76 static void store_fixed_bit_field_1 (rtx, unsigned HOST_WIDE_INT, 77 unsigned HOST_WIDE_INT, 78 rtx); 79 static void store_split_bit_field (rtx, unsigned HOST_WIDE_INT, 80 unsigned HOST_WIDE_INT, 81 unsigned HOST_WIDE_INT, 82 unsigned HOST_WIDE_INT, 83 rtx); 84 static rtx extract_fixed_bit_field (machine_mode, rtx, 85 unsigned HOST_WIDE_INT, 86 unsigned HOST_WIDE_INT, rtx, int); 87 static rtx extract_fixed_bit_field_1 (machine_mode, rtx, 88 unsigned HOST_WIDE_INT, 89 unsigned HOST_WIDE_INT, rtx, int); 90 static rtx lshift_value (machine_mode, unsigned HOST_WIDE_INT, int); 91 static rtx extract_split_bit_field (rtx, unsigned HOST_WIDE_INT, 92 unsigned HOST_WIDE_INT, int); 93 static void do_cmp_and_jump (rtx, rtx, enum rtx_code, machine_mode, rtx_code_label *); 94 static rtx expand_smod_pow2 (machine_mode, rtx, HOST_WIDE_INT); 95 static rtx expand_sdiv_pow2 (machine_mode, rtx, HOST_WIDE_INT); 96 97 /* Return a constant integer mask value of mode MODE with BITSIZE ones 98 followed by BITPOS zeros, or the complement of that if COMPLEMENT. 99 The mask is truncated if necessary to the width of mode MODE. The 100 mask is zero-extended if BITSIZE+BITPOS is too small for MODE. */ 101 102 static inline rtx 103 mask_rtx (machine_mode mode, int bitpos, int bitsize, bool complement) 104 { 105 return immed_wide_int_const 106 (wi::shifted_mask (bitpos, bitsize, complement, 107 GET_MODE_PRECISION (mode)), mode); 108 } 109 110 /* Test whether a value is zero of a power of two. */ 111 #define EXACT_POWER_OF_2_OR_ZERO_P(x) \ 112 (((x) & ((x) - (unsigned HOST_WIDE_INT) 1)) == 0) 113 114 struct init_expmed_rtl 115 { 116 rtx reg; 117 rtx plus; 118 rtx neg; 119 rtx mult; 120 rtx sdiv; 121 rtx udiv; 122 rtx sdiv_32; 123 rtx smod_32; 124 rtx wide_mult; 125 rtx wide_lshr; 126 rtx wide_trunc; 127 rtx shift; 128 rtx shift_mult; 129 rtx shift_add; 130 rtx shift_sub0; 131 rtx shift_sub1; 132 rtx zext; 133 rtx trunc; 134 135 rtx pow2[MAX_BITS_PER_WORD]; 136 rtx cint[MAX_BITS_PER_WORD]; 137 }; 138 139 static void 140 init_expmed_one_conv (struct init_expmed_rtl *all, machine_mode to_mode, 141 machine_mode from_mode, bool speed) 142 { 143 int to_size, from_size; 144 rtx which; 145 146 to_size = GET_MODE_PRECISION (to_mode); 147 from_size = GET_MODE_PRECISION (from_mode); 148 149 /* Most partial integers have a precision less than the "full" 150 integer it requires for storage. In case one doesn't, for 151 comparison purposes here, reduce the bit size by one in that 152 case. */ 153 if (GET_MODE_CLASS (to_mode) == MODE_PARTIAL_INT 154 && exact_log2 (to_size) != -1) 155 to_size --; 156 if (GET_MODE_CLASS (from_mode) == MODE_PARTIAL_INT 157 && exact_log2 (from_size) != -1) 158 from_size --; 159 160 /* Assume cost of zero-extend and sign-extend is the same. */ 161 which = (to_size < from_size ? all->trunc : all->zext); 162 163 PUT_MODE (all->reg, from_mode); 164 set_convert_cost (to_mode, from_mode, speed, set_src_cost (which, speed)); 165 } 166 167 static void 168 init_expmed_one_mode (struct init_expmed_rtl *all, 169 machine_mode mode, int speed) 170 { 171 int m, n, mode_bitsize; 172 machine_mode mode_from; 173 174 mode_bitsize = GET_MODE_UNIT_BITSIZE (mode); 175 176 PUT_MODE (all->reg, mode); 177 PUT_MODE (all->plus, mode); 178 PUT_MODE (all->neg, mode); 179 PUT_MODE (all->mult, mode); 180 PUT_MODE (all->sdiv, mode); 181 PUT_MODE (all->udiv, mode); 182 PUT_MODE (all->sdiv_32, mode); 183 PUT_MODE (all->smod_32, mode); 184 PUT_MODE (all->wide_trunc, mode); 185 PUT_MODE (all->shift, mode); 186 PUT_MODE (all->shift_mult, mode); 187 PUT_MODE (all->shift_add, mode); 188 PUT_MODE (all->shift_sub0, mode); 189 PUT_MODE (all->shift_sub1, mode); 190 PUT_MODE (all->zext, mode); 191 PUT_MODE (all->trunc, mode); 192 193 set_add_cost (speed, mode, set_src_cost (all->plus, speed)); 194 set_neg_cost (speed, mode, set_src_cost (all->neg, speed)); 195 set_mul_cost (speed, mode, set_src_cost (all->mult, speed)); 196 set_sdiv_cost (speed, mode, set_src_cost (all->sdiv, speed)); 197 set_udiv_cost (speed, mode, set_src_cost (all->udiv, speed)); 198 199 set_sdiv_pow2_cheap (speed, mode, (set_src_cost (all->sdiv_32, speed) 200 <= 2 * add_cost (speed, mode))); 201 set_smod_pow2_cheap (speed, mode, (set_src_cost (all->smod_32, speed) 202 <= 4 * add_cost (speed, mode))); 203 204 set_shift_cost (speed, mode, 0, 0); 205 { 206 int cost = add_cost (speed, mode); 207 set_shiftadd_cost (speed, mode, 0, cost); 208 set_shiftsub0_cost (speed, mode, 0, cost); 209 set_shiftsub1_cost (speed, mode, 0, cost); 210 } 211 212 n = MIN (MAX_BITS_PER_WORD, mode_bitsize); 213 for (m = 1; m < n; m++) 214 { 215 XEXP (all->shift, 1) = all->cint[m]; 216 XEXP (all->shift_mult, 1) = all->pow2[m]; 217 218 set_shift_cost (speed, mode, m, set_src_cost (all->shift, speed)); 219 set_shiftadd_cost (speed, mode, m, set_src_cost (all->shift_add, speed)); 220 set_shiftsub0_cost (speed, mode, m, set_src_cost (all->shift_sub0, speed)); 221 set_shiftsub1_cost (speed, mode, m, set_src_cost (all->shift_sub1, speed)); 222 } 223 224 if (SCALAR_INT_MODE_P (mode)) 225 { 226 for (mode_from = MIN_MODE_INT; mode_from <= MAX_MODE_INT; 227 mode_from = (machine_mode)(mode_from + 1)) 228 init_expmed_one_conv (all, mode, mode_from, speed); 229 } 230 if (GET_MODE_CLASS (mode) == MODE_INT) 231 { 232 machine_mode wider_mode = GET_MODE_WIDER_MODE (mode); 233 if (wider_mode != VOIDmode) 234 { 235 PUT_MODE (all->zext, wider_mode); 236 PUT_MODE (all->wide_mult, wider_mode); 237 PUT_MODE (all->wide_lshr, wider_mode); 238 XEXP (all->wide_lshr, 1) = GEN_INT (mode_bitsize); 239 240 set_mul_widen_cost (speed, wider_mode, 241 set_src_cost (all->wide_mult, speed)); 242 set_mul_highpart_cost (speed, mode, 243 set_src_cost (all->wide_trunc, speed)); 244 } 245 } 246 } 247 248 void 249 init_expmed (void) 250 { 251 struct init_expmed_rtl all; 252 machine_mode mode = QImode; 253 int m, speed; 254 255 memset (&all, 0, sizeof all); 256 for (m = 1; m < MAX_BITS_PER_WORD; m++) 257 { 258 all.pow2[m] = GEN_INT ((HOST_WIDE_INT) 1 << m); 259 all.cint[m] = GEN_INT (m); 260 } 261 262 /* Avoid using hard regs in ways which may be unsupported. */ 263 all.reg = gen_rtx_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1); 264 all.plus = gen_rtx_PLUS (mode, all.reg, all.reg); 265 all.neg = gen_rtx_NEG (mode, all.reg); 266 all.mult = gen_rtx_MULT (mode, all.reg, all.reg); 267 all.sdiv = gen_rtx_DIV (mode, all.reg, all.reg); 268 all.udiv = gen_rtx_UDIV (mode, all.reg, all.reg); 269 all.sdiv_32 = gen_rtx_DIV (mode, all.reg, all.pow2[5]); 270 all.smod_32 = gen_rtx_MOD (mode, all.reg, all.pow2[5]); 271 all.zext = gen_rtx_ZERO_EXTEND (mode, all.reg); 272 all.wide_mult = gen_rtx_MULT (mode, all.zext, all.zext); 273 all.wide_lshr = gen_rtx_LSHIFTRT (mode, all.wide_mult, all.reg); 274 all.wide_trunc = gen_rtx_TRUNCATE (mode, all.wide_lshr); 275 all.shift = gen_rtx_ASHIFT (mode, all.reg, all.reg); 276 all.shift_mult = gen_rtx_MULT (mode, all.reg, all.reg); 277 all.shift_add = gen_rtx_PLUS (mode, all.shift_mult, all.reg); 278 all.shift_sub0 = gen_rtx_MINUS (mode, all.shift_mult, all.reg); 279 all.shift_sub1 = gen_rtx_MINUS (mode, all.reg, all.shift_mult); 280 all.trunc = gen_rtx_TRUNCATE (mode, all.reg); 281 282 for (speed = 0; speed < 2; speed++) 283 { 284 crtl->maybe_hot_insn_p = speed; 285 set_zero_cost (speed, set_src_cost (const0_rtx, speed)); 286 287 for (mode = MIN_MODE_INT; mode <= MAX_MODE_INT; 288 mode = (machine_mode)(mode + 1)) 289 init_expmed_one_mode (&all, mode, speed); 290 291 if (MIN_MODE_PARTIAL_INT != VOIDmode) 292 for (mode = MIN_MODE_PARTIAL_INT; mode <= MAX_MODE_PARTIAL_INT; 293 mode = (machine_mode)(mode + 1)) 294 init_expmed_one_mode (&all, mode, speed); 295 296 if (MIN_MODE_VECTOR_INT != VOIDmode) 297 for (mode = MIN_MODE_VECTOR_INT; mode <= MAX_MODE_VECTOR_INT; 298 mode = (machine_mode)(mode + 1)) 299 init_expmed_one_mode (&all, mode, speed); 300 } 301 302 if (alg_hash_used_p ()) 303 { 304 struct alg_hash_entry *p = alg_hash_entry_ptr (0); 305 memset (p, 0, sizeof (*p) * NUM_ALG_HASH_ENTRIES); 306 } 307 else 308 set_alg_hash_used_p (true); 309 default_rtl_profile (); 310 311 ggc_free (all.trunc); 312 ggc_free (all.shift_sub1); 313 ggc_free (all.shift_sub0); 314 ggc_free (all.shift_add); 315 ggc_free (all.shift_mult); 316 ggc_free (all.shift); 317 ggc_free (all.wide_trunc); 318 ggc_free (all.wide_lshr); 319 ggc_free (all.wide_mult); 320 ggc_free (all.zext); 321 ggc_free (all.smod_32); 322 ggc_free (all.sdiv_32); 323 ggc_free (all.udiv); 324 ggc_free (all.sdiv); 325 ggc_free (all.mult); 326 ggc_free (all.neg); 327 ggc_free (all.plus); 328 ggc_free (all.reg); 329 } 330 331 /* Return an rtx representing minus the value of X. 332 MODE is the intended mode of the result, 333 useful if X is a CONST_INT. */ 334 335 rtx 336 negate_rtx (machine_mode mode, rtx x) 337 { 338 rtx result = simplify_unary_operation (NEG, mode, x, mode); 339 340 if (result == 0) 341 result = expand_unop (mode, neg_optab, x, NULL_RTX, 0); 342 343 return result; 344 } 345 346 /* Adjust bitfield memory MEM so that it points to the first unit of mode 347 MODE that contains a bitfield of size BITSIZE at bit position BITNUM. 348 If MODE is BLKmode, return a reference to every byte in the bitfield. 349 Set *NEW_BITNUM to the bit position of the field within the new memory. */ 350 351 static rtx 352 narrow_bit_field_mem (rtx mem, machine_mode mode, 353 unsigned HOST_WIDE_INT bitsize, 354 unsigned HOST_WIDE_INT bitnum, 355 unsigned HOST_WIDE_INT *new_bitnum) 356 { 357 if (mode == BLKmode) 358 { 359 *new_bitnum = bitnum % BITS_PER_UNIT; 360 HOST_WIDE_INT offset = bitnum / BITS_PER_UNIT; 361 HOST_WIDE_INT size = ((*new_bitnum + bitsize + BITS_PER_UNIT - 1) 362 / BITS_PER_UNIT); 363 return adjust_bitfield_address_size (mem, mode, offset, size); 364 } 365 else 366 { 367 unsigned int unit = GET_MODE_BITSIZE (mode); 368 *new_bitnum = bitnum % unit; 369 HOST_WIDE_INT offset = (bitnum - *new_bitnum) / BITS_PER_UNIT; 370 return adjust_bitfield_address (mem, mode, offset); 371 } 372 } 373 374 /* The caller wants to perform insertion or extraction PATTERN on a 375 bitfield of size BITSIZE at BITNUM bits into memory operand OP0. 376 BITREGION_START and BITREGION_END are as for store_bit_field 377 and FIELDMODE is the natural mode of the field. 378 379 Search for a mode that is compatible with the memory access 380 restrictions and (where applicable) with a register insertion or 381 extraction. Return the new memory on success, storing the adjusted 382 bit position in *NEW_BITNUM. Return null otherwise. */ 383 384 static rtx 385 adjust_bit_field_mem_for_reg (enum extraction_pattern pattern, 386 rtx op0, HOST_WIDE_INT bitsize, 387 HOST_WIDE_INT bitnum, 388 unsigned HOST_WIDE_INT bitregion_start, 389 unsigned HOST_WIDE_INT bitregion_end, 390 machine_mode fieldmode, 391 unsigned HOST_WIDE_INT *new_bitnum) 392 { 393 bit_field_mode_iterator iter (bitsize, bitnum, bitregion_start, 394 bitregion_end, MEM_ALIGN (op0), 395 MEM_VOLATILE_P (op0)); 396 machine_mode best_mode; 397 if (iter.next_mode (&best_mode)) 398 { 399 /* We can use a memory in BEST_MODE. See whether this is true for 400 any wider modes. All other things being equal, we prefer to 401 use the widest mode possible because it tends to expose more 402 CSE opportunities. */ 403 if (!iter.prefer_smaller_modes ()) 404 { 405 /* Limit the search to the mode required by the corresponding 406 register insertion or extraction instruction, if any. */ 407 machine_mode limit_mode = word_mode; 408 extraction_insn insn; 409 if (get_best_reg_extraction_insn (&insn, pattern, 410 GET_MODE_BITSIZE (best_mode), 411 fieldmode)) 412 limit_mode = insn.field_mode; 413 414 machine_mode wider_mode; 415 while (iter.next_mode (&wider_mode) 416 && GET_MODE_SIZE (wider_mode) <= GET_MODE_SIZE (limit_mode)) 417 best_mode = wider_mode; 418 } 419 return narrow_bit_field_mem (op0, best_mode, bitsize, bitnum, 420 new_bitnum); 421 } 422 return NULL_RTX; 423 } 424 425 /* Return true if a bitfield of size BITSIZE at bit number BITNUM within 426 a structure of mode STRUCT_MODE represents a lowpart subreg. The subreg 427 offset is then BITNUM / BITS_PER_UNIT. */ 428 429 static bool 430 lowpart_bit_field_p (unsigned HOST_WIDE_INT bitnum, 431 unsigned HOST_WIDE_INT bitsize, 432 machine_mode struct_mode) 433 { 434 if (BYTES_BIG_ENDIAN) 435 return (bitnum % BITS_PER_UNIT == 0 436 && (bitnum + bitsize == GET_MODE_BITSIZE (struct_mode) 437 || (bitnum + bitsize) % BITS_PER_WORD == 0)); 438 else 439 return bitnum % BITS_PER_WORD == 0; 440 } 441 442 /* Return true if -fstrict-volatile-bitfields applies to an access of OP0 443 containing BITSIZE bits starting at BITNUM, with field mode FIELDMODE. 444 Return false if the access would touch memory outside the range 445 BITREGION_START to BITREGION_END for conformance to the C++ memory 446 model. */ 447 448 static bool 449 strict_volatile_bitfield_p (rtx op0, unsigned HOST_WIDE_INT bitsize, 450 unsigned HOST_WIDE_INT bitnum, 451 machine_mode fieldmode, 452 unsigned HOST_WIDE_INT bitregion_start, 453 unsigned HOST_WIDE_INT bitregion_end) 454 { 455 unsigned HOST_WIDE_INT modesize = GET_MODE_BITSIZE (fieldmode); 456 457 /* -fstrict-volatile-bitfields must be enabled and we must have a 458 volatile MEM. */ 459 if (!MEM_P (op0) 460 || !MEM_VOLATILE_P (op0) 461 || flag_strict_volatile_bitfields <= 0) 462 return false; 463 464 /* Non-integral modes likely only happen with packed structures. 465 Punt. */ 466 if (!SCALAR_INT_MODE_P (fieldmode)) 467 return false; 468 469 /* The bit size must not be larger than the field mode, and 470 the field mode must not be larger than a word. */ 471 if (bitsize > modesize || modesize > BITS_PER_WORD) 472 return false; 473 474 /* Check for cases of unaligned fields that must be split. */ 475 if (bitnum % modesize + bitsize > modesize) 476 return false; 477 478 /* The memory must be sufficiently aligned for a MODESIZE access. 479 This condition guarantees, that the memory access will not 480 touch anything after the end of the structure. */ 481 if (MEM_ALIGN (op0) < modesize) 482 return false; 483 484 /* Check for cases where the C++ memory model applies. */ 485 if (bitregion_end != 0 486 && (bitnum - bitnum % modesize < bitregion_start 487 || bitnum - bitnum % modesize + modesize - 1 > bitregion_end)) 488 return false; 489 490 return true; 491 } 492 493 /* Return true if OP is a memory and if a bitfield of size BITSIZE at 494 bit number BITNUM can be treated as a simple value of mode MODE. */ 495 496 static bool 497 simple_mem_bitfield_p (rtx op0, unsigned HOST_WIDE_INT bitsize, 498 unsigned HOST_WIDE_INT bitnum, machine_mode mode) 499 { 500 return (MEM_P (op0) 501 && bitnum % BITS_PER_UNIT == 0 502 && bitsize == GET_MODE_BITSIZE (mode) 503 && (!SLOW_UNALIGNED_ACCESS (mode, MEM_ALIGN (op0)) 504 || (bitnum % GET_MODE_ALIGNMENT (mode) == 0 505 && MEM_ALIGN (op0) >= GET_MODE_ALIGNMENT (mode)))); 506 } 507 508 /* Try to use instruction INSV to store VALUE into a field of OP0. 509 BITSIZE and BITNUM are as for store_bit_field. */ 510 511 static bool 512 store_bit_field_using_insv (const extraction_insn *insv, rtx op0, 513 unsigned HOST_WIDE_INT bitsize, 514 unsigned HOST_WIDE_INT bitnum, 515 rtx value) 516 { 517 struct expand_operand ops[4]; 518 rtx value1; 519 rtx xop0 = op0; 520 rtx_insn *last = get_last_insn (); 521 bool copy_back = false; 522 523 machine_mode op_mode = insv->field_mode; 524 unsigned int unit = GET_MODE_BITSIZE (op_mode); 525 if (bitsize == 0 || bitsize > unit) 526 return false; 527 528 if (MEM_P (xop0)) 529 /* Get a reference to the first byte of the field. */ 530 xop0 = narrow_bit_field_mem (xop0, insv->struct_mode, bitsize, bitnum, 531 &bitnum); 532 else 533 { 534 /* Convert from counting within OP0 to counting in OP_MODE. */ 535 if (BYTES_BIG_ENDIAN) 536 bitnum += unit - GET_MODE_BITSIZE (GET_MODE (op0)); 537 538 /* If xop0 is a register, we need it in OP_MODE 539 to make it acceptable to the format of insv. */ 540 if (GET_CODE (xop0) == SUBREG) 541 /* We can't just change the mode, because this might clobber op0, 542 and we will need the original value of op0 if insv fails. */ 543 xop0 = gen_rtx_SUBREG (op_mode, SUBREG_REG (xop0), SUBREG_BYTE (xop0)); 544 if (REG_P (xop0) && GET_MODE (xop0) != op_mode) 545 xop0 = gen_lowpart_SUBREG (op_mode, xop0); 546 } 547 548 /* If the destination is a paradoxical subreg such that we need a 549 truncate to the inner mode, perform the insertion on a temporary and 550 truncate the result to the original destination. Note that we can't 551 just truncate the paradoxical subreg as (truncate:N (subreg:W (reg:N 552 X) 0)) is (reg:N X). */ 553 if (GET_CODE (xop0) == SUBREG 554 && REG_P (SUBREG_REG (xop0)) 555 && !TRULY_NOOP_TRUNCATION_MODES_P (GET_MODE (SUBREG_REG (xop0)), 556 op_mode)) 557 { 558 rtx tem = gen_reg_rtx (op_mode); 559 emit_move_insn (tem, xop0); 560 xop0 = tem; 561 copy_back = true; 562 } 563 564 /* There are similar overflow check at the start of store_bit_field_1, 565 but that only check the situation where the field lies completely 566 outside the register, while there do have situation where the field 567 lies partialy in the register, we need to adjust bitsize for this 568 partial overflow situation. Without this fix, pr48335-2.c on big-endian 569 will broken on those arch support bit insert instruction, like arm, aarch64 570 etc. */ 571 if (bitsize + bitnum > unit && bitnum < unit) 572 { 573 warning (OPT_Wextra, "write of %wu-bit data outside the bound of " 574 "destination object, data truncated into %wu-bit", 575 bitsize, unit - bitnum); 576 bitsize = unit - bitnum; 577 } 578 579 /* If BITS_BIG_ENDIAN is zero on a BYTES_BIG_ENDIAN machine, we count 580 "backwards" from the size of the unit we are inserting into. 581 Otherwise, we count bits from the most significant on a 582 BYTES/BITS_BIG_ENDIAN machine. */ 583 584 if (BITS_BIG_ENDIAN != BYTES_BIG_ENDIAN) 585 bitnum = unit - bitsize - bitnum; 586 587 /* Convert VALUE to op_mode (which insv insn wants) in VALUE1. */ 588 value1 = value; 589 if (GET_MODE (value) != op_mode) 590 { 591 if (GET_MODE_BITSIZE (GET_MODE (value)) >= bitsize) 592 { 593 /* Optimization: Don't bother really extending VALUE 594 if it has all the bits we will actually use. However, 595 if we must narrow it, be sure we do it correctly. */ 596 597 if (GET_MODE_SIZE (GET_MODE (value)) < GET_MODE_SIZE (op_mode)) 598 { 599 rtx tmp; 600 601 tmp = simplify_subreg (op_mode, value1, GET_MODE (value), 0); 602 if (! tmp) 603 tmp = simplify_gen_subreg (op_mode, 604 force_reg (GET_MODE (value), 605 value1), 606 GET_MODE (value), 0); 607 value1 = tmp; 608 } 609 else 610 value1 = gen_lowpart (op_mode, value1); 611 } 612 else if (CONST_INT_P (value)) 613 value1 = gen_int_mode (INTVAL (value), op_mode); 614 else 615 /* Parse phase is supposed to make VALUE's data type 616 match that of the component reference, which is a type 617 at least as wide as the field; so VALUE should have 618 a mode that corresponds to that type. */ 619 gcc_assert (CONSTANT_P (value)); 620 } 621 622 create_fixed_operand (&ops[0], xop0); 623 create_integer_operand (&ops[1], bitsize); 624 create_integer_operand (&ops[2], bitnum); 625 create_input_operand (&ops[3], value1, op_mode); 626 if (maybe_expand_insn (insv->icode, 4, ops)) 627 { 628 if (copy_back) 629 convert_move (op0, xop0, true); 630 return true; 631 } 632 delete_insns_since (last); 633 return false; 634 } 635 636 /* A subroutine of store_bit_field, with the same arguments. Return true 637 if the operation could be implemented. 638 639 If FALLBACK_P is true, fall back to store_fixed_bit_field if we have 640 no other way of implementing the operation. If FALLBACK_P is false, 641 return false instead. */ 642 643 static bool 644 store_bit_field_1 (rtx str_rtx, unsigned HOST_WIDE_INT bitsize, 645 unsigned HOST_WIDE_INT bitnum, 646 unsigned HOST_WIDE_INT bitregion_start, 647 unsigned HOST_WIDE_INT bitregion_end, 648 machine_mode fieldmode, 649 rtx value, bool fallback_p) 650 { 651 rtx op0 = str_rtx; 652 rtx orig_value; 653 654 while (GET_CODE (op0) == SUBREG) 655 { 656 /* The following line once was done only if WORDS_BIG_ENDIAN, 657 but I think that is a mistake. WORDS_BIG_ENDIAN is 658 meaningful at a much higher level; when structures are copied 659 between memory and regs, the higher-numbered regs 660 always get higher addresses. */ 661 int inner_mode_size = GET_MODE_SIZE (GET_MODE (SUBREG_REG (op0))); 662 int outer_mode_size = GET_MODE_SIZE (GET_MODE (op0)); 663 int byte_offset = 0; 664 665 /* Paradoxical subregs need special handling on big endian machines. */ 666 if (SUBREG_BYTE (op0) == 0 && inner_mode_size < outer_mode_size) 667 { 668 int difference = inner_mode_size - outer_mode_size; 669 670 if (WORDS_BIG_ENDIAN) 671 byte_offset += (difference / UNITS_PER_WORD) * UNITS_PER_WORD; 672 if (BYTES_BIG_ENDIAN) 673 byte_offset += difference % UNITS_PER_WORD; 674 } 675 else 676 byte_offset = SUBREG_BYTE (op0); 677 678 bitnum += byte_offset * BITS_PER_UNIT; 679 op0 = SUBREG_REG (op0); 680 } 681 682 /* No action is needed if the target is a register and if the field 683 lies completely outside that register. This can occur if the source 684 code contains an out-of-bounds access to a small array. */ 685 if (REG_P (op0) && bitnum >= GET_MODE_BITSIZE (GET_MODE (op0))) 686 return true; 687 688 /* Use vec_set patterns for inserting parts of vectors whenever 689 available. */ 690 if (VECTOR_MODE_P (GET_MODE (op0)) 691 && !MEM_P (op0) 692 && optab_handler (vec_set_optab, GET_MODE (op0)) != CODE_FOR_nothing 693 && fieldmode == GET_MODE_INNER (GET_MODE (op0)) 694 && bitsize == GET_MODE_BITSIZE (GET_MODE_INNER (GET_MODE (op0))) 695 && !(bitnum % GET_MODE_BITSIZE (GET_MODE_INNER (GET_MODE (op0))))) 696 { 697 struct expand_operand ops[3]; 698 machine_mode outermode = GET_MODE (op0); 699 machine_mode innermode = GET_MODE_INNER (outermode); 700 enum insn_code icode = optab_handler (vec_set_optab, outermode); 701 int pos = bitnum / GET_MODE_BITSIZE (innermode); 702 703 create_fixed_operand (&ops[0], op0); 704 create_input_operand (&ops[1], value, innermode); 705 create_integer_operand (&ops[2], pos); 706 if (maybe_expand_insn (icode, 3, ops)) 707 return true; 708 } 709 710 /* If the target is a register, overwriting the entire object, or storing 711 a full-word or multi-word field can be done with just a SUBREG. */ 712 if (!MEM_P (op0) 713 && bitsize == GET_MODE_BITSIZE (fieldmode) 714 && ((bitsize == GET_MODE_BITSIZE (GET_MODE (op0)) && bitnum == 0) 715 || (bitsize % BITS_PER_WORD == 0 && bitnum % BITS_PER_WORD == 0))) 716 { 717 /* Use the subreg machinery either to narrow OP0 to the required 718 words or to cope with mode punning between equal-sized modes. 719 In the latter case, use subreg on the rhs side, not lhs. */ 720 rtx sub; 721 722 if (bitsize == GET_MODE_BITSIZE (GET_MODE (op0))) 723 { 724 sub = simplify_gen_subreg (GET_MODE (op0), value, fieldmode, 0); 725 if (sub) 726 { 727 emit_move_insn (op0, sub); 728 return true; 729 } 730 } 731 else 732 { 733 sub = simplify_gen_subreg (fieldmode, op0, GET_MODE (op0), 734 bitnum / BITS_PER_UNIT); 735 if (sub) 736 { 737 emit_move_insn (sub, value); 738 return true; 739 } 740 } 741 } 742 743 /* If the target is memory, storing any naturally aligned field can be 744 done with a simple store. For targets that support fast unaligned 745 memory, any naturally sized, unit aligned field can be done directly. */ 746 if (simple_mem_bitfield_p (op0, bitsize, bitnum, fieldmode)) 747 { 748 op0 = adjust_bitfield_address (op0, fieldmode, bitnum / BITS_PER_UNIT); 749 emit_move_insn (op0, value); 750 return true; 751 } 752 753 /* Make sure we are playing with integral modes. Pun with subregs 754 if we aren't. This must come after the entire register case above, 755 since that case is valid for any mode. The following cases are only 756 valid for integral modes. */ 757 { 758 machine_mode imode = int_mode_for_mode (GET_MODE (op0)); 759 if (imode != GET_MODE (op0)) 760 { 761 if (MEM_P (op0)) 762 op0 = adjust_bitfield_address_size (op0, imode, 0, MEM_SIZE (op0)); 763 else 764 { 765 gcc_assert (imode != BLKmode); 766 op0 = gen_lowpart (imode, op0); 767 } 768 } 769 } 770 771 /* Storing an lsb-aligned field in a register 772 can be done with a movstrict instruction. */ 773 774 if (!MEM_P (op0) 775 && lowpart_bit_field_p (bitnum, bitsize, GET_MODE (op0)) 776 && bitsize == GET_MODE_BITSIZE (fieldmode) 777 && optab_handler (movstrict_optab, fieldmode) != CODE_FOR_nothing) 778 { 779 struct expand_operand ops[2]; 780 enum insn_code icode = optab_handler (movstrict_optab, fieldmode); 781 rtx arg0 = op0; 782 unsigned HOST_WIDE_INT subreg_off; 783 784 if (GET_CODE (arg0) == SUBREG) 785 { 786 /* Else we've got some float mode source being extracted into 787 a different float mode destination -- this combination of 788 subregs results in Severe Tire Damage. */ 789 gcc_assert (GET_MODE (SUBREG_REG (arg0)) == fieldmode 790 || GET_MODE_CLASS (fieldmode) == MODE_INT 791 || GET_MODE_CLASS (fieldmode) == MODE_PARTIAL_INT); 792 arg0 = SUBREG_REG (arg0); 793 } 794 795 subreg_off = bitnum / BITS_PER_UNIT; 796 if (validate_subreg (fieldmode, GET_MODE (arg0), arg0, subreg_off)) 797 { 798 arg0 = gen_rtx_SUBREG (fieldmode, arg0, subreg_off); 799 800 create_fixed_operand (&ops[0], arg0); 801 /* Shrink the source operand to FIELDMODE. */ 802 create_convert_operand_to (&ops[1], value, fieldmode, false); 803 if (maybe_expand_insn (icode, 2, ops)) 804 return true; 805 } 806 } 807 808 /* Handle fields bigger than a word. */ 809 810 if (bitsize > BITS_PER_WORD) 811 { 812 /* Here we transfer the words of the field 813 in the order least significant first. 814 This is because the most significant word is the one which may 815 be less than full. 816 However, only do that if the value is not BLKmode. */ 817 818 unsigned int backwards = WORDS_BIG_ENDIAN && fieldmode != BLKmode; 819 unsigned int nwords = (bitsize + (BITS_PER_WORD - 1)) / BITS_PER_WORD; 820 unsigned int i; 821 rtx_insn *last; 822 823 /* This is the mode we must force value to, so that there will be enough 824 subwords to extract. Note that fieldmode will often (always?) be 825 VOIDmode, because that is what store_field uses to indicate that this 826 is a bit field, but passing VOIDmode to operand_subword_force 827 is not allowed. */ 828 fieldmode = GET_MODE (value); 829 if (fieldmode == VOIDmode) 830 fieldmode = smallest_mode_for_size (nwords * BITS_PER_WORD, MODE_INT); 831 832 last = get_last_insn (); 833 for (i = 0; i < nwords; i++) 834 { 835 /* If I is 0, use the low-order word in both field and target; 836 if I is 1, use the next to lowest word; and so on. */ 837 unsigned int wordnum = (backwards 838 ? GET_MODE_SIZE (fieldmode) / UNITS_PER_WORD 839 - i - 1 840 : i); 841 unsigned int bit_offset = (backwards 842 ? MAX ((int) bitsize - ((int) i + 1) 843 * BITS_PER_WORD, 844 0) 845 : (int) i * BITS_PER_WORD); 846 rtx value_word = operand_subword_force (value, wordnum, fieldmode); 847 unsigned HOST_WIDE_INT new_bitsize = 848 MIN (BITS_PER_WORD, bitsize - i * BITS_PER_WORD); 849 850 /* If the remaining chunk doesn't have full wordsize we have 851 to make sure that for big endian machines the higher order 852 bits are used. */ 853 if (new_bitsize < BITS_PER_WORD && BYTES_BIG_ENDIAN && !backwards) 854 value_word = simplify_expand_binop (word_mode, lshr_optab, 855 value_word, 856 GEN_INT (BITS_PER_WORD 857 - new_bitsize), 858 NULL_RTX, true, 859 OPTAB_LIB_WIDEN); 860 861 if (!store_bit_field_1 (op0, new_bitsize, 862 bitnum + bit_offset, 863 bitregion_start, bitregion_end, 864 word_mode, 865 value_word, fallback_p)) 866 { 867 delete_insns_since (last); 868 return false; 869 } 870 } 871 return true; 872 } 873 874 /* If VALUE has a floating-point or complex mode, access it as an 875 integer of the corresponding size. This can occur on a machine 876 with 64 bit registers that uses SFmode for float. It can also 877 occur for unaligned float or complex fields. */ 878 orig_value = value; 879 if (GET_MODE (value) != VOIDmode 880 && GET_MODE_CLASS (GET_MODE (value)) != MODE_INT 881 && GET_MODE_CLASS (GET_MODE (value)) != MODE_PARTIAL_INT) 882 { 883 value = gen_reg_rtx (int_mode_for_mode (GET_MODE (value))); 884 emit_move_insn (gen_lowpart (GET_MODE (orig_value), value), orig_value); 885 } 886 887 /* If OP0 is a multi-word register, narrow it to the affected word. 888 If the region spans two words, defer to store_split_bit_field. */ 889 if (!MEM_P (op0) && GET_MODE_SIZE (GET_MODE (op0)) > UNITS_PER_WORD) 890 { 891 op0 = simplify_gen_subreg (word_mode, op0, GET_MODE (op0), 892 bitnum / BITS_PER_WORD * UNITS_PER_WORD); 893 gcc_assert (op0); 894 bitnum %= BITS_PER_WORD; 895 if (bitnum + bitsize > BITS_PER_WORD) 896 { 897 if (!fallback_p) 898 return false; 899 900 store_split_bit_field (op0, bitsize, bitnum, bitregion_start, 901 bitregion_end, value); 902 return true; 903 } 904 } 905 906 /* From here on we can assume that the field to be stored in fits 907 within a word. If the destination is a register, it too fits 908 in a word. */ 909 910 extraction_insn insv; 911 if (!MEM_P (op0) 912 && get_best_reg_extraction_insn (&insv, EP_insv, 913 GET_MODE_BITSIZE (GET_MODE (op0)), 914 fieldmode) 915 && store_bit_field_using_insv (&insv, op0, bitsize, bitnum, value)) 916 return true; 917 918 /* If OP0 is a memory, try copying it to a register and seeing if a 919 cheap register alternative is available. */ 920 if (MEM_P (op0)) 921 { 922 if (get_best_mem_extraction_insn (&insv, EP_insv, bitsize, bitnum, 923 fieldmode) 924 && store_bit_field_using_insv (&insv, op0, bitsize, bitnum, value)) 925 return true; 926 927 rtx_insn *last = get_last_insn (); 928 929 /* Try loading part of OP0 into a register, inserting the bitfield 930 into that, and then copying the result back to OP0. */ 931 unsigned HOST_WIDE_INT bitpos; 932 rtx xop0 = adjust_bit_field_mem_for_reg (EP_insv, op0, bitsize, bitnum, 933 bitregion_start, bitregion_end, 934 fieldmode, &bitpos); 935 if (xop0) 936 { 937 rtx tempreg = copy_to_reg (xop0); 938 if (store_bit_field_1 (tempreg, bitsize, bitpos, 939 bitregion_start, bitregion_end, 940 fieldmode, orig_value, false)) 941 { 942 emit_move_insn (xop0, tempreg); 943 return true; 944 } 945 delete_insns_since (last); 946 } 947 } 948 949 if (!fallback_p) 950 return false; 951 952 store_fixed_bit_field (op0, bitsize, bitnum, bitregion_start, 953 bitregion_end, value); 954 return true; 955 } 956 957 /* Generate code to store value from rtx VALUE 958 into a bit-field within structure STR_RTX 959 containing BITSIZE bits starting at bit BITNUM. 960 961 BITREGION_START is bitpos of the first bitfield in this region. 962 BITREGION_END is the bitpos of the ending bitfield in this region. 963 These two fields are 0, if the C++ memory model does not apply, 964 or we are not interested in keeping track of bitfield regions. 965 966 FIELDMODE is the machine-mode of the FIELD_DECL node for this field. */ 967 968 void 969 store_bit_field (rtx str_rtx, unsigned HOST_WIDE_INT bitsize, 970 unsigned HOST_WIDE_INT bitnum, 971 unsigned HOST_WIDE_INT bitregion_start, 972 unsigned HOST_WIDE_INT bitregion_end, 973 machine_mode fieldmode, 974 rtx value) 975 { 976 /* Handle -fstrict-volatile-bitfields in the cases where it applies. */ 977 if (strict_volatile_bitfield_p (str_rtx, bitsize, bitnum, fieldmode, 978 bitregion_start, bitregion_end)) 979 { 980 /* Storing of a full word can be done with a simple store. 981 We know here that the field can be accessed with one single 982 instruction. For targets that support unaligned memory, 983 an unaligned access may be necessary. */ 984 if (bitsize == GET_MODE_BITSIZE (fieldmode)) 985 { 986 str_rtx = adjust_bitfield_address (str_rtx, fieldmode, 987 bitnum / BITS_PER_UNIT); 988 gcc_assert (bitnum % BITS_PER_UNIT == 0); 989 emit_move_insn (str_rtx, value); 990 } 991 else 992 { 993 rtx temp; 994 995 str_rtx = narrow_bit_field_mem (str_rtx, fieldmode, bitsize, bitnum, 996 &bitnum); 997 gcc_assert (bitnum + bitsize <= GET_MODE_BITSIZE (fieldmode)); 998 temp = copy_to_reg (str_rtx); 999 if (!store_bit_field_1 (temp, bitsize, bitnum, 0, 0, 1000 fieldmode, value, true)) 1001 gcc_unreachable (); 1002 1003 emit_move_insn (str_rtx, temp); 1004 } 1005 1006 return; 1007 } 1008 1009 /* Under the C++0x memory model, we must not touch bits outside the 1010 bit region. Adjust the address to start at the beginning of the 1011 bit region. */ 1012 if (MEM_P (str_rtx) && bitregion_start > 0) 1013 { 1014 machine_mode bestmode; 1015 HOST_WIDE_INT offset, size; 1016 1017 gcc_assert ((bitregion_start % BITS_PER_UNIT) == 0); 1018 1019 offset = bitregion_start / BITS_PER_UNIT; 1020 bitnum -= bitregion_start; 1021 size = (bitnum + bitsize + BITS_PER_UNIT - 1) / BITS_PER_UNIT; 1022 bitregion_end -= bitregion_start; 1023 bitregion_start = 0; 1024 bestmode = get_best_mode (bitsize, bitnum, 1025 bitregion_start, bitregion_end, 1026 MEM_ALIGN (str_rtx), VOIDmode, 1027 MEM_VOLATILE_P (str_rtx)); 1028 str_rtx = adjust_bitfield_address_size (str_rtx, bestmode, offset, size); 1029 } 1030 1031 if (!store_bit_field_1 (str_rtx, bitsize, bitnum, 1032 bitregion_start, bitregion_end, 1033 fieldmode, value, true)) 1034 gcc_unreachable (); 1035 } 1036 1037 /* Use shifts and boolean operations to store VALUE into a bit field of 1038 width BITSIZE in OP0, starting at bit BITNUM. */ 1039 1040 static void 1041 store_fixed_bit_field (rtx op0, unsigned HOST_WIDE_INT bitsize, 1042 unsigned HOST_WIDE_INT bitnum, 1043 unsigned HOST_WIDE_INT bitregion_start, 1044 unsigned HOST_WIDE_INT bitregion_end, 1045 rtx value) 1046 { 1047 /* There is a case not handled here: 1048 a structure with a known alignment of just a halfword 1049 and a field split across two aligned halfwords within the structure. 1050 Or likewise a structure with a known alignment of just a byte 1051 and a field split across two bytes. 1052 Such cases are not supposed to be able to occur. */ 1053 1054 if (MEM_P (op0)) 1055 { 1056 machine_mode mode = GET_MODE (op0); 1057 if (GET_MODE_BITSIZE (mode) == 0 1058 || GET_MODE_BITSIZE (mode) > GET_MODE_BITSIZE (word_mode)) 1059 mode = word_mode; 1060 mode = get_best_mode (bitsize, bitnum, bitregion_start, bitregion_end, 1061 MEM_ALIGN (op0), mode, MEM_VOLATILE_P (op0)); 1062 1063 if (mode == VOIDmode) 1064 { 1065 /* The only way this should occur is if the field spans word 1066 boundaries. */ 1067 store_split_bit_field (op0, bitsize, bitnum, bitregion_start, 1068 bitregion_end, value); 1069 return; 1070 } 1071 1072 op0 = narrow_bit_field_mem (op0, mode, bitsize, bitnum, &bitnum); 1073 } 1074 1075 store_fixed_bit_field_1 (op0, bitsize, bitnum, value); 1076 } 1077 1078 /* Helper function for store_fixed_bit_field, stores 1079 the bit field always using the MODE of OP0. */ 1080 1081 static void 1082 store_fixed_bit_field_1 (rtx op0, unsigned HOST_WIDE_INT bitsize, 1083 unsigned HOST_WIDE_INT bitnum, 1084 rtx value) 1085 { 1086 machine_mode mode; 1087 rtx temp; 1088 int all_zero = 0; 1089 int all_one = 0; 1090 1091 mode = GET_MODE (op0); 1092 gcc_assert (SCALAR_INT_MODE_P (mode)); 1093 1094 /* Note that bitsize + bitnum can be greater than GET_MODE_BITSIZE (mode) 1095 for invalid input, such as f5 from gcc.dg/pr48335-2.c. */ 1096 1097 if (BYTES_BIG_ENDIAN) 1098 /* BITNUM is the distance between our msb 1099 and that of the containing datum. 1100 Convert it to the distance from the lsb. */ 1101 bitnum = GET_MODE_BITSIZE (mode) - bitsize - bitnum; 1102 1103 /* Now BITNUM is always the distance between our lsb 1104 and that of OP0. */ 1105 1106 /* Shift VALUE left by BITNUM bits. If VALUE is not constant, 1107 we must first convert its mode to MODE. */ 1108 1109 if (CONST_INT_P (value)) 1110 { 1111 unsigned HOST_WIDE_INT v = UINTVAL (value); 1112 1113 if (bitsize < HOST_BITS_PER_WIDE_INT) 1114 v &= ((unsigned HOST_WIDE_INT) 1 << bitsize) - 1; 1115 1116 if (v == 0) 1117 all_zero = 1; 1118 else if ((bitsize < HOST_BITS_PER_WIDE_INT 1119 && v == ((unsigned HOST_WIDE_INT) 1 << bitsize) - 1) 1120 || (bitsize == HOST_BITS_PER_WIDE_INT 1121 && v == (unsigned HOST_WIDE_INT) -1)) 1122 all_one = 1; 1123 1124 value = lshift_value (mode, v, bitnum); 1125 } 1126 else 1127 { 1128 int must_and = (GET_MODE_BITSIZE (GET_MODE (value)) != bitsize 1129 && bitnum + bitsize != GET_MODE_BITSIZE (mode)); 1130 1131 if (GET_MODE (value) != mode) 1132 value = convert_to_mode (mode, value, 1); 1133 1134 if (must_and) 1135 value = expand_binop (mode, and_optab, value, 1136 mask_rtx (mode, 0, bitsize, 0), 1137 NULL_RTX, 1, OPTAB_LIB_WIDEN); 1138 if (bitnum > 0) 1139 value = expand_shift (LSHIFT_EXPR, mode, value, 1140 bitnum, NULL_RTX, 1); 1141 } 1142 1143 /* Now clear the chosen bits in OP0, 1144 except that if VALUE is -1 we need not bother. */ 1145 /* We keep the intermediates in registers to allow CSE to combine 1146 consecutive bitfield assignments. */ 1147 1148 temp = force_reg (mode, op0); 1149 1150 if (! all_one) 1151 { 1152 temp = expand_binop (mode, and_optab, temp, 1153 mask_rtx (mode, bitnum, bitsize, 1), 1154 NULL_RTX, 1, OPTAB_LIB_WIDEN); 1155 temp = force_reg (mode, temp); 1156 } 1157 1158 /* Now logical-or VALUE into OP0, unless it is zero. */ 1159 1160 if (! all_zero) 1161 { 1162 temp = expand_binop (mode, ior_optab, temp, value, 1163 NULL_RTX, 1, OPTAB_LIB_WIDEN); 1164 temp = force_reg (mode, temp); 1165 } 1166 1167 if (op0 != temp) 1168 { 1169 op0 = copy_rtx (op0); 1170 emit_move_insn (op0, temp); 1171 } 1172 } 1173 1174 /* Store a bit field that is split across multiple accessible memory objects. 1175 1176 OP0 is the REG, SUBREG or MEM rtx for the first of the objects. 1177 BITSIZE is the field width; BITPOS the position of its first bit 1178 (within the word). 1179 VALUE is the value to store. 1180 1181 This does not yet handle fields wider than BITS_PER_WORD. */ 1182 1183 static void 1184 store_split_bit_field (rtx op0, unsigned HOST_WIDE_INT bitsize, 1185 unsigned HOST_WIDE_INT bitpos, 1186 unsigned HOST_WIDE_INT bitregion_start, 1187 unsigned HOST_WIDE_INT bitregion_end, 1188 rtx value) 1189 { 1190 unsigned int unit; 1191 unsigned int bitsdone = 0; 1192 1193 /* Make sure UNIT isn't larger than BITS_PER_WORD, we can only handle that 1194 much at a time. */ 1195 if (REG_P (op0) || GET_CODE (op0) == SUBREG) 1196 unit = BITS_PER_WORD; 1197 else 1198 unit = MIN (MEM_ALIGN (op0), BITS_PER_WORD); 1199 1200 /* If OP0 is a memory with a mode, then UNIT must not be larger than 1201 OP0's mode as well. Otherwise, store_fixed_bit_field will call us 1202 again, and we will mutually recurse forever. */ 1203 if (MEM_P (op0) && GET_MODE_BITSIZE (GET_MODE (op0)) > 0) 1204 unit = MIN (unit, GET_MODE_BITSIZE (GET_MODE (op0))); 1205 1206 /* If VALUE is a constant other than a CONST_INT, get it into a register in 1207 WORD_MODE. If we can do this using gen_lowpart_common, do so. Note 1208 that VALUE might be a floating-point constant. */ 1209 if (CONSTANT_P (value) && !CONST_INT_P (value)) 1210 { 1211 rtx word = gen_lowpart_common (word_mode, value); 1212 1213 if (word && (value != word)) 1214 value = word; 1215 else 1216 value = gen_lowpart_common (word_mode, 1217 force_reg (GET_MODE (value) != VOIDmode 1218 ? GET_MODE (value) 1219 : word_mode, value)); 1220 } 1221 1222 while (bitsdone < bitsize) 1223 { 1224 unsigned HOST_WIDE_INT thissize; 1225 rtx part, word; 1226 unsigned HOST_WIDE_INT thispos; 1227 unsigned HOST_WIDE_INT offset; 1228 1229 offset = (bitpos + bitsdone) / unit; 1230 thispos = (bitpos + bitsdone) % unit; 1231 1232 /* When region of bytes we can touch is restricted, decrease 1233 UNIT close to the end of the region as needed. If op0 is a REG 1234 or SUBREG of REG, don't do this, as there can't be data races 1235 on a register and we can expand shorter code in some cases. */ 1236 if (bitregion_end 1237 && unit > BITS_PER_UNIT 1238 && bitpos + bitsdone - thispos + unit > bitregion_end + 1 1239 && !REG_P (op0) 1240 && (GET_CODE (op0) != SUBREG || !REG_P (SUBREG_REG (op0)))) 1241 { 1242 unit = unit / 2; 1243 continue; 1244 } 1245 1246 /* THISSIZE must not overrun a word boundary. Otherwise, 1247 store_fixed_bit_field will call us again, and we will mutually 1248 recurse forever. */ 1249 thissize = MIN (bitsize - bitsdone, BITS_PER_WORD); 1250 thissize = MIN (thissize, unit - thispos); 1251 1252 if (BYTES_BIG_ENDIAN) 1253 { 1254 /* Fetch successively less significant portions. */ 1255 if (CONST_INT_P (value)) 1256 part = GEN_INT (((unsigned HOST_WIDE_INT) (INTVAL (value)) 1257 >> (bitsize - bitsdone - thissize)) 1258 & (((HOST_WIDE_INT) 1 << thissize) - 1)); 1259 else 1260 { 1261 int total_bits = GET_MODE_BITSIZE (GET_MODE (value)); 1262 /* The args are chosen so that the last part includes the 1263 lsb. Give extract_bit_field the value it needs (with 1264 endianness compensation) to fetch the piece we want. */ 1265 part = extract_fixed_bit_field (word_mode, value, thissize, 1266 total_bits - bitsize + bitsdone, 1267 NULL_RTX, 1); 1268 } 1269 } 1270 else 1271 { 1272 /* Fetch successively more significant portions. */ 1273 if (CONST_INT_P (value)) 1274 part = GEN_INT (((unsigned HOST_WIDE_INT) (INTVAL (value)) 1275 >> bitsdone) 1276 & (((HOST_WIDE_INT) 1 << thissize) - 1)); 1277 else 1278 part = extract_fixed_bit_field (word_mode, value, thissize, 1279 bitsdone, NULL_RTX, 1); 1280 } 1281 1282 /* If OP0 is a register, then handle OFFSET here. 1283 1284 When handling multiword bitfields, extract_bit_field may pass 1285 down a word_mode SUBREG of a larger REG for a bitfield that actually 1286 crosses a word boundary. Thus, for a SUBREG, we must find 1287 the current word starting from the base register. */ 1288 if (GET_CODE (op0) == SUBREG) 1289 { 1290 int word_offset = (SUBREG_BYTE (op0) / UNITS_PER_WORD) 1291 + (offset * unit / BITS_PER_WORD); 1292 machine_mode sub_mode = GET_MODE (SUBREG_REG (op0)); 1293 if (sub_mode != BLKmode && GET_MODE_SIZE (sub_mode) < UNITS_PER_WORD) 1294 word = word_offset ? const0_rtx : op0; 1295 else 1296 word = operand_subword_force (SUBREG_REG (op0), word_offset, 1297 GET_MODE (SUBREG_REG (op0))); 1298 offset &= BITS_PER_WORD / unit - 1; 1299 } 1300 else if (REG_P (op0)) 1301 { 1302 machine_mode op0_mode = GET_MODE (op0); 1303 if (op0_mode != BLKmode && GET_MODE_SIZE (op0_mode) < UNITS_PER_WORD) 1304 word = offset ? const0_rtx : op0; 1305 else 1306 word = operand_subword_force (op0, offset * unit / BITS_PER_WORD, 1307 GET_MODE (op0)); 1308 offset &= BITS_PER_WORD / unit - 1; 1309 } 1310 else 1311 word = op0; 1312 1313 /* OFFSET is in UNITs, and UNIT is in bits. If WORD is const0_rtx, 1314 it is just an out-of-bounds access. Ignore it. */ 1315 if (word != const0_rtx) 1316 store_fixed_bit_field (word, thissize, offset * unit + thispos, 1317 bitregion_start, bitregion_end, part); 1318 bitsdone += thissize; 1319 } 1320 } 1321 1322 /* A subroutine of extract_bit_field_1 that converts return value X 1323 to either MODE or TMODE. MODE, TMODE and UNSIGNEDP are arguments 1324 to extract_bit_field. */ 1325 1326 static rtx 1327 convert_extracted_bit_field (rtx x, machine_mode mode, 1328 machine_mode tmode, bool unsignedp) 1329 { 1330 if (GET_MODE (x) == tmode || GET_MODE (x) == mode) 1331 return x; 1332 1333 /* If the x mode is not a scalar integral, first convert to the 1334 integer mode of that size and then access it as a floating-point 1335 value via a SUBREG. */ 1336 if (!SCALAR_INT_MODE_P (tmode)) 1337 { 1338 machine_mode smode; 1339 1340 smode = mode_for_size (GET_MODE_BITSIZE (tmode), MODE_INT, 0); 1341 x = convert_to_mode (smode, x, unsignedp); 1342 x = force_reg (smode, x); 1343 return gen_lowpart (tmode, x); 1344 } 1345 1346 return convert_to_mode (tmode, x, unsignedp); 1347 } 1348 1349 /* Try to use an ext(z)v pattern to extract a field from OP0. 1350 Return the extracted value on success, otherwise return null. 1351 EXT_MODE is the mode of the extraction and the other arguments 1352 are as for extract_bit_field. */ 1353 1354 static rtx 1355 extract_bit_field_using_extv (const extraction_insn *extv, rtx op0, 1356 unsigned HOST_WIDE_INT bitsize, 1357 unsigned HOST_WIDE_INT bitnum, 1358 int unsignedp, rtx target, 1359 machine_mode mode, machine_mode tmode) 1360 { 1361 struct expand_operand ops[4]; 1362 rtx spec_target = target; 1363 rtx spec_target_subreg = 0; 1364 machine_mode ext_mode = extv->field_mode; 1365 unsigned unit = GET_MODE_BITSIZE (ext_mode); 1366 1367 if (bitsize == 0 || unit < bitsize) 1368 return NULL_RTX; 1369 1370 if (MEM_P (op0)) 1371 /* Get a reference to the first byte of the field. */ 1372 op0 = narrow_bit_field_mem (op0, extv->struct_mode, bitsize, bitnum, 1373 &bitnum); 1374 else 1375 { 1376 /* Convert from counting within OP0 to counting in EXT_MODE. */ 1377 if (BYTES_BIG_ENDIAN) 1378 bitnum += unit - GET_MODE_BITSIZE (GET_MODE (op0)); 1379 1380 /* If op0 is a register, we need it in EXT_MODE to make it 1381 acceptable to the format of ext(z)v. */ 1382 if (GET_CODE (op0) == SUBREG && GET_MODE (op0) != ext_mode) 1383 return NULL_RTX; 1384 if (REG_P (op0) && GET_MODE (op0) != ext_mode) 1385 op0 = gen_lowpart_SUBREG (ext_mode, op0); 1386 } 1387 1388 /* If BITS_BIG_ENDIAN is zero on a BYTES_BIG_ENDIAN machine, we count 1389 "backwards" from the size of the unit we are extracting from. 1390 Otherwise, we count bits from the most significant on a 1391 BYTES/BITS_BIG_ENDIAN machine. */ 1392 1393 if (BITS_BIG_ENDIAN != BYTES_BIG_ENDIAN) 1394 bitnum = unit - bitsize - bitnum; 1395 1396 if (target == 0) 1397 target = spec_target = gen_reg_rtx (tmode); 1398 1399 if (GET_MODE (target) != ext_mode) 1400 { 1401 /* Don't use LHS paradoxical subreg if explicit truncation is needed 1402 between the mode of the extraction (word_mode) and the target 1403 mode. Instead, create a temporary and use convert_move to set 1404 the target. */ 1405 if (REG_P (target) 1406 && TRULY_NOOP_TRUNCATION_MODES_P (GET_MODE (target), ext_mode)) 1407 { 1408 target = gen_lowpart (ext_mode, target); 1409 if (GET_MODE_PRECISION (ext_mode) 1410 > GET_MODE_PRECISION (GET_MODE (spec_target))) 1411 spec_target_subreg = target; 1412 } 1413 else 1414 target = gen_reg_rtx (ext_mode); 1415 } 1416 1417 create_output_operand (&ops[0], target, ext_mode); 1418 create_fixed_operand (&ops[1], op0); 1419 create_integer_operand (&ops[2], bitsize); 1420 create_integer_operand (&ops[3], bitnum); 1421 if (maybe_expand_insn (extv->icode, 4, ops)) 1422 { 1423 target = ops[0].value; 1424 if (target == spec_target) 1425 return target; 1426 if (target == spec_target_subreg) 1427 return spec_target; 1428 return convert_extracted_bit_field (target, mode, tmode, unsignedp); 1429 } 1430 return NULL_RTX; 1431 } 1432 1433 /* A subroutine of extract_bit_field, with the same arguments. 1434 If FALLBACK_P is true, fall back to extract_fixed_bit_field 1435 if we can find no other means of implementing the operation. 1436 if FALLBACK_P is false, return NULL instead. */ 1437 1438 static rtx 1439 extract_bit_field_1 (rtx str_rtx, unsigned HOST_WIDE_INT bitsize, 1440 unsigned HOST_WIDE_INT bitnum, int unsignedp, rtx target, 1441 machine_mode mode, machine_mode tmode, 1442 bool fallback_p) 1443 { 1444 rtx op0 = str_rtx; 1445 machine_mode int_mode; 1446 machine_mode mode1; 1447 1448 if (tmode == VOIDmode) 1449 tmode = mode; 1450 1451 while (GET_CODE (op0) == SUBREG) 1452 { 1453 bitnum += SUBREG_BYTE (op0) * BITS_PER_UNIT; 1454 op0 = SUBREG_REG (op0); 1455 } 1456 1457 /* If we have an out-of-bounds access to a register, just return an 1458 uninitialized register of the required mode. This can occur if the 1459 source code contains an out-of-bounds access to a small array. */ 1460 if (REG_P (op0) && bitnum >= GET_MODE_BITSIZE (GET_MODE (op0))) 1461 return gen_reg_rtx (tmode); 1462 1463 if (REG_P (op0) 1464 && mode == GET_MODE (op0) 1465 && bitnum == 0 1466 && bitsize == GET_MODE_BITSIZE (GET_MODE (op0))) 1467 { 1468 /* We're trying to extract a full register from itself. */ 1469 return op0; 1470 } 1471 1472 /* See if we can get a better vector mode before extracting. */ 1473 if (VECTOR_MODE_P (GET_MODE (op0)) 1474 && !MEM_P (op0) 1475 && GET_MODE_INNER (GET_MODE (op0)) != tmode) 1476 { 1477 machine_mode new_mode; 1478 1479 if (GET_MODE_CLASS (tmode) == MODE_FLOAT) 1480 new_mode = MIN_MODE_VECTOR_FLOAT; 1481 else if (GET_MODE_CLASS (tmode) == MODE_FRACT) 1482 new_mode = MIN_MODE_VECTOR_FRACT; 1483 else if (GET_MODE_CLASS (tmode) == MODE_UFRACT) 1484 new_mode = MIN_MODE_VECTOR_UFRACT; 1485 else if (GET_MODE_CLASS (tmode) == MODE_ACCUM) 1486 new_mode = MIN_MODE_VECTOR_ACCUM; 1487 else if (GET_MODE_CLASS (tmode) == MODE_UACCUM) 1488 new_mode = MIN_MODE_VECTOR_UACCUM; 1489 else 1490 new_mode = MIN_MODE_VECTOR_INT; 1491 1492 for (; new_mode != VOIDmode ; new_mode = GET_MODE_WIDER_MODE (new_mode)) 1493 if (GET_MODE_SIZE (new_mode) == GET_MODE_SIZE (GET_MODE (op0)) 1494 && targetm.vector_mode_supported_p (new_mode)) 1495 break; 1496 if (new_mode != VOIDmode) 1497 op0 = gen_lowpart (new_mode, op0); 1498 } 1499 1500 /* Use vec_extract patterns for extracting parts of vectors whenever 1501 available. */ 1502 if (VECTOR_MODE_P (GET_MODE (op0)) 1503 && !MEM_P (op0) 1504 && optab_handler (vec_extract_optab, GET_MODE (op0)) != CODE_FOR_nothing 1505 && ((bitnum + bitsize - 1) / GET_MODE_BITSIZE (GET_MODE_INNER (GET_MODE (op0))) 1506 == bitnum / GET_MODE_BITSIZE (GET_MODE_INNER (GET_MODE (op0))))) 1507 { 1508 struct expand_operand ops[3]; 1509 machine_mode outermode = GET_MODE (op0); 1510 machine_mode innermode = GET_MODE_INNER (outermode); 1511 enum insn_code icode = optab_handler (vec_extract_optab, outermode); 1512 unsigned HOST_WIDE_INT pos = bitnum / GET_MODE_BITSIZE (innermode); 1513 1514 create_output_operand (&ops[0], target, innermode); 1515 create_input_operand (&ops[1], op0, outermode); 1516 create_integer_operand (&ops[2], pos); 1517 if (maybe_expand_insn (icode, 3, ops)) 1518 { 1519 target = ops[0].value; 1520 if (GET_MODE (target) != mode) 1521 return gen_lowpart (tmode, target); 1522 return target; 1523 } 1524 } 1525 1526 /* Make sure we are playing with integral modes. Pun with subregs 1527 if we aren't. */ 1528 { 1529 machine_mode imode = int_mode_for_mode (GET_MODE (op0)); 1530 if (imode != GET_MODE (op0)) 1531 { 1532 if (MEM_P (op0)) 1533 op0 = adjust_bitfield_address_size (op0, imode, 0, MEM_SIZE (op0)); 1534 else if (imode != BLKmode) 1535 { 1536 op0 = gen_lowpart (imode, op0); 1537 1538 /* If we got a SUBREG, force it into a register since we 1539 aren't going to be able to do another SUBREG on it. */ 1540 if (GET_CODE (op0) == SUBREG) 1541 op0 = force_reg (imode, op0); 1542 } 1543 else if (REG_P (op0)) 1544 { 1545 rtx reg, subreg; 1546 imode = smallest_mode_for_size (GET_MODE_BITSIZE (GET_MODE (op0)), 1547 MODE_INT); 1548 reg = gen_reg_rtx (imode); 1549 subreg = gen_lowpart_SUBREG (GET_MODE (op0), reg); 1550 emit_move_insn (subreg, op0); 1551 op0 = reg; 1552 bitnum += SUBREG_BYTE (subreg) * BITS_PER_UNIT; 1553 } 1554 else 1555 { 1556 HOST_WIDE_INT size = GET_MODE_SIZE (GET_MODE (op0)); 1557 rtx mem = assign_stack_temp (GET_MODE (op0), size); 1558 emit_move_insn (mem, op0); 1559 op0 = adjust_bitfield_address_size (mem, BLKmode, 0, size); 1560 } 1561 } 1562 } 1563 1564 /* ??? We currently assume TARGET is at least as big as BITSIZE. 1565 If that's wrong, the solution is to test for it and set TARGET to 0 1566 if needed. */ 1567 1568 /* Get the mode of the field to use for atomic access or subreg 1569 conversion. */ 1570 mode1 = mode; 1571 if (SCALAR_INT_MODE_P (tmode)) 1572 { 1573 machine_mode try_mode = mode_for_size (bitsize, 1574 GET_MODE_CLASS (tmode), 0); 1575 if (try_mode != BLKmode) 1576 mode1 = try_mode; 1577 } 1578 gcc_assert (mode1 != BLKmode); 1579 1580 /* Extraction of a full MODE1 value can be done with a subreg as long 1581 as the least significant bit of the value is the least significant 1582 bit of either OP0 or a word of OP0. */ 1583 if (!MEM_P (op0) 1584 && lowpart_bit_field_p (bitnum, bitsize, GET_MODE (op0)) 1585 && bitsize == GET_MODE_BITSIZE (mode1) 1586 && TRULY_NOOP_TRUNCATION_MODES_P (mode1, GET_MODE (op0))) 1587 { 1588 rtx sub = simplify_gen_subreg (mode1, op0, GET_MODE (op0), 1589 bitnum / BITS_PER_UNIT); 1590 if (sub) 1591 return convert_extracted_bit_field (sub, mode, tmode, unsignedp); 1592 } 1593 1594 /* Extraction of a full MODE1 value can be done with a load as long as 1595 the field is on a byte boundary and is sufficiently aligned. */ 1596 if (simple_mem_bitfield_p (op0, bitsize, bitnum, mode1)) 1597 { 1598 op0 = adjust_bitfield_address (op0, mode1, bitnum / BITS_PER_UNIT); 1599 return convert_extracted_bit_field (op0, mode, tmode, unsignedp); 1600 } 1601 1602 /* Handle fields bigger than a word. */ 1603 1604 if (bitsize > BITS_PER_WORD) 1605 { 1606 /* Here we transfer the words of the field 1607 in the order least significant first. 1608 This is because the most significant word is the one which may 1609 be less than full. */ 1610 1611 unsigned int backwards = WORDS_BIG_ENDIAN; 1612 unsigned int nwords = (bitsize + (BITS_PER_WORD - 1)) / BITS_PER_WORD; 1613 unsigned int i; 1614 rtx_insn *last; 1615 1616 if (target == 0 || !REG_P (target) || !valid_multiword_target_p (target)) 1617 target = gen_reg_rtx (mode); 1618 1619 /* Indicate for flow that the entire target reg is being set. */ 1620 emit_clobber (target); 1621 1622 last = get_last_insn (); 1623 for (i = 0; i < nwords; i++) 1624 { 1625 /* If I is 0, use the low-order word in both field and target; 1626 if I is 1, use the next to lowest word; and so on. */ 1627 /* Word number in TARGET to use. */ 1628 unsigned int wordnum 1629 = (backwards 1630 ? GET_MODE_SIZE (GET_MODE (target)) / UNITS_PER_WORD - i - 1 1631 : i); 1632 /* Offset from start of field in OP0. */ 1633 unsigned int bit_offset = (backwards 1634 ? MAX ((int) bitsize - ((int) i + 1) 1635 * BITS_PER_WORD, 1636 0) 1637 : (int) i * BITS_PER_WORD); 1638 rtx target_part = operand_subword (target, wordnum, 1, VOIDmode); 1639 rtx result_part 1640 = extract_bit_field_1 (op0, MIN (BITS_PER_WORD, 1641 bitsize - i * BITS_PER_WORD), 1642 bitnum + bit_offset, 1, target_part, 1643 mode, word_mode, fallback_p); 1644 1645 gcc_assert (target_part); 1646 if (!result_part) 1647 { 1648 delete_insns_since (last); 1649 return NULL; 1650 } 1651 1652 if (result_part != target_part) 1653 emit_move_insn (target_part, result_part); 1654 } 1655 1656 if (unsignedp) 1657 { 1658 /* Unless we've filled TARGET, the upper regs in a multi-reg value 1659 need to be zero'd out. */ 1660 if (GET_MODE_SIZE (GET_MODE (target)) > nwords * UNITS_PER_WORD) 1661 { 1662 unsigned int i, total_words; 1663 1664 total_words = GET_MODE_SIZE (GET_MODE (target)) / UNITS_PER_WORD; 1665 for (i = nwords; i < total_words; i++) 1666 emit_move_insn 1667 (operand_subword (target, 1668 backwards ? total_words - i - 1 : i, 1669 1, VOIDmode), 1670 const0_rtx); 1671 } 1672 return target; 1673 } 1674 1675 /* Signed bit field: sign-extend with two arithmetic shifts. */ 1676 target = expand_shift (LSHIFT_EXPR, mode, target, 1677 GET_MODE_BITSIZE (mode) - bitsize, NULL_RTX, 0); 1678 return expand_shift (RSHIFT_EXPR, mode, target, 1679 GET_MODE_BITSIZE (mode) - bitsize, NULL_RTX, 0); 1680 } 1681 1682 /* If OP0 is a multi-word register, narrow it to the affected word. 1683 If the region spans two words, defer to extract_split_bit_field. */ 1684 if (!MEM_P (op0) && GET_MODE_SIZE (GET_MODE (op0)) > UNITS_PER_WORD) 1685 { 1686 op0 = simplify_gen_subreg (word_mode, op0, GET_MODE (op0), 1687 bitnum / BITS_PER_WORD * UNITS_PER_WORD); 1688 bitnum %= BITS_PER_WORD; 1689 if (bitnum + bitsize > BITS_PER_WORD) 1690 { 1691 if (!fallback_p) 1692 return NULL_RTX; 1693 target = extract_split_bit_field (op0, bitsize, bitnum, unsignedp); 1694 return convert_extracted_bit_field (target, mode, tmode, unsignedp); 1695 } 1696 } 1697 1698 /* From here on we know the desired field is smaller than a word. 1699 If OP0 is a register, it too fits within a word. */ 1700 enum extraction_pattern pattern = unsignedp ? EP_extzv : EP_extv; 1701 extraction_insn extv; 1702 if (!MEM_P (op0) 1703 /* ??? We could limit the structure size to the part of OP0 that 1704 contains the field, with appropriate checks for endianness 1705 and TRULY_NOOP_TRUNCATION. */ 1706 && get_best_reg_extraction_insn (&extv, pattern, 1707 GET_MODE_BITSIZE (GET_MODE (op0)), 1708 tmode)) 1709 { 1710 rtx result = extract_bit_field_using_extv (&extv, op0, bitsize, bitnum, 1711 unsignedp, target, mode, 1712 tmode); 1713 if (result) 1714 return result; 1715 } 1716 1717 /* If OP0 is a memory, try copying it to a register and seeing if a 1718 cheap register alternative is available. */ 1719 if (MEM_P (op0)) 1720 { 1721 if (get_best_mem_extraction_insn (&extv, pattern, bitsize, bitnum, 1722 tmode)) 1723 { 1724 rtx result = extract_bit_field_using_extv (&extv, op0, bitsize, 1725 bitnum, unsignedp, 1726 target, mode, 1727 tmode); 1728 if (result) 1729 return result; 1730 } 1731 1732 rtx_insn *last = get_last_insn (); 1733 1734 /* Try loading part of OP0 into a register and extracting the 1735 bitfield from that. */ 1736 unsigned HOST_WIDE_INT bitpos; 1737 rtx xop0 = adjust_bit_field_mem_for_reg (pattern, op0, bitsize, bitnum, 1738 0, 0, tmode, &bitpos); 1739 if (xop0) 1740 { 1741 xop0 = copy_to_reg (xop0); 1742 rtx result = extract_bit_field_1 (xop0, bitsize, bitpos, 1743 unsignedp, target, 1744 mode, tmode, false); 1745 if (result) 1746 return result; 1747 delete_insns_since (last); 1748 } 1749 } 1750 1751 if (!fallback_p) 1752 return NULL; 1753 1754 /* Find a correspondingly-sized integer field, so we can apply 1755 shifts and masks to it. */ 1756 int_mode = int_mode_for_mode (tmode); 1757 if (int_mode == BLKmode) 1758 int_mode = int_mode_for_mode (mode); 1759 /* Should probably push op0 out to memory and then do a load. */ 1760 gcc_assert (int_mode != BLKmode); 1761 1762 target = extract_fixed_bit_field (int_mode, op0, bitsize, bitnum, 1763 target, unsignedp); 1764 return convert_extracted_bit_field (target, mode, tmode, unsignedp); 1765 } 1766 1767 /* Generate code to extract a byte-field from STR_RTX 1768 containing BITSIZE bits, starting at BITNUM, 1769 and put it in TARGET if possible (if TARGET is nonzero). 1770 Regardless of TARGET, we return the rtx for where the value is placed. 1771 1772 STR_RTX is the structure containing the byte (a REG or MEM). 1773 UNSIGNEDP is nonzero if this is an unsigned bit field. 1774 MODE is the natural mode of the field value once extracted. 1775 TMODE is the mode the caller would like the value to have; 1776 but the value may be returned with type MODE instead. 1777 1778 If a TARGET is specified and we can store in it at no extra cost, 1779 we do so, and return TARGET. 1780 Otherwise, we return a REG of mode TMODE or MODE, with TMODE preferred 1781 if they are equally easy. */ 1782 1783 rtx 1784 extract_bit_field (rtx str_rtx, unsigned HOST_WIDE_INT bitsize, 1785 unsigned HOST_WIDE_INT bitnum, int unsignedp, rtx target, 1786 machine_mode mode, machine_mode tmode) 1787 { 1788 machine_mode mode1; 1789 1790 /* Handle -fstrict-volatile-bitfields in the cases where it applies. */ 1791 if (GET_MODE_BITSIZE (GET_MODE (str_rtx)) > 0) 1792 mode1 = GET_MODE (str_rtx); 1793 else if (target && GET_MODE_BITSIZE (GET_MODE (target)) > 0) 1794 mode1 = GET_MODE (target); 1795 else 1796 mode1 = tmode; 1797 1798 if (strict_volatile_bitfield_p (str_rtx, bitsize, bitnum, mode1, 0, 0)) 1799 { 1800 /* Extraction of a full MODE1 value can be done with a simple load. 1801 We know here that the field can be accessed with one single 1802 instruction. For targets that support unaligned memory, 1803 an unaligned access may be necessary. */ 1804 if (bitsize == GET_MODE_BITSIZE (mode1)) 1805 { 1806 rtx result = adjust_bitfield_address (str_rtx, mode1, 1807 bitnum / BITS_PER_UNIT); 1808 gcc_assert (bitnum % BITS_PER_UNIT == 0); 1809 return convert_extracted_bit_field (result, mode, tmode, unsignedp); 1810 } 1811 1812 str_rtx = narrow_bit_field_mem (str_rtx, mode1, bitsize, bitnum, 1813 &bitnum); 1814 gcc_assert (bitnum + bitsize <= GET_MODE_BITSIZE (mode1)); 1815 str_rtx = copy_to_reg (str_rtx); 1816 } 1817 1818 return extract_bit_field_1 (str_rtx, bitsize, bitnum, unsignedp, 1819 target, mode, tmode, true); 1820 } 1821 1822 /* Use shifts and boolean operations to extract a field of BITSIZE bits 1823 from bit BITNUM of OP0. 1824 1825 UNSIGNEDP is nonzero for an unsigned bit field (don't sign-extend value). 1826 If TARGET is nonzero, attempts to store the value there 1827 and return TARGET, but this is not guaranteed. 1828 If TARGET is not used, create a pseudo-reg of mode TMODE for the value. */ 1829 1830 static rtx 1831 extract_fixed_bit_field (machine_mode tmode, rtx op0, 1832 unsigned HOST_WIDE_INT bitsize, 1833 unsigned HOST_WIDE_INT bitnum, rtx target, 1834 int unsignedp) 1835 { 1836 if (MEM_P (op0)) 1837 { 1838 machine_mode mode 1839 = get_best_mode (bitsize, bitnum, 0, 0, MEM_ALIGN (op0), word_mode, 1840 MEM_VOLATILE_P (op0)); 1841 1842 if (mode == VOIDmode) 1843 /* The only way this should occur is if the field spans word 1844 boundaries. */ 1845 return extract_split_bit_field (op0, bitsize, bitnum, unsignedp); 1846 1847 op0 = narrow_bit_field_mem (op0, mode, bitsize, bitnum, &bitnum); 1848 } 1849 1850 return extract_fixed_bit_field_1 (tmode, op0, bitsize, bitnum, 1851 target, unsignedp); 1852 } 1853 1854 /* Helper function for extract_fixed_bit_field, extracts 1855 the bit field always using the MODE of OP0. */ 1856 1857 static rtx 1858 extract_fixed_bit_field_1 (machine_mode tmode, rtx op0, 1859 unsigned HOST_WIDE_INT bitsize, 1860 unsigned HOST_WIDE_INT bitnum, rtx target, 1861 int unsignedp) 1862 { 1863 machine_mode mode = GET_MODE (op0); 1864 gcc_assert (SCALAR_INT_MODE_P (mode)); 1865 1866 /* Note that bitsize + bitnum can be greater than GET_MODE_BITSIZE (mode) 1867 for invalid input, such as extract equivalent of f5 from 1868 gcc.dg/pr48335-2.c. */ 1869 1870 if (BYTES_BIG_ENDIAN) 1871 /* BITNUM is the distance between our msb and that of OP0. 1872 Convert it to the distance from the lsb. */ 1873 bitnum = GET_MODE_BITSIZE (mode) - bitsize - bitnum; 1874 1875 /* Now BITNUM is always the distance between the field's lsb and that of OP0. 1876 We have reduced the big-endian case to the little-endian case. */ 1877 1878 if (unsignedp) 1879 { 1880 if (bitnum) 1881 { 1882 /* If the field does not already start at the lsb, 1883 shift it so it does. */ 1884 /* Maybe propagate the target for the shift. */ 1885 rtx subtarget = (target != 0 && REG_P (target) ? target : 0); 1886 if (tmode != mode) 1887 subtarget = 0; 1888 op0 = expand_shift (RSHIFT_EXPR, mode, op0, bitnum, subtarget, 1); 1889 } 1890 /* Convert the value to the desired mode. */ 1891 if (mode != tmode) 1892 op0 = convert_to_mode (tmode, op0, 1); 1893 1894 /* Unless the msb of the field used to be the msb when we shifted, 1895 mask out the upper bits. */ 1896 1897 if (GET_MODE_BITSIZE (mode) != bitnum + bitsize) 1898 return expand_binop (GET_MODE (op0), and_optab, op0, 1899 mask_rtx (GET_MODE (op0), 0, bitsize, 0), 1900 target, 1, OPTAB_LIB_WIDEN); 1901 return op0; 1902 } 1903 1904 /* To extract a signed bit-field, first shift its msb to the msb of the word, 1905 then arithmetic-shift its lsb to the lsb of the word. */ 1906 op0 = force_reg (mode, op0); 1907 1908 /* Find the narrowest integer mode that contains the field. */ 1909 1910 for (mode = GET_CLASS_NARROWEST_MODE (MODE_INT); mode != VOIDmode; 1911 mode = GET_MODE_WIDER_MODE (mode)) 1912 if (GET_MODE_BITSIZE (mode) >= bitsize + bitnum) 1913 { 1914 op0 = convert_to_mode (mode, op0, 0); 1915 break; 1916 } 1917 1918 if (mode != tmode) 1919 target = 0; 1920 1921 if (GET_MODE_BITSIZE (mode) != (bitsize + bitnum)) 1922 { 1923 int amount = GET_MODE_BITSIZE (mode) - (bitsize + bitnum); 1924 /* Maybe propagate the target for the shift. */ 1925 rtx subtarget = (target != 0 && REG_P (target) ? target : 0); 1926 op0 = expand_shift (LSHIFT_EXPR, mode, op0, amount, subtarget, 1); 1927 } 1928 1929 return expand_shift (RSHIFT_EXPR, mode, op0, 1930 GET_MODE_BITSIZE (mode) - bitsize, target, 0); 1931 } 1932 1933 /* Return a constant integer (CONST_INT or CONST_DOUBLE) rtx with the value 1934 VALUE << BITPOS. */ 1935 1936 static rtx 1937 lshift_value (machine_mode mode, unsigned HOST_WIDE_INT value, 1938 int bitpos) 1939 { 1940 return immed_wide_int_const (wi::lshift (value, bitpos), mode); 1941 } 1942 1943 /* Extract a bit field that is split across two words 1944 and return an RTX for the result. 1945 1946 OP0 is the REG, SUBREG or MEM rtx for the first of the two words. 1947 BITSIZE is the field width; BITPOS, position of its first bit, in the word. 1948 UNSIGNEDP is 1 if should zero-extend the contents; else sign-extend. */ 1949 1950 static rtx 1951 extract_split_bit_field (rtx op0, unsigned HOST_WIDE_INT bitsize, 1952 unsigned HOST_WIDE_INT bitpos, int unsignedp) 1953 { 1954 unsigned int unit; 1955 unsigned int bitsdone = 0; 1956 rtx result = NULL_RTX; 1957 int first = 1; 1958 1959 /* Make sure UNIT isn't larger than BITS_PER_WORD, we can only handle that 1960 much at a time. */ 1961 if (REG_P (op0) || GET_CODE (op0) == SUBREG) 1962 unit = BITS_PER_WORD; 1963 else 1964 unit = MIN (MEM_ALIGN (op0), BITS_PER_WORD); 1965 1966 while (bitsdone < bitsize) 1967 { 1968 unsigned HOST_WIDE_INT thissize; 1969 rtx part, word; 1970 unsigned HOST_WIDE_INT thispos; 1971 unsigned HOST_WIDE_INT offset; 1972 1973 offset = (bitpos + bitsdone) / unit; 1974 thispos = (bitpos + bitsdone) % unit; 1975 1976 /* THISSIZE must not overrun a word boundary. Otherwise, 1977 extract_fixed_bit_field will call us again, and we will mutually 1978 recurse forever. */ 1979 thissize = MIN (bitsize - bitsdone, BITS_PER_WORD); 1980 thissize = MIN (thissize, unit - thispos); 1981 1982 /* If OP0 is a register, then handle OFFSET here. 1983 1984 When handling multiword bitfields, extract_bit_field may pass 1985 down a word_mode SUBREG of a larger REG for a bitfield that actually 1986 crosses a word boundary. Thus, for a SUBREG, we must find 1987 the current word starting from the base register. */ 1988 if (GET_CODE (op0) == SUBREG) 1989 { 1990 int word_offset = (SUBREG_BYTE (op0) / UNITS_PER_WORD) + offset; 1991 word = operand_subword_force (SUBREG_REG (op0), word_offset, 1992 GET_MODE (SUBREG_REG (op0))); 1993 offset = 0; 1994 } 1995 else if (REG_P (op0)) 1996 { 1997 word = operand_subword_force (op0, offset, GET_MODE (op0)); 1998 offset = 0; 1999 } 2000 else 2001 word = op0; 2002 2003 /* Extract the parts in bit-counting order, 2004 whose meaning is determined by BYTES_PER_UNIT. 2005 OFFSET is in UNITs, and UNIT is in bits. */ 2006 part = extract_fixed_bit_field (word_mode, word, thissize, 2007 offset * unit + thispos, 0, 1); 2008 bitsdone += thissize; 2009 2010 /* Shift this part into place for the result. */ 2011 if (BYTES_BIG_ENDIAN) 2012 { 2013 if (bitsize != bitsdone) 2014 part = expand_shift (LSHIFT_EXPR, word_mode, part, 2015 bitsize - bitsdone, 0, 1); 2016 } 2017 else 2018 { 2019 if (bitsdone != thissize) 2020 part = expand_shift (LSHIFT_EXPR, word_mode, part, 2021 bitsdone - thissize, 0, 1); 2022 } 2023 2024 if (first) 2025 result = part; 2026 else 2027 /* Combine the parts with bitwise or. This works 2028 because we extracted each part as an unsigned bit field. */ 2029 result = expand_binop (word_mode, ior_optab, part, result, NULL_RTX, 1, 2030 OPTAB_LIB_WIDEN); 2031 2032 first = 0; 2033 } 2034 2035 /* Unsigned bit field: we are done. */ 2036 if (unsignedp) 2037 return result; 2038 /* Signed bit field: sign-extend with two arithmetic shifts. */ 2039 result = expand_shift (LSHIFT_EXPR, word_mode, result, 2040 BITS_PER_WORD - bitsize, NULL_RTX, 0); 2041 return expand_shift (RSHIFT_EXPR, word_mode, result, 2042 BITS_PER_WORD - bitsize, NULL_RTX, 0); 2043 } 2044 2045 /* Try to read the low bits of SRC as an rvalue of mode MODE, preserving 2046 the bit pattern. SRC_MODE is the mode of SRC; if this is smaller than 2047 MODE, fill the upper bits with zeros. Fail if the layout of either 2048 mode is unknown (as for CC modes) or if the extraction would involve 2049 unprofitable mode punning. Return the value on success, otherwise 2050 return null. 2051 2052 This is different from gen_lowpart* in these respects: 2053 2054 - the returned value must always be considered an rvalue 2055 2056 - when MODE is wider than SRC_MODE, the extraction involves 2057 a zero extension 2058 2059 - when MODE is smaller than SRC_MODE, the extraction involves 2060 a truncation (and is thus subject to TRULY_NOOP_TRUNCATION). 2061 2062 In other words, this routine performs a computation, whereas the 2063 gen_lowpart* routines are conceptually lvalue or rvalue subreg 2064 operations. */ 2065 2066 rtx 2067 extract_low_bits (machine_mode mode, machine_mode src_mode, rtx src) 2068 { 2069 machine_mode int_mode, src_int_mode; 2070 2071 if (mode == src_mode) 2072 return src; 2073 2074 if (CONSTANT_P (src)) 2075 { 2076 /* simplify_gen_subreg can't be used here, as if simplify_subreg 2077 fails, it will happily create (subreg (symbol_ref)) or similar 2078 invalid SUBREGs. */ 2079 unsigned int byte = subreg_lowpart_offset (mode, src_mode); 2080 rtx ret = simplify_subreg (mode, src, src_mode, byte); 2081 if (ret) 2082 return ret; 2083 2084 if (GET_MODE (src) == VOIDmode 2085 || !validate_subreg (mode, src_mode, src, byte)) 2086 return NULL_RTX; 2087 2088 src = force_reg (GET_MODE (src), src); 2089 return gen_rtx_SUBREG (mode, src, byte); 2090 } 2091 2092 if (GET_MODE_CLASS (mode) == MODE_CC || GET_MODE_CLASS (src_mode) == MODE_CC) 2093 return NULL_RTX; 2094 2095 if (GET_MODE_BITSIZE (mode) == GET_MODE_BITSIZE (src_mode) 2096 && MODES_TIEABLE_P (mode, src_mode)) 2097 { 2098 rtx x = gen_lowpart_common (mode, src); 2099 if (x) 2100 return x; 2101 } 2102 2103 src_int_mode = int_mode_for_mode (src_mode); 2104 int_mode = int_mode_for_mode (mode); 2105 if (src_int_mode == BLKmode || int_mode == BLKmode) 2106 return NULL_RTX; 2107 2108 if (!MODES_TIEABLE_P (src_int_mode, src_mode)) 2109 return NULL_RTX; 2110 if (!MODES_TIEABLE_P (int_mode, mode)) 2111 return NULL_RTX; 2112 2113 src = gen_lowpart (src_int_mode, src); 2114 src = convert_modes (int_mode, src_int_mode, src, true); 2115 src = gen_lowpart (mode, src); 2116 return src; 2117 } 2118 2119 /* Add INC into TARGET. */ 2120 2121 void 2122 expand_inc (rtx target, rtx inc) 2123 { 2124 rtx value = expand_binop (GET_MODE (target), add_optab, 2125 target, inc, 2126 target, 0, OPTAB_LIB_WIDEN); 2127 if (value != target) 2128 emit_move_insn (target, value); 2129 } 2130 2131 /* Subtract DEC from TARGET. */ 2132 2133 void 2134 expand_dec (rtx target, rtx dec) 2135 { 2136 rtx value = expand_binop (GET_MODE (target), sub_optab, 2137 target, dec, 2138 target, 0, OPTAB_LIB_WIDEN); 2139 if (value != target) 2140 emit_move_insn (target, value); 2141 } 2142 2143 /* Output a shift instruction for expression code CODE, 2144 with SHIFTED being the rtx for the value to shift, 2145 and AMOUNT the rtx for the amount to shift by. 2146 Store the result in the rtx TARGET, if that is convenient. 2147 If UNSIGNEDP is nonzero, do a logical shift; otherwise, arithmetic. 2148 Return the rtx for where the value is. */ 2149 2150 static rtx 2151 expand_shift_1 (enum tree_code code, machine_mode mode, rtx shifted, 2152 rtx amount, rtx target, int unsignedp) 2153 { 2154 rtx op1, temp = 0; 2155 int left = (code == LSHIFT_EXPR || code == LROTATE_EXPR); 2156 int rotate = (code == LROTATE_EXPR || code == RROTATE_EXPR); 2157 optab lshift_optab = ashl_optab; 2158 optab rshift_arith_optab = ashr_optab; 2159 optab rshift_uns_optab = lshr_optab; 2160 optab lrotate_optab = rotl_optab; 2161 optab rrotate_optab = rotr_optab; 2162 machine_mode op1_mode; 2163 machine_mode scalar_mode = mode; 2164 int attempt; 2165 bool speed = optimize_insn_for_speed_p (); 2166 2167 if (VECTOR_MODE_P (mode)) 2168 scalar_mode = GET_MODE_INNER (mode); 2169 op1 = amount; 2170 op1_mode = GET_MODE (op1); 2171 2172 /* Determine whether the shift/rotate amount is a vector, or scalar. If the 2173 shift amount is a vector, use the vector/vector shift patterns. */ 2174 if (VECTOR_MODE_P (mode) && VECTOR_MODE_P (op1_mode)) 2175 { 2176 lshift_optab = vashl_optab; 2177 rshift_arith_optab = vashr_optab; 2178 rshift_uns_optab = vlshr_optab; 2179 lrotate_optab = vrotl_optab; 2180 rrotate_optab = vrotr_optab; 2181 } 2182 2183 /* Previously detected shift-counts computed by NEGATE_EXPR 2184 and shifted in the other direction; but that does not work 2185 on all machines. */ 2186 2187 if (SHIFT_COUNT_TRUNCATED) 2188 { 2189 if (CONST_INT_P (op1) 2190 && ((unsigned HOST_WIDE_INT) INTVAL (op1) >= 2191 (unsigned HOST_WIDE_INT) GET_MODE_BITSIZE (scalar_mode))) 2192 op1 = GEN_INT ((unsigned HOST_WIDE_INT) INTVAL (op1) 2193 % GET_MODE_BITSIZE (scalar_mode)); 2194 else if (GET_CODE (op1) == SUBREG 2195 && subreg_lowpart_p (op1) 2196 && SCALAR_INT_MODE_P (GET_MODE (SUBREG_REG (op1))) 2197 && SCALAR_INT_MODE_P (GET_MODE (op1))) 2198 op1 = SUBREG_REG (op1); 2199 } 2200 2201 /* Canonicalize rotates by constant amount. If op1 is bitsize / 2, 2202 prefer left rotation, if op1 is from bitsize / 2 + 1 to 2203 bitsize - 1, use other direction of rotate with 1 .. bitsize / 2 - 1 2204 amount instead. */ 2205 if (rotate 2206 && CONST_INT_P (op1) 2207 && IN_RANGE (INTVAL (op1), GET_MODE_BITSIZE (scalar_mode) / 2 + left, 2208 GET_MODE_BITSIZE (scalar_mode) - 1)) 2209 { 2210 op1 = GEN_INT (GET_MODE_BITSIZE (scalar_mode) - INTVAL (op1)); 2211 left = !left; 2212 code = left ? LROTATE_EXPR : RROTATE_EXPR; 2213 } 2214 2215 /* Rotation of 16bit values by 8 bits is effectively equivalent to a bswaphi. 2216 Note that this is not the case for bigger values. For instance a rotation 2217 of 0x01020304 by 16 bits gives 0x03040102 which is different from 2218 0x04030201 (bswapsi). */ 2219 if (rotate 2220 && CONST_INT_P (op1) 2221 && INTVAL (op1) == BITS_PER_UNIT 2222 && GET_MODE_SIZE (scalar_mode) == 2 2223 && optab_handler (bswap_optab, HImode) != CODE_FOR_nothing) 2224 return expand_unop (HImode, bswap_optab, shifted, NULL_RTX, 2225 unsignedp); 2226 2227 if (op1 == const0_rtx) 2228 return shifted; 2229 2230 /* Check whether its cheaper to implement a left shift by a constant 2231 bit count by a sequence of additions. */ 2232 if (code == LSHIFT_EXPR 2233 && CONST_INT_P (op1) 2234 && INTVAL (op1) > 0 2235 && INTVAL (op1) < GET_MODE_PRECISION (scalar_mode) 2236 && INTVAL (op1) < MAX_BITS_PER_WORD 2237 && (shift_cost (speed, mode, INTVAL (op1)) 2238 > INTVAL (op1) * add_cost (speed, mode)) 2239 && shift_cost (speed, mode, INTVAL (op1)) != MAX_COST) 2240 { 2241 int i; 2242 for (i = 0; i < INTVAL (op1); i++) 2243 { 2244 temp = force_reg (mode, shifted); 2245 shifted = expand_binop (mode, add_optab, temp, temp, NULL_RTX, 2246 unsignedp, OPTAB_LIB_WIDEN); 2247 } 2248 return shifted; 2249 } 2250 2251 for (attempt = 0; temp == 0 && attempt < 3; attempt++) 2252 { 2253 enum optab_methods methods; 2254 2255 if (attempt == 0) 2256 methods = OPTAB_DIRECT; 2257 else if (attempt == 1) 2258 methods = OPTAB_WIDEN; 2259 else 2260 methods = OPTAB_LIB_WIDEN; 2261 2262 if (rotate) 2263 { 2264 /* Widening does not work for rotation. */ 2265 if (methods == OPTAB_WIDEN) 2266 continue; 2267 else if (methods == OPTAB_LIB_WIDEN) 2268 { 2269 /* If we have been unable to open-code this by a rotation, 2270 do it as the IOR of two shifts. I.e., to rotate A 2271 by N bits, compute 2272 (A << N) | ((unsigned) A >> ((-N) & (C - 1))) 2273 where C is the bitsize of A. 2274 2275 It is theoretically possible that the target machine might 2276 not be able to perform either shift and hence we would 2277 be making two libcalls rather than just the one for the 2278 shift (similarly if IOR could not be done). We will allow 2279 this extremely unlikely lossage to avoid complicating the 2280 code below. */ 2281 2282 rtx subtarget = target == shifted ? 0 : target; 2283 rtx new_amount, other_amount; 2284 rtx temp1; 2285 2286 new_amount = op1; 2287 if (op1 == const0_rtx) 2288 return shifted; 2289 else if (CONST_INT_P (op1)) 2290 other_amount = GEN_INT (GET_MODE_BITSIZE (scalar_mode) 2291 - INTVAL (op1)); 2292 else 2293 { 2294 other_amount 2295 = simplify_gen_unary (NEG, GET_MODE (op1), 2296 op1, GET_MODE (op1)); 2297 HOST_WIDE_INT mask = GET_MODE_PRECISION (scalar_mode) - 1; 2298 other_amount 2299 = simplify_gen_binary (AND, GET_MODE (op1), other_amount, 2300 gen_int_mode (mask, GET_MODE (op1))); 2301 } 2302 2303 shifted = force_reg (mode, shifted); 2304 2305 temp = expand_shift_1 (left ? LSHIFT_EXPR : RSHIFT_EXPR, 2306 mode, shifted, new_amount, 0, 1); 2307 temp1 = expand_shift_1 (left ? RSHIFT_EXPR : LSHIFT_EXPR, 2308 mode, shifted, other_amount, 2309 subtarget, 1); 2310 return expand_binop (mode, ior_optab, temp, temp1, target, 2311 unsignedp, methods); 2312 } 2313 2314 temp = expand_binop (mode, 2315 left ? lrotate_optab : rrotate_optab, 2316 shifted, op1, target, unsignedp, methods); 2317 } 2318 else if (unsignedp) 2319 temp = expand_binop (mode, 2320 left ? lshift_optab : rshift_uns_optab, 2321 shifted, op1, target, unsignedp, methods); 2322 2323 /* Do arithmetic shifts. 2324 Also, if we are going to widen the operand, we can just as well 2325 use an arithmetic right-shift instead of a logical one. */ 2326 if (temp == 0 && ! rotate 2327 && (! unsignedp || (! left && methods == OPTAB_WIDEN))) 2328 { 2329 enum optab_methods methods1 = methods; 2330 2331 /* If trying to widen a log shift to an arithmetic shift, 2332 don't accept an arithmetic shift of the same size. */ 2333 if (unsignedp) 2334 methods1 = OPTAB_MUST_WIDEN; 2335 2336 /* Arithmetic shift */ 2337 2338 temp = expand_binop (mode, 2339 left ? lshift_optab : rshift_arith_optab, 2340 shifted, op1, target, unsignedp, methods1); 2341 } 2342 2343 /* We used to try extzv here for logical right shifts, but that was 2344 only useful for one machine, the VAX, and caused poor code 2345 generation there for lshrdi3, so the code was deleted and a 2346 define_expand for lshrsi3 was added to vax.md. */ 2347 } 2348 2349 gcc_assert (temp); 2350 return temp; 2351 } 2352 2353 /* Output a shift instruction for expression code CODE, 2354 with SHIFTED being the rtx for the value to shift, 2355 and AMOUNT the amount to shift by. 2356 Store the result in the rtx TARGET, if that is convenient. 2357 If UNSIGNEDP is nonzero, do a logical shift; otherwise, arithmetic. 2358 Return the rtx for where the value is. */ 2359 2360 rtx 2361 expand_shift (enum tree_code code, machine_mode mode, rtx shifted, 2362 int amount, rtx target, int unsignedp) 2363 { 2364 return expand_shift_1 (code, mode, 2365 shifted, GEN_INT (amount), target, unsignedp); 2366 } 2367 2368 /* Output a shift instruction for expression code CODE, 2369 with SHIFTED being the rtx for the value to shift, 2370 and AMOUNT the tree for the amount to shift by. 2371 Store the result in the rtx TARGET, if that is convenient. 2372 If UNSIGNEDP is nonzero, do a logical shift; otherwise, arithmetic. 2373 Return the rtx for where the value is. */ 2374 2375 rtx 2376 expand_variable_shift (enum tree_code code, machine_mode mode, rtx shifted, 2377 tree amount, rtx target, int unsignedp) 2378 { 2379 return expand_shift_1 (code, mode, 2380 shifted, expand_normal (amount), target, unsignedp); 2381 } 2382 2383 2384 /* Indicates the type of fixup needed after a constant multiplication. 2385 BASIC_VARIANT means no fixup is needed, NEGATE_VARIANT means that 2386 the result should be negated, and ADD_VARIANT means that the 2387 multiplicand should be added to the result. */ 2388 enum mult_variant {basic_variant, negate_variant, add_variant}; 2389 2390 static void synth_mult (struct algorithm *, unsigned HOST_WIDE_INT, 2391 const struct mult_cost *, machine_mode mode); 2392 static bool choose_mult_variant (machine_mode, HOST_WIDE_INT, 2393 struct algorithm *, enum mult_variant *, int); 2394 static rtx expand_mult_const (machine_mode, rtx, HOST_WIDE_INT, rtx, 2395 const struct algorithm *, enum mult_variant); 2396 static unsigned HOST_WIDE_INT invert_mod2n (unsigned HOST_WIDE_INT, int); 2397 static rtx extract_high_half (machine_mode, rtx); 2398 static rtx expmed_mult_highpart (machine_mode, rtx, rtx, rtx, int, int); 2399 static rtx expmed_mult_highpart_optab (machine_mode, rtx, rtx, rtx, 2400 int, int); 2401 /* Compute and return the best algorithm for multiplying by T. 2402 The algorithm must cost less than cost_limit 2403 If retval.cost >= COST_LIMIT, no algorithm was found and all 2404 other field of the returned struct are undefined. 2405 MODE is the machine mode of the multiplication. */ 2406 2407 static void 2408 synth_mult (struct algorithm *alg_out, unsigned HOST_WIDE_INT t, 2409 const struct mult_cost *cost_limit, machine_mode mode) 2410 { 2411 int m; 2412 struct algorithm *alg_in, *best_alg; 2413 struct mult_cost best_cost; 2414 struct mult_cost new_limit; 2415 int op_cost, op_latency; 2416 unsigned HOST_WIDE_INT orig_t = t; 2417 unsigned HOST_WIDE_INT q; 2418 int maxm, hash_index; 2419 bool cache_hit = false; 2420 enum alg_code cache_alg = alg_zero; 2421 bool speed = optimize_insn_for_speed_p (); 2422 machine_mode imode; 2423 struct alg_hash_entry *entry_ptr; 2424 2425 /* Indicate that no algorithm is yet found. If no algorithm 2426 is found, this value will be returned and indicate failure. */ 2427 alg_out->cost.cost = cost_limit->cost + 1; 2428 alg_out->cost.latency = cost_limit->latency + 1; 2429 2430 if (cost_limit->cost < 0 2431 || (cost_limit->cost == 0 && cost_limit->latency <= 0)) 2432 return; 2433 2434 /* Be prepared for vector modes. */ 2435 imode = GET_MODE_INNER (mode); 2436 if (imode == VOIDmode) 2437 imode = mode; 2438 2439 maxm = MIN (BITS_PER_WORD, GET_MODE_BITSIZE (imode)); 2440 2441 /* Restrict the bits of "t" to the multiplication's mode. */ 2442 t &= GET_MODE_MASK (imode); 2443 2444 /* t == 1 can be done in zero cost. */ 2445 if (t == 1) 2446 { 2447 alg_out->ops = 1; 2448 alg_out->cost.cost = 0; 2449 alg_out->cost.latency = 0; 2450 alg_out->op[0] = alg_m; 2451 return; 2452 } 2453 2454 /* t == 0 sometimes has a cost. If it does and it exceeds our limit, 2455 fail now. */ 2456 if (t == 0) 2457 { 2458 if (MULT_COST_LESS (cost_limit, zero_cost (speed))) 2459 return; 2460 else 2461 { 2462 alg_out->ops = 1; 2463 alg_out->cost.cost = zero_cost (speed); 2464 alg_out->cost.latency = zero_cost (speed); 2465 alg_out->op[0] = alg_zero; 2466 return; 2467 } 2468 } 2469 2470 /* We'll be needing a couple extra algorithm structures now. */ 2471 2472 alg_in = XALLOCA (struct algorithm); 2473 best_alg = XALLOCA (struct algorithm); 2474 best_cost = *cost_limit; 2475 2476 /* Compute the hash index. */ 2477 hash_index = (t ^ (unsigned int) mode ^ (speed * 256)) % NUM_ALG_HASH_ENTRIES; 2478 2479 /* See if we already know what to do for T. */ 2480 entry_ptr = alg_hash_entry_ptr (hash_index); 2481 if (entry_ptr->t == t 2482 && entry_ptr->mode == mode 2483 && entry_ptr->mode == mode 2484 && entry_ptr->speed == speed 2485 && entry_ptr->alg != alg_unknown) 2486 { 2487 cache_alg = entry_ptr->alg; 2488 2489 if (cache_alg == alg_impossible) 2490 { 2491 /* The cache tells us that it's impossible to synthesize 2492 multiplication by T within entry_ptr->cost. */ 2493 if (!CHEAPER_MULT_COST (&entry_ptr->cost, cost_limit)) 2494 /* COST_LIMIT is at least as restrictive as the one 2495 recorded in the hash table, in which case we have no 2496 hope of synthesizing a multiplication. Just 2497 return. */ 2498 return; 2499 2500 /* If we get here, COST_LIMIT is less restrictive than the 2501 one recorded in the hash table, so we may be able to 2502 synthesize a multiplication. Proceed as if we didn't 2503 have the cache entry. */ 2504 } 2505 else 2506 { 2507 if (CHEAPER_MULT_COST (cost_limit, &entry_ptr->cost)) 2508 /* The cached algorithm shows that this multiplication 2509 requires more cost than COST_LIMIT. Just return. This 2510 way, we don't clobber this cache entry with 2511 alg_impossible but retain useful information. */ 2512 return; 2513 2514 cache_hit = true; 2515 2516 switch (cache_alg) 2517 { 2518 case alg_shift: 2519 goto do_alg_shift; 2520 2521 case alg_add_t_m2: 2522 case alg_sub_t_m2: 2523 goto do_alg_addsub_t_m2; 2524 2525 case alg_add_factor: 2526 case alg_sub_factor: 2527 goto do_alg_addsub_factor; 2528 2529 case alg_add_t2_m: 2530 goto do_alg_add_t2_m; 2531 2532 case alg_sub_t2_m: 2533 goto do_alg_sub_t2_m; 2534 2535 default: 2536 gcc_unreachable (); 2537 } 2538 } 2539 } 2540 2541 /* If we have a group of zero bits at the low-order part of T, try 2542 multiplying by the remaining bits and then doing a shift. */ 2543 2544 if ((t & 1) == 0) 2545 { 2546 do_alg_shift: 2547 m = floor_log2 (t & -t); /* m = number of low zero bits */ 2548 if (m < maxm) 2549 { 2550 q = t >> m; 2551 /* The function expand_shift will choose between a shift and 2552 a sequence of additions, so the observed cost is given as 2553 MIN (m * add_cost(speed, mode), shift_cost(speed, mode, m)). */ 2554 op_cost = m * add_cost (speed, mode); 2555 if (shift_cost (speed, mode, m) < op_cost) 2556 op_cost = shift_cost (speed, mode, m); 2557 new_limit.cost = best_cost.cost - op_cost; 2558 new_limit.latency = best_cost.latency - op_cost; 2559 synth_mult (alg_in, q, &new_limit, mode); 2560 2561 alg_in->cost.cost += op_cost; 2562 alg_in->cost.latency += op_cost; 2563 if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost)) 2564 { 2565 best_cost = alg_in->cost; 2566 std::swap (alg_in, best_alg); 2567 best_alg->log[best_alg->ops] = m; 2568 best_alg->op[best_alg->ops] = alg_shift; 2569 } 2570 2571 /* See if treating ORIG_T as a signed number yields a better 2572 sequence. Try this sequence only for a negative ORIG_T 2573 as it would be useless for a non-negative ORIG_T. */ 2574 if ((HOST_WIDE_INT) orig_t < 0) 2575 { 2576 /* Shift ORIG_T as follows because a right shift of a 2577 negative-valued signed type is implementation 2578 defined. */ 2579 q = ~(~orig_t >> m); 2580 /* The function expand_shift will choose between a shift 2581 and a sequence of additions, so the observed cost is 2582 given as MIN (m * add_cost(speed, mode), 2583 shift_cost(speed, mode, m)). */ 2584 op_cost = m * add_cost (speed, mode); 2585 if (shift_cost (speed, mode, m) < op_cost) 2586 op_cost = shift_cost (speed, mode, m); 2587 new_limit.cost = best_cost.cost - op_cost; 2588 new_limit.latency = best_cost.latency - op_cost; 2589 synth_mult (alg_in, q, &new_limit, mode); 2590 2591 alg_in->cost.cost += op_cost; 2592 alg_in->cost.latency += op_cost; 2593 if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost)) 2594 { 2595 best_cost = alg_in->cost; 2596 std::swap (alg_in, best_alg); 2597 best_alg->log[best_alg->ops] = m; 2598 best_alg->op[best_alg->ops] = alg_shift; 2599 } 2600 } 2601 } 2602 if (cache_hit) 2603 goto done; 2604 } 2605 2606 /* If we have an odd number, add or subtract one. */ 2607 if ((t & 1) != 0) 2608 { 2609 unsigned HOST_WIDE_INT w; 2610 2611 do_alg_addsub_t_m2: 2612 for (w = 1; (w & t) != 0; w <<= 1) 2613 ; 2614 /* If T was -1, then W will be zero after the loop. This is another 2615 case where T ends with ...111. Handling this with (T + 1) and 2616 subtract 1 produces slightly better code and results in algorithm 2617 selection much faster than treating it like the ...0111 case 2618 below. */ 2619 if (w == 0 2620 || (w > 2 2621 /* Reject the case where t is 3. 2622 Thus we prefer addition in that case. */ 2623 && t != 3)) 2624 { 2625 /* T ends with ...111. Multiply by (T + 1) and subtract T. */ 2626 2627 op_cost = add_cost (speed, mode); 2628 new_limit.cost = best_cost.cost - op_cost; 2629 new_limit.latency = best_cost.latency - op_cost; 2630 synth_mult (alg_in, t + 1, &new_limit, mode); 2631 2632 alg_in->cost.cost += op_cost; 2633 alg_in->cost.latency += op_cost; 2634 if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost)) 2635 { 2636 best_cost = alg_in->cost; 2637 std::swap (alg_in, best_alg); 2638 best_alg->log[best_alg->ops] = 0; 2639 best_alg->op[best_alg->ops] = alg_sub_t_m2; 2640 } 2641 } 2642 else 2643 { 2644 /* T ends with ...01 or ...011. Multiply by (T - 1) and add T. */ 2645 2646 op_cost = add_cost (speed, mode); 2647 new_limit.cost = best_cost.cost - op_cost; 2648 new_limit.latency = best_cost.latency - op_cost; 2649 synth_mult (alg_in, t - 1, &new_limit, mode); 2650 2651 alg_in->cost.cost += op_cost; 2652 alg_in->cost.latency += op_cost; 2653 if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost)) 2654 { 2655 best_cost = alg_in->cost; 2656 std::swap (alg_in, best_alg); 2657 best_alg->log[best_alg->ops] = 0; 2658 best_alg->op[best_alg->ops] = alg_add_t_m2; 2659 } 2660 } 2661 2662 /* We may be able to calculate a * -7, a * -15, a * -31, etc 2663 quickly with a - a * n for some appropriate constant n. */ 2664 m = exact_log2 (-orig_t + 1); 2665 if (m >= 0 && m < maxm) 2666 { 2667 op_cost = shiftsub1_cost (speed, mode, m); 2668 new_limit.cost = best_cost.cost - op_cost; 2669 new_limit.latency = best_cost.latency - op_cost; 2670 synth_mult (alg_in, (unsigned HOST_WIDE_INT) (-orig_t + 1) >> m, 2671 &new_limit, mode); 2672 2673 alg_in->cost.cost += op_cost; 2674 alg_in->cost.latency += op_cost; 2675 if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost)) 2676 { 2677 best_cost = alg_in->cost; 2678 std::swap (alg_in, best_alg); 2679 best_alg->log[best_alg->ops] = m; 2680 best_alg->op[best_alg->ops] = alg_sub_t_m2; 2681 } 2682 } 2683 2684 if (cache_hit) 2685 goto done; 2686 } 2687 2688 /* Look for factors of t of the form 2689 t = q(2**m +- 1), 2 <= m <= floor(log2(t - 1)). 2690 If we find such a factor, we can multiply by t using an algorithm that 2691 multiplies by q, shift the result by m and add/subtract it to itself. 2692 2693 We search for large factors first and loop down, even if large factors 2694 are less probable than small; if we find a large factor we will find a 2695 good sequence quickly, and therefore be able to prune (by decreasing 2696 COST_LIMIT) the search. */ 2697 2698 do_alg_addsub_factor: 2699 for (m = floor_log2 (t - 1); m >= 2; m--) 2700 { 2701 unsigned HOST_WIDE_INT d; 2702 2703 d = ((unsigned HOST_WIDE_INT) 1 << m) + 1; 2704 if (t % d == 0 && t > d && m < maxm 2705 && (!cache_hit || cache_alg == alg_add_factor)) 2706 { 2707 /* If the target has a cheap shift-and-add instruction use 2708 that in preference to a shift insn followed by an add insn. 2709 Assume that the shift-and-add is "atomic" with a latency 2710 equal to its cost, otherwise assume that on superscalar 2711 hardware the shift may be executed concurrently with the 2712 earlier steps in the algorithm. */ 2713 op_cost = add_cost (speed, mode) + shift_cost (speed, mode, m); 2714 if (shiftadd_cost (speed, mode, m) < op_cost) 2715 { 2716 op_cost = shiftadd_cost (speed, mode, m); 2717 op_latency = op_cost; 2718 } 2719 else 2720 op_latency = add_cost (speed, mode); 2721 2722 new_limit.cost = best_cost.cost - op_cost; 2723 new_limit.latency = best_cost.latency - op_latency; 2724 synth_mult (alg_in, t / d, &new_limit, mode); 2725 2726 alg_in->cost.cost += op_cost; 2727 alg_in->cost.latency += op_latency; 2728 if (alg_in->cost.latency < op_cost) 2729 alg_in->cost.latency = op_cost; 2730 if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost)) 2731 { 2732 best_cost = alg_in->cost; 2733 std::swap (alg_in, best_alg); 2734 best_alg->log[best_alg->ops] = m; 2735 best_alg->op[best_alg->ops] = alg_add_factor; 2736 } 2737 /* Other factors will have been taken care of in the recursion. */ 2738 break; 2739 } 2740 2741 d = ((unsigned HOST_WIDE_INT) 1 << m) - 1; 2742 if (t % d == 0 && t > d && m < maxm 2743 && (!cache_hit || cache_alg == alg_sub_factor)) 2744 { 2745 /* If the target has a cheap shift-and-subtract insn use 2746 that in preference to a shift insn followed by a sub insn. 2747 Assume that the shift-and-sub is "atomic" with a latency 2748 equal to it's cost, otherwise assume that on superscalar 2749 hardware the shift may be executed concurrently with the 2750 earlier steps in the algorithm. */ 2751 op_cost = add_cost (speed, mode) + shift_cost (speed, mode, m); 2752 if (shiftsub0_cost (speed, mode, m) < op_cost) 2753 { 2754 op_cost = shiftsub0_cost (speed, mode, m); 2755 op_latency = op_cost; 2756 } 2757 else 2758 op_latency = add_cost (speed, mode); 2759 2760 new_limit.cost = best_cost.cost - op_cost; 2761 new_limit.latency = best_cost.latency - op_latency; 2762 synth_mult (alg_in, t / d, &new_limit, mode); 2763 2764 alg_in->cost.cost += op_cost; 2765 alg_in->cost.latency += op_latency; 2766 if (alg_in->cost.latency < op_cost) 2767 alg_in->cost.latency = op_cost; 2768 if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost)) 2769 { 2770 best_cost = alg_in->cost; 2771 std::swap (alg_in, best_alg); 2772 best_alg->log[best_alg->ops] = m; 2773 best_alg->op[best_alg->ops] = alg_sub_factor; 2774 } 2775 break; 2776 } 2777 } 2778 if (cache_hit) 2779 goto done; 2780 2781 /* Try shift-and-add (load effective address) instructions, 2782 i.e. do a*3, a*5, a*9. */ 2783 if ((t & 1) != 0) 2784 { 2785 do_alg_add_t2_m: 2786 q = t - 1; 2787 q = q & -q; 2788 m = exact_log2 (q); 2789 if (m >= 0 && m < maxm) 2790 { 2791 op_cost = shiftadd_cost (speed, mode, m); 2792 new_limit.cost = best_cost.cost - op_cost; 2793 new_limit.latency = best_cost.latency - op_cost; 2794 synth_mult (alg_in, (t - 1) >> m, &new_limit, mode); 2795 2796 alg_in->cost.cost += op_cost; 2797 alg_in->cost.latency += op_cost; 2798 if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost)) 2799 { 2800 best_cost = alg_in->cost; 2801 std::swap (alg_in, best_alg); 2802 best_alg->log[best_alg->ops] = m; 2803 best_alg->op[best_alg->ops] = alg_add_t2_m; 2804 } 2805 } 2806 if (cache_hit) 2807 goto done; 2808 2809 do_alg_sub_t2_m: 2810 q = t + 1; 2811 q = q & -q; 2812 m = exact_log2 (q); 2813 if (m >= 0 && m < maxm) 2814 { 2815 op_cost = shiftsub0_cost (speed, mode, m); 2816 new_limit.cost = best_cost.cost - op_cost; 2817 new_limit.latency = best_cost.latency - op_cost; 2818 synth_mult (alg_in, (t + 1) >> m, &new_limit, mode); 2819 2820 alg_in->cost.cost += op_cost; 2821 alg_in->cost.latency += op_cost; 2822 if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost)) 2823 { 2824 best_cost = alg_in->cost; 2825 std::swap (alg_in, best_alg); 2826 best_alg->log[best_alg->ops] = m; 2827 best_alg->op[best_alg->ops] = alg_sub_t2_m; 2828 } 2829 } 2830 if (cache_hit) 2831 goto done; 2832 } 2833 2834 done: 2835 /* If best_cost has not decreased, we have not found any algorithm. */ 2836 if (!CHEAPER_MULT_COST (&best_cost, cost_limit)) 2837 { 2838 /* We failed to find an algorithm. Record alg_impossible for 2839 this case (that is, <T, MODE, COST_LIMIT>) so that next time 2840 we are asked to find an algorithm for T within the same or 2841 lower COST_LIMIT, we can immediately return to the 2842 caller. */ 2843 entry_ptr->t = t; 2844 entry_ptr->mode = mode; 2845 entry_ptr->speed = speed; 2846 entry_ptr->alg = alg_impossible; 2847 entry_ptr->cost = *cost_limit; 2848 return; 2849 } 2850 2851 /* Cache the result. */ 2852 if (!cache_hit) 2853 { 2854 entry_ptr->t = t; 2855 entry_ptr->mode = mode; 2856 entry_ptr->speed = speed; 2857 entry_ptr->alg = best_alg->op[best_alg->ops]; 2858 entry_ptr->cost.cost = best_cost.cost; 2859 entry_ptr->cost.latency = best_cost.latency; 2860 } 2861 2862 /* If we are getting a too long sequence for `struct algorithm' 2863 to record, make this search fail. */ 2864 if (best_alg->ops == MAX_BITS_PER_WORD) 2865 return; 2866 2867 /* Copy the algorithm from temporary space to the space at alg_out. 2868 We avoid using structure assignment because the majority of 2869 best_alg is normally undefined, and this is a critical function. */ 2870 alg_out->ops = best_alg->ops + 1; 2871 alg_out->cost = best_cost; 2872 memcpy (alg_out->op, best_alg->op, 2873 alg_out->ops * sizeof *alg_out->op); 2874 memcpy (alg_out->log, best_alg->log, 2875 alg_out->ops * sizeof *alg_out->log); 2876 } 2877 2878 /* Find the cheapest way of multiplying a value of mode MODE by VAL. 2879 Try three variations: 2880 2881 - a shift/add sequence based on VAL itself 2882 - a shift/add sequence based on -VAL, followed by a negation 2883 - a shift/add sequence based on VAL - 1, followed by an addition. 2884 2885 Return true if the cheapest of these cost less than MULT_COST, 2886 describing the algorithm in *ALG and final fixup in *VARIANT. */ 2887 2888 static bool 2889 choose_mult_variant (machine_mode mode, HOST_WIDE_INT val, 2890 struct algorithm *alg, enum mult_variant *variant, 2891 int mult_cost) 2892 { 2893 struct algorithm alg2; 2894 struct mult_cost limit; 2895 int op_cost; 2896 bool speed = optimize_insn_for_speed_p (); 2897 2898 /* Fail quickly for impossible bounds. */ 2899 if (mult_cost < 0) 2900 return false; 2901 2902 /* Ensure that mult_cost provides a reasonable upper bound. 2903 Any constant multiplication can be performed with less 2904 than 2 * bits additions. */ 2905 op_cost = 2 * GET_MODE_UNIT_BITSIZE (mode) * add_cost (speed, mode); 2906 if (mult_cost > op_cost) 2907 mult_cost = op_cost; 2908 2909 *variant = basic_variant; 2910 limit.cost = mult_cost; 2911 limit.latency = mult_cost; 2912 synth_mult (alg, val, &limit, mode); 2913 2914 /* This works only if the inverted value actually fits in an 2915 `unsigned int' */ 2916 if (HOST_BITS_PER_INT >= GET_MODE_UNIT_BITSIZE (mode)) 2917 { 2918 op_cost = neg_cost (speed, mode); 2919 if (MULT_COST_LESS (&alg->cost, mult_cost)) 2920 { 2921 limit.cost = alg->cost.cost - op_cost; 2922 limit.latency = alg->cost.latency - op_cost; 2923 } 2924 else 2925 { 2926 limit.cost = mult_cost - op_cost; 2927 limit.latency = mult_cost - op_cost; 2928 } 2929 2930 synth_mult (&alg2, -val, &limit, mode); 2931 alg2.cost.cost += op_cost; 2932 alg2.cost.latency += op_cost; 2933 if (CHEAPER_MULT_COST (&alg2.cost, &alg->cost)) 2934 *alg = alg2, *variant = negate_variant; 2935 } 2936 2937 /* This proves very useful for division-by-constant. */ 2938 op_cost = add_cost (speed, mode); 2939 if (MULT_COST_LESS (&alg->cost, mult_cost)) 2940 { 2941 limit.cost = alg->cost.cost - op_cost; 2942 limit.latency = alg->cost.latency - op_cost; 2943 } 2944 else 2945 { 2946 limit.cost = mult_cost - op_cost; 2947 limit.latency = mult_cost - op_cost; 2948 } 2949 2950 synth_mult (&alg2, val - 1, &limit, mode); 2951 alg2.cost.cost += op_cost; 2952 alg2.cost.latency += op_cost; 2953 if (CHEAPER_MULT_COST (&alg2.cost, &alg->cost)) 2954 *alg = alg2, *variant = add_variant; 2955 2956 return MULT_COST_LESS (&alg->cost, mult_cost); 2957 } 2958 2959 /* A subroutine of expand_mult, used for constant multiplications. 2960 Multiply OP0 by VAL in mode MODE, storing the result in TARGET if 2961 convenient. Use the shift/add sequence described by ALG and apply 2962 the final fixup specified by VARIANT. */ 2963 2964 static rtx 2965 expand_mult_const (machine_mode mode, rtx op0, HOST_WIDE_INT val, 2966 rtx target, const struct algorithm *alg, 2967 enum mult_variant variant) 2968 { 2969 HOST_WIDE_INT val_so_far; 2970 rtx_insn *insn; 2971 rtx accum, tem; 2972 int opno; 2973 machine_mode nmode; 2974 2975 /* Avoid referencing memory over and over and invalid sharing 2976 on SUBREGs. */ 2977 op0 = force_reg (mode, op0); 2978 2979 /* ACCUM starts out either as OP0 or as a zero, depending on 2980 the first operation. */ 2981 2982 if (alg->op[0] == alg_zero) 2983 { 2984 accum = copy_to_mode_reg (mode, CONST0_RTX (mode)); 2985 val_so_far = 0; 2986 } 2987 else if (alg->op[0] == alg_m) 2988 { 2989 accum = copy_to_mode_reg (mode, op0); 2990 val_so_far = 1; 2991 } 2992 else 2993 gcc_unreachable (); 2994 2995 for (opno = 1; opno < alg->ops; opno++) 2996 { 2997 int log = alg->log[opno]; 2998 rtx shift_subtarget = optimize ? 0 : accum; 2999 rtx add_target 3000 = (opno == alg->ops - 1 && target != 0 && variant != add_variant 3001 && !optimize) 3002 ? target : 0; 3003 rtx accum_target = optimize ? 0 : accum; 3004 rtx accum_inner; 3005 3006 switch (alg->op[opno]) 3007 { 3008 case alg_shift: 3009 tem = expand_shift (LSHIFT_EXPR, mode, accum, log, NULL_RTX, 0); 3010 /* REG_EQUAL note will be attached to the following insn. */ 3011 emit_move_insn (accum, tem); 3012 val_so_far <<= log; 3013 break; 3014 3015 case alg_add_t_m2: 3016 tem = expand_shift (LSHIFT_EXPR, mode, op0, log, NULL_RTX, 0); 3017 accum = force_operand (gen_rtx_PLUS (mode, accum, tem), 3018 add_target ? add_target : accum_target); 3019 val_so_far += (HOST_WIDE_INT) 1 << log; 3020 break; 3021 3022 case alg_sub_t_m2: 3023 tem = expand_shift (LSHIFT_EXPR, mode, op0, log, NULL_RTX, 0); 3024 accum = force_operand (gen_rtx_MINUS (mode, accum, tem), 3025 add_target ? add_target : accum_target); 3026 val_so_far -= (HOST_WIDE_INT) 1 << log; 3027 break; 3028 3029 case alg_add_t2_m: 3030 accum = expand_shift (LSHIFT_EXPR, mode, accum, 3031 log, shift_subtarget, 0); 3032 accum = force_operand (gen_rtx_PLUS (mode, accum, op0), 3033 add_target ? add_target : accum_target); 3034 val_so_far = (val_so_far << log) + 1; 3035 break; 3036 3037 case alg_sub_t2_m: 3038 accum = expand_shift (LSHIFT_EXPR, mode, accum, 3039 log, shift_subtarget, 0); 3040 accum = force_operand (gen_rtx_MINUS (mode, accum, op0), 3041 add_target ? add_target : accum_target); 3042 val_so_far = (val_so_far << log) - 1; 3043 break; 3044 3045 case alg_add_factor: 3046 tem = expand_shift (LSHIFT_EXPR, mode, accum, log, NULL_RTX, 0); 3047 accum = force_operand (gen_rtx_PLUS (mode, accum, tem), 3048 add_target ? add_target : accum_target); 3049 val_so_far += val_so_far << log; 3050 break; 3051 3052 case alg_sub_factor: 3053 tem = expand_shift (LSHIFT_EXPR, mode, accum, log, NULL_RTX, 0); 3054 accum = force_operand (gen_rtx_MINUS (mode, tem, accum), 3055 (add_target 3056 ? add_target : (optimize ? 0 : tem))); 3057 val_so_far = (val_so_far << log) - val_so_far; 3058 break; 3059 3060 default: 3061 gcc_unreachable (); 3062 } 3063 3064 if (SCALAR_INT_MODE_P (mode)) 3065 { 3066 /* Write a REG_EQUAL note on the last insn so that we can cse 3067 multiplication sequences. Note that if ACCUM is a SUBREG, 3068 we've set the inner register and must properly indicate that. */ 3069 tem = op0, nmode = mode; 3070 accum_inner = accum; 3071 if (GET_CODE (accum) == SUBREG) 3072 { 3073 accum_inner = SUBREG_REG (accum); 3074 nmode = GET_MODE (accum_inner); 3075 tem = gen_lowpart (nmode, op0); 3076 } 3077 3078 insn = get_last_insn (); 3079 set_dst_reg_note (insn, REG_EQUAL, 3080 gen_rtx_MULT (nmode, tem, 3081 gen_int_mode (val_so_far, nmode)), 3082 accum_inner); 3083 } 3084 } 3085 3086 if (variant == negate_variant) 3087 { 3088 val_so_far = -val_so_far; 3089 accum = expand_unop (mode, neg_optab, accum, target, 0); 3090 } 3091 else if (variant == add_variant) 3092 { 3093 val_so_far = val_so_far + 1; 3094 accum = force_operand (gen_rtx_PLUS (mode, accum, op0), target); 3095 } 3096 3097 /* Compare only the bits of val and val_so_far that are significant 3098 in the result mode, to avoid sign-/zero-extension confusion. */ 3099 nmode = GET_MODE_INNER (mode); 3100 if (nmode == VOIDmode) 3101 nmode = mode; 3102 val &= GET_MODE_MASK (nmode); 3103 val_so_far &= GET_MODE_MASK (nmode); 3104 gcc_assert (val == val_so_far); 3105 3106 return accum; 3107 } 3108 3109 /* Perform a multiplication and return an rtx for the result. 3110 MODE is mode of value; OP0 and OP1 are what to multiply (rtx's); 3111 TARGET is a suggestion for where to store the result (an rtx). 3112 3113 We check specially for a constant integer as OP1. 3114 If you want this check for OP0 as well, then before calling 3115 you should swap the two operands if OP0 would be constant. */ 3116 3117 rtx 3118 expand_mult (machine_mode mode, rtx op0, rtx op1, rtx target, 3119 int unsignedp) 3120 { 3121 enum mult_variant variant; 3122 struct algorithm algorithm; 3123 rtx scalar_op1; 3124 int max_cost; 3125 bool speed = optimize_insn_for_speed_p (); 3126 bool do_trapv = flag_trapv && SCALAR_INT_MODE_P (mode) && !unsignedp; 3127 3128 if (CONSTANT_P (op0)) 3129 std::swap (op0, op1); 3130 3131 /* For vectors, there are several simplifications that can be made if 3132 all elements of the vector constant are identical. */ 3133 scalar_op1 = op1; 3134 if (GET_CODE (op1) == CONST_VECTOR) 3135 { 3136 int i, n = CONST_VECTOR_NUNITS (op1); 3137 scalar_op1 = CONST_VECTOR_ELT (op1, 0); 3138 for (i = 1; i < n; ++i) 3139 if (!rtx_equal_p (scalar_op1, CONST_VECTOR_ELT (op1, i))) 3140 goto skip_scalar; 3141 } 3142 3143 if (INTEGRAL_MODE_P (mode)) 3144 { 3145 rtx fake_reg; 3146 HOST_WIDE_INT coeff; 3147 bool is_neg; 3148 int mode_bitsize; 3149 3150 if (op1 == CONST0_RTX (mode)) 3151 return op1; 3152 if (op1 == CONST1_RTX (mode)) 3153 return op0; 3154 if (op1 == CONSTM1_RTX (mode)) 3155 return expand_unop (mode, do_trapv ? negv_optab : neg_optab, 3156 op0, target, 0); 3157 3158 if (do_trapv) 3159 goto skip_synth; 3160 3161 /* If mode is integer vector mode, check if the backend supports 3162 vector lshift (by scalar or vector) at all. If not, we can't use 3163 synthetized multiply. */ 3164 if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT 3165 && optab_handler (vashl_optab, mode) == CODE_FOR_nothing 3166 && optab_handler (ashl_optab, mode) == CODE_FOR_nothing) 3167 goto skip_synth; 3168 3169 /* These are the operations that are potentially turned into 3170 a sequence of shifts and additions. */ 3171 mode_bitsize = GET_MODE_UNIT_BITSIZE (mode); 3172 3173 /* synth_mult does an `unsigned int' multiply. As long as the mode is 3174 less than or equal in size to `unsigned int' this doesn't matter. 3175 If the mode is larger than `unsigned int', then synth_mult works 3176 only if the constant value exactly fits in an `unsigned int' without 3177 any truncation. This means that multiplying by negative values does 3178 not work; results are off by 2^32 on a 32 bit machine. */ 3179 if (CONST_INT_P (scalar_op1)) 3180 { 3181 coeff = INTVAL (scalar_op1); 3182 is_neg = coeff < 0; 3183 } 3184 #if TARGET_SUPPORTS_WIDE_INT 3185 else if (CONST_WIDE_INT_P (scalar_op1)) 3186 #else 3187 else if (CONST_DOUBLE_AS_INT_P (scalar_op1)) 3188 #endif 3189 { 3190 int shift = wi::exact_log2 (std::make_pair (scalar_op1, mode)); 3191 /* Perfect power of 2 (other than 1, which is handled above). */ 3192 if (shift > 0) 3193 return expand_shift (LSHIFT_EXPR, mode, op0, 3194 shift, target, unsignedp); 3195 else 3196 goto skip_synth; 3197 } 3198 else 3199 goto skip_synth; 3200 3201 /* We used to test optimize here, on the grounds that it's better to 3202 produce a smaller program when -O is not used. But this causes 3203 such a terrible slowdown sometimes that it seems better to always 3204 use synth_mult. */ 3205 3206 /* Special case powers of two. */ 3207 if (EXACT_POWER_OF_2_OR_ZERO_P (coeff) 3208 && !(is_neg && mode_bitsize > HOST_BITS_PER_WIDE_INT)) 3209 return expand_shift (LSHIFT_EXPR, mode, op0, 3210 floor_log2 (coeff), target, unsignedp); 3211 3212 fake_reg = gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1); 3213 3214 /* Attempt to handle multiplication of DImode values by negative 3215 coefficients, by performing the multiplication by a positive 3216 multiplier and then inverting the result. */ 3217 if (is_neg && mode_bitsize > HOST_BITS_PER_WIDE_INT) 3218 { 3219 /* Its safe to use -coeff even for INT_MIN, as the 3220 result is interpreted as an unsigned coefficient. 3221 Exclude cost of op0 from max_cost to match the cost 3222 calculation of the synth_mult. */ 3223 coeff = -(unsigned HOST_WIDE_INT) coeff; 3224 max_cost = (set_src_cost (gen_rtx_MULT (mode, fake_reg, op1), speed) 3225 - neg_cost (speed, mode)); 3226 if (max_cost <= 0) 3227 goto skip_synth; 3228 3229 /* Special case powers of two. */ 3230 if (EXACT_POWER_OF_2_OR_ZERO_P (coeff)) 3231 { 3232 rtx temp = expand_shift (LSHIFT_EXPR, mode, op0, 3233 floor_log2 (coeff), target, unsignedp); 3234 return expand_unop (mode, neg_optab, temp, target, 0); 3235 } 3236 3237 if (choose_mult_variant (mode, coeff, &algorithm, &variant, 3238 max_cost)) 3239 { 3240 rtx temp = expand_mult_const (mode, op0, coeff, NULL_RTX, 3241 &algorithm, variant); 3242 return expand_unop (mode, neg_optab, temp, target, 0); 3243 } 3244 goto skip_synth; 3245 } 3246 3247 /* Exclude cost of op0 from max_cost to match the cost 3248 calculation of the synth_mult. */ 3249 max_cost = set_src_cost (gen_rtx_MULT (mode, fake_reg, op1), speed); 3250 if (choose_mult_variant (mode, coeff, &algorithm, &variant, max_cost)) 3251 return expand_mult_const (mode, op0, coeff, target, 3252 &algorithm, variant); 3253 } 3254 skip_synth: 3255 3256 /* Expand x*2.0 as x+x. */ 3257 if (CONST_DOUBLE_AS_FLOAT_P (scalar_op1)) 3258 { 3259 REAL_VALUE_TYPE d; 3260 REAL_VALUE_FROM_CONST_DOUBLE (d, scalar_op1); 3261 3262 if (REAL_VALUES_EQUAL (d, dconst2)) 3263 { 3264 op0 = force_reg (GET_MODE (op0), op0); 3265 return expand_binop (mode, add_optab, op0, op0, 3266 target, unsignedp, OPTAB_LIB_WIDEN); 3267 } 3268 } 3269 skip_scalar: 3270 3271 /* This used to use umul_optab if unsigned, but for non-widening multiply 3272 there is no difference between signed and unsigned. */ 3273 op0 = expand_binop (mode, do_trapv ? smulv_optab : smul_optab, 3274 op0, op1, target, unsignedp, OPTAB_LIB_WIDEN); 3275 gcc_assert (op0); 3276 return op0; 3277 } 3278 3279 /* Return a cost estimate for multiplying a register by the given 3280 COEFFicient in the given MODE and SPEED. */ 3281 3282 int 3283 mult_by_coeff_cost (HOST_WIDE_INT coeff, machine_mode mode, bool speed) 3284 { 3285 int max_cost; 3286 struct algorithm algorithm; 3287 enum mult_variant variant; 3288 3289 rtx fake_reg = gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1); 3290 max_cost = set_src_cost (gen_rtx_MULT (mode, fake_reg, fake_reg), speed); 3291 if (choose_mult_variant (mode, coeff, &algorithm, &variant, max_cost)) 3292 return algorithm.cost.cost; 3293 else 3294 return max_cost; 3295 } 3296 3297 /* Perform a widening multiplication and return an rtx for the result. 3298 MODE is mode of value; OP0 and OP1 are what to multiply (rtx's); 3299 TARGET is a suggestion for where to store the result (an rtx). 3300 THIS_OPTAB is the optab we should use, it must be either umul_widen_optab 3301 or smul_widen_optab. 3302 3303 We check specially for a constant integer as OP1, comparing the 3304 cost of a widening multiply against the cost of a sequence of shifts 3305 and adds. */ 3306 3307 rtx 3308 expand_widening_mult (machine_mode mode, rtx op0, rtx op1, rtx target, 3309 int unsignedp, optab this_optab) 3310 { 3311 bool speed = optimize_insn_for_speed_p (); 3312 rtx cop1; 3313 3314 if (CONST_INT_P (op1) 3315 && GET_MODE (op0) != VOIDmode 3316 && (cop1 = convert_modes (mode, GET_MODE (op0), op1, 3317 this_optab == umul_widen_optab)) 3318 && CONST_INT_P (cop1) 3319 && (INTVAL (cop1) >= 0 3320 || HWI_COMPUTABLE_MODE_P (mode))) 3321 { 3322 HOST_WIDE_INT coeff = INTVAL (cop1); 3323 int max_cost; 3324 enum mult_variant variant; 3325 struct algorithm algorithm; 3326 3327 if (coeff == 0) 3328 return CONST0_RTX (mode); 3329 3330 /* Special case powers of two. */ 3331 if (EXACT_POWER_OF_2_OR_ZERO_P (coeff)) 3332 { 3333 op0 = convert_to_mode (mode, op0, this_optab == umul_widen_optab); 3334 return expand_shift (LSHIFT_EXPR, mode, op0, 3335 floor_log2 (coeff), target, unsignedp); 3336 } 3337 3338 /* Exclude cost of op0 from max_cost to match the cost 3339 calculation of the synth_mult. */ 3340 max_cost = mul_widen_cost (speed, mode); 3341 if (choose_mult_variant (mode, coeff, &algorithm, &variant, 3342 max_cost)) 3343 { 3344 op0 = convert_to_mode (mode, op0, this_optab == umul_widen_optab); 3345 return expand_mult_const (mode, op0, coeff, target, 3346 &algorithm, variant); 3347 } 3348 } 3349 return expand_binop (mode, this_optab, op0, op1, target, 3350 unsignedp, OPTAB_LIB_WIDEN); 3351 } 3352 3353 /* Choose a minimal N + 1 bit approximation to 1/D that can be used to 3354 replace division by D, and put the least significant N bits of the result 3355 in *MULTIPLIER_PTR and return the most significant bit. 3356 3357 The width of operations is N (should be <= HOST_BITS_PER_WIDE_INT), the 3358 needed precision is in PRECISION (should be <= N). 3359 3360 PRECISION should be as small as possible so this function can choose 3361 multiplier more freely. 3362 3363 The rounded-up logarithm of D is placed in *lgup_ptr. A shift count that 3364 is to be used for a final right shift is placed in *POST_SHIFT_PTR. 3365 3366 Using this function, x/D will be equal to (x * m) >> (*POST_SHIFT_PTR), 3367 where m is the full HOST_BITS_PER_WIDE_INT + 1 bit multiplier. */ 3368 3369 unsigned HOST_WIDE_INT 3370 choose_multiplier (unsigned HOST_WIDE_INT d, int n, int precision, 3371 unsigned HOST_WIDE_INT *multiplier_ptr, 3372 int *post_shift_ptr, int *lgup_ptr) 3373 { 3374 int lgup, post_shift; 3375 int pow, pow2; 3376 3377 /* lgup = ceil(log2(divisor)); */ 3378 lgup = ceil_log2 (d); 3379 3380 gcc_assert (lgup <= n); 3381 3382 pow = n + lgup; 3383 pow2 = n + lgup - precision; 3384 3385 /* mlow = 2^(N + lgup)/d */ 3386 wide_int val = wi::set_bit_in_zero (pow, HOST_BITS_PER_DOUBLE_INT); 3387 wide_int mlow = wi::udiv_trunc (val, d); 3388 3389 /* mhigh = (2^(N + lgup) + 2^(N + lgup - precision))/d */ 3390 val |= wi::set_bit_in_zero (pow2, HOST_BITS_PER_DOUBLE_INT); 3391 wide_int mhigh = wi::udiv_trunc (val, d); 3392 3393 /* If precision == N, then mlow, mhigh exceed 2^N 3394 (but they do not exceed 2^(N+1)). */ 3395 3396 /* Reduce to lowest terms. */ 3397 for (post_shift = lgup; post_shift > 0; post_shift--) 3398 { 3399 unsigned HOST_WIDE_INT ml_lo = wi::extract_uhwi (mlow, 1, 3400 HOST_BITS_PER_WIDE_INT); 3401 unsigned HOST_WIDE_INT mh_lo = wi::extract_uhwi (mhigh, 1, 3402 HOST_BITS_PER_WIDE_INT); 3403 if (ml_lo >= mh_lo) 3404 break; 3405 3406 mlow = wi::uhwi (ml_lo, HOST_BITS_PER_DOUBLE_INT); 3407 mhigh = wi::uhwi (mh_lo, HOST_BITS_PER_DOUBLE_INT); 3408 } 3409 3410 *post_shift_ptr = post_shift; 3411 *lgup_ptr = lgup; 3412 if (n < HOST_BITS_PER_WIDE_INT) 3413 { 3414 unsigned HOST_WIDE_INT mask = ((unsigned HOST_WIDE_INT) 1 << n) - 1; 3415 *multiplier_ptr = mhigh.to_uhwi () & mask; 3416 return mhigh.to_uhwi () >= mask; 3417 } 3418 else 3419 { 3420 *multiplier_ptr = mhigh.to_uhwi (); 3421 return wi::extract_uhwi (mhigh, HOST_BITS_PER_WIDE_INT, 1); 3422 } 3423 } 3424 3425 /* Compute the inverse of X mod 2**n, i.e., find Y such that X * Y is 3426 congruent to 1 (mod 2**N). */ 3427 3428 static unsigned HOST_WIDE_INT 3429 invert_mod2n (unsigned HOST_WIDE_INT x, int n) 3430 { 3431 /* Solve x*y == 1 (mod 2^n), where x is odd. Return y. */ 3432 3433 /* The algorithm notes that the choice y = x satisfies 3434 x*y == 1 mod 2^3, since x is assumed odd. 3435 Each iteration doubles the number of bits of significance in y. */ 3436 3437 unsigned HOST_WIDE_INT mask; 3438 unsigned HOST_WIDE_INT y = x; 3439 int nbit = 3; 3440 3441 mask = (n == HOST_BITS_PER_WIDE_INT 3442 ? ~(unsigned HOST_WIDE_INT) 0 3443 : ((unsigned HOST_WIDE_INT) 1 << n) - 1); 3444 3445 while (nbit < n) 3446 { 3447 y = y * (2 - x*y) & mask; /* Modulo 2^N */ 3448 nbit *= 2; 3449 } 3450 return y; 3451 } 3452 3453 /* Emit code to adjust ADJ_OPERAND after multiplication of wrong signedness 3454 flavor of OP0 and OP1. ADJ_OPERAND is already the high half of the 3455 product OP0 x OP1. If UNSIGNEDP is nonzero, adjust the signed product 3456 to become unsigned, if UNSIGNEDP is zero, adjust the unsigned product to 3457 become signed. 3458 3459 The result is put in TARGET if that is convenient. 3460 3461 MODE is the mode of operation. */ 3462 3463 rtx 3464 expand_mult_highpart_adjust (machine_mode mode, rtx adj_operand, rtx op0, 3465 rtx op1, rtx target, int unsignedp) 3466 { 3467 rtx tem; 3468 enum rtx_code adj_code = unsignedp ? PLUS : MINUS; 3469 3470 tem = expand_shift (RSHIFT_EXPR, mode, op0, 3471 GET_MODE_BITSIZE (mode) - 1, NULL_RTX, 0); 3472 tem = expand_and (mode, tem, op1, NULL_RTX); 3473 adj_operand 3474 = force_operand (gen_rtx_fmt_ee (adj_code, mode, adj_operand, tem), 3475 adj_operand); 3476 3477 tem = expand_shift (RSHIFT_EXPR, mode, op1, 3478 GET_MODE_BITSIZE (mode) - 1, NULL_RTX, 0); 3479 tem = expand_and (mode, tem, op0, NULL_RTX); 3480 target = force_operand (gen_rtx_fmt_ee (adj_code, mode, adj_operand, tem), 3481 target); 3482 3483 return target; 3484 } 3485 3486 /* Subroutine of expmed_mult_highpart. Return the MODE high part of OP. */ 3487 3488 static rtx 3489 extract_high_half (machine_mode mode, rtx op) 3490 { 3491 machine_mode wider_mode; 3492 3493 if (mode == word_mode) 3494 return gen_highpart (mode, op); 3495 3496 gcc_assert (!SCALAR_FLOAT_MODE_P (mode)); 3497 3498 wider_mode = GET_MODE_WIDER_MODE (mode); 3499 op = expand_shift (RSHIFT_EXPR, wider_mode, op, 3500 GET_MODE_BITSIZE (mode), 0, 1); 3501 return convert_modes (mode, wider_mode, op, 0); 3502 } 3503 3504 /* Like expmed_mult_highpart, but only consider using a multiplication 3505 optab. OP1 is an rtx for the constant operand. */ 3506 3507 static rtx 3508 expmed_mult_highpart_optab (machine_mode mode, rtx op0, rtx op1, 3509 rtx target, int unsignedp, int max_cost) 3510 { 3511 rtx narrow_op1 = gen_int_mode (INTVAL (op1), mode); 3512 machine_mode wider_mode; 3513 optab moptab; 3514 rtx tem; 3515 int size; 3516 bool speed = optimize_insn_for_speed_p (); 3517 3518 gcc_assert (!SCALAR_FLOAT_MODE_P (mode)); 3519 3520 wider_mode = GET_MODE_WIDER_MODE (mode); 3521 size = GET_MODE_BITSIZE (mode); 3522 3523 /* Firstly, try using a multiplication insn that only generates the needed 3524 high part of the product, and in the sign flavor of unsignedp. */ 3525 if (mul_highpart_cost (speed, mode) < max_cost) 3526 { 3527 moptab = unsignedp ? umul_highpart_optab : smul_highpart_optab; 3528 tem = expand_binop (mode, moptab, op0, narrow_op1, target, 3529 unsignedp, OPTAB_DIRECT); 3530 if (tem) 3531 return tem; 3532 } 3533 3534 /* Secondly, same as above, but use sign flavor opposite of unsignedp. 3535 Need to adjust the result after the multiplication. */ 3536 if (size - 1 < BITS_PER_WORD 3537 && (mul_highpart_cost (speed, mode) 3538 + 2 * shift_cost (speed, mode, size-1) 3539 + 4 * add_cost (speed, mode) < max_cost)) 3540 { 3541 moptab = unsignedp ? smul_highpart_optab : umul_highpart_optab; 3542 tem = expand_binop (mode, moptab, op0, narrow_op1, target, 3543 unsignedp, OPTAB_DIRECT); 3544 if (tem) 3545 /* We used the wrong signedness. Adjust the result. */ 3546 return expand_mult_highpart_adjust (mode, tem, op0, narrow_op1, 3547 tem, unsignedp); 3548 } 3549 3550 /* Try widening multiplication. */ 3551 moptab = unsignedp ? umul_widen_optab : smul_widen_optab; 3552 if (widening_optab_handler (moptab, wider_mode, mode) != CODE_FOR_nothing 3553 && mul_widen_cost (speed, wider_mode) < max_cost) 3554 { 3555 tem = expand_binop (wider_mode, moptab, op0, narrow_op1, 0, 3556 unsignedp, OPTAB_WIDEN); 3557 if (tem) 3558 return extract_high_half (mode, tem); 3559 } 3560 3561 /* Try widening the mode and perform a non-widening multiplication. */ 3562 if (optab_handler (smul_optab, wider_mode) != CODE_FOR_nothing 3563 && size - 1 < BITS_PER_WORD 3564 && (mul_cost (speed, wider_mode) + shift_cost (speed, mode, size-1) 3565 < max_cost)) 3566 { 3567 rtx_insn *insns; 3568 rtx wop0, wop1; 3569 3570 /* We need to widen the operands, for example to ensure the 3571 constant multiplier is correctly sign or zero extended. 3572 Use a sequence to clean-up any instructions emitted by 3573 the conversions if things don't work out. */ 3574 start_sequence (); 3575 wop0 = convert_modes (wider_mode, mode, op0, unsignedp); 3576 wop1 = convert_modes (wider_mode, mode, op1, unsignedp); 3577 tem = expand_binop (wider_mode, smul_optab, wop0, wop1, 0, 3578 unsignedp, OPTAB_WIDEN); 3579 insns = get_insns (); 3580 end_sequence (); 3581 3582 if (tem) 3583 { 3584 emit_insn (insns); 3585 return extract_high_half (mode, tem); 3586 } 3587 } 3588 3589 /* Try widening multiplication of opposite signedness, and adjust. */ 3590 moptab = unsignedp ? smul_widen_optab : umul_widen_optab; 3591 if (widening_optab_handler (moptab, wider_mode, mode) != CODE_FOR_nothing 3592 && size - 1 < BITS_PER_WORD 3593 && (mul_widen_cost (speed, wider_mode) 3594 + 2 * shift_cost (speed, mode, size-1) 3595 + 4 * add_cost (speed, mode) < max_cost)) 3596 { 3597 tem = expand_binop (wider_mode, moptab, op0, narrow_op1, 3598 NULL_RTX, ! unsignedp, OPTAB_WIDEN); 3599 if (tem != 0) 3600 { 3601 tem = extract_high_half (mode, tem); 3602 /* We used the wrong signedness. Adjust the result. */ 3603 return expand_mult_highpart_adjust (mode, tem, op0, narrow_op1, 3604 target, unsignedp); 3605 } 3606 } 3607 3608 return 0; 3609 } 3610 3611 /* Emit code to multiply OP0 and OP1 (where OP1 is an integer constant), 3612 putting the high half of the result in TARGET if that is convenient, 3613 and return where the result is. If the operation can not be performed, 3614 0 is returned. 3615 3616 MODE is the mode of operation and result. 3617 3618 UNSIGNEDP nonzero means unsigned multiply. 3619 3620 MAX_COST is the total allowed cost for the expanded RTL. */ 3621 3622 static rtx 3623 expmed_mult_highpart (machine_mode mode, rtx op0, rtx op1, 3624 rtx target, int unsignedp, int max_cost) 3625 { 3626 machine_mode wider_mode = GET_MODE_WIDER_MODE (mode); 3627 unsigned HOST_WIDE_INT cnst1; 3628 int extra_cost; 3629 bool sign_adjust = false; 3630 enum mult_variant variant; 3631 struct algorithm alg; 3632 rtx tem; 3633 bool speed = optimize_insn_for_speed_p (); 3634 3635 gcc_assert (!SCALAR_FLOAT_MODE_P (mode)); 3636 /* We can't support modes wider than HOST_BITS_PER_INT. */ 3637 gcc_assert (HWI_COMPUTABLE_MODE_P (mode)); 3638 3639 cnst1 = INTVAL (op1) & GET_MODE_MASK (mode); 3640 3641 /* We can't optimize modes wider than BITS_PER_WORD. 3642 ??? We might be able to perform double-word arithmetic if 3643 mode == word_mode, however all the cost calculations in 3644 synth_mult etc. assume single-word operations. */ 3645 if (GET_MODE_BITSIZE (wider_mode) > BITS_PER_WORD) 3646 return expmed_mult_highpart_optab (mode, op0, op1, target, 3647 unsignedp, max_cost); 3648 3649 extra_cost = shift_cost (speed, mode, GET_MODE_BITSIZE (mode) - 1); 3650 3651 /* Check whether we try to multiply by a negative constant. */ 3652 if (!unsignedp && ((cnst1 >> (GET_MODE_BITSIZE (mode) - 1)) & 1)) 3653 { 3654 sign_adjust = true; 3655 extra_cost += add_cost (speed, mode); 3656 } 3657 3658 /* See whether shift/add multiplication is cheap enough. */ 3659 if (choose_mult_variant (wider_mode, cnst1, &alg, &variant, 3660 max_cost - extra_cost)) 3661 { 3662 /* See whether the specialized multiplication optabs are 3663 cheaper than the shift/add version. */ 3664 tem = expmed_mult_highpart_optab (mode, op0, op1, target, unsignedp, 3665 alg.cost.cost + extra_cost); 3666 if (tem) 3667 return tem; 3668 3669 tem = convert_to_mode (wider_mode, op0, unsignedp); 3670 tem = expand_mult_const (wider_mode, tem, cnst1, 0, &alg, variant); 3671 tem = extract_high_half (mode, tem); 3672 3673 /* Adjust result for signedness. */ 3674 if (sign_adjust) 3675 tem = force_operand (gen_rtx_MINUS (mode, tem, op0), tem); 3676 3677 return tem; 3678 } 3679 return expmed_mult_highpart_optab (mode, op0, op1, target, 3680 unsignedp, max_cost); 3681 } 3682 3683 3684 /* Expand signed modulus of OP0 by a power of two D in mode MODE. */ 3685 3686 static rtx 3687 expand_smod_pow2 (machine_mode mode, rtx op0, HOST_WIDE_INT d) 3688 { 3689 rtx result, temp, shift; 3690 rtx_code_label *label; 3691 int logd; 3692 int prec = GET_MODE_PRECISION (mode); 3693 3694 logd = floor_log2 (d); 3695 result = gen_reg_rtx (mode); 3696 3697 /* Avoid conditional branches when they're expensive. */ 3698 if (BRANCH_COST (optimize_insn_for_speed_p (), false) >= 2 3699 && optimize_insn_for_speed_p ()) 3700 { 3701 rtx signmask = emit_store_flag (result, LT, op0, const0_rtx, 3702 mode, 0, -1); 3703 if (signmask) 3704 { 3705 HOST_WIDE_INT masklow = ((HOST_WIDE_INT) 1 << logd) - 1; 3706 signmask = force_reg (mode, signmask); 3707 shift = GEN_INT (GET_MODE_BITSIZE (mode) - logd); 3708 3709 /* Use the rtx_cost of a LSHIFTRT instruction to determine 3710 which instruction sequence to use. If logical right shifts 3711 are expensive the use 2 XORs, 2 SUBs and an AND, otherwise 3712 use a LSHIFTRT, 1 ADD, 1 SUB and an AND. */ 3713 3714 temp = gen_rtx_LSHIFTRT (mode, result, shift); 3715 if (optab_handler (lshr_optab, mode) == CODE_FOR_nothing 3716 || (set_src_cost (temp, optimize_insn_for_speed_p ()) 3717 > COSTS_N_INSNS (2))) 3718 { 3719 temp = expand_binop (mode, xor_optab, op0, signmask, 3720 NULL_RTX, 1, OPTAB_LIB_WIDEN); 3721 temp = expand_binop (mode, sub_optab, temp, signmask, 3722 NULL_RTX, 1, OPTAB_LIB_WIDEN); 3723 temp = expand_binop (mode, and_optab, temp, 3724 gen_int_mode (masklow, mode), 3725 NULL_RTX, 1, OPTAB_LIB_WIDEN); 3726 temp = expand_binop (mode, xor_optab, temp, signmask, 3727 NULL_RTX, 1, OPTAB_LIB_WIDEN); 3728 temp = expand_binop (mode, sub_optab, temp, signmask, 3729 NULL_RTX, 1, OPTAB_LIB_WIDEN); 3730 } 3731 else 3732 { 3733 signmask = expand_binop (mode, lshr_optab, signmask, shift, 3734 NULL_RTX, 1, OPTAB_LIB_WIDEN); 3735 signmask = force_reg (mode, signmask); 3736 3737 temp = expand_binop (mode, add_optab, op0, signmask, 3738 NULL_RTX, 1, OPTAB_LIB_WIDEN); 3739 temp = expand_binop (mode, and_optab, temp, 3740 gen_int_mode (masklow, mode), 3741 NULL_RTX, 1, OPTAB_LIB_WIDEN); 3742 temp = expand_binop (mode, sub_optab, temp, signmask, 3743 NULL_RTX, 1, OPTAB_LIB_WIDEN); 3744 } 3745 return temp; 3746 } 3747 } 3748 3749 /* Mask contains the mode's signbit and the significant bits of the 3750 modulus. By including the signbit in the operation, many targets 3751 can avoid an explicit compare operation in the following comparison 3752 against zero. */ 3753 wide_int mask = wi::mask (logd, false, prec); 3754 mask = wi::set_bit (mask, prec - 1); 3755 3756 temp = expand_binop (mode, and_optab, op0, 3757 immed_wide_int_const (mask, mode), 3758 result, 1, OPTAB_LIB_WIDEN); 3759 if (temp != result) 3760 emit_move_insn (result, temp); 3761 3762 label = gen_label_rtx (); 3763 do_cmp_and_jump (result, const0_rtx, GE, mode, label); 3764 3765 temp = expand_binop (mode, sub_optab, result, const1_rtx, result, 3766 0, OPTAB_LIB_WIDEN); 3767 3768 mask = wi::mask (logd, true, prec); 3769 temp = expand_binop (mode, ior_optab, temp, 3770 immed_wide_int_const (mask, mode), 3771 result, 1, OPTAB_LIB_WIDEN); 3772 temp = expand_binop (mode, add_optab, temp, const1_rtx, result, 3773 0, OPTAB_LIB_WIDEN); 3774 if (temp != result) 3775 emit_move_insn (result, temp); 3776 emit_label (label); 3777 return result; 3778 } 3779 3780 /* Expand signed division of OP0 by a power of two D in mode MODE. 3781 This routine is only called for positive values of D. */ 3782 3783 static rtx 3784 expand_sdiv_pow2 (machine_mode mode, rtx op0, HOST_WIDE_INT d) 3785 { 3786 rtx temp; 3787 rtx_code_label *label; 3788 int logd; 3789 3790 logd = floor_log2 (d); 3791 3792 if (d == 2 3793 && BRANCH_COST (optimize_insn_for_speed_p (), 3794 false) >= 1) 3795 { 3796 temp = gen_reg_rtx (mode); 3797 temp = emit_store_flag (temp, LT, op0, const0_rtx, mode, 0, 1); 3798 temp = expand_binop (mode, add_optab, temp, op0, NULL_RTX, 3799 0, OPTAB_LIB_WIDEN); 3800 return expand_shift (RSHIFT_EXPR, mode, temp, logd, NULL_RTX, 0); 3801 } 3802 3803 #ifdef HAVE_conditional_move 3804 if (BRANCH_COST (optimize_insn_for_speed_p (), false) 3805 >= 2) 3806 { 3807 rtx temp2; 3808 3809 start_sequence (); 3810 temp2 = copy_to_mode_reg (mode, op0); 3811 temp = expand_binop (mode, add_optab, temp2, gen_int_mode (d - 1, mode), 3812 NULL_RTX, 0, OPTAB_LIB_WIDEN); 3813 temp = force_reg (mode, temp); 3814 3815 /* Construct "temp2 = (temp2 < 0) ? temp : temp2". */ 3816 temp2 = emit_conditional_move (temp2, LT, temp2, const0_rtx, 3817 mode, temp, temp2, mode, 0); 3818 if (temp2) 3819 { 3820 rtx_insn *seq = get_insns (); 3821 end_sequence (); 3822 emit_insn (seq); 3823 return expand_shift (RSHIFT_EXPR, mode, temp2, logd, NULL_RTX, 0); 3824 } 3825 end_sequence (); 3826 } 3827 #endif 3828 3829 if (BRANCH_COST (optimize_insn_for_speed_p (), 3830 false) >= 2) 3831 { 3832 int ushift = GET_MODE_BITSIZE (mode) - logd; 3833 3834 temp = gen_reg_rtx (mode); 3835 temp = emit_store_flag (temp, LT, op0, const0_rtx, mode, 0, -1); 3836 if (GET_MODE_BITSIZE (mode) >= BITS_PER_WORD 3837 || shift_cost (optimize_insn_for_speed_p (), mode, ushift) 3838 > COSTS_N_INSNS (1)) 3839 temp = expand_binop (mode, and_optab, temp, gen_int_mode (d - 1, mode), 3840 NULL_RTX, 0, OPTAB_LIB_WIDEN); 3841 else 3842 temp = expand_shift (RSHIFT_EXPR, mode, temp, 3843 ushift, NULL_RTX, 1); 3844 temp = expand_binop (mode, add_optab, temp, op0, NULL_RTX, 3845 0, OPTAB_LIB_WIDEN); 3846 return expand_shift (RSHIFT_EXPR, mode, temp, logd, NULL_RTX, 0); 3847 } 3848 3849 label = gen_label_rtx (); 3850 temp = copy_to_mode_reg (mode, op0); 3851 do_cmp_and_jump (temp, const0_rtx, GE, mode, label); 3852 expand_inc (temp, gen_int_mode (d - 1, mode)); 3853 emit_label (label); 3854 return expand_shift (RSHIFT_EXPR, mode, temp, logd, NULL_RTX, 0); 3855 } 3856 3857 /* Emit the code to divide OP0 by OP1, putting the result in TARGET 3858 if that is convenient, and returning where the result is. 3859 You may request either the quotient or the remainder as the result; 3860 specify REM_FLAG nonzero to get the remainder. 3861 3862 CODE is the expression code for which kind of division this is; 3863 it controls how rounding is done. MODE is the machine mode to use. 3864 UNSIGNEDP nonzero means do unsigned division. */ 3865 3866 /* ??? For CEIL_MOD_EXPR, can compute incorrect remainder with ANDI 3867 and then correct it by or'ing in missing high bits 3868 if result of ANDI is nonzero. 3869 For ROUND_MOD_EXPR, can use ANDI and then sign-extend the result. 3870 This could optimize to a bfexts instruction. 3871 But C doesn't use these operations, so their optimizations are 3872 left for later. */ 3873 /* ??? For modulo, we don't actually need the highpart of the first product, 3874 the low part will do nicely. And for small divisors, the second multiply 3875 can also be a low-part only multiply or even be completely left out. 3876 E.g. to calculate the remainder of a division by 3 with a 32 bit 3877 multiply, multiply with 0x55555556 and extract the upper two bits; 3878 the result is exact for inputs up to 0x1fffffff. 3879 The input range can be reduced by using cross-sum rules. 3880 For odd divisors >= 3, the following table gives right shift counts 3881 so that if a number is shifted by an integer multiple of the given 3882 amount, the remainder stays the same: 3883 2, 4, 3, 6, 10, 12, 4, 8, 18, 6, 11, 20, 18, 0, 5, 10, 12, 0, 12, 20, 3884 14, 12, 23, 21, 8, 0, 20, 18, 0, 0, 6, 12, 0, 22, 0, 18, 20, 30, 0, 0, 3885 0, 8, 0, 11, 12, 10, 36, 0, 30, 0, 0, 12, 0, 0, 0, 0, 44, 12, 24, 0, 3886 20, 0, 7, 14, 0, 18, 36, 0, 0, 46, 60, 0, 42, 0, 15, 24, 20, 0, 0, 33, 3887 0, 20, 0, 0, 18, 0, 60, 0, 0, 0, 0, 0, 40, 18, 0, 0, 12 3888 3889 Cross-sum rules for even numbers can be derived by leaving as many bits 3890 to the right alone as the divisor has zeros to the right. 3891 E.g. if x is an unsigned 32 bit number: 3892 (x mod 12) == (((x & 1023) + ((x >> 8) & ~3)) * 0x15555558 >> 2 * 3) >> 28 3893 */ 3894 3895 rtx 3896 expand_divmod (int rem_flag, enum tree_code code, machine_mode mode, 3897 rtx op0, rtx op1, rtx target, int unsignedp) 3898 { 3899 machine_mode compute_mode; 3900 rtx tquotient; 3901 rtx quotient = 0, remainder = 0; 3902 rtx_insn *last; 3903 int size; 3904 rtx_insn *insn; 3905 optab optab1, optab2; 3906 int op1_is_constant, op1_is_pow2 = 0; 3907 int max_cost, extra_cost; 3908 static HOST_WIDE_INT last_div_const = 0; 3909 bool speed = optimize_insn_for_speed_p (); 3910 3911 op1_is_constant = CONST_INT_P (op1); 3912 if (op1_is_constant) 3913 { 3914 wide_int ext_op1 = rtx_mode_t (op1, mode); 3915 op1_is_pow2 = (wi::popcount (ext_op1) == 1 3916 || (! unsignedp 3917 && wi::popcount (wi::neg (ext_op1)) == 1)); 3918 } 3919 3920 /* 3921 This is the structure of expand_divmod: 3922 3923 First comes code to fix up the operands so we can perform the operations 3924 correctly and efficiently. 3925 3926 Second comes a switch statement with code specific for each rounding mode. 3927 For some special operands this code emits all RTL for the desired 3928 operation, for other cases, it generates only a quotient and stores it in 3929 QUOTIENT. The case for trunc division/remainder might leave quotient = 0, 3930 to indicate that it has not done anything. 3931 3932 Last comes code that finishes the operation. If QUOTIENT is set and 3933 REM_FLAG is set, the remainder is computed as OP0 - QUOTIENT * OP1. If 3934 QUOTIENT is not set, it is computed using trunc rounding. 3935 3936 We try to generate special code for division and remainder when OP1 is a 3937 constant. If |OP1| = 2**n we can use shifts and some other fast 3938 operations. For other values of OP1, we compute a carefully selected 3939 fixed-point approximation m = 1/OP1, and generate code that multiplies OP0 3940 by m. 3941 3942 In all cases but EXACT_DIV_EXPR, this multiplication requires the upper 3943 half of the product. Different strategies for generating the product are 3944 implemented in expmed_mult_highpart. 3945 3946 If what we actually want is the remainder, we generate that by another 3947 by-constant multiplication and a subtraction. */ 3948 3949 /* We shouldn't be called with OP1 == const1_rtx, but some of the 3950 code below will malfunction if we are, so check here and handle 3951 the special case if so. */ 3952 if (op1 == const1_rtx) 3953 return rem_flag ? const0_rtx : op0; 3954 3955 /* When dividing by -1, we could get an overflow. 3956 negv_optab can handle overflows. */ 3957 if (! unsignedp && op1 == constm1_rtx) 3958 { 3959 if (rem_flag) 3960 return const0_rtx; 3961 return expand_unop (mode, flag_trapv && GET_MODE_CLASS (mode) == MODE_INT 3962 ? negv_optab : neg_optab, op0, target, 0); 3963 } 3964 3965 if (target 3966 /* Don't use the function value register as a target 3967 since we have to read it as well as write it, 3968 and function-inlining gets confused by this. */ 3969 && ((REG_P (target) && REG_FUNCTION_VALUE_P (target)) 3970 /* Don't clobber an operand while doing a multi-step calculation. */ 3971 || ((rem_flag || op1_is_constant) 3972 && (reg_mentioned_p (target, op0) 3973 || (MEM_P (op0) && MEM_P (target)))) 3974 || reg_mentioned_p (target, op1) 3975 || (MEM_P (op1) && MEM_P (target)))) 3976 target = 0; 3977 3978 /* Get the mode in which to perform this computation. Normally it will 3979 be MODE, but sometimes we can't do the desired operation in MODE. 3980 If so, pick a wider mode in which we can do the operation. Convert 3981 to that mode at the start to avoid repeated conversions. 3982 3983 First see what operations we need. These depend on the expression 3984 we are evaluating. (We assume that divxx3 insns exist under the 3985 same conditions that modxx3 insns and that these insns don't normally 3986 fail. If these assumptions are not correct, we may generate less 3987 efficient code in some cases.) 3988 3989 Then see if we find a mode in which we can open-code that operation 3990 (either a division, modulus, or shift). Finally, check for the smallest 3991 mode for which we can do the operation with a library call. */ 3992 3993 /* We might want to refine this now that we have division-by-constant 3994 optimization. Since expmed_mult_highpart tries so many variants, it is 3995 not straightforward to generalize this. Maybe we should make an array 3996 of possible modes in init_expmed? Save this for GCC 2.7. */ 3997 3998 optab1 = (op1_is_pow2 3999 ? (unsignedp ? lshr_optab : ashr_optab) 4000 : (unsignedp ? udiv_optab : sdiv_optab)); 4001 optab2 = (op1_is_pow2 ? optab1 4002 : (unsignedp ? udivmod_optab : sdivmod_optab)); 4003 4004 for (compute_mode = mode; compute_mode != VOIDmode; 4005 compute_mode = GET_MODE_WIDER_MODE (compute_mode)) 4006 if (optab_handler (optab1, compute_mode) != CODE_FOR_nothing 4007 || optab_handler (optab2, compute_mode) != CODE_FOR_nothing) 4008 break; 4009 4010 if (compute_mode == VOIDmode) 4011 for (compute_mode = mode; compute_mode != VOIDmode; 4012 compute_mode = GET_MODE_WIDER_MODE (compute_mode)) 4013 if (optab_libfunc (optab1, compute_mode) 4014 || optab_libfunc (optab2, compute_mode)) 4015 break; 4016 4017 /* If we still couldn't find a mode, use MODE, but expand_binop will 4018 probably die. */ 4019 if (compute_mode == VOIDmode) 4020 compute_mode = mode; 4021 4022 if (target && GET_MODE (target) == compute_mode) 4023 tquotient = target; 4024 else 4025 tquotient = gen_reg_rtx (compute_mode); 4026 4027 size = GET_MODE_BITSIZE (compute_mode); 4028 #if 0 4029 /* It should be possible to restrict the precision to GET_MODE_BITSIZE 4030 (mode), and thereby get better code when OP1 is a constant. Do that 4031 later. It will require going over all usages of SIZE below. */ 4032 size = GET_MODE_BITSIZE (mode); 4033 #endif 4034 4035 /* Only deduct something for a REM if the last divide done was 4036 for a different constant. Then set the constant of the last 4037 divide. */ 4038 max_cost = (unsignedp 4039 ? udiv_cost (speed, compute_mode) 4040 : sdiv_cost (speed, compute_mode)); 4041 if (rem_flag && ! (last_div_const != 0 && op1_is_constant 4042 && INTVAL (op1) == last_div_const)) 4043 max_cost -= (mul_cost (speed, compute_mode) 4044 + add_cost (speed, compute_mode)); 4045 4046 last_div_const = ! rem_flag && op1_is_constant ? INTVAL (op1) : 0; 4047 4048 /* Now convert to the best mode to use. */ 4049 if (compute_mode != mode) 4050 { 4051 op0 = convert_modes (compute_mode, mode, op0, unsignedp); 4052 op1 = convert_modes (compute_mode, mode, op1, unsignedp); 4053 4054 /* convert_modes may have placed op1 into a register, so we 4055 must recompute the following. */ 4056 op1_is_constant = CONST_INT_P (op1); 4057 if (op1_is_constant) 4058 { 4059 wide_int ext_op1 = rtx_mode_t (op1, compute_mode); 4060 op1_is_pow2 = (wi::popcount (ext_op1) == 1 4061 || (! unsignedp 4062 && wi::popcount (wi::neg (ext_op1)) == 1)); 4063 } 4064 else 4065 op1_is_pow2 = 0; 4066 } 4067 4068 /* If one of the operands is a volatile MEM, copy it into a register. */ 4069 4070 if (MEM_P (op0) && MEM_VOLATILE_P (op0)) 4071 op0 = force_reg (compute_mode, op0); 4072 if (MEM_P (op1) && MEM_VOLATILE_P (op1)) 4073 op1 = force_reg (compute_mode, op1); 4074 4075 /* If we need the remainder or if OP1 is constant, we need to 4076 put OP0 in a register in case it has any queued subexpressions. */ 4077 if (rem_flag || op1_is_constant) 4078 op0 = force_reg (compute_mode, op0); 4079 4080 last = get_last_insn (); 4081 4082 /* Promote floor rounding to trunc rounding for unsigned operations. */ 4083 if (unsignedp) 4084 { 4085 if (code == FLOOR_DIV_EXPR) 4086 code = TRUNC_DIV_EXPR; 4087 if (code == FLOOR_MOD_EXPR) 4088 code = TRUNC_MOD_EXPR; 4089 if (code == EXACT_DIV_EXPR && op1_is_pow2) 4090 code = TRUNC_DIV_EXPR; 4091 } 4092 4093 if (op1 != const0_rtx) 4094 switch (code) 4095 { 4096 case TRUNC_MOD_EXPR: 4097 case TRUNC_DIV_EXPR: 4098 if (op1_is_constant) 4099 { 4100 if (unsignedp) 4101 { 4102 unsigned HOST_WIDE_INT mh, ml; 4103 int pre_shift, post_shift; 4104 int dummy; 4105 wide_int wd = rtx_mode_t (op1, compute_mode); 4106 unsigned HOST_WIDE_INT d = wd.to_uhwi (); 4107 4108 if (wi::popcount (wd) == 1) 4109 { 4110 pre_shift = floor_log2 (d); 4111 if (rem_flag) 4112 { 4113 unsigned HOST_WIDE_INT mask 4114 = ((unsigned HOST_WIDE_INT) 1 << pre_shift) - 1; 4115 remainder 4116 = expand_binop (compute_mode, and_optab, op0, 4117 gen_int_mode (mask, compute_mode), 4118 remainder, 1, 4119 OPTAB_LIB_WIDEN); 4120 if (remainder) 4121 return gen_lowpart (mode, remainder); 4122 } 4123 quotient = expand_shift (RSHIFT_EXPR, compute_mode, op0, 4124 pre_shift, tquotient, 1); 4125 } 4126 else if (size <= HOST_BITS_PER_WIDE_INT) 4127 { 4128 if (d >= ((unsigned HOST_WIDE_INT) 1 << (size - 1))) 4129 { 4130 /* Most significant bit of divisor is set; emit an scc 4131 insn. */ 4132 quotient = emit_store_flag_force (tquotient, GEU, op0, op1, 4133 compute_mode, 1, 1); 4134 } 4135 else 4136 { 4137 /* Find a suitable multiplier and right shift count 4138 instead of multiplying with D. */ 4139 4140 mh = choose_multiplier (d, size, size, 4141 &ml, &post_shift, &dummy); 4142 4143 /* If the suggested multiplier is more than SIZE bits, 4144 we can do better for even divisors, using an 4145 initial right shift. */ 4146 if (mh != 0 && (d & 1) == 0) 4147 { 4148 pre_shift = floor_log2 (d & -d); 4149 mh = choose_multiplier (d >> pre_shift, size, 4150 size - pre_shift, 4151 &ml, &post_shift, &dummy); 4152 gcc_assert (!mh); 4153 } 4154 else 4155 pre_shift = 0; 4156 4157 if (mh != 0) 4158 { 4159 rtx t1, t2, t3, t4; 4160 4161 if (post_shift - 1 >= BITS_PER_WORD) 4162 goto fail1; 4163 4164 extra_cost 4165 = (shift_cost (speed, compute_mode, post_shift - 1) 4166 + shift_cost (speed, compute_mode, 1) 4167 + 2 * add_cost (speed, compute_mode)); 4168 t1 = expmed_mult_highpart 4169 (compute_mode, op0, 4170 gen_int_mode (ml, compute_mode), 4171 NULL_RTX, 1, max_cost - extra_cost); 4172 if (t1 == 0) 4173 goto fail1; 4174 t2 = force_operand (gen_rtx_MINUS (compute_mode, 4175 op0, t1), 4176 NULL_RTX); 4177 t3 = expand_shift (RSHIFT_EXPR, compute_mode, 4178 t2, 1, NULL_RTX, 1); 4179 t4 = force_operand (gen_rtx_PLUS (compute_mode, 4180 t1, t3), 4181 NULL_RTX); 4182 quotient = expand_shift 4183 (RSHIFT_EXPR, compute_mode, t4, 4184 post_shift - 1, tquotient, 1); 4185 } 4186 else 4187 { 4188 rtx t1, t2; 4189 4190 if (pre_shift >= BITS_PER_WORD 4191 || post_shift >= BITS_PER_WORD) 4192 goto fail1; 4193 4194 t1 = expand_shift 4195 (RSHIFT_EXPR, compute_mode, op0, 4196 pre_shift, NULL_RTX, 1); 4197 extra_cost 4198 = (shift_cost (speed, compute_mode, pre_shift) 4199 + shift_cost (speed, compute_mode, post_shift)); 4200 t2 = expmed_mult_highpart 4201 (compute_mode, t1, 4202 gen_int_mode (ml, compute_mode), 4203 NULL_RTX, 1, max_cost - extra_cost); 4204 if (t2 == 0) 4205 goto fail1; 4206 quotient = expand_shift 4207 (RSHIFT_EXPR, compute_mode, t2, 4208 post_shift, tquotient, 1); 4209 } 4210 } 4211 } 4212 else /* Too wide mode to use tricky code */ 4213 break; 4214 4215 insn = get_last_insn (); 4216 if (insn != last) 4217 set_dst_reg_note (insn, REG_EQUAL, 4218 gen_rtx_UDIV (compute_mode, op0, op1), 4219 quotient); 4220 } 4221 else /* TRUNC_DIV, signed */ 4222 { 4223 unsigned HOST_WIDE_INT ml; 4224 int lgup, post_shift; 4225 rtx mlr; 4226 HOST_WIDE_INT d = INTVAL (op1); 4227 unsigned HOST_WIDE_INT abs_d; 4228 4229 /* Since d might be INT_MIN, we have to cast to 4230 unsigned HOST_WIDE_INT before negating to avoid 4231 undefined signed overflow. */ 4232 abs_d = (d >= 0 4233 ? (unsigned HOST_WIDE_INT) d 4234 : - (unsigned HOST_WIDE_INT) d); 4235 4236 /* n rem d = n rem -d */ 4237 if (rem_flag && d < 0) 4238 { 4239 d = abs_d; 4240 op1 = gen_int_mode (abs_d, compute_mode); 4241 } 4242 4243 if (d == 1) 4244 quotient = op0; 4245 else if (d == -1) 4246 quotient = expand_unop (compute_mode, neg_optab, op0, 4247 tquotient, 0); 4248 else if (size <= HOST_BITS_PER_WIDE_INT 4249 && abs_d == (unsigned HOST_WIDE_INT) 1 << (size - 1)) 4250 { 4251 /* This case is not handled correctly below. */ 4252 quotient = emit_store_flag (tquotient, EQ, op0, op1, 4253 compute_mode, 1, 1); 4254 if (quotient == 0) 4255 goto fail1; 4256 } 4257 else if (EXACT_POWER_OF_2_OR_ZERO_P (d) 4258 && (size <= HOST_BITS_PER_WIDE_INT || d >= 0) 4259 && (rem_flag 4260 ? smod_pow2_cheap (speed, compute_mode) 4261 : sdiv_pow2_cheap (speed, compute_mode)) 4262 /* We assume that cheap metric is true if the 4263 optab has an expander for this mode. */ 4264 && ((optab_handler ((rem_flag ? smod_optab 4265 : sdiv_optab), 4266 compute_mode) 4267 != CODE_FOR_nothing) 4268 || (optab_handler (sdivmod_optab, 4269 compute_mode) 4270 != CODE_FOR_nothing))) 4271 ; 4272 else if (EXACT_POWER_OF_2_OR_ZERO_P (abs_d) 4273 && (size <= HOST_BITS_PER_WIDE_INT 4274 || abs_d != (unsigned HOST_WIDE_INT) d)) 4275 { 4276 if (rem_flag) 4277 { 4278 remainder = expand_smod_pow2 (compute_mode, op0, d); 4279 if (remainder) 4280 return gen_lowpart (mode, remainder); 4281 } 4282 4283 if (sdiv_pow2_cheap (speed, compute_mode) 4284 && ((optab_handler (sdiv_optab, compute_mode) 4285 != CODE_FOR_nothing) 4286 || (optab_handler (sdivmod_optab, compute_mode) 4287 != CODE_FOR_nothing))) 4288 quotient = expand_divmod (0, TRUNC_DIV_EXPR, 4289 compute_mode, op0, 4290 gen_int_mode (abs_d, 4291 compute_mode), 4292 NULL_RTX, 0); 4293 else 4294 quotient = expand_sdiv_pow2 (compute_mode, op0, abs_d); 4295 4296 /* We have computed OP0 / abs(OP1). If OP1 is negative, 4297 negate the quotient. */ 4298 if (d < 0) 4299 { 4300 insn = get_last_insn (); 4301 if (insn != last 4302 && abs_d < ((unsigned HOST_WIDE_INT) 1 4303 << (HOST_BITS_PER_WIDE_INT - 1))) 4304 set_dst_reg_note (insn, REG_EQUAL, 4305 gen_rtx_DIV (compute_mode, op0, 4306 gen_int_mode 4307 (abs_d, 4308 compute_mode)), 4309 quotient); 4310 4311 quotient = expand_unop (compute_mode, neg_optab, 4312 quotient, quotient, 0); 4313 } 4314 } 4315 else if (size <= HOST_BITS_PER_WIDE_INT) 4316 { 4317 choose_multiplier (abs_d, size, size - 1, 4318 &ml, &post_shift, &lgup); 4319 if (ml < (unsigned HOST_WIDE_INT) 1 << (size - 1)) 4320 { 4321 rtx t1, t2, t3; 4322 4323 if (post_shift >= BITS_PER_WORD 4324 || size - 1 >= BITS_PER_WORD) 4325 goto fail1; 4326 4327 extra_cost = (shift_cost (speed, compute_mode, post_shift) 4328 + shift_cost (speed, compute_mode, size - 1) 4329 + add_cost (speed, compute_mode)); 4330 t1 = expmed_mult_highpart 4331 (compute_mode, op0, gen_int_mode (ml, compute_mode), 4332 NULL_RTX, 0, max_cost - extra_cost); 4333 if (t1 == 0) 4334 goto fail1; 4335 t2 = expand_shift 4336 (RSHIFT_EXPR, compute_mode, t1, 4337 post_shift, NULL_RTX, 0); 4338 t3 = expand_shift 4339 (RSHIFT_EXPR, compute_mode, op0, 4340 size - 1, NULL_RTX, 0); 4341 if (d < 0) 4342 quotient 4343 = force_operand (gen_rtx_MINUS (compute_mode, 4344 t3, t2), 4345 tquotient); 4346 else 4347 quotient 4348 = force_operand (gen_rtx_MINUS (compute_mode, 4349 t2, t3), 4350 tquotient); 4351 } 4352 else 4353 { 4354 rtx t1, t2, t3, t4; 4355 4356 if (post_shift >= BITS_PER_WORD 4357 || size - 1 >= BITS_PER_WORD) 4358 goto fail1; 4359 4360 ml |= (~(unsigned HOST_WIDE_INT) 0) << (size - 1); 4361 mlr = gen_int_mode (ml, compute_mode); 4362 extra_cost = (shift_cost (speed, compute_mode, post_shift) 4363 + shift_cost (speed, compute_mode, size - 1) 4364 + 2 * add_cost (speed, compute_mode)); 4365 t1 = expmed_mult_highpart (compute_mode, op0, mlr, 4366 NULL_RTX, 0, 4367 max_cost - extra_cost); 4368 if (t1 == 0) 4369 goto fail1; 4370 t2 = force_operand (gen_rtx_PLUS (compute_mode, 4371 t1, op0), 4372 NULL_RTX); 4373 t3 = expand_shift 4374 (RSHIFT_EXPR, compute_mode, t2, 4375 post_shift, NULL_RTX, 0); 4376 t4 = expand_shift 4377 (RSHIFT_EXPR, compute_mode, op0, 4378 size - 1, NULL_RTX, 0); 4379 if (d < 0) 4380 quotient 4381 = force_operand (gen_rtx_MINUS (compute_mode, 4382 t4, t3), 4383 tquotient); 4384 else 4385 quotient 4386 = force_operand (gen_rtx_MINUS (compute_mode, 4387 t3, t4), 4388 tquotient); 4389 } 4390 } 4391 else /* Too wide mode to use tricky code */ 4392 break; 4393 4394 insn = get_last_insn (); 4395 if (insn != last) 4396 set_dst_reg_note (insn, REG_EQUAL, 4397 gen_rtx_DIV (compute_mode, op0, op1), 4398 quotient); 4399 } 4400 break; 4401 } 4402 fail1: 4403 delete_insns_since (last); 4404 break; 4405 4406 case FLOOR_DIV_EXPR: 4407 case FLOOR_MOD_EXPR: 4408 /* We will come here only for signed operations. */ 4409 if (op1_is_constant && size <= HOST_BITS_PER_WIDE_INT) 4410 { 4411 unsigned HOST_WIDE_INT mh, ml; 4412 int pre_shift, lgup, post_shift; 4413 HOST_WIDE_INT d = INTVAL (op1); 4414 4415 if (d > 0) 4416 { 4417 /* We could just as easily deal with negative constants here, 4418 but it does not seem worth the trouble for GCC 2.6. */ 4419 if (EXACT_POWER_OF_2_OR_ZERO_P (d)) 4420 { 4421 pre_shift = floor_log2 (d); 4422 if (rem_flag) 4423 { 4424 unsigned HOST_WIDE_INT mask 4425 = ((unsigned HOST_WIDE_INT) 1 << pre_shift) - 1; 4426 remainder = expand_binop 4427 (compute_mode, and_optab, op0, 4428 gen_int_mode (mask, compute_mode), 4429 remainder, 0, OPTAB_LIB_WIDEN); 4430 if (remainder) 4431 return gen_lowpart (mode, remainder); 4432 } 4433 quotient = expand_shift 4434 (RSHIFT_EXPR, compute_mode, op0, 4435 pre_shift, tquotient, 0); 4436 } 4437 else 4438 { 4439 rtx t1, t2, t3, t4; 4440 4441 mh = choose_multiplier (d, size, size - 1, 4442 &ml, &post_shift, &lgup); 4443 gcc_assert (!mh); 4444 4445 if (post_shift < BITS_PER_WORD 4446 && size - 1 < BITS_PER_WORD) 4447 { 4448 t1 = expand_shift 4449 (RSHIFT_EXPR, compute_mode, op0, 4450 size - 1, NULL_RTX, 0); 4451 t2 = expand_binop (compute_mode, xor_optab, op0, t1, 4452 NULL_RTX, 0, OPTAB_WIDEN); 4453 extra_cost = (shift_cost (speed, compute_mode, post_shift) 4454 + shift_cost (speed, compute_mode, size - 1) 4455 + 2 * add_cost (speed, compute_mode)); 4456 t3 = expmed_mult_highpart 4457 (compute_mode, t2, gen_int_mode (ml, compute_mode), 4458 NULL_RTX, 1, max_cost - extra_cost); 4459 if (t3 != 0) 4460 { 4461 t4 = expand_shift 4462 (RSHIFT_EXPR, compute_mode, t3, 4463 post_shift, NULL_RTX, 1); 4464 quotient = expand_binop (compute_mode, xor_optab, 4465 t4, t1, tquotient, 0, 4466 OPTAB_WIDEN); 4467 } 4468 } 4469 } 4470 } 4471 else 4472 { 4473 rtx nsign, t1, t2, t3, t4; 4474 t1 = force_operand (gen_rtx_PLUS (compute_mode, 4475 op0, constm1_rtx), NULL_RTX); 4476 t2 = expand_binop (compute_mode, ior_optab, op0, t1, NULL_RTX, 4477 0, OPTAB_WIDEN); 4478 nsign = expand_shift (RSHIFT_EXPR, compute_mode, t2, 4479 size - 1, NULL_RTX, 0); 4480 t3 = force_operand (gen_rtx_MINUS (compute_mode, t1, nsign), 4481 NULL_RTX); 4482 t4 = expand_divmod (0, TRUNC_DIV_EXPR, compute_mode, t3, op1, 4483 NULL_RTX, 0); 4484 if (t4) 4485 { 4486 rtx t5; 4487 t5 = expand_unop (compute_mode, one_cmpl_optab, nsign, 4488 NULL_RTX, 0); 4489 quotient = force_operand (gen_rtx_PLUS (compute_mode, 4490 t4, t5), 4491 tquotient); 4492 } 4493 } 4494 } 4495 4496 if (quotient != 0) 4497 break; 4498 delete_insns_since (last); 4499 4500 /* Try using an instruction that produces both the quotient and 4501 remainder, using truncation. We can easily compensate the quotient 4502 or remainder to get floor rounding, once we have the remainder. 4503 Notice that we compute also the final remainder value here, 4504 and return the result right away. */ 4505 if (target == 0 || GET_MODE (target) != compute_mode) 4506 target = gen_reg_rtx (compute_mode); 4507 4508 if (rem_flag) 4509 { 4510 remainder 4511 = REG_P (target) ? target : gen_reg_rtx (compute_mode); 4512 quotient = gen_reg_rtx (compute_mode); 4513 } 4514 else 4515 { 4516 quotient 4517 = REG_P (target) ? target : gen_reg_rtx (compute_mode); 4518 remainder = gen_reg_rtx (compute_mode); 4519 } 4520 4521 if (expand_twoval_binop (sdivmod_optab, op0, op1, 4522 quotient, remainder, 0)) 4523 { 4524 /* This could be computed with a branch-less sequence. 4525 Save that for later. */ 4526 rtx tem; 4527 rtx_code_label *label = gen_label_rtx (); 4528 do_cmp_and_jump (remainder, const0_rtx, EQ, compute_mode, label); 4529 tem = expand_binop (compute_mode, xor_optab, op0, op1, 4530 NULL_RTX, 0, OPTAB_WIDEN); 4531 do_cmp_and_jump (tem, const0_rtx, GE, compute_mode, label); 4532 expand_dec (quotient, const1_rtx); 4533 expand_inc (remainder, op1); 4534 emit_label (label); 4535 return gen_lowpart (mode, rem_flag ? remainder : quotient); 4536 } 4537 4538 /* No luck with division elimination or divmod. Have to do it 4539 by conditionally adjusting op0 *and* the result. */ 4540 { 4541 rtx_code_label *label1, *label2, *label3, *label4, *label5; 4542 rtx adjusted_op0; 4543 rtx tem; 4544 4545 quotient = gen_reg_rtx (compute_mode); 4546 adjusted_op0 = copy_to_mode_reg (compute_mode, op0); 4547 label1 = gen_label_rtx (); 4548 label2 = gen_label_rtx (); 4549 label3 = gen_label_rtx (); 4550 label4 = gen_label_rtx (); 4551 label5 = gen_label_rtx (); 4552 do_cmp_and_jump (op1, const0_rtx, LT, compute_mode, label2); 4553 do_cmp_and_jump (adjusted_op0, const0_rtx, LT, compute_mode, label1); 4554 tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1, 4555 quotient, 0, OPTAB_LIB_WIDEN); 4556 if (tem != quotient) 4557 emit_move_insn (quotient, tem); 4558 emit_jump_insn (gen_jump (label5)); 4559 emit_barrier (); 4560 emit_label (label1); 4561 expand_inc (adjusted_op0, const1_rtx); 4562 emit_jump_insn (gen_jump (label4)); 4563 emit_barrier (); 4564 emit_label (label2); 4565 do_cmp_and_jump (adjusted_op0, const0_rtx, GT, compute_mode, label3); 4566 tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1, 4567 quotient, 0, OPTAB_LIB_WIDEN); 4568 if (tem != quotient) 4569 emit_move_insn (quotient, tem); 4570 emit_jump_insn (gen_jump (label5)); 4571 emit_barrier (); 4572 emit_label (label3); 4573 expand_dec (adjusted_op0, const1_rtx); 4574 emit_label (label4); 4575 tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1, 4576 quotient, 0, OPTAB_LIB_WIDEN); 4577 if (tem != quotient) 4578 emit_move_insn (quotient, tem); 4579 expand_dec (quotient, const1_rtx); 4580 emit_label (label5); 4581 } 4582 break; 4583 4584 case CEIL_DIV_EXPR: 4585 case CEIL_MOD_EXPR: 4586 if (unsignedp) 4587 { 4588 if (op1_is_constant 4589 && EXACT_POWER_OF_2_OR_ZERO_P (INTVAL (op1)) 4590 && (size <= HOST_BITS_PER_WIDE_INT 4591 || INTVAL (op1) >= 0)) 4592 { 4593 rtx t1, t2, t3; 4594 unsigned HOST_WIDE_INT d = INTVAL (op1); 4595 t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0, 4596 floor_log2 (d), tquotient, 1); 4597 t2 = expand_binop (compute_mode, and_optab, op0, 4598 gen_int_mode (d - 1, compute_mode), 4599 NULL_RTX, 1, OPTAB_LIB_WIDEN); 4600 t3 = gen_reg_rtx (compute_mode); 4601 t3 = emit_store_flag (t3, NE, t2, const0_rtx, 4602 compute_mode, 1, 1); 4603 if (t3 == 0) 4604 { 4605 rtx_code_label *lab; 4606 lab = gen_label_rtx (); 4607 do_cmp_and_jump (t2, const0_rtx, EQ, compute_mode, lab); 4608 expand_inc (t1, const1_rtx); 4609 emit_label (lab); 4610 quotient = t1; 4611 } 4612 else 4613 quotient = force_operand (gen_rtx_PLUS (compute_mode, 4614 t1, t3), 4615 tquotient); 4616 break; 4617 } 4618 4619 /* Try using an instruction that produces both the quotient and 4620 remainder, using truncation. We can easily compensate the 4621 quotient or remainder to get ceiling rounding, once we have the 4622 remainder. Notice that we compute also the final remainder 4623 value here, and return the result right away. */ 4624 if (target == 0 || GET_MODE (target) != compute_mode) 4625 target = gen_reg_rtx (compute_mode); 4626 4627 if (rem_flag) 4628 { 4629 remainder = (REG_P (target) 4630 ? target : gen_reg_rtx (compute_mode)); 4631 quotient = gen_reg_rtx (compute_mode); 4632 } 4633 else 4634 { 4635 quotient = (REG_P (target) 4636 ? target : gen_reg_rtx (compute_mode)); 4637 remainder = gen_reg_rtx (compute_mode); 4638 } 4639 4640 if (expand_twoval_binop (udivmod_optab, op0, op1, quotient, 4641 remainder, 1)) 4642 { 4643 /* This could be computed with a branch-less sequence. 4644 Save that for later. */ 4645 rtx_code_label *label = gen_label_rtx (); 4646 do_cmp_and_jump (remainder, const0_rtx, EQ, 4647 compute_mode, label); 4648 expand_inc (quotient, const1_rtx); 4649 expand_dec (remainder, op1); 4650 emit_label (label); 4651 return gen_lowpart (mode, rem_flag ? remainder : quotient); 4652 } 4653 4654 /* No luck with division elimination or divmod. Have to do it 4655 by conditionally adjusting op0 *and* the result. */ 4656 { 4657 rtx_code_label *label1, *label2; 4658 rtx adjusted_op0, tem; 4659 4660 quotient = gen_reg_rtx (compute_mode); 4661 adjusted_op0 = copy_to_mode_reg (compute_mode, op0); 4662 label1 = gen_label_rtx (); 4663 label2 = gen_label_rtx (); 4664 do_cmp_and_jump (adjusted_op0, const0_rtx, NE, 4665 compute_mode, label1); 4666 emit_move_insn (quotient, const0_rtx); 4667 emit_jump_insn (gen_jump (label2)); 4668 emit_barrier (); 4669 emit_label (label1); 4670 expand_dec (adjusted_op0, const1_rtx); 4671 tem = expand_binop (compute_mode, udiv_optab, adjusted_op0, op1, 4672 quotient, 1, OPTAB_LIB_WIDEN); 4673 if (tem != quotient) 4674 emit_move_insn (quotient, tem); 4675 expand_inc (quotient, const1_rtx); 4676 emit_label (label2); 4677 } 4678 } 4679 else /* signed */ 4680 { 4681 if (op1_is_constant && EXACT_POWER_OF_2_OR_ZERO_P (INTVAL (op1)) 4682 && INTVAL (op1) >= 0) 4683 { 4684 /* This is extremely similar to the code for the unsigned case 4685 above. For 2.7 we should merge these variants, but for 4686 2.6.1 I don't want to touch the code for unsigned since that 4687 get used in C. The signed case will only be used by other 4688 languages (Ada). */ 4689 4690 rtx t1, t2, t3; 4691 unsigned HOST_WIDE_INT d = INTVAL (op1); 4692 t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0, 4693 floor_log2 (d), tquotient, 0); 4694 t2 = expand_binop (compute_mode, and_optab, op0, 4695 gen_int_mode (d - 1, compute_mode), 4696 NULL_RTX, 1, OPTAB_LIB_WIDEN); 4697 t3 = gen_reg_rtx (compute_mode); 4698 t3 = emit_store_flag (t3, NE, t2, const0_rtx, 4699 compute_mode, 1, 1); 4700 if (t3 == 0) 4701 { 4702 rtx_code_label *lab; 4703 lab = gen_label_rtx (); 4704 do_cmp_and_jump (t2, const0_rtx, EQ, compute_mode, lab); 4705 expand_inc (t1, const1_rtx); 4706 emit_label (lab); 4707 quotient = t1; 4708 } 4709 else 4710 quotient = force_operand (gen_rtx_PLUS (compute_mode, 4711 t1, t3), 4712 tquotient); 4713 break; 4714 } 4715 4716 /* Try using an instruction that produces both the quotient and 4717 remainder, using truncation. We can easily compensate the 4718 quotient or remainder to get ceiling rounding, once we have the 4719 remainder. Notice that we compute also the final remainder 4720 value here, and return the result right away. */ 4721 if (target == 0 || GET_MODE (target) != compute_mode) 4722 target = gen_reg_rtx (compute_mode); 4723 if (rem_flag) 4724 { 4725 remainder= (REG_P (target) 4726 ? target : gen_reg_rtx (compute_mode)); 4727 quotient = gen_reg_rtx (compute_mode); 4728 } 4729 else 4730 { 4731 quotient = (REG_P (target) 4732 ? target : gen_reg_rtx (compute_mode)); 4733 remainder = gen_reg_rtx (compute_mode); 4734 } 4735 4736 if (expand_twoval_binop (sdivmod_optab, op0, op1, quotient, 4737 remainder, 0)) 4738 { 4739 /* This could be computed with a branch-less sequence. 4740 Save that for later. */ 4741 rtx tem; 4742 rtx_code_label *label = gen_label_rtx (); 4743 do_cmp_and_jump (remainder, const0_rtx, EQ, 4744 compute_mode, label); 4745 tem = expand_binop (compute_mode, xor_optab, op0, op1, 4746 NULL_RTX, 0, OPTAB_WIDEN); 4747 do_cmp_and_jump (tem, const0_rtx, LT, compute_mode, label); 4748 expand_inc (quotient, const1_rtx); 4749 expand_dec (remainder, op1); 4750 emit_label (label); 4751 return gen_lowpart (mode, rem_flag ? remainder : quotient); 4752 } 4753 4754 /* No luck with division elimination or divmod. Have to do it 4755 by conditionally adjusting op0 *and* the result. */ 4756 { 4757 rtx_code_label *label1, *label2, *label3, *label4, *label5; 4758 rtx adjusted_op0; 4759 rtx tem; 4760 4761 quotient = gen_reg_rtx (compute_mode); 4762 adjusted_op0 = copy_to_mode_reg (compute_mode, op0); 4763 label1 = gen_label_rtx (); 4764 label2 = gen_label_rtx (); 4765 label3 = gen_label_rtx (); 4766 label4 = gen_label_rtx (); 4767 label5 = gen_label_rtx (); 4768 do_cmp_and_jump (op1, const0_rtx, LT, compute_mode, label2); 4769 do_cmp_and_jump (adjusted_op0, const0_rtx, GT, 4770 compute_mode, label1); 4771 tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1, 4772 quotient, 0, OPTAB_LIB_WIDEN); 4773 if (tem != quotient) 4774 emit_move_insn (quotient, tem); 4775 emit_jump_insn (gen_jump (label5)); 4776 emit_barrier (); 4777 emit_label (label1); 4778 expand_dec (adjusted_op0, const1_rtx); 4779 emit_jump_insn (gen_jump (label4)); 4780 emit_barrier (); 4781 emit_label (label2); 4782 do_cmp_and_jump (adjusted_op0, const0_rtx, LT, 4783 compute_mode, label3); 4784 tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1, 4785 quotient, 0, OPTAB_LIB_WIDEN); 4786 if (tem != quotient) 4787 emit_move_insn (quotient, tem); 4788 emit_jump_insn (gen_jump (label5)); 4789 emit_barrier (); 4790 emit_label (label3); 4791 expand_inc (adjusted_op0, const1_rtx); 4792 emit_label (label4); 4793 tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1, 4794 quotient, 0, OPTAB_LIB_WIDEN); 4795 if (tem != quotient) 4796 emit_move_insn (quotient, tem); 4797 expand_inc (quotient, const1_rtx); 4798 emit_label (label5); 4799 } 4800 } 4801 break; 4802 4803 case EXACT_DIV_EXPR: 4804 if (op1_is_constant && size <= HOST_BITS_PER_WIDE_INT) 4805 { 4806 HOST_WIDE_INT d = INTVAL (op1); 4807 unsigned HOST_WIDE_INT ml; 4808 int pre_shift; 4809 rtx t1; 4810 4811 pre_shift = floor_log2 (d & -d); 4812 ml = invert_mod2n (d >> pre_shift, size); 4813 t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0, 4814 pre_shift, NULL_RTX, unsignedp); 4815 quotient = expand_mult (compute_mode, t1, 4816 gen_int_mode (ml, compute_mode), 4817 NULL_RTX, 1); 4818 4819 insn = get_last_insn (); 4820 set_dst_reg_note (insn, REG_EQUAL, 4821 gen_rtx_fmt_ee (unsignedp ? UDIV : DIV, 4822 compute_mode, op0, op1), 4823 quotient); 4824 } 4825 break; 4826 4827 case ROUND_DIV_EXPR: 4828 case ROUND_MOD_EXPR: 4829 if (unsignedp) 4830 { 4831 rtx tem; 4832 rtx_code_label *label; 4833 label = gen_label_rtx (); 4834 quotient = gen_reg_rtx (compute_mode); 4835 remainder = gen_reg_rtx (compute_mode); 4836 if (expand_twoval_binop (udivmod_optab, op0, op1, quotient, remainder, 1) == 0) 4837 { 4838 rtx tem; 4839 quotient = expand_binop (compute_mode, udiv_optab, op0, op1, 4840 quotient, 1, OPTAB_LIB_WIDEN); 4841 tem = expand_mult (compute_mode, quotient, op1, NULL_RTX, 1); 4842 remainder = expand_binop (compute_mode, sub_optab, op0, tem, 4843 remainder, 1, OPTAB_LIB_WIDEN); 4844 } 4845 tem = plus_constant (compute_mode, op1, -1); 4846 tem = expand_shift (RSHIFT_EXPR, compute_mode, tem, 1, NULL_RTX, 1); 4847 do_cmp_and_jump (remainder, tem, LEU, compute_mode, label); 4848 expand_inc (quotient, const1_rtx); 4849 expand_dec (remainder, op1); 4850 emit_label (label); 4851 } 4852 else 4853 { 4854 rtx abs_rem, abs_op1, tem, mask; 4855 rtx_code_label *label; 4856 label = gen_label_rtx (); 4857 quotient = gen_reg_rtx (compute_mode); 4858 remainder = gen_reg_rtx (compute_mode); 4859 if (expand_twoval_binop (sdivmod_optab, op0, op1, quotient, remainder, 0) == 0) 4860 { 4861 rtx tem; 4862 quotient = expand_binop (compute_mode, sdiv_optab, op0, op1, 4863 quotient, 0, OPTAB_LIB_WIDEN); 4864 tem = expand_mult (compute_mode, quotient, op1, NULL_RTX, 0); 4865 remainder = expand_binop (compute_mode, sub_optab, op0, tem, 4866 remainder, 0, OPTAB_LIB_WIDEN); 4867 } 4868 abs_rem = expand_abs (compute_mode, remainder, NULL_RTX, 1, 0); 4869 abs_op1 = expand_abs (compute_mode, op1, NULL_RTX, 1, 0); 4870 tem = expand_shift (LSHIFT_EXPR, compute_mode, abs_rem, 4871 1, NULL_RTX, 1); 4872 do_cmp_and_jump (tem, abs_op1, LTU, compute_mode, label); 4873 tem = expand_binop (compute_mode, xor_optab, op0, op1, 4874 NULL_RTX, 0, OPTAB_WIDEN); 4875 mask = expand_shift (RSHIFT_EXPR, compute_mode, tem, 4876 size - 1, NULL_RTX, 0); 4877 tem = expand_binop (compute_mode, xor_optab, mask, const1_rtx, 4878 NULL_RTX, 0, OPTAB_WIDEN); 4879 tem = expand_binop (compute_mode, sub_optab, tem, mask, 4880 NULL_RTX, 0, OPTAB_WIDEN); 4881 expand_inc (quotient, tem); 4882 tem = expand_binop (compute_mode, xor_optab, mask, op1, 4883 NULL_RTX, 0, OPTAB_WIDEN); 4884 tem = expand_binop (compute_mode, sub_optab, tem, mask, 4885 NULL_RTX, 0, OPTAB_WIDEN); 4886 expand_dec (remainder, tem); 4887 emit_label (label); 4888 } 4889 return gen_lowpart (mode, rem_flag ? remainder : quotient); 4890 4891 default: 4892 gcc_unreachable (); 4893 } 4894 4895 if (quotient == 0) 4896 { 4897 if (target && GET_MODE (target) != compute_mode) 4898 target = 0; 4899 4900 if (rem_flag) 4901 { 4902 /* Try to produce the remainder without producing the quotient. 4903 If we seem to have a divmod pattern that does not require widening, 4904 don't try widening here. We should really have a WIDEN argument 4905 to expand_twoval_binop, since what we'd really like to do here is 4906 1) try a mod insn in compute_mode 4907 2) try a divmod insn in compute_mode 4908 3) try a div insn in compute_mode and multiply-subtract to get 4909 remainder 4910 4) try the same things with widening allowed. */ 4911 remainder 4912 = sign_expand_binop (compute_mode, umod_optab, smod_optab, 4913 op0, op1, target, 4914 unsignedp, 4915 ((optab_handler (optab2, compute_mode) 4916 != CODE_FOR_nothing) 4917 ? OPTAB_DIRECT : OPTAB_WIDEN)); 4918 if (remainder == 0) 4919 { 4920 /* No luck there. Can we do remainder and divide at once 4921 without a library call? */ 4922 remainder = gen_reg_rtx (compute_mode); 4923 if (! expand_twoval_binop ((unsignedp 4924 ? udivmod_optab 4925 : sdivmod_optab), 4926 op0, op1, 4927 NULL_RTX, remainder, unsignedp)) 4928 remainder = 0; 4929 } 4930 4931 if (remainder) 4932 return gen_lowpart (mode, remainder); 4933 } 4934 4935 /* Produce the quotient. Try a quotient insn, but not a library call. 4936 If we have a divmod in this mode, use it in preference to widening 4937 the div (for this test we assume it will not fail). Note that optab2 4938 is set to the one of the two optabs that the call below will use. */ 4939 quotient 4940 = sign_expand_binop (compute_mode, udiv_optab, sdiv_optab, 4941 op0, op1, rem_flag ? NULL_RTX : target, 4942 unsignedp, 4943 ((optab_handler (optab2, compute_mode) 4944 != CODE_FOR_nothing) 4945 ? OPTAB_DIRECT : OPTAB_WIDEN)); 4946 4947 if (quotient == 0) 4948 { 4949 /* No luck there. Try a quotient-and-remainder insn, 4950 keeping the quotient alone. */ 4951 quotient = gen_reg_rtx (compute_mode); 4952 if (! expand_twoval_binop (unsignedp ? udivmod_optab : sdivmod_optab, 4953 op0, op1, 4954 quotient, NULL_RTX, unsignedp)) 4955 { 4956 quotient = 0; 4957 if (! rem_flag) 4958 /* Still no luck. If we are not computing the remainder, 4959 use a library call for the quotient. */ 4960 quotient = sign_expand_binop (compute_mode, 4961 udiv_optab, sdiv_optab, 4962 op0, op1, target, 4963 unsignedp, OPTAB_LIB_WIDEN); 4964 } 4965 } 4966 } 4967 4968 if (rem_flag) 4969 { 4970 if (target && GET_MODE (target) != compute_mode) 4971 target = 0; 4972 4973 if (quotient == 0) 4974 { 4975 /* No divide instruction either. Use library for remainder. */ 4976 remainder = sign_expand_binop (compute_mode, umod_optab, smod_optab, 4977 op0, op1, target, 4978 unsignedp, OPTAB_LIB_WIDEN); 4979 /* No remainder function. Try a quotient-and-remainder 4980 function, keeping the remainder. */ 4981 if (!remainder) 4982 { 4983 remainder = gen_reg_rtx (compute_mode); 4984 if (!expand_twoval_binop_libfunc 4985 (unsignedp ? udivmod_optab : sdivmod_optab, 4986 op0, op1, 4987 NULL_RTX, remainder, 4988 unsignedp ? UMOD : MOD)) 4989 remainder = NULL_RTX; 4990 } 4991 } 4992 else 4993 { 4994 /* We divided. Now finish doing X - Y * (X / Y). */ 4995 remainder = expand_mult (compute_mode, quotient, op1, 4996 NULL_RTX, unsignedp); 4997 remainder = expand_binop (compute_mode, sub_optab, op0, 4998 remainder, target, unsignedp, 4999 OPTAB_LIB_WIDEN); 5000 } 5001 } 5002 5003 return gen_lowpart (mode, rem_flag ? remainder : quotient); 5004 } 5005 5006 /* Return a tree node with data type TYPE, describing the value of X. 5007 Usually this is an VAR_DECL, if there is no obvious better choice. 5008 X may be an expression, however we only support those expressions 5009 generated by loop.c. */ 5010 5011 tree 5012 make_tree (tree type, rtx x) 5013 { 5014 tree t; 5015 5016 switch (GET_CODE (x)) 5017 { 5018 case CONST_INT: 5019 case CONST_WIDE_INT: 5020 t = wide_int_to_tree (type, std::make_pair (x, TYPE_MODE (type))); 5021 return t; 5022 5023 case CONST_DOUBLE: 5024 STATIC_ASSERT (HOST_BITS_PER_WIDE_INT * 2 <= MAX_BITSIZE_MODE_ANY_INT); 5025 if (TARGET_SUPPORTS_WIDE_INT == 0 && GET_MODE (x) == VOIDmode) 5026 t = wide_int_to_tree (type, 5027 wide_int::from_array (&CONST_DOUBLE_LOW (x), 2, 5028 HOST_BITS_PER_WIDE_INT * 2)); 5029 else 5030 { 5031 REAL_VALUE_TYPE d; 5032 5033 REAL_VALUE_FROM_CONST_DOUBLE (d, x); 5034 t = build_real (type, d); 5035 } 5036 5037 return t; 5038 5039 case CONST_VECTOR: 5040 { 5041 int units = CONST_VECTOR_NUNITS (x); 5042 tree itype = TREE_TYPE (type); 5043 tree *elts; 5044 int i; 5045 5046 /* Build a tree with vector elements. */ 5047 elts = XALLOCAVEC (tree, units); 5048 for (i = units - 1; i >= 0; --i) 5049 { 5050 rtx elt = CONST_VECTOR_ELT (x, i); 5051 elts[i] = make_tree (itype, elt); 5052 } 5053 5054 return build_vector (type, elts); 5055 } 5056 5057 case PLUS: 5058 return fold_build2 (PLUS_EXPR, type, make_tree (type, XEXP (x, 0)), 5059 make_tree (type, XEXP (x, 1))); 5060 5061 case MINUS: 5062 return fold_build2 (MINUS_EXPR, type, make_tree (type, XEXP (x, 0)), 5063 make_tree (type, XEXP (x, 1))); 5064 5065 case NEG: 5066 return fold_build1 (NEGATE_EXPR, type, make_tree (type, XEXP (x, 0))); 5067 5068 case MULT: 5069 return fold_build2 (MULT_EXPR, type, make_tree (type, XEXP (x, 0)), 5070 make_tree (type, XEXP (x, 1))); 5071 5072 case ASHIFT: 5073 return fold_build2 (LSHIFT_EXPR, type, make_tree (type, XEXP (x, 0)), 5074 make_tree (type, XEXP (x, 1))); 5075 5076 case LSHIFTRT: 5077 t = unsigned_type_for (type); 5078 return fold_convert (type, build2 (RSHIFT_EXPR, t, 5079 make_tree (t, XEXP (x, 0)), 5080 make_tree (type, XEXP (x, 1)))); 5081 5082 case ASHIFTRT: 5083 t = signed_type_for (type); 5084 return fold_convert (type, build2 (RSHIFT_EXPR, t, 5085 make_tree (t, XEXP (x, 0)), 5086 make_tree (type, XEXP (x, 1)))); 5087 5088 case DIV: 5089 if (TREE_CODE (type) != REAL_TYPE) 5090 t = signed_type_for (type); 5091 else 5092 t = type; 5093 5094 return fold_convert (type, build2 (TRUNC_DIV_EXPR, t, 5095 make_tree (t, XEXP (x, 0)), 5096 make_tree (t, XEXP (x, 1)))); 5097 case UDIV: 5098 t = unsigned_type_for (type); 5099 return fold_convert (type, build2 (TRUNC_DIV_EXPR, t, 5100 make_tree (t, XEXP (x, 0)), 5101 make_tree (t, XEXP (x, 1)))); 5102 5103 case SIGN_EXTEND: 5104 case ZERO_EXTEND: 5105 t = lang_hooks.types.type_for_mode (GET_MODE (XEXP (x, 0)), 5106 GET_CODE (x) == ZERO_EXTEND); 5107 return fold_convert (type, make_tree (t, XEXP (x, 0))); 5108 5109 case CONST: 5110 return make_tree (type, XEXP (x, 0)); 5111 5112 case SYMBOL_REF: 5113 t = SYMBOL_REF_DECL (x); 5114 if (t) 5115 return fold_convert (type, build_fold_addr_expr (t)); 5116 /* else fall through. */ 5117 5118 default: 5119 t = build_decl (RTL_LOCATION (x), VAR_DECL, NULL_TREE, type); 5120 5121 /* If TYPE is a POINTER_TYPE, we might need to convert X from 5122 address mode to pointer mode. */ 5123 if (POINTER_TYPE_P (type)) 5124 x = convert_memory_address_addr_space 5125 (TYPE_MODE (type), x, TYPE_ADDR_SPACE (TREE_TYPE (type))); 5126 5127 /* Note that we do *not* use SET_DECL_RTL here, because we do not 5128 want set_decl_rtl to go adjusting REG_ATTRS for this temporary. */ 5129 t->decl_with_rtl.rtl = x; 5130 5131 return t; 5132 } 5133 } 5134 5135 /* Compute the logical-and of OP0 and OP1, storing it in TARGET 5136 and returning TARGET. 5137 5138 If TARGET is 0, a pseudo-register or constant is returned. */ 5139 5140 rtx 5141 expand_and (machine_mode mode, rtx op0, rtx op1, rtx target) 5142 { 5143 rtx tem = 0; 5144 5145 if (GET_MODE (op0) == VOIDmode && GET_MODE (op1) == VOIDmode) 5146 tem = simplify_binary_operation (AND, mode, op0, op1); 5147 if (tem == 0) 5148 tem = expand_binop (mode, and_optab, op0, op1, target, 0, OPTAB_LIB_WIDEN); 5149 5150 if (target == 0) 5151 target = tem; 5152 else if (tem != target) 5153 emit_move_insn (target, tem); 5154 return target; 5155 } 5156 5157 /* Helper function for emit_store_flag. */ 5158 rtx 5159 emit_cstore (rtx target, enum insn_code icode, enum rtx_code code, 5160 machine_mode mode, machine_mode compare_mode, 5161 int unsignedp, rtx x, rtx y, int normalizep, 5162 machine_mode target_mode) 5163 { 5164 struct expand_operand ops[4]; 5165 rtx op0, comparison, subtarget; 5166 rtx_insn *last; 5167 machine_mode result_mode = targetm.cstore_mode (icode); 5168 5169 last = get_last_insn (); 5170 x = prepare_operand (icode, x, 2, mode, compare_mode, unsignedp); 5171 y = prepare_operand (icode, y, 3, mode, compare_mode, unsignedp); 5172 if (!x || !y) 5173 { 5174 delete_insns_since (last); 5175 return NULL_RTX; 5176 } 5177 5178 if (target_mode == VOIDmode) 5179 target_mode = result_mode; 5180 if (!target) 5181 target = gen_reg_rtx (target_mode); 5182 5183 comparison = gen_rtx_fmt_ee (code, result_mode, x, y); 5184 5185 create_output_operand (&ops[0], optimize ? NULL_RTX : target, result_mode); 5186 create_fixed_operand (&ops[1], comparison); 5187 create_fixed_operand (&ops[2], x); 5188 create_fixed_operand (&ops[3], y); 5189 if (!maybe_expand_insn (icode, 4, ops)) 5190 { 5191 delete_insns_since (last); 5192 return NULL_RTX; 5193 } 5194 subtarget = ops[0].value; 5195 5196 /* If we are converting to a wider mode, first convert to 5197 TARGET_MODE, then normalize. This produces better combining 5198 opportunities on machines that have a SIGN_EXTRACT when we are 5199 testing a single bit. This mostly benefits the 68k. 5200 5201 If STORE_FLAG_VALUE does not have the sign bit set when 5202 interpreted in MODE, we can do this conversion as unsigned, which 5203 is usually more efficient. */ 5204 if (GET_MODE_SIZE (target_mode) > GET_MODE_SIZE (result_mode)) 5205 { 5206 convert_move (target, subtarget, 5207 val_signbit_known_clear_p (result_mode, 5208 STORE_FLAG_VALUE)); 5209 op0 = target; 5210 result_mode = target_mode; 5211 } 5212 else 5213 op0 = subtarget; 5214 5215 /* If we want to keep subexpressions around, don't reuse our last 5216 target. */ 5217 if (optimize) 5218 subtarget = 0; 5219 5220 /* Now normalize to the proper value in MODE. Sometimes we don't 5221 have to do anything. */ 5222 if (normalizep == 0 || normalizep == STORE_FLAG_VALUE) 5223 ; 5224 /* STORE_FLAG_VALUE might be the most negative number, so write 5225 the comparison this way to avoid a compiler-time warning. */ 5226 else if (- normalizep == STORE_FLAG_VALUE) 5227 op0 = expand_unop (result_mode, neg_optab, op0, subtarget, 0); 5228 5229 /* We don't want to use STORE_FLAG_VALUE < 0 below since this makes 5230 it hard to use a value of just the sign bit due to ANSI integer 5231 constant typing rules. */ 5232 else if (val_signbit_known_set_p (result_mode, STORE_FLAG_VALUE)) 5233 op0 = expand_shift (RSHIFT_EXPR, result_mode, op0, 5234 GET_MODE_BITSIZE (result_mode) - 1, subtarget, 5235 normalizep == 1); 5236 else 5237 { 5238 gcc_assert (STORE_FLAG_VALUE & 1); 5239 5240 op0 = expand_and (result_mode, op0, const1_rtx, subtarget); 5241 if (normalizep == -1) 5242 op0 = expand_unop (result_mode, neg_optab, op0, op0, 0); 5243 } 5244 5245 /* If we were converting to a smaller mode, do the conversion now. */ 5246 if (target_mode != result_mode) 5247 { 5248 convert_move (target, op0, 0); 5249 return target; 5250 } 5251 else 5252 return op0; 5253 } 5254 5255 5256 /* A subroutine of emit_store_flag only including "tricks" that do not 5257 need a recursive call. These are kept separate to avoid infinite 5258 loops. */ 5259 5260 static rtx 5261 emit_store_flag_1 (rtx target, enum rtx_code code, rtx op0, rtx op1, 5262 machine_mode mode, int unsignedp, int normalizep, 5263 machine_mode target_mode) 5264 { 5265 rtx subtarget; 5266 enum insn_code icode; 5267 machine_mode compare_mode; 5268 enum mode_class mclass; 5269 enum rtx_code scode; 5270 rtx tem; 5271 5272 if (unsignedp) 5273 code = unsigned_condition (code); 5274 scode = swap_condition (code); 5275 5276 /* If one operand is constant, make it the second one. Only do this 5277 if the other operand is not constant as well. */ 5278 5279 if (swap_commutative_operands_p (op0, op1)) 5280 { 5281 tem = op0; 5282 op0 = op1; 5283 op1 = tem; 5284 code = swap_condition (code); 5285 } 5286 5287 if (mode == VOIDmode) 5288 mode = GET_MODE (op0); 5289 5290 /* For some comparisons with 1 and -1, we can convert this to 5291 comparisons with zero. This will often produce more opportunities for 5292 store-flag insns. */ 5293 5294 switch (code) 5295 { 5296 case LT: 5297 if (op1 == const1_rtx) 5298 op1 = const0_rtx, code = LE; 5299 break; 5300 case LE: 5301 if (op1 == constm1_rtx) 5302 op1 = const0_rtx, code = LT; 5303 break; 5304 case GE: 5305 if (op1 == const1_rtx) 5306 op1 = const0_rtx, code = GT; 5307 break; 5308 case GT: 5309 if (op1 == constm1_rtx) 5310 op1 = const0_rtx, code = GE; 5311 break; 5312 case GEU: 5313 if (op1 == const1_rtx) 5314 op1 = const0_rtx, code = NE; 5315 break; 5316 case LTU: 5317 if (op1 == const1_rtx) 5318 op1 = const0_rtx, code = EQ; 5319 break; 5320 default: 5321 break; 5322 } 5323 5324 /* If we are comparing a double-word integer with zero or -1, we can 5325 convert the comparison into one involving a single word. */ 5326 if (GET_MODE_BITSIZE (mode) == BITS_PER_WORD * 2 5327 && GET_MODE_CLASS (mode) == MODE_INT 5328 && (!MEM_P (op0) || ! MEM_VOLATILE_P (op0))) 5329 { 5330 if ((code == EQ || code == NE) 5331 && (op1 == const0_rtx || op1 == constm1_rtx)) 5332 { 5333 rtx op00, op01; 5334 5335 /* Do a logical OR or AND of the two words and compare the 5336 result. */ 5337 op00 = simplify_gen_subreg (word_mode, op0, mode, 0); 5338 op01 = simplify_gen_subreg (word_mode, op0, mode, UNITS_PER_WORD); 5339 tem = expand_binop (word_mode, 5340 op1 == const0_rtx ? ior_optab : and_optab, 5341 op00, op01, NULL_RTX, unsignedp, 5342 OPTAB_DIRECT); 5343 5344 if (tem != 0) 5345 tem = emit_store_flag (NULL_RTX, code, tem, op1, word_mode, 5346 unsignedp, normalizep); 5347 } 5348 else if ((code == LT || code == GE) && op1 == const0_rtx) 5349 { 5350 rtx op0h; 5351 5352 /* If testing the sign bit, can just test on high word. */ 5353 op0h = simplify_gen_subreg (word_mode, op0, mode, 5354 subreg_highpart_offset (word_mode, 5355 mode)); 5356 tem = emit_store_flag (NULL_RTX, code, op0h, op1, word_mode, 5357 unsignedp, normalizep); 5358 } 5359 else 5360 tem = NULL_RTX; 5361 5362 if (tem) 5363 { 5364 if (target_mode == VOIDmode || GET_MODE (tem) == target_mode) 5365 return tem; 5366 if (!target) 5367 target = gen_reg_rtx (target_mode); 5368 5369 convert_move (target, tem, 5370 !val_signbit_known_set_p (word_mode, 5371 (normalizep ? normalizep 5372 : STORE_FLAG_VALUE))); 5373 return target; 5374 } 5375 } 5376 5377 /* If this is A < 0 or A >= 0, we can do this by taking the ones 5378 complement of A (for GE) and shifting the sign bit to the low bit. */ 5379 if (op1 == const0_rtx && (code == LT || code == GE) 5380 && GET_MODE_CLASS (mode) == MODE_INT 5381 && (normalizep || STORE_FLAG_VALUE == 1 5382 || val_signbit_p (mode, STORE_FLAG_VALUE))) 5383 { 5384 subtarget = target; 5385 5386 if (!target) 5387 target_mode = mode; 5388 5389 /* If the result is to be wider than OP0, it is best to convert it 5390 first. If it is to be narrower, it is *incorrect* to convert it 5391 first. */ 5392 else if (GET_MODE_SIZE (target_mode) > GET_MODE_SIZE (mode)) 5393 { 5394 op0 = convert_modes (target_mode, mode, op0, 0); 5395 mode = target_mode; 5396 } 5397 5398 if (target_mode != mode) 5399 subtarget = 0; 5400 5401 if (code == GE) 5402 op0 = expand_unop (mode, one_cmpl_optab, op0, 5403 ((STORE_FLAG_VALUE == 1 || normalizep) 5404 ? 0 : subtarget), 0); 5405 5406 if (STORE_FLAG_VALUE == 1 || normalizep) 5407 /* If we are supposed to produce a 0/1 value, we want to do 5408 a logical shift from the sign bit to the low-order bit; for 5409 a -1/0 value, we do an arithmetic shift. */ 5410 op0 = expand_shift (RSHIFT_EXPR, mode, op0, 5411 GET_MODE_BITSIZE (mode) - 1, 5412 subtarget, normalizep != -1); 5413 5414 if (mode != target_mode) 5415 op0 = convert_modes (target_mode, mode, op0, 0); 5416 5417 return op0; 5418 } 5419 5420 mclass = GET_MODE_CLASS (mode); 5421 for (compare_mode = mode; compare_mode != VOIDmode; 5422 compare_mode = GET_MODE_WIDER_MODE (compare_mode)) 5423 { 5424 machine_mode optab_mode = mclass == MODE_CC ? CCmode : compare_mode; 5425 icode = optab_handler (cstore_optab, optab_mode); 5426 if (icode != CODE_FOR_nothing) 5427 { 5428 do_pending_stack_adjust (); 5429 tem = emit_cstore (target, icode, code, mode, compare_mode, 5430 unsignedp, op0, op1, normalizep, target_mode); 5431 if (tem) 5432 return tem; 5433 5434 if (GET_MODE_CLASS (mode) == MODE_FLOAT) 5435 { 5436 tem = emit_cstore (target, icode, scode, mode, compare_mode, 5437 unsignedp, op1, op0, normalizep, target_mode); 5438 if (tem) 5439 return tem; 5440 } 5441 break; 5442 } 5443 } 5444 5445 return 0; 5446 } 5447 5448 /* Emit a store-flags instruction for comparison CODE on OP0 and OP1 5449 and storing in TARGET. Normally return TARGET. 5450 Return 0 if that cannot be done. 5451 5452 MODE is the mode to use for OP0 and OP1 should they be CONST_INTs. If 5453 it is VOIDmode, they cannot both be CONST_INT. 5454 5455 UNSIGNEDP is for the case where we have to widen the operands 5456 to perform the operation. It says to use zero-extension. 5457 5458 NORMALIZEP is 1 if we should convert the result to be either zero 5459 or one. Normalize is -1 if we should convert the result to be 5460 either zero or -1. If NORMALIZEP is zero, the result will be left 5461 "raw" out of the scc insn. */ 5462 5463 rtx 5464 emit_store_flag (rtx target, enum rtx_code code, rtx op0, rtx op1, 5465 machine_mode mode, int unsignedp, int normalizep) 5466 { 5467 machine_mode target_mode = target ? GET_MODE (target) : VOIDmode; 5468 enum rtx_code rcode; 5469 rtx subtarget; 5470 rtx tem, trueval; 5471 rtx_insn *last; 5472 5473 /* If we compare constants, we shouldn't use a store-flag operation, 5474 but a constant load. We can get there via the vanilla route that 5475 usually generates a compare-branch sequence, but will in this case 5476 fold the comparison to a constant, and thus elide the branch. */ 5477 if (CONSTANT_P (op0) && CONSTANT_P (op1)) 5478 return NULL_RTX; 5479 5480 tem = emit_store_flag_1 (target, code, op0, op1, mode, unsignedp, normalizep, 5481 target_mode); 5482 if (tem) 5483 return tem; 5484 5485 /* If we reached here, we can't do this with a scc insn, however there 5486 are some comparisons that can be done in other ways. Don't do any 5487 of these cases if branches are very cheap. */ 5488 if (BRANCH_COST (optimize_insn_for_speed_p (), false) == 0) 5489 return 0; 5490 5491 /* See what we need to return. We can only return a 1, -1, or the 5492 sign bit. */ 5493 5494 if (normalizep == 0) 5495 { 5496 if (STORE_FLAG_VALUE == 1 || STORE_FLAG_VALUE == -1) 5497 normalizep = STORE_FLAG_VALUE; 5498 5499 else if (val_signbit_p (mode, STORE_FLAG_VALUE)) 5500 ; 5501 else 5502 return 0; 5503 } 5504 5505 last = get_last_insn (); 5506 5507 /* If optimizing, use different pseudo registers for each insn, instead 5508 of reusing the same pseudo. This leads to better CSE, but slows 5509 down the compiler, since there are more pseudos */ 5510 subtarget = (!optimize 5511 && (target_mode == mode)) ? target : NULL_RTX; 5512 trueval = GEN_INT (normalizep ? normalizep : STORE_FLAG_VALUE); 5513 5514 /* For floating-point comparisons, try the reverse comparison or try 5515 changing the "orderedness" of the comparison. */ 5516 if (GET_MODE_CLASS (mode) == MODE_FLOAT) 5517 { 5518 enum rtx_code first_code; 5519 bool and_them; 5520 5521 rcode = reverse_condition_maybe_unordered (code); 5522 if (can_compare_p (rcode, mode, ccp_store_flag) 5523 && (code == ORDERED || code == UNORDERED 5524 || (! HONOR_NANS (mode) && (code == LTGT || code == UNEQ)) 5525 || (! HONOR_SNANS (mode) && (code == EQ || code == NE)))) 5526 { 5527 int want_add = ((STORE_FLAG_VALUE == 1 && normalizep == -1) 5528 || (STORE_FLAG_VALUE == -1 && normalizep == 1)); 5529 5530 /* For the reverse comparison, use either an addition or a XOR. */ 5531 if (want_add 5532 && rtx_cost (GEN_INT (normalizep), PLUS, 1, 5533 optimize_insn_for_speed_p ()) == 0) 5534 { 5535 tem = emit_store_flag_1 (subtarget, rcode, op0, op1, mode, 0, 5536 STORE_FLAG_VALUE, target_mode); 5537 if (tem) 5538 return expand_binop (target_mode, add_optab, tem, 5539 gen_int_mode (normalizep, target_mode), 5540 target, 0, OPTAB_WIDEN); 5541 } 5542 else if (!want_add 5543 && rtx_cost (trueval, XOR, 1, 5544 optimize_insn_for_speed_p ()) == 0) 5545 { 5546 tem = emit_store_flag_1 (subtarget, rcode, op0, op1, mode, 0, 5547 normalizep, target_mode); 5548 if (tem) 5549 return expand_binop (target_mode, xor_optab, tem, trueval, 5550 target, INTVAL (trueval) >= 0, OPTAB_WIDEN); 5551 } 5552 } 5553 5554 delete_insns_since (last); 5555 5556 /* Cannot split ORDERED and UNORDERED, only try the above trick. */ 5557 if (code == ORDERED || code == UNORDERED) 5558 return 0; 5559 5560 and_them = split_comparison (code, mode, &first_code, &code); 5561 5562 /* If there are no NaNs, the first comparison should always fall through. 5563 Effectively change the comparison to the other one. */ 5564 if (!HONOR_NANS (mode)) 5565 { 5566 gcc_assert (first_code == (and_them ? ORDERED : UNORDERED)); 5567 return emit_store_flag_1 (target, code, op0, op1, mode, 0, normalizep, 5568 target_mode); 5569 } 5570 5571 #ifdef HAVE_conditional_move 5572 /* Try using a setcc instruction for ORDERED/UNORDERED, followed by a 5573 conditional move. */ 5574 tem = emit_store_flag_1 (subtarget, first_code, op0, op1, mode, 0, 5575 normalizep, target_mode); 5576 if (tem == 0) 5577 return 0; 5578 5579 if (and_them) 5580 tem = emit_conditional_move (target, code, op0, op1, mode, 5581 tem, const0_rtx, GET_MODE (tem), 0); 5582 else 5583 tem = emit_conditional_move (target, code, op0, op1, mode, 5584 trueval, tem, GET_MODE (tem), 0); 5585 5586 if (tem == 0) 5587 delete_insns_since (last); 5588 return tem; 5589 #else 5590 return 0; 5591 #endif 5592 } 5593 5594 /* The remaining tricks only apply to integer comparisons. */ 5595 5596 if (GET_MODE_CLASS (mode) != MODE_INT) 5597 return 0; 5598 5599 /* If this is an equality comparison of integers, we can try to exclusive-or 5600 (or subtract) the two operands and use a recursive call to try the 5601 comparison with zero. Don't do any of these cases if branches are 5602 very cheap. */ 5603 5604 if ((code == EQ || code == NE) && op1 != const0_rtx) 5605 { 5606 tem = expand_binop (mode, xor_optab, op0, op1, subtarget, 1, 5607 OPTAB_WIDEN); 5608 5609 if (tem == 0) 5610 tem = expand_binop (mode, sub_optab, op0, op1, subtarget, 1, 5611 OPTAB_WIDEN); 5612 if (tem != 0) 5613 tem = emit_store_flag (target, code, tem, const0_rtx, 5614 mode, unsignedp, normalizep); 5615 if (tem != 0) 5616 return tem; 5617 5618 delete_insns_since (last); 5619 } 5620 5621 /* For integer comparisons, try the reverse comparison. However, for 5622 small X and if we'd have anyway to extend, implementing "X != 0" 5623 as "-(int)X >> 31" is still cheaper than inverting "(int)X == 0". */ 5624 rcode = reverse_condition (code); 5625 if (can_compare_p (rcode, mode, ccp_store_flag) 5626 && ! (optab_handler (cstore_optab, mode) == CODE_FOR_nothing 5627 && code == NE 5628 && GET_MODE_SIZE (mode) < UNITS_PER_WORD 5629 && op1 == const0_rtx)) 5630 { 5631 int want_add = ((STORE_FLAG_VALUE == 1 && normalizep == -1) 5632 || (STORE_FLAG_VALUE == -1 && normalizep == 1)); 5633 5634 /* Again, for the reverse comparison, use either an addition or a XOR. */ 5635 if (want_add 5636 && rtx_cost (GEN_INT (normalizep), PLUS, 1, 5637 optimize_insn_for_speed_p ()) == 0) 5638 { 5639 tem = emit_store_flag_1 (subtarget, rcode, op0, op1, mode, 0, 5640 STORE_FLAG_VALUE, target_mode); 5641 if (tem != 0) 5642 tem = expand_binop (target_mode, add_optab, tem, 5643 gen_int_mode (normalizep, target_mode), 5644 target, 0, OPTAB_WIDEN); 5645 } 5646 else if (!want_add 5647 && rtx_cost (trueval, XOR, 1, 5648 optimize_insn_for_speed_p ()) == 0) 5649 { 5650 tem = emit_store_flag_1 (subtarget, rcode, op0, op1, mode, 0, 5651 normalizep, target_mode); 5652 if (tem != 0) 5653 tem = expand_binop (target_mode, xor_optab, tem, trueval, target, 5654 INTVAL (trueval) >= 0, OPTAB_WIDEN); 5655 } 5656 5657 if (tem != 0) 5658 return tem; 5659 delete_insns_since (last); 5660 } 5661 5662 /* Some other cases we can do are EQ, NE, LE, and GT comparisons with 5663 the constant zero. Reject all other comparisons at this point. Only 5664 do LE and GT if branches are expensive since they are expensive on 5665 2-operand machines. */ 5666 5667 if (op1 != const0_rtx 5668 || (code != EQ && code != NE 5669 && (BRANCH_COST (optimize_insn_for_speed_p (), 5670 false) <= 1 || (code != LE && code != GT)))) 5671 return 0; 5672 5673 /* Try to put the result of the comparison in the sign bit. Assume we can't 5674 do the necessary operation below. */ 5675 5676 tem = 0; 5677 5678 /* To see if A <= 0, compute (A | (A - 1)). A <= 0 iff that result has 5679 the sign bit set. */ 5680 5681 if (code == LE) 5682 { 5683 /* This is destructive, so SUBTARGET can't be OP0. */ 5684 if (rtx_equal_p (subtarget, op0)) 5685 subtarget = 0; 5686 5687 tem = expand_binop (mode, sub_optab, op0, const1_rtx, subtarget, 0, 5688 OPTAB_WIDEN); 5689 if (tem) 5690 tem = expand_binop (mode, ior_optab, op0, tem, subtarget, 0, 5691 OPTAB_WIDEN); 5692 } 5693 5694 /* To see if A > 0, compute (((signed) A) << BITS) - A, where BITS is the 5695 number of bits in the mode of OP0, minus one. */ 5696 5697 if (code == GT) 5698 { 5699 if (rtx_equal_p (subtarget, op0)) 5700 subtarget = 0; 5701 5702 tem = expand_shift (RSHIFT_EXPR, mode, op0, 5703 GET_MODE_BITSIZE (mode) - 1, 5704 subtarget, 0); 5705 tem = expand_binop (mode, sub_optab, tem, op0, subtarget, 0, 5706 OPTAB_WIDEN); 5707 } 5708 5709 if (code == EQ || code == NE) 5710 { 5711 /* For EQ or NE, one way to do the comparison is to apply an operation 5712 that converts the operand into a positive number if it is nonzero 5713 or zero if it was originally zero. Then, for EQ, we subtract 1 and 5714 for NE we negate. This puts the result in the sign bit. Then we 5715 normalize with a shift, if needed. 5716 5717 Two operations that can do the above actions are ABS and FFS, so try 5718 them. If that doesn't work, and MODE is smaller than a full word, 5719 we can use zero-extension to the wider mode (an unsigned conversion) 5720 as the operation. */ 5721 5722 /* Note that ABS doesn't yield a positive number for INT_MIN, but 5723 that is compensated by the subsequent overflow when subtracting 5724 one / negating. */ 5725 5726 if (optab_handler (abs_optab, mode) != CODE_FOR_nothing) 5727 tem = expand_unop (mode, abs_optab, op0, subtarget, 1); 5728 else if (optab_handler (ffs_optab, mode) != CODE_FOR_nothing) 5729 tem = expand_unop (mode, ffs_optab, op0, subtarget, 1); 5730 else if (GET_MODE_SIZE (mode) < UNITS_PER_WORD) 5731 { 5732 tem = convert_modes (word_mode, mode, op0, 1); 5733 mode = word_mode; 5734 } 5735 5736 if (tem != 0) 5737 { 5738 if (code == EQ) 5739 tem = expand_binop (mode, sub_optab, tem, const1_rtx, subtarget, 5740 0, OPTAB_WIDEN); 5741 else 5742 tem = expand_unop (mode, neg_optab, tem, subtarget, 0); 5743 } 5744 5745 /* If we couldn't do it that way, for NE we can "or" the two's complement 5746 of the value with itself. For EQ, we take the one's complement of 5747 that "or", which is an extra insn, so we only handle EQ if branches 5748 are expensive. */ 5749 5750 if (tem == 0 5751 && (code == NE 5752 || BRANCH_COST (optimize_insn_for_speed_p (), 5753 false) > 1)) 5754 { 5755 if (rtx_equal_p (subtarget, op0)) 5756 subtarget = 0; 5757 5758 tem = expand_unop (mode, neg_optab, op0, subtarget, 0); 5759 tem = expand_binop (mode, ior_optab, tem, op0, subtarget, 0, 5760 OPTAB_WIDEN); 5761 5762 if (tem && code == EQ) 5763 tem = expand_unop (mode, one_cmpl_optab, tem, subtarget, 0); 5764 } 5765 } 5766 5767 if (tem && normalizep) 5768 tem = expand_shift (RSHIFT_EXPR, mode, tem, 5769 GET_MODE_BITSIZE (mode) - 1, 5770 subtarget, normalizep == 1); 5771 5772 if (tem) 5773 { 5774 if (!target) 5775 ; 5776 else if (GET_MODE (tem) != target_mode) 5777 { 5778 convert_move (target, tem, 0); 5779 tem = target; 5780 } 5781 else if (!subtarget) 5782 { 5783 emit_move_insn (target, tem); 5784 tem = target; 5785 } 5786 } 5787 else 5788 delete_insns_since (last); 5789 5790 return tem; 5791 } 5792 5793 /* Like emit_store_flag, but always succeeds. */ 5794 5795 rtx 5796 emit_store_flag_force (rtx target, enum rtx_code code, rtx op0, rtx op1, 5797 machine_mode mode, int unsignedp, int normalizep) 5798 { 5799 rtx tem; 5800 rtx_code_label *label; 5801 rtx trueval, falseval; 5802 5803 /* First see if emit_store_flag can do the job. */ 5804 tem = emit_store_flag (target, code, op0, op1, mode, unsignedp, normalizep); 5805 if (tem != 0) 5806 return tem; 5807 5808 if (!target) 5809 target = gen_reg_rtx (word_mode); 5810 5811 /* If this failed, we have to do this with set/compare/jump/set code. 5812 For foo != 0, if foo is in OP0, just replace it with 1 if nonzero. */ 5813 trueval = normalizep ? GEN_INT (normalizep) : const1_rtx; 5814 if (code == NE 5815 && GET_MODE_CLASS (mode) == MODE_INT 5816 && REG_P (target) 5817 && op0 == target 5818 && op1 == const0_rtx) 5819 { 5820 label = gen_label_rtx (); 5821 do_compare_rtx_and_jump (target, const0_rtx, EQ, unsignedp, 5822 mode, NULL_RTX, NULL_RTX, label, -1); 5823 emit_move_insn (target, trueval); 5824 emit_label (label); 5825 return target; 5826 } 5827 5828 if (!REG_P (target) 5829 || reg_mentioned_p (target, op0) || reg_mentioned_p (target, op1)) 5830 target = gen_reg_rtx (GET_MODE (target)); 5831 5832 /* Jump in the right direction if the target cannot implement CODE 5833 but can jump on its reverse condition. */ 5834 falseval = const0_rtx; 5835 if (! can_compare_p (code, mode, ccp_jump) 5836 && (! FLOAT_MODE_P (mode) 5837 || code == ORDERED || code == UNORDERED 5838 || (! HONOR_NANS (mode) && (code == LTGT || code == UNEQ)) 5839 || (! HONOR_SNANS (mode) && (code == EQ || code == NE)))) 5840 { 5841 enum rtx_code rcode; 5842 if (FLOAT_MODE_P (mode)) 5843 rcode = reverse_condition_maybe_unordered (code); 5844 else 5845 rcode = reverse_condition (code); 5846 5847 /* Canonicalize to UNORDERED for the libcall. */ 5848 if (can_compare_p (rcode, mode, ccp_jump) 5849 || (code == ORDERED && ! can_compare_p (ORDERED, mode, ccp_jump))) 5850 { 5851 falseval = trueval; 5852 trueval = const0_rtx; 5853 code = rcode; 5854 } 5855 } 5856 5857 emit_move_insn (target, trueval); 5858 label = gen_label_rtx (); 5859 do_compare_rtx_and_jump (op0, op1, code, unsignedp, mode, NULL_RTX, 5860 NULL_RTX, label, -1); 5861 5862 emit_move_insn (target, falseval); 5863 emit_label (label); 5864 5865 return target; 5866 } 5867 5868 /* Perform possibly multi-word comparison and conditional jump to LABEL 5869 if ARG1 OP ARG2 true where ARG1 and ARG2 are of mode MODE. This is 5870 now a thin wrapper around do_compare_rtx_and_jump. */ 5871 5872 static void 5873 do_cmp_and_jump (rtx arg1, rtx arg2, enum rtx_code op, machine_mode mode, 5874 rtx_code_label *label) 5875 { 5876 int unsignedp = (op == LTU || op == LEU || op == GTU || op == GEU); 5877 do_compare_rtx_and_jump (arg1, arg2, op, unsignedp, mode, 5878 NULL_RTX, NULL_RTX, label, -1); 5879 } 5880