1 /* Medium-level subroutines: convert bit-field store and extract
2 and shifts, multiplies and divides to rtl instructions.
3 Copyright (C) 1987-2020 Free Software Foundation, Inc.
4
5 This file is part of GCC.
6
7 GCC is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 3, or (at your option) any later
10 version.
11
12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
15 for more details.
16
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
20
21 /* Work around tree-optimization/91825. */
22 #pragma GCC diagnostic warning "-Wmaybe-uninitialized"
23
24 #include "config.h"
25 #include "system.h"
26 #include "coretypes.h"
27 #include "backend.h"
28 #include "target.h"
29 #include "rtl.h"
30 #include "tree.h"
31 #include "predict.h"
32 #include "memmodel.h"
33 #include "tm_p.h"
34 #include "expmed.h"
35 #include "optabs.h"
36 #include "regs.h"
37 #include "emit-rtl.h"
38 #include "diagnostic-core.h"
39 #include "fold-const.h"
40 #include "stor-layout.h"
41 #include "dojump.h"
42 #include "explow.h"
43 #include "expr.h"
44 #include "langhooks.h"
45 #include "tree-vector-builder.h"
46
47 struct target_expmed default_target_expmed;
48 #if SWITCHABLE_TARGET
49 struct target_expmed *this_target_expmed = &default_target_expmed;
50 #endif
51
52 static bool store_integral_bit_field (rtx, opt_scalar_int_mode,
53 unsigned HOST_WIDE_INT,
54 unsigned HOST_WIDE_INT,
55 poly_uint64, poly_uint64,
56 machine_mode, rtx, bool, bool);
57 static void store_fixed_bit_field (rtx, opt_scalar_int_mode,
58 unsigned HOST_WIDE_INT,
59 unsigned HOST_WIDE_INT,
60 poly_uint64, poly_uint64,
61 rtx, scalar_int_mode, bool);
62 static void store_fixed_bit_field_1 (rtx, scalar_int_mode,
63 unsigned HOST_WIDE_INT,
64 unsigned HOST_WIDE_INT,
65 rtx, scalar_int_mode, bool);
66 static void store_split_bit_field (rtx, opt_scalar_int_mode,
67 unsigned HOST_WIDE_INT,
68 unsigned HOST_WIDE_INT,
69 poly_uint64, poly_uint64,
70 rtx, scalar_int_mode, bool);
71 static rtx extract_integral_bit_field (rtx, opt_scalar_int_mode,
72 unsigned HOST_WIDE_INT,
73 unsigned HOST_WIDE_INT, int, rtx,
74 machine_mode, machine_mode, bool, bool);
75 static rtx extract_fixed_bit_field (machine_mode, rtx, opt_scalar_int_mode,
76 unsigned HOST_WIDE_INT,
77 unsigned HOST_WIDE_INT, rtx, int, bool);
78 static rtx extract_fixed_bit_field_1 (machine_mode, rtx, scalar_int_mode,
79 unsigned HOST_WIDE_INT,
80 unsigned HOST_WIDE_INT, rtx, int, bool);
81 static rtx lshift_value (machine_mode, unsigned HOST_WIDE_INT, int);
82 static rtx extract_split_bit_field (rtx, opt_scalar_int_mode,
83 unsigned HOST_WIDE_INT,
84 unsigned HOST_WIDE_INT, int, bool);
85 static void do_cmp_and_jump (rtx, rtx, enum rtx_code, machine_mode, rtx_code_label *);
86 static rtx expand_smod_pow2 (scalar_int_mode, rtx, HOST_WIDE_INT);
87 static rtx expand_sdiv_pow2 (scalar_int_mode, rtx, HOST_WIDE_INT);
88
89 /* Return a constant integer mask value of mode MODE with BITSIZE ones
90 followed by BITPOS zeros, or the complement of that if COMPLEMENT.
91 The mask is truncated if necessary to the width of mode MODE. The
92 mask is zero-extended if BITSIZE+BITPOS is too small for MODE. */
93
94 static inline rtx
mask_rtx(scalar_int_mode mode,int bitpos,int bitsize,bool complement)95 mask_rtx (scalar_int_mode mode, int bitpos, int bitsize, bool complement)
96 {
97 return immed_wide_int_const
98 (wi::shifted_mask (bitpos, bitsize, complement,
99 GET_MODE_PRECISION (mode)), mode);
100 }
101
102 /* Test whether a value is zero of a power of two. */
103 #define EXACT_POWER_OF_2_OR_ZERO_P(x) \
104 (((x) & ((x) - HOST_WIDE_INT_1U)) == 0)
105
106 struct init_expmed_rtl
107 {
108 rtx reg;
109 rtx plus;
110 rtx neg;
111 rtx mult;
112 rtx sdiv;
113 rtx udiv;
114 rtx sdiv_32;
115 rtx smod_32;
116 rtx wide_mult;
117 rtx wide_lshr;
118 rtx wide_trunc;
119 rtx shift;
120 rtx shift_mult;
121 rtx shift_add;
122 rtx shift_sub0;
123 rtx shift_sub1;
124 rtx zext;
125 rtx trunc;
126
127 rtx pow2[MAX_BITS_PER_WORD];
128 rtx cint[MAX_BITS_PER_WORD];
129 };
130
131 static void
init_expmed_one_conv(struct init_expmed_rtl * all,scalar_int_mode to_mode,scalar_int_mode from_mode,bool speed)132 init_expmed_one_conv (struct init_expmed_rtl *all, scalar_int_mode to_mode,
133 scalar_int_mode from_mode, bool speed)
134 {
135 int to_size, from_size;
136 rtx which;
137
138 to_size = GET_MODE_PRECISION (to_mode);
139 from_size = GET_MODE_PRECISION (from_mode);
140
141 /* Most partial integers have a precision less than the "full"
142 integer it requires for storage. In case one doesn't, for
143 comparison purposes here, reduce the bit size by one in that
144 case. */
145 if (GET_MODE_CLASS (to_mode) == MODE_PARTIAL_INT
146 && pow2p_hwi (to_size))
147 to_size --;
148 if (GET_MODE_CLASS (from_mode) == MODE_PARTIAL_INT
149 && pow2p_hwi (from_size))
150 from_size --;
151
152 /* Assume cost of zero-extend and sign-extend is the same. */
153 which = (to_size < from_size ? all->trunc : all->zext);
154
155 PUT_MODE (all->reg, from_mode);
156 set_convert_cost (to_mode, from_mode, speed,
157 set_src_cost (which, to_mode, speed));
158 }
159
160 static void
init_expmed_one_mode(struct init_expmed_rtl * all,machine_mode mode,int speed)161 init_expmed_one_mode (struct init_expmed_rtl *all,
162 machine_mode mode, int speed)
163 {
164 int m, n, mode_bitsize;
165 machine_mode mode_from;
166
167 mode_bitsize = GET_MODE_UNIT_BITSIZE (mode);
168
169 PUT_MODE (all->reg, mode);
170 PUT_MODE (all->plus, mode);
171 PUT_MODE (all->neg, mode);
172 PUT_MODE (all->mult, mode);
173 PUT_MODE (all->sdiv, mode);
174 PUT_MODE (all->udiv, mode);
175 PUT_MODE (all->sdiv_32, mode);
176 PUT_MODE (all->smod_32, mode);
177 PUT_MODE (all->wide_trunc, mode);
178 PUT_MODE (all->shift, mode);
179 PUT_MODE (all->shift_mult, mode);
180 PUT_MODE (all->shift_add, mode);
181 PUT_MODE (all->shift_sub0, mode);
182 PUT_MODE (all->shift_sub1, mode);
183 PUT_MODE (all->zext, mode);
184 PUT_MODE (all->trunc, mode);
185
186 set_add_cost (speed, mode, set_src_cost (all->plus, mode, speed));
187 set_neg_cost (speed, mode, set_src_cost (all->neg, mode, speed));
188 set_mul_cost (speed, mode, set_src_cost (all->mult, mode, speed));
189 set_sdiv_cost (speed, mode, set_src_cost (all->sdiv, mode, speed));
190 set_udiv_cost (speed, mode, set_src_cost (all->udiv, mode, speed));
191
192 set_sdiv_pow2_cheap (speed, mode, (set_src_cost (all->sdiv_32, mode, speed)
193 <= 2 * add_cost (speed, mode)));
194 set_smod_pow2_cheap (speed, mode, (set_src_cost (all->smod_32, mode, speed)
195 <= 4 * add_cost (speed, mode)));
196
197 set_shift_cost (speed, mode, 0, 0);
198 {
199 int cost = add_cost (speed, mode);
200 set_shiftadd_cost (speed, mode, 0, cost);
201 set_shiftsub0_cost (speed, mode, 0, cost);
202 set_shiftsub1_cost (speed, mode, 0, cost);
203 }
204
205 n = MIN (MAX_BITS_PER_WORD, mode_bitsize);
206 for (m = 1; m < n; m++)
207 {
208 XEXP (all->shift, 1) = all->cint[m];
209 XEXP (all->shift_mult, 1) = all->pow2[m];
210
211 set_shift_cost (speed, mode, m, set_src_cost (all->shift, mode, speed));
212 set_shiftadd_cost (speed, mode, m, set_src_cost (all->shift_add, mode,
213 speed));
214 set_shiftsub0_cost (speed, mode, m, set_src_cost (all->shift_sub0, mode,
215 speed));
216 set_shiftsub1_cost (speed, mode, m, set_src_cost (all->shift_sub1, mode,
217 speed));
218 }
219
220 scalar_int_mode int_mode_to;
221 if (is_a <scalar_int_mode> (mode, &int_mode_to))
222 {
223 for (mode_from = MIN_MODE_INT; mode_from <= MAX_MODE_INT;
224 mode_from = (machine_mode)(mode_from + 1))
225 init_expmed_one_conv (all, int_mode_to,
226 as_a <scalar_int_mode> (mode_from), speed);
227
228 scalar_int_mode wider_mode;
229 if (GET_MODE_CLASS (int_mode_to) == MODE_INT
230 && GET_MODE_WIDER_MODE (int_mode_to).exists (&wider_mode))
231 {
232 PUT_MODE (all->zext, wider_mode);
233 PUT_MODE (all->wide_mult, wider_mode);
234 PUT_MODE (all->wide_lshr, wider_mode);
235 XEXP (all->wide_lshr, 1)
236 = gen_int_shift_amount (wider_mode, mode_bitsize);
237
238 set_mul_widen_cost (speed, wider_mode,
239 set_src_cost (all->wide_mult, wider_mode, speed));
240 set_mul_highpart_cost (speed, int_mode_to,
241 set_src_cost (all->wide_trunc,
242 int_mode_to, speed));
243 }
244 }
245 }
246
247 void
init_expmed(void)248 init_expmed (void)
249 {
250 struct init_expmed_rtl all;
251 machine_mode mode = QImode;
252 int m, speed;
253
254 memset (&all, 0, sizeof all);
255 for (m = 1; m < MAX_BITS_PER_WORD; m++)
256 {
257 all.pow2[m] = GEN_INT (HOST_WIDE_INT_1 << m);
258 all.cint[m] = GEN_INT (m);
259 }
260
261 /* Avoid using hard regs in ways which may be unsupported. */
262 all.reg = gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1);
263 all.plus = gen_rtx_PLUS (mode, all.reg, all.reg);
264 all.neg = gen_rtx_NEG (mode, all.reg);
265 all.mult = gen_rtx_MULT (mode, all.reg, all.reg);
266 all.sdiv = gen_rtx_DIV (mode, all.reg, all.reg);
267 all.udiv = gen_rtx_UDIV (mode, all.reg, all.reg);
268 all.sdiv_32 = gen_rtx_DIV (mode, all.reg, all.pow2[5]);
269 all.smod_32 = gen_rtx_MOD (mode, all.reg, all.pow2[5]);
270 all.zext = gen_rtx_ZERO_EXTEND (mode, all.reg);
271 all.wide_mult = gen_rtx_MULT (mode, all.zext, all.zext);
272 all.wide_lshr = gen_rtx_LSHIFTRT (mode, all.wide_mult, all.reg);
273 all.wide_trunc = gen_rtx_TRUNCATE (mode, all.wide_lshr);
274 all.shift = gen_rtx_ASHIFT (mode, all.reg, all.reg);
275 all.shift_mult = gen_rtx_MULT (mode, all.reg, all.reg);
276 all.shift_add = gen_rtx_PLUS (mode, all.shift_mult, all.reg);
277 all.shift_sub0 = gen_rtx_MINUS (mode, all.shift_mult, all.reg);
278 all.shift_sub1 = gen_rtx_MINUS (mode, all.reg, all.shift_mult);
279 all.trunc = gen_rtx_TRUNCATE (mode, all.reg);
280
281 for (speed = 0; speed < 2; speed++)
282 {
283 crtl->maybe_hot_insn_p = speed;
284 set_zero_cost (speed, set_src_cost (const0_rtx, mode, speed));
285
286 for (mode = MIN_MODE_INT; mode <= MAX_MODE_INT;
287 mode = (machine_mode)(mode + 1))
288 init_expmed_one_mode (&all, mode, speed);
289
290 if (MIN_MODE_PARTIAL_INT != VOIDmode)
291 for (mode = MIN_MODE_PARTIAL_INT; mode <= MAX_MODE_PARTIAL_INT;
292 mode = (machine_mode)(mode + 1))
293 init_expmed_one_mode (&all, mode, speed);
294
295 if (MIN_MODE_VECTOR_INT != VOIDmode)
296 for (mode = MIN_MODE_VECTOR_INT; mode <= MAX_MODE_VECTOR_INT;
297 mode = (machine_mode)(mode + 1))
298 init_expmed_one_mode (&all, mode, speed);
299 }
300
301 if (alg_hash_used_p ())
302 {
303 struct alg_hash_entry *p = alg_hash_entry_ptr (0);
304 memset (p, 0, sizeof (*p) * NUM_ALG_HASH_ENTRIES);
305 }
306 else
307 set_alg_hash_used_p (true);
308 default_rtl_profile ();
309
310 ggc_free (all.trunc);
311 ggc_free (all.shift_sub1);
312 ggc_free (all.shift_sub0);
313 ggc_free (all.shift_add);
314 ggc_free (all.shift_mult);
315 ggc_free (all.shift);
316 ggc_free (all.wide_trunc);
317 ggc_free (all.wide_lshr);
318 ggc_free (all.wide_mult);
319 ggc_free (all.zext);
320 ggc_free (all.smod_32);
321 ggc_free (all.sdiv_32);
322 ggc_free (all.udiv);
323 ggc_free (all.sdiv);
324 ggc_free (all.mult);
325 ggc_free (all.neg);
326 ggc_free (all.plus);
327 ggc_free (all.reg);
328 }
329
330 /* Return an rtx representing minus the value of X.
331 MODE is the intended mode of the result,
332 useful if X is a CONST_INT. */
333
334 rtx
negate_rtx(machine_mode mode,rtx x)335 negate_rtx (machine_mode mode, rtx x)
336 {
337 rtx result = simplify_unary_operation (NEG, mode, x, mode);
338
339 if (result == 0)
340 result = expand_unop (mode, neg_optab, x, NULL_RTX, 0);
341
342 return result;
343 }
344
345 /* Whether reverse storage order is supported on the target. */
346 static int reverse_storage_order_supported = -1;
347
348 /* Check whether reverse storage order is supported on the target. */
349
350 static void
check_reverse_storage_order_support(void)351 check_reverse_storage_order_support (void)
352 {
353 if (BYTES_BIG_ENDIAN != WORDS_BIG_ENDIAN)
354 {
355 reverse_storage_order_supported = 0;
356 sorry ("reverse scalar storage order");
357 }
358 else
359 reverse_storage_order_supported = 1;
360 }
361
362 /* Whether reverse FP storage order is supported on the target. */
363 static int reverse_float_storage_order_supported = -1;
364
365 /* Check whether reverse FP storage order is supported on the target. */
366
367 static void
check_reverse_float_storage_order_support(void)368 check_reverse_float_storage_order_support (void)
369 {
370 if (FLOAT_WORDS_BIG_ENDIAN != WORDS_BIG_ENDIAN)
371 {
372 reverse_float_storage_order_supported = 0;
373 sorry ("reverse floating-point scalar storage order");
374 }
375 else
376 reverse_float_storage_order_supported = 1;
377 }
378
379 /* Return an rtx representing value of X with reverse storage order.
380 MODE is the intended mode of the result,
381 useful if X is a CONST_INT. */
382
383 rtx
flip_storage_order(machine_mode mode,rtx x)384 flip_storage_order (machine_mode mode, rtx x)
385 {
386 scalar_int_mode int_mode;
387 rtx result;
388
389 if (mode == QImode)
390 return x;
391
392 if (COMPLEX_MODE_P (mode))
393 {
394 rtx real = read_complex_part (x, false);
395 rtx imag = read_complex_part (x, true);
396
397 real = flip_storage_order (GET_MODE_INNER (mode), real);
398 imag = flip_storage_order (GET_MODE_INNER (mode), imag);
399
400 return gen_rtx_CONCAT (mode, real, imag);
401 }
402
403 if (__builtin_expect (reverse_storage_order_supported < 0, 0))
404 check_reverse_storage_order_support ();
405
406 if (!is_a <scalar_int_mode> (mode, &int_mode))
407 {
408 if (FLOAT_MODE_P (mode)
409 && __builtin_expect (reverse_float_storage_order_supported < 0, 0))
410 check_reverse_float_storage_order_support ();
411
412 if (!int_mode_for_size (GET_MODE_PRECISION (mode), 0).exists (&int_mode))
413 {
414 sorry ("reverse storage order for %smode", GET_MODE_NAME (mode));
415 return x;
416 }
417 x = gen_lowpart (int_mode, x);
418 }
419
420 result = simplify_unary_operation (BSWAP, int_mode, x, int_mode);
421 if (result == 0)
422 result = expand_unop (int_mode, bswap_optab, x, NULL_RTX, 1);
423
424 if (int_mode != mode)
425 result = gen_lowpart (mode, result);
426
427 return result;
428 }
429
430 /* If MODE is set, adjust bitfield memory MEM so that it points to the
431 first unit of mode MODE that contains a bitfield of size BITSIZE at
432 bit position BITNUM. If MODE is not set, return a BLKmode reference
433 to every byte in the bitfield. Set *NEW_BITNUM to the bit position
434 of the field within the new memory. */
435
436 static rtx
narrow_bit_field_mem(rtx mem,opt_scalar_int_mode mode,unsigned HOST_WIDE_INT bitsize,unsigned HOST_WIDE_INT bitnum,unsigned HOST_WIDE_INT * new_bitnum)437 narrow_bit_field_mem (rtx mem, opt_scalar_int_mode mode,
438 unsigned HOST_WIDE_INT bitsize,
439 unsigned HOST_WIDE_INT bitnum,
440 unsigned HOST_WIDE_INT *new_bitnum)
441 {
442 scalar_int_mode imode;
443 if (mode.exists (&imode))
444 {
445 unsigned int unit = GET_MODE_BITSIZE (imode);
446 *new_bitnum = bitnum % unit;
447 HOST_WIDE_INT offset = (bitnum - *new_bitnum) / BITS_PER_UNIT;
448 return adjust_bitfield_address (mem, imode, offset);
449 }
450 else
451 {
452 *new_bitnum = bitnum % BITS_PER_UNIT;
453 HOST_WIDE_INT offset = bitnum / BITS_PER_UNIT;
454 HOST_WIDE_INT size = ((*new_bitnum + bitsize + BITS_PER_UNIT - 1)
455 / BITS_PER_UNIT);
456 return adjust_bitfield_address_size (mem, BLKmode, offset, size);
457 }
458 }
459
460 /* The caller wants to perform insertion or extraction PATTERN on a
461 bitfield of size BITSIZE at BITNUM bits into memory operand OP0.
462 BITREGION_START and BITREGION_END are as for store_bit_field
463 and FIELDMODE is the natural mode of the field.
464
465 Search for a mode that is compatible with the memory access
466 restrictions and (where applicable) with a register insertion or
467 extraction. Return the new memory on success, storing the adjusted
468 bit position in *NEW_BITNUM. Return null otherwise. */
469
470 static rtx
adjust_bit_field_mem_for_reg(enum extraction_pattern pattern,rtx op0,HOST_WIDE_INT bitsize,HOST_WIDE_INT bitnum,poly_uint64 bitregion_start,poly_uint64 bitregion_end,machine_mode fieldmode,unsigned HOST_WIDE_INT * new_bitnum)471 adjust_bit_field_mem_for_reg (enum extraction_pattern pattern,
472 rtx op0, HOST_WIDE_INT bitsize,
473 HOST_WIDE_INT bitnum,
474 poly_uint64 bitregion_start,
475 poly_uint64 bitregion_end,
476 machine_mode fieldmode,
477 unsigned HOST_WIDE_INT *new_bitnum)
478 {
479 bit_field_mode_iterator iter (bitsize, bitnum, bitregion_start,
480 bitregion_end, MEM_ALIGN (op0),
481 MEM_VOLATILE_P (op0));
482 scalar_int_mode best_mode;
483 if (iter.next_mode (&best_mode))
484 {
485 /* We can use a memory in BEST_MODE. See whether this is true for
486 any wider modes. All other things being equal, we prefer to
487 use the widest mode possible because it tends to expose more
488 CSE opportunities. */
489 if (!iter.prefer_smaller_modes ())
490 {
491 /* Limit the search to the mode required by the corresponding
492 register insertion or extraction instruction, if any. */
493 scalar_int_mode limit_mode = word_mode;
494 extraction_insn insn;
495 if (get_best_reg_extraction_insn (&insn, pattern,
496 GET_MODE_BITSIZE (best_mode),
497 fieldmode))
498 limit_mode = insn.field_mode;
499
500 scalar_int_mode wider_mode;
501 while (iter.next_mode (&wider_mode)
502 && GET_MODE_SIZE (wider_mode) <= GET_MODE_SIZE (limit_mode))
503 best_mode = wider_mode;
504 }
505 return narrow_bit_field_mem (op0, best_mode, bitsize, bitnum,
506 new_bitnum);
507 }
508 return NULL_RTX;
509 }
510
511 /* Return true if a bitfield of size BITSIZE at bit number BITNUM within
512 a structure of mode STRUCT_MODE represents a lowpart subreg. The subreg
513 offset is then BITNUM / BITS_PER_UNIT. */
514
515 static bool
lowpart_bit_field_p(poly_uint64 bitnum,poly_uint64 bitsize,machine_mode struct_mode)516 lowpart_bit_field_p (poly_uint64 bitnum, poly_uint64 bitsize,
517 machine_mode struct_mode)
518 {
519 poly_uint64 regsize = REGMODE_NATURAL_SIZE (struct_mode);
520 if (BYTES_BIG_ENDIAN)
521 return (multiple_p (bitnum, BITS_PER_UNIT)
522 && (known_eq (bitnum + bitsize, GET_MODE_BITSIZE (struct_mode))
523 || multiple_p (bitnum + bitsize,
524 regsize * BITS_PER_UNIT)));
525 else
526 return multiple_p (bitnum, regsize * BITS_PER_UNIT);
527 }
528
529 /* Return true if -fstrict-volatile-bitfields applies to an access of OP0
530 containing BITSIZE bits starting at BITNUM, with field mode FIELDMODE.
531 Return false if the access would touch memory outside the range
532 BITREGION_START to BITREGION_END for conformance to the C++ memory
533 model. */
534
535 static bool
strict_volatile_bitfield_p(rtx op0,unsigned HOST_WIDE_INT bitsize,unsigned HOST_WIDE_INT bitnum,scalar_int_mode fieldmode,poly_uint64 bitregion_start,poly_uint64 bitregion_end)536 strict_volatile_bitfield_p (rtx op0, unsigned HOST_WIDE_INT bitsize,
537 unsigned HOST_WIDE_INT bitnum,
538 scalar_int_mode fieldmode,
539 poly_uint64 bitregion_start,
540 poly_uint64 bitregion_end)
541 {
542 unsigned HOST_WIDE_INT modesize = GET_MODE_BITSIZE (fieldmode);
543
544 /* -fstrict-volatile-bitfields must be enabled and we must have a
545 volatile MEM. */
546 if (!MEM_P (op0)
547 || !MEM_VOLATILE_P (op0)
548 || flag_strict_volatile_bitfields <= 0)
549 return false;
550
551 /* The bit size must not be larger than the field mode, and
552 the field mode must not be larger than a word. */
553 if (bitsize > modesize || modesize > BITS_PER_WORD)
554 return false;
555
556 /* Check for cases of unaligned fields that must be split. */
557 if (bitnum % modesize + bitsize > modesize)
558 return false;
559
560 /* The memory must be sufficiently aligned for a MODESIZE access.
561 This condition guarantees, that the memory access will not
562 touch anything after the end of the structure. */
563 if (MEM_ALIGN (op0) < modesize)
564 return false;
565
566 /* Check for cases where the C++ memory model applies. */
567 if (maybe_ne (bitregion_end, 0U)
568 && (maybe_lt (bitnum - bitnum % modesize, bitregion_start)
569 || maybe_gt (bitnum - bitnum % modesize + modesize - 1,
570 bitregion_end)))
571 return false;
572
573 return true;
574 }
575
576 /* Return true if OP is a memory and if a bitfield of size BITSIZE at
577 bit number BITNUM can be treated as a simple value of mode MODE.
578 Store the byte offset in *BYTENUM if so. */
579
580 static bool
simple_mem_bitfield_p(rtx op0,poly_uint64 bitsize,poly_uint64 bitnum,machine_mode mode,poly_uint64 * bytenum)581 simple_mem_bitfield_p (rtx op0, poly_uint64 bitsize, poly_uint64 bitnum,
582 machine_mode mode, poly_uint64 *bytenum)
583 {
584 return (MEM_P (op0)
585 && multiple_p (bitnum, BITS_PER_UNIT, bytenum)
586 && known_eq (bitsize, GET_MODE_BITSIZE (mode))
587 && (!targetm.slow_unaligned_access (mode, MEM_ALIGN (op0))
588 || (multiple_p (bitnum, GET_MODE_ALIGNMENT (mode))
589 && MEM_ALIGN (op0) >= GET_MODE_ALIGNMENT (mode))));
590 }
591
592 /* Try to use instruction INSV to store VALUE into a field of OP0.
593 If OP0_MODE is defined, it is the mode of OP0, otherwise OP0 is a
594 BLKmode MEM. VALUE_MODE is the mode of VALUE. BITSIZE and BITNUM
595 are as for store_bit_field. */
596
597 static bool
store_bit_field_using_insv(const extraction_insn * insv,rtx op0,opt_scalar_int_mode op0_mode,unsigned HOST_WIDE_INT bitsize,unsigned HOST_WIDE_INT bitnum,rtx value,scalar_int_mode value_mode)598 store_bit_field_using_insv (const extraction_insn *insv, rtx op0,
599 opt_scalar_int_mode op0_mode,
600 unsigned HOST_WIDE_INT bitsize,
601 unsigned HOST_WIDE_INT bitnum,
602 rtx value, scalar_int_mode value_mode)
603 {
604 class expand_operand ops[4];
605 rtx value1;
606 rtx xop0 = op0;
607 rtx_insn *last = get_last_insn ();
608 bool copy_back = false;
609
610 scalar_int_mode op_mode = insv->field_mode;
611 unsigned int unit = GET_MODE_BITSIZE (op_mode);
612 if (bitsize == 0 || bitsize > unit)
613 return false;
614
615 if (MEM_P (xop0))
616 /* Get a reference to the first byte of the field. */
617 xop0 = narrow_bit_field_mem (xop0, insv->struct_mode, bitsize, bitnum,
618 &bitnum);
619 else
620 {
621 /* Convert from counting within OP0 to counting in OP_MODE. */
622 if (BYTES_BIG_ENDIAN)
623 bitnum += unit - GET_MODE_BITSIZE (op0_mode.require ());
624
625 /* If xop0 is a register, we need it in OP_MODE
626 to make it acceptable to the format of insv. */
627 if (GET_CODE (xop0) == SUBREG)
628 {
629 /* If such a SUBREG can't be created, give up. */
630 if (!validate_subreg (op_mode, GET_MODE (SUBREG_REG (xop0)),
631 SUBREG_REG (xop0), SUBREG_BYTE (xop0)))
632 return false;
633 /* We can't just change the mode, because this might clobber op0,
634 and we will need the original value of op0 if insv fails. */
635 xop0 = gen_rtx_SUBREG (op_mode, SUBREG_REG (xop0),
636 SUBREG_BYTE (xop0));
637 }
638 if (REG_P (xop0) && GET_MODE (xop0) != op_mode)
639 xop0 = gen_lowpart_SUBREG (op_mode, xop0);
640 }
641
642 /* If the destination is a paradoxical subreg such that we need a
643 truncate to the inner mode, perform the insertion on a temporary and
644 truncate the result to the original destination. Note that we can't
645 just truncate the paradoxical subreg as (truncate:N (subreg:W (reg:N
646 X) 0)) is (reg:N X). */
647 if (GET_CODE (xop0) == SUBREG
648 && REG_P (SUBREG_REG (xop0))
649 && !TRULY_NOOP_TRUNCATION_MODES_P (GET_MODE (SUBREG_REG (xop0)),
650 op_mode))
651 {
652 rtx tem = gen_reg_rtx (op_mode);
653 emit_move_insn (tem, xop0);
654 xop0 = tem;
655 copy_back = true;
656 }
657
658 /* There are similar overflow check at the start of store_bit_field_1,
659 but that only check the situation where the field lies completely
660 outside the register, while there do have situation where the field
661 lies partialy in the register, we need to adjust bitsize for this
662 partial overflow situation. Without this fix, pr48335-2.c on big-endian
663 will broken on those arch support bit insert instruction, like arm, aarch64
664 etc. */
665 if (bitsize + bitnum > unit && bitnum < unit)
666 {
667 warning (OPT_Wextra, "write of %wu-bit data outside the bound of "
668 "destination object, data truncated into %wu-bit",
669 bitsize, unit - bitnum);
670 bitsize = unit - bitnum;
671 }
672
673 /* If BITS_BIG_ENDIAN is zero on a BYTES_BIG_ENDIAN machine, we count
674 "backwards" from the size of the unit we are inserting into.
675 Otherwise, we count bits from the most significant on a
676 BYTES/BITS_BIG_ENDIAN machine. */
677
678 if (BITS_BIG_ENDIAN != BYTES_BIG_ENDIAN)
679 bitnum = unit - bitsize - bitnum;
680
681 /* Convert VALUE to op_mode (which insv insn wants) in VALUE1. */
682 value1 = value;
683 if (value_mode != op_mode)
684 {
685 if (GET_MODE_BITSIZE (value_mode) >= bitsize)
686 {
687 rtx tmp;
688 /* Optimization: Don't bother really extending VALUE
689 if it has all the bits we will actually use. However,
690 if we must narrow it, be sure we do it correctly. */
691
692 if (GET_MODE_SIZE (value_mode) < GET_MODE_SIZE (op_mode))
693 {
694 tmp = simplify_subreg (op_mode, value1, value_mode, 0);
695 if (! tmp)
696 tmp = simplify_gen_subreg (op_mode,
697 force_reg (value_mode, value1),
698 value_mode, 0);
699 }
700 else
701 {
702 tmp = gen_lowpart_if_possible (op_mode, value1);
703 if (! tmp)
704 tmp = gen_lowpart (op_mode, force_reg (value_mode, value1));
705 }
706 value1 = tmp;
707 }
708 else if (CONST_INT_P (value))
709 value1 = gen_int_mode (INTVAL (value), op_mode);
710 else
711 /* Parse phase is supposed to make VALUE's data type
712 match that of the component reference, which is a type
713 at least as wide as the field; so VALUE should have
714 a mode that corresponds to that type. */
715 gcc_assert (CONSTANT_P (value));
716 }
717
718 create_fixed_operand (&ops[0], xop0);
719 create_integer_operand (&ops[1], bitsize);
720 create_integer_operand (&ops[2], bitnum);
721 create_input_operand (&ops[3], value1, op_mode);
722 if (maybe_expand_insn (insv->icode, 4, ops))
723 {
724 if (copy_back)
725 convert_move (op0, xop0, true);
726 return true;
727 }
728 delete_insns_since (last);
729 return false;
730 }
731
732 /* A subroutine of store_bit_field, with the same arguments. Return true
733 if the operation could be implemented.
734
735 If FALLBACK_P is true, fall back to store_fixed_bit_field if we have
736 no other way of implementing the operation. If FALLBACK_P is false,
737 return false instead. */
738
739 static bool
store_bit_field_1(rtx str_rtx,poly_uint64 bitsize,poly_uint64 bitnum,poly_uint64 bitregion_start,poly_uint64 bitregion_end,machine_mode fieldmode,rtx value,bool reverse,bool fallback_p)740 store_bit_field_1 (rtx str_rtx, poly_uint64 bitsize, poly_uint64 bitnum,
741 poly_uint64 bitregion_start, poly_uint64 bitregion_end,
742 machine_mode fieldmode,
743 rtx value, bool reverse, bool fallback_p)
744 {
745 rtx op0 = str_rtx;
746
747 while (GET_CODE (op0) == SUBREG)
748 {
749 bitnum += subreg_memory_offset (op0) * BITS_PER_UNIT;
750 op0 = SUBREG_REG (op0);
751 }
752
753 /* No action is needed if the target is a register and if the field
754 lies completely outside that register. This can occur if the source
755 code contains an out-of-bounds access to a small array. */
756 if (REG_P (op0) && known_ge (bitnum, GET_MODE_BITSIZE (GET_MODE (op0))))
757 return true;
758
759 /* Use vec_set patterns for inserting parts of vectors whenever
760 available. */
761 machine_mode outermode = GET_MODE (op0);
762 scalar_mode innermode = GET_MODE_INNER (outermode);
763 poly_uint64 pos;
764 if (VECTOR_MODE_P (outermode)
765 && !MEM_P (op0)
766 && optab_handler (vec_set_optab, outermode) != CODE_FOR_nothing
767 && fieldmode == innermode
768 && known_eq (bitsize, GET_MODE_BITSIZE (innermode))
769 && multiple_p (bitnum, GET_MODE_BITSIZE (innermode), &pos))
770 {
771 class expand_operand ops[3];
772 enum insn_code icode = optab_handler (vec_set_optab, outermode);
773
774 create_fixed_operand (&ops[0], op0);
775 create_input_operand (&ops[1], value, innermode);
776 create_integer_operand (&ops[2], pos);
777 if (maybe_expand_insn (icode, 3, ops))
778 return true;
779 }
780
781 /* If the target is a register, overwriting the entire object, or storing
782 a full-word or multi-word field can be done with just a SUBREG. */
783 if (!MEM_P (op0)
784 && known_eq (bitsize, GET_MODE_BITSIZE (fieldmode)))
785 {
786 /* Use the subreg machinery either to narrow OP0 to the required
787 words or to cope with mode punning between equal-sized modes.
788 In the latter case, use subreg on the rhs side, not lhs. */
789 rtx sub;
790 HOST_WIDE_INT regnum;
791 poly_uint64 regsize = REGMODE_NATURAL_SIZE (GET_MODE (op0));
792 if (known_eq (bitnum, 0U)
793 && known_eq (bitsize, GET_MODE_BITSIZE (GET_MODE (op0))))
794 {
795 sub = simplify_gen_subreg (GET_MODE (op0), value, fieldmode, 0);
796 if (sub)
797 {
798 if (reverse)
799 sub = flip_storage_order (GET_MODE (op0), sub);
800 emit_move_insn (op0, sub);
801 return true;
802 }
803 }
804 else if (constant_multiple_p (bitnum, regsize * BITS_PER_UNIT, ®num)
805 && multiple_p (bitsize, regsize * BITS_PER_UNIT))
806 {
807 sub = simplify_gen_subreg (fieldmode, op0, GET_MODE (op0),
808 regnum * regsize);
809 if (sub)
810 {
811 if (reverse)
812 value = flip_storage_order (fieldmode, value);
813 emit_move_insn (sub, value);
814 return true;
815 }
816 }
817 }
818
819 /* If the target is memory, storing any naturally aligned field can be
820 done with a simple store. For targets that support fast unaligned
821 memory, any naturally sized, unit aligned field can be done directly. */
822 poly_uint64 bytenum;
823 if (simple_mem_bitfield_p (op0, bitsize, bitnum, fieldmode, &bytenum))
824 {
825 op0 = adjust_bitfield_address (op0, fieldmode, bytenum);
826 if (reverse)
827 value = flip_storage_order (fieldmode, value);
828 emit_move_insn (op0, value);
829 return true;
830 }
831
832 /* It's possible we'll need to handle other cases here for
833 polynomial bitnum and bitsize. */
834
835 /* From here on we need to be looking at a fixed-size insertion. */
836 unsigned HOST_WIDE_INT ibitsize = bitsize.to_constant ();
837 unsigned HOST_WIDE_INT ibitnum = bitnum.to_constant ();
838
839 /* Make sure we are playing with integral modes. Pun with subregs
840 if we aren't. This must come after the entire register case above,
841 since that case is valid for any mode. The following cases are only
842 valid for integral modes. */
843 opt_scalar_int_mode op0_mode = int_mode_for_mode (GET_MODE (op0));
844 scalar_int_mode imode;
845 if (!op0_mode.exists (&imode) || imode != GET_MODE (op0))
846 {
847 if (MEM_P (op0))
848 op0 = adjust_bitfield_address_size (op0, op0_mode.else_blk (),
849 0, MEM_SIZE (op0));
850 else if (!op0_mode.exists ())
851 {
852 if (ibitnum == 0
853 && known_eq (ibitsize, GET_MODE_BITSIZE (GET_MODE (op0)))
854 && MEM_P (value)
855 && !reverse)
856 {
857 value = adjust_address (value, GET_MODE (op0), 0);
858 emit_move_insn (op0, value);
859 return true;
860 }
861 if (!fallback_p)
862 return false;
863 rtx temp = assign_stack_temp (GET_MODE (op0),
864 GET_MODE_SIZE (GET_MODE (op0)));
865 emit_move_insn (temp, op0);
866 store_bit_field_1 (temp, bitsize, bitnum, 0, 0, fieldmode, value,
867 reverse, fallback_p);
868 emit_move_insn (op0, temp);
869 return true;
870 }
871 else
872 op0 = gen_lowpart (op0_mode.require (), op0);
873 }
874
875 return store_integral_bit_field (op0, op0_mode, ibitsize, ibitnum,
876 bitregion_start, bitregion_end,
877 fieldmode, value, reverse, fallback_p);
878 }
879
880 /* Subroutine of store_bit_field_1, with the same arguments, except
881 that BITSIZE and BITNUM are constant. Handle cases specific to
882 integral modes. If OP0_MODE is defined, it is the mode of OP0,
883 otherwise OP0 is a BLKmode MEM. */
884
885 static bool
store_integral_bit_field(rtx op0,opt_scalar_int_mode op0_mode,unsigned HOST_WIDE_INT bitsize,unsigned HOST_WIDE_INT bitnum,poly_uint64 bitregion_start,poly_uint64 bitregion_end,machine_mode fieldmode,rtx value,bool reverse,bool fallback_p)886 store_integral_bit_field (rtx op0, opt_scalar_int_mode op0_mode,
887 unsigned HOST_WIDE_INT bitsize,
888 unsigned HOST_WIDE_INT bitnum,
889 poly_uint64 bitregion_start,
890 poly_uint64 bitregion_end,
891 machine_mode fieldmode,
892 rtx value, bool reverse, bool fallback_p)
893 {
894 /* Storing an lsb-aligned field in a register
895 can be done with a movstrict instruction. */
896
897 if (!MEM_P (op0)
898 && !reverse
899 && lowpart_bit_field_p (bitnum, bitsize, op0_mode.require ())
900 && known_eq (bitsize, GET_MODE_BITSIZE (fieldmode))
901 && optab_handler (movstrict_optab, fieldmode) != CODE_FOR_nothing)
902 {
903 class expand_operand ops[2];
904 enum insn_code icode = optab_handler (movstrict_optab, fieldmode);
905 rtx arg0 = op0;
906 unsigned HOST_WIDE_INT subreg_off;
907
908 if (GET_CODE (arg0) == SUBREG)
909 {
910 /* Else we've got some float mode source being extracted into
911 a different float mode destination -- this combination of
912 subregs results in Severe Tire Damage. */
913 gcc_assert (GET_MODE (SUBREG_REG (arg0)) == fieldmode
914 || GET_MODE_CLASS (fieldmode) == MODE_INT
915 || GET_MODE_CLASS (fieldmode) == MODE_PARTIAL_INT);
916 arg0 = SUBREG_REG (arg0);
917 }
918
919 subreg_off = bitnum / BITS_PER_UNIT;
920 if (validate_subreg (fieldmode, GET_MODE (arg0), arg0, subreg_off)
921 /* STRICT_LOW_PART must have a non-paradoxical subreg as
922 operand. */
923 && !paradoxical_subreg_p (fieldmode, GET_MODE (arg0)))
924 {
925 arg0 = gen_rtx_SUBREG (fieldmode, arg0, subreg_off);
926
927 create_fixed_operand (&ops[0], arg0);
928 /* Shrink the source operand to FIELDMODE. */
929 create_convert_operand_to (&ops[1], value, fieldmode, false);
930 if (maybe_expand_insn (icode, 2, ops))
931 return true;
932 }
933 }
934
935 /* Handle fields bigger than a word. */
936
937 if (bitsize > BITS_PER_WORD)
938 {
939 /* Here we transfer the words of the field
940 in the order least significant first.
941 This is because the most significant word is the one which may
942 be less than full.
943 However, only do that if the value is not BLKmode. */
944
945 const bool backwards = WORDS_BIG_ENDIAN && fieldmode != BLKmode;
946 const int nwords = (bitsize + (BITS_PER_WORD - 1)) / BITS_PER_WORD;
947 rtx_insn *last;
948
949 /* This is the mode we must force value to, so that there will be enough
950 subwords to extract. Note that fieldmode will often (always?) be
951 VOIDmode, because that is what store_field uses to indicate that this
952 is a bit field, but passing VOIDmode to operand_subword_force
953 is not allowed.
954
955 The mode must be fixed-size, since insertions into variable-sized
956 objects are meant to be handled before calling this function. */
957 fixed_size_mode value_mode = as_a <fixed_size_mode> (GET_MODE (value));
958 if (value_mode == VOIDmode)
959 value_mode = smallest_int_mode_for_size (nwords * BITS_PER_WORD);
960
961 last = get_last_insn ();
962 for (int i = 0; i < nwords; i++)
963 {
964 /* Number of bits to be stored in this iteration, i.e. BITS_PER_WORD
965 except maybe for the last iteration. */
966 const unsigned HOST_WIDE_INT new_bitsize
967 = MIN (BITS_PER_WORD, bitsize - i * BITS_PER_WORD);
968 /* Bit offset from the starting bit number in the target. */
969 const unsigned int bit_offset
970 = backwards ^ reverse
971 ? MAX ((int) bitsize - (i + 1) * BITS_PER_WORD, 0)
972 : i * BITS_PER_WORD;
973 /* Starting word number in the value. */
974 const unsigned int wordnum
975 = backwards
976 ? GET_MODE_SIZE (value_mode) / UNITS_PER_WORD - (i + 1)
977 : i;
978 /* The chunk of the value in word_mode. We use bit-field extraction
979 in BLKmode to handle unaligned memory references and to shift the
980 last chunk right on big-endian machines if need be. */
981 rtx value_word
982 = fieldmode == BLKmode
983 ? extract_bit_field (value, new_bitsize, wordnum * BITS_PER_WORD,
984 1, NULL_RTX, word_mode, word_mode, false,
985 NULL)
986 : operand_subword_force (value, wordnum, value_mode);
987
988 if (!store_bit_field_1 (op0, new_bitsize,
989 bitnum + bit_offset,
990 bitregion_start, bitregion_end,
991 word_mode,
992 value_word, reverse, fallback_p))
993 {
994 delete_insns_since (last);
995 return false;
996 }
997 }
998 return true;
999 }
1000
1001 /* If VALUE has a floating-point or complex mode, access it as an
1002 integer of the corresponding size. This can occur on a machine
1003 with 64 bit registers that uses SFmode for float. It can also
1004 occur for unaligned float or complex fields. */
1005 rtx orig_value = value;
1006 scalar_int_mode value_mode;
1007 if (GET_MODE (value) == VOIDmode)
1008 /* By this point we've dealt with values that are bigger than a word,
1009 so word_mode is a conservatively correct choice. */
1010 value_mode = word_mode;
1011 else if (!is_a <scalar_int_mode> (GET_MODE (value), &value_mode))
1012 {
1013 value_mode = int_mode_for_mode (GET_MODE (value)).require ();
1014 value = gen_reg_rtx (value_mode);
1015 emit_move_insn (gen_lowpart (GET_MODE (orig_value), value), orig_value);
1016 }
1017
1018 /* If OP0 is a multi-word register, narrow it to the affected word.
1019 If the region spans two words, defer to store_split_bit_field.
1020 Don't do this if op0 is a single hard register wider than word
1021 such as a float or vector register. */
1022 if (!MEM_P (op0)
1023 && GET_MODE_SIZE (op0_mode.require ()) > UNITS_PER_WORD
1024 && (!REG_P (op0)
1025 || !HARD_REGISTER_P (op0)
1026 || hard_regno_nregs (REGNO (op0), op0_mode.require ()) != 1))
1027 {
1028 if (bitnum % BITS_PER_WORD + bitsize > BITS_PER_WORD)
1029 {
1030 if (!fallback_p)
1031 return false;
1032
1033 store_split_bit_field (op0, op0_mode, bitsize, bitnum,
1034 bitregion_start, bitregion_end,
1035 value, value_mode, reverse);
1036 return true;
1037 }
1038 op0 = simplify_gen_subreg (word_mode, op0, op0_mode.require (),
1039 bitnum / BITS_PER_WORD * UNITS_PER_WORD);
1040 gcc_assert (op0);
1041 op0_mode = word_mode;
1042 bitnum %= BITS_PER_WORD;
1043 }
1044
1045 /* From here on we can assume that the field to be stored in fits
1046 within a word. If the destination is a register, it too fits
1047 in a word. */
1048
1049 extraction_insn insv;
1050 if (!MEM_P (op0)
1051 && !reverse
1052 && get_best_reg_extraction_insn (&insv, EP_insv,
1053 GET_MODE_BITSIZE (op0_mode.require ()),
1054 fieldmode)
1055 && store_bit_field_using_insv (&insv, op0, op0_mode,
1056 bitsize, bitnum, value, value_mode))
1057 return true;
1058
1059 /* If OP0 is a memory, try copying it to a register and seeing if a
1060 cheap register alternative is available. */
1061 if (MEM_P (op0) && !reverse)
1062 {
1063 if (get_best_mem_extraction_insn (&insv, EP_insv, bitsize, bitnum,
1064 fieldmode)
1065 && store_bit_field_using_insv (&insv, op0, op0_mode,
1066 bitsize, bitnum, value, value_mode))
1067 return true;
1068
1069 rtx_insn *last = get_last_insn ();
1070
1071 /* Try loading part of OP0 into a register, inserting the bitfield
1072 into that, and then copying the result back to OP0. */
1073 unsigned HOST_WIDE_INT bitpos;
1074 rtx xop0 = adjust_bit_field_mem_for_reg (EP_insv, op0, bitsize, bitnum,
1075 bitregion_start, bitregion_end,
1076 fieldmode, &bitpos);
1077 if (xop0)
1078 {
1079 rtx tempreg = copy_to_reg (xop0);
1080 if (store_bit_field_1 (tempreg, bitsize, bitpos,
1081 bitregion_start, bitregion_end,
1082 fieldmode, orig_value, reverse, false))
1083 {
1084 emit_move_insn (xop0, tempreg);
1085 return true;
1086 }
1087 delete_insns_since (last);
1088 }
1089 }
1090
1091 if (!fallback_p)
1092 return false;
1093
1094 store_fixed_bit_field (op0, op0_mode, bitsize, bitnum, bitregion_start,
1095 bitregion_end, value, value_mode, reverse);
1096 return true;
1097 }
1098
1099 /* Generate code to store value from rtx VALUE
1100 into a bit-field within structure STR_RTX
1101 containing BITSIZE bits starting at bit BITNUM.
1102
1103 BITREGION_START is bitpos of the first bitfield in this region.
1104 BITREGION_END is the bitpos of the ending bitfield in this region.
1105 These two fields are 0, if the C++ memory model does not apply,
1106 or we are not interested in keeping track of bitfield regions.
1107
1108 FIELDMODE is the machine-mode of the FIELD_DECL node for this field.
1109
1110 If REVERSE is true, the store is to be done in reverse order. */
1111
1112 void
store_bit_field(rtx str_rtx,poly_uint64 bitsize,poly_uint64 bitnum,poly_uint64 bitregion_start,poly_uint64 bitregion_end,machine_mode fieldmode,rtx value,bool reverse)1113 store_bit_field (rtx str_rtx, poly_uint64 bitsize, poly_uint64 bitnum,
1114 poly_uint64 bitregion_start, poly_uint64 bitregion_end,
1115 machine_mode fieldmode,
1116 rtx value, bool reverse)
1117 {
1118 /* Handle -fstrict-volatile-bitfields in the cases where it applies. */
1119 unsigned HOST_WIDE_INT ibitsize = 0, ibitnum = 0;
1120 scalar_int_mode int_mode;
1121 if (bitsize.is_constant (&ibitsize)
1122 && bitnum.is_constant (&ibitnum)
1123 && is_a <scalar_int_mode> (fieldmode, &int_mode)
1124 && strict_volatile_bitfield_p (str_rtx, ibitsize, ibitnum, int_mode,
1125 bitregion_start, bitregion_end))
1126 {
1127 /* Storing of a full word can be done with a simple store.
1128 We know here that the field can be accessed with one single
1129 instruction. For targets that support unaligned memory,
1130 an unaligned access may be necessary. */
1131 if (ibitsize == GET_MODE_BITSIZE (int_mode))
1132 {
1133 str_rtx = adjust_bitfield_address (str_rtx, int_mode,
1134 ibitnum / BITS_PER_UNIT);
1135 if (reverse)
1136 value = flip_storage_order (int_mode, value);
1137 gcc_assert (ibitnum % BITS_PER_UNIT == 0);
1138 emit_move_insn (str_rtx, value);
1139 }
1140 else
1141 {
1142 rtx temp;
1143
1144 str_rtx = narrow_bit_field_mem (str_rtx, int_mode, ibitsize,
1145 ibitnum, &ibitnum);
1146 gcc_assert (ibitnum + ibitsize <= GET_MODE_BITSIZE (int_mode));
1147 temp = copy_to_reg (str_rtx);
1148 if (!store_bit_field_1 (temp, ibitsize, ibitnum, 0, 0,
1149 int_mode, value, reverse, true))
1150 gcc_unreachable ();
1151
1152 emit_move_insn (str_rtx, temp);
1153 }
1154
1155 return;
1156 }
1157
1158 /* Under the C++0x memory model, we must not touch bits outside the
1159 bit region. Adjust the address to start at the beginning of the
1160 bit region. */
1161 if (MEM_P (str_rtx) && maybe_ne (bitregion_start, 0U))
1162 {
1163 scalar_int_mode best_mode;
1164 machine_mode addr_mode = VOIDmode;
1165
1166 poly_uint64 offset = exact_div (bitregion_start, BITS_PER_UNIT);
1167 bitnum -= bitregion_start;
1168 poly_int64 size = bits_to_bytes_round_up (bitnum + bitsize);
1169 bitregion_end -= bitregion_start;
1170 bitregion_start = 0;
1171 if (bitsize.is_constant (&ibitsize)
1172 && bitnum.is_constant (&ibitnum)
1173 && get_best_mode (ibitsize, ibitnum,
1174 bitregion_start, bitregion_end,
1175 MEM_ALIGN (str_rtx), INT_MAX,
1176 MEM_VOLATILE_P (str_rtx), &best_mode))
1177 addr_mode = best_mode;
1178 str_rtx = adjust_bitfield_address_size (str_rtx, addr_mode,
1179 offset, size);
1180 }
1181
1182 if (!store_bit_field_1 (str_rtx, bitsize, bitnum,
1183 bitregion_start, bitregion_end,
1184 fieldmode, value, reverse, true))
1185 gcc_unreachable ();
1186 }
1187
1188 /* Use shifts and boolean operations to store VALUE into a bit field of
1189 width BITSIZE in OP0, starting at bit BITNUM. If OP0_MODE is defined,
1190 it is the mode of OP0, otherwise OP0 is a BLKmode MEM. VALUE_MODE is
1191 the mode of VALUE.
1192
1193 If REVERSE is true, the store is to be done in reverse order. */
1194
1195 static void
store_fixed_bit_field(rtx op0,opt_scalar_int_mode op0_mode,unsigned HOST_WIDE_INT bitsize,unsigned HOST_WIDE_INT bitnum,poly_uint64 bitregion_start,poly_uint64 bitregion_end,rtx value,scalar_int_mode value_mode,bool reverse)1196 store_fixed_bit_field (rtx op0, opt_scalar_int_mode op0_mode,
1197 unsigned HOST_WIDE_INT bitsize,
1198 unsigned HOST_WIDE_INT bitnum,
1199 poly_uint64 bitregion_start, poly_uint64 bitregion_end,
1200 rtx value, scalar_int_mode value_mode, bool reverse)
1201 {
1202 /* There is a case not handled here:
1203 a structure with a known alignment of just a halfword
1204 and a field split across two aligned halfwords within the structure.
1205 Or likewise a structure with a known alignment of just a byte
1206 and a field split across two bytes.
1207 Such cases are not supposed to be able to occur. */
1208
1209 scalar_int_mode best_mode;
1210 if (MEM_P (op0))
1211 {
1212 unsigned int max_bitsize = BITS_PER_WORD;
1213 scalar_int_mode imode;
1214 if (op0_mode.exists (&imode) && GET_MODE_BITSIZE (imode) < max_bitsize)
1215 max_bitsize = GET_MODE_BITSIZE (imode);
1216
1217 if (!get_best_mode (bitsize, bitnum, bitregion_start, bitregion_end,
1218 MEM_ALIGN (op0), max_bitsize, MEM_VOLATILE_P (op0),
1219 &best_mode))
1220 {
1221 /* The only way this should occur is if the field spans word
1222 boundaries. */
1223 store_split_bit_field (op0, op0_mode, bitsize, bitnum,
1224 bitregion_start, bitregion_end,
1225 value, value_mode, reverse);
1226 return;
1227 }
1228
1229 op0 = narrow_bit_field_mem (op0, best_mode, bitsize, bitnum, &bitnum);
1230 }
1231 else
1232 best_mode = op0_mode.require ();
1233
1234 store_fixed_bit_field_1 (op0, best_mode, bitsize, bitnum,
1235 value, value_mode, reverse);
1236 }
1237
1238 /* Helper function for store_fixed_bit_field, stores
1239 the bit field always using MODE, which is the mode of OP0. The other
1240 arguments are as for store_fixed_bit_field. */
1241
1242 static void
store_fixed_bit_field_1(rtx op0,scalar_int_mode mode,unsigned HOST_WIDE_INT bitsize,unsigned HOST_WIDE_INT bitnum,rtx value,scalar_int_mode value_mode,bool reverse)1243 store_fixed_bit_field_1 (rtx op0, scalar_int_mode mode,
1244 unsigned HOST_WIDE_INT bitsize,
1245 unsigned HOST_WIDE_INT bitnum,
1246 rtx value, scalar_int_mode value_mode, bool reverse)
1247 {
1248 rtx temp;
1249 int all_zero = 0;
1250 int all_one = 0;
1251
1252 /* Note that bitsize + bitnum can be greater than GET_MODE_BITSIZE (mode)
1253 for invalid input, such as f5 from gcc.dg/pr48335-2.c. */
1254
1255 if (reverse ? !BYTES_BIG_ENDIAN : BYTES_BIG_ENDIAN)
1256 /* BITNUM is the distance between our msb
1257 and that of the containing datum.
1258 Convert it to the distance from the lsb. */
1259 bitnum = GET_MODE_BITSIZE (mode) - bitsize - bitnum;
1260
1261 /* Now BITNUM is always the distance between our lsb
1262 and that of OP0. */
1263
1264 /* Shift VALUE left by BITNUM bits. If VALUE is not constant,
1265 we must first convert its mode to MODE. */
1266
1267 if (CONST_INT_P (value))
1268 {
1269 unsigned HOST_WIDE_INT v = UINTVAL (value);
1270
1271 if (bitsize < HOST_BITS_PER_WIDE_INT)
1272 v &= (HOST_WIDE_INT_1U << bitsize) - 1;
1273
1274 if (v == 0)
1275 all_zero = 1;
1276 else if ((bitsize < HOST_BITS_PER_WIDE_INT
1277 && v == (HOST_WIDE_INT_1U << bitsize) - 1)
1278 || (bitsize == HOST_BITS_PER_WIDE_INT
1279 && v == HOST_WIDE_INT_M1U))
1280 all_one = 1;
1281
1282 value = lshift_value (mode, v, bitnum);
1283 }
1284 else
1285 {
1286 int must_and = (GET_MODE_BITSIZE (value_mode) != bitsize
1287 && bitnum + bitsize != GET_MODE_BITSIZE (mode));
1288
1289 if (value_mode != mode)
1290 value = convert_to_mode (mode, value, 1);
1291
1292 if (must_and)
1293 value = expand_binop (mode, and_optab, value,
1294 mask_rtx (mode, 0, bitsize, 0),
1295 NULL_RTX, 1, OPTAB_LIB_WIDEN);
1296 if (bitnum > 0)
1297 value = expand_shift (LSHIFT_EXPR, mode, value,
1298 bitnum, NULL_RTX, 1);
1299 }
1300
1301 if (reverse)
1302 value = flip_storage_order (mode, value);
1303
1304 /* Now clear the chosen bits in OP0,
1305 except that if VALUE is -1 we need not bother. */
1306 /* We keep the intermediates in registers to allow CSE to combine
1307 consecutive bitfield assignments. */
1308
1309 temp = force_reg (mode, op0);
1310
1311 if (! all_one)
1312 {
1313 rtx mask = mask_rtx (mode, bitnum, bitsize, 1);
1314 if (reverse)
1315 mask = flip_storage_order (mode, mask);
1316 temp = expand_binop (mode, and_optab, temp, mask,
1317 NULL_RTX, 1, OPTAB_LIB_WIDEN);
1318 temp = force_reg (mode, temp);
1319 }
1320
1321 /* Now logical-or VALUE into OP0, unless it is zero. */
1322
1323 if (! all_zero)
1324 {
1325 temp = expand_binop (mode, ior_optab, temp, value,
1326 NULL_RTX, 1, OPTAB_LIB_WIDEN);
1327 temp = force_reg (mode, temp);
1328 }
1329
1330 if (op0 != temp)
1331 {
1332 op0 = copy_rtx (op0);
1333 emit_move_insn (op0, temp);
1334 }
1335 }
1336
1337 /* Store a bit field that is split across multiple accessible memory objects.
1338
1339 OP0 is the REG, SUBREG or MEM rtx for the first of the objects.
1340 BITSIZE is the field width; BITPOS the position of its first bit
1341 (within the word).
1342 VALUE is the value to store, which has mode VALUE_MODE.
1343 If OP0_MODE is defined, it is the mode of OP0, otherwise OP0 is
1344 a BLKmode MEM.
1345
1346 If REVERSE is true, the store is to be done in reverse order.
1347
1348 This does not yet handle fields wider than BITS_PER_WORD. */
1349
1350 static void
store_split_bit_field(rtx op0,opt_scalar_int_mode op0_mode,unsigned HOST_WIDE_INT bitsize,unsigned HOST_WIDE_INT bitpos,poly_uint64 bitregion_start,poly_uint64 bitregion_end,rtx value,scalar_int_mode value_mode,bool reverse)1351 store_split_bit_field (rtx op0, opt_scalar_int_mode op0_mode,
1352 unsigned HOST_WIDE_INT bitsize,
1353 unsigned HOST_WIDE_INT bitpos,
1354 poly_uint64 bitregion_start, poly_uint64 bitregion_end,
1355 rtx value, scalar_int_mode value_mode, bool reverse)
1356 {
1357 unsigned int unit, total_bits, bitsdone = 0;
1358
1359 /* Make sure UNIT isn't larger than BITS_PER_WORD, we can only handle that
1360 much at a time. */
1361 if (REG_P (op0) || GET_CODE (op0) == SUBREG)
1362 unit = BITS_PER_WORD;
1363 else
1364 unit = MIN (MEM_ALIGN (op0), BITS_PER_WORD);
1365
1366 /* If OP0 is a memory with a mode, then UNIT must not be larger than
1367 OP0's mode as well. Otherwise, store_fixed_bit_field will call us
1368 again, and we will mutually recurse forever. */
1369 if (MEM_P (op0) && op0_mode.exists ())
1370 unit = MIN (unit, GET_MODE_BITSIZE (op0_mode.require ()));
1371
1372 /* If VALUE is a constant other than a CONST_INT, get it into a register in
1373 WORD_MODE. If we can do this using gen_lowpart_common, do so. Note
1374 that VALUE might be a floating-point constant. */
1375 if (CONSTANT_P (value) && !CONST_INT_P (value))
1376 {
1377 rtx word = gen_lowpart_common (word_mode, value);
1378
1379 if (word && (value != word))
1380 value = word;
1381 else
1382 value = gen_lowpart_common (word_mode, force_reg (value_mode, value));
1383 value_mode = word_mode;
1384 }
1385
1386 total_bits = GET_MODE_BITSIZE (value_mode);
1387
1388 while (bitsdone < bitsize)
1389 {
1390 unsigned HOST_WIDE_INT thissize;
1391 unsigned HOST_WIDE_INT thispos;
1392 unsigned HOST_WIDE_INT offset;
1393 rtx part;
1394
1395 offset = (bitpos + bitsdone) / unit;
1396 thispos = (bitpos + bitsdone) % unit;
1397
1398 /* When region of bytes we can touch is restricted, decrease
1399 UNIT close to the end of the region as needed. If op0 is a REG
1400 or SUBREG of REG, don't do this, as there can't be data races
1401 on a register and we can expand shorter code in some cases. */
1402 if (maybe_ne (bitregion_end, 0U)
1403 && unit > BITS_PER_UNIT
1404 && maybe_gt (bitpos + bitsdone - thispos + unit, bitregion_end + 1)
1405 && !REG_P (op0)
1406 && (GET_CODE (op0) != SUBREG || !REG_P (SUBREG_REG (op0))))
1407 {
1408 unit = unit / 2;
1409 continue;
1410 }
1411
1412 /* THISSIZE must not overrun a word boundary. Otherwise,
1413 store_fixed_bit_field will call us again, and we will mutually
1414 recurse forever. */
1415 thissize = MIN (bitsize - bitsdone, BITS_PER_WORD);
1416 thissize = MIN (thissize, unit - thispos);
1417
1418 if (reverse ? !BYTES_BIG_ENDIAN : BYTES_BIG_ENDIAN)
1419 {
1420 /* Fetch successively less significant portions. */
1421 if (CONST_INT_P (value))
1422 part = GEN_INT (((unsigned HOST_WIDE_INT) (INTVAL (value))
1423 >> (bitsize - bitsdone - thissize))
1424 & ((HOST_WIDE_INT_1 << thissize) - 1));
1425 /* Likewise, but the source is little-endian. */
1426 else if (reverse)
1427 part = extract_fixed_bit_field (word_mode, value, value_mode,
1428 thissize,
1429 bitsize - bitsdone - thissize,
1430 NULL_RTX, 1, false);
1431 else
1432 /* The args are chosen so that the last part includes the
1433 lsb. Give extract_bit_field the value it needs (with
1434 endianness compensation) to fetch the piece we want. */
1435 part = extract_fixed_bit_field (word_mode, value, value_mode,
1436 thissize,
1437 total_bits - bitsize + bitsdone,
1438 NULL_RTX, 1, false);
1439 }
1440 else
1441 {
1442 /* Fetch successively more significant portions. */
1443 if (CONST_INT_P (value))
1444 part = GEN_INT (((unsigned HOST_WIDE_INT) (INTVAL (value))
1445 >> bitsdone)
1446 & ((HOST_WIDE_INT_1 << thissize) - 1));
1447 /* Likewise, but the source is big-endian. */
1448 else if (reverse)
1449 part = extract_fixed_bit_field (word_mode, value, value_mode,
1450 thissize,
1451 total_bits - bitsdone - thissize,
1452 NULL_RTX, 1, false);
1453 else
1454 part = extract_fixed_bit_field (word_mode, value, value_mode,
1455 thissize, bitsdone, NULL_RTX,
1456 1, false);
1457 }
1458
1459 /* If OP0 is a register, then handle OFFSET here. */
1460 rtx op0_piece = op0;
1461 opt_scalar_int_mode op0_piece_mode = op0_mode;
1462 if (SUBREG_P (op0) || REG_P (op0))
1463 {
1464 scalar_int_mode imode;
1465 if (op0_mode.exists (&imode)
1466 && GET_MODE_SIZE (imode) < UNITS_PER_WORD)
1467 {
1468 if (offset)
1469 op0_piece = const0_rtx;
1470 }
1471 else
1472 {
1473 op0_piece = operand_subword_force (op0,
1474 offset * unit / BITS_PER_WORD,
1475 GET_MODE (op0));
1476 op0_piece_mode = word_mode;
1477 }
1478 offset &= BITS_PER_WORD / unit - 1;
1479 }
1480
1481 /* OFFSET is in UNITs, and UNIT is in bits. If WORD is const0_rtx,
1482 it is just an out-of-bounds access. Ignore it. */
1483 if (op0_piece != const0_rtx)
1484 store_fixed_bit_field (op0_piece, op0_piece_mode, thissize,
1485 offset * unit + thispos, bitregion_start,
1486 bitregion_end, part, word_mode, reverse);
1487 bitsdone += thissize;
1488 }
1489 }
1490
1491 /* A subroutine of extract_bit_field_1 that converts return value X
1492 to either MODE or TMODE. MODE, TMODE and UNSIGNEDP are arguments
1493 to extract_bit_field. */
1494
1495 static rtx
convert_extracted_bit_field(rtx x,machine_mode mode,machine_mode tmode,bool unsignedp)1496 convert_extracted_bit_field (rtx x, machine_mode mode,
1497 machine_mode tmode, bool unsignedp)
1498 {
1499 if (GET_MODE (x) == tmode || GET_MODE (x) == mode)
1500 return x;
1501
1502 /* If the x mode is not a scalar integral, first convert to the
1503 integer mode of that size and then access it as a floating-point
1504 value via a SUBREG. */
1505 if (!SCALAR_INT_MODE_P (tmode))
1506 {
1507 scalar_int_mode int_mode = int_mode_for_mode (tmode).require ();
1508 x = convert_to_mode (int_mode, x, unsignedp);
1509 x = force_reg (int_mode, x);
1510 return gen_lowpart (tmode, x);
1511 }
1512
1513 return convert_to_mode (tmode, x, unsignedp);
1514 }
1515
1516 /* Try to use an ext(z)v pattern to extract a field from OP0.
1517 Return the extracted value on success, otherwise return null.
1518 EXTV describes the extraction instruction to use. If OP0_MODE
1519 is defined, it is the mode of OP0, otherwise OP0 is a BLKmode MEM.
1520 The other arguments are as for extract_bit_field. */
1521
1522 static rtx
extract_bit_field_using_extv(const extraction_insn * extv,rtx op0,opt_scalar_int_mode op0_mode,unsigned HOST_WIDE_INT bitsize,unsigned HOST_WIDE_INT bitnum,int unsignedp,rtx target,machine_mode mode,machine_mode tmode)1523 extract_bit_field_using_extv (const extraction_insn *extv, rtx op0,
1524 opt_scalar_int_mode op0_mode,
1525 unsigned HOST_WIDE_INT bitsize,
1526 unsigned HOST_WIDE_INT bitnum,
1527 int unsignedp, rtx target,
1528 machine_mode mode, machine_mode tmode)
1529 {
1530 class expand_operand ops[4];
1531 rtx spec_target = target;
1532 rtx spec_target_subreg = 0;
1533 scalar_int_mode ext_mode = extv->field_mode;
1534 unsigned unit = GET_MODE_BITSIZE (ext_mode);
1535
1536 if (bitsize == 0 || unit < bitsize)
1537 return NULL_RTX;
1538
1539 if (MEM_P (op0))
1540 /* Get a reference to the first byte of the field. */
1541 op0 = narrow_bit_field_mem (op0, extv->struct_mode, bitsize, bitnum,
1542 &bitnum);
1543 else
1544 {
1545 /* Convert from counting within OP0 to counting in EXT_MODE. */
1546 if (BYTES_BIG_ENDIAN)
1547 bitnum += unit - GET_MODE_BITSIZE (op0_mode.require ());
1548
1549 /* If op0 is a register, we need it in EXT_MODE to make it
1550 acceptable to the format of ext(z)v. */
1551 if (GET_CODE (op0) == SUBREG && op0_mode.require () != ext_mode)
1552 return NULL_RTX;
1553 if (REG_P (op0) && op0_mode.require () != ext_mode)
1554 op0 = gen_lowpart_SUBREG (ext_mode, op0);
1555 }
1556
1557 /* If BITS_BIG_ENDIAN is zero on a BYTES_BIG_ENDIAN machine, we count
1558 "backwards" from the size of the unit we are extracting from.
1559 Otherwise, we count bits from the most significant on a
1560 BYTES/BITS_BIG_ENDIAN machine. */
1561
1562 if (BITS_BIG_ENDIAN != BYTES_BIG_ENDIAN)
1563 bitnum = unit - bitsize - bitnum;
1564
1565 if (target == 0)
1566 target = spec_target = gen_reg_rtx (tmode);
1567
1568 if (GET_MODE (target) != ext_mode)
1569 {
1570 /* Don't use LHS paradoxical subreg if explicit truncation is needed
1571 between the mode of the extraction (word_mode) and the target
1572 mode. Instead, create a temporary and use convert_move to set
1573 the target. */
1574 if (REG_P (target)
1575 && TRULY_NOOP_TRUNCATION_MODES_P (GET_MODE (target), ext_mode))
1576 {
1577 target = gen_lowpart (ext_mode, target);
1578 if (partial_subreg_p (GET_MODE (spec_target), ext_mode))
1579 spec_target_subreg = target;
1580 }
1581 else
1582 target = gen_reg_rtx (ext_mode);
1583 }
1584
1585 create_output_operand (&ops[0], target, ext_mode);
1586 create_fixed_operand (&ops[1], op0);
1587 create_integer_operand (&ops[2], bitsize);
1588 create_integer_operand (&ops[3], bitnum);
1589 if (maybe_expand_insn (extv->icode, 4, ops))
1590 {
1591 target = ops[0].value;
1592 if (target == spec_target)
1593 return target;
1594 if (target == spec_target_subreg)
1595 return spec_target;
1596 return convert_extracted_bit_field (target, mode, tmode, unsignedp);
1597 }
1598 return NULL_RTX;
1599 }
1600
1601 /* See whether it would be valid to extract the part of OP0 described
1602 by BITNUM and BITSIZE into a value of mode MODE using a subreg
1603 operation. Return the subreg if so, otherwise return null. */
1604
1605 static rtx
extract_bit_field_as_subreg(machine_mode mode,rtx op0,poly_uint64 bitsize,poly_uint64 bitnum)1606 extract_bit_field_as_subreg (machine_mode mode, rtx op0,
1607 poly_uint64 bitsize, poly_uint64 bitnum)
1608 {
1609 poly_uint64 bytenum;
1610 if (multiple_p (bitnum, BITS_PER_UNIT, &bytenum)
1611 && known_eq (bitsize, GET_MODE_BITSIZE (mode))
1612 && lowpart_bit_field_p (bitnum, bitsize, GET_MODE (op0))
1613 && TRULY_NOOP_TRUNCATION_MODES_P (mode, GET_MODE (op0)))
1614 return simplify_gen_subreg (mode, op0, GET_MODE (op0), bytenum);
1615 return NULL_RTX;
1616 }
1617
1618 /* A subroutine of extract_bit_field, with the same arguments.
1619 If FALLBACK_P is true, fall back to extract_fixed_bit_field
1620 if we can find no other means of implementing the operation.
1621 if FALLBACK_P is false, return NULL instead. */
1622
1623 static rtx
extract_bit_field_1(rtx str_rtx,poly_uint64 bitsize,poly_uint64 bitnum,int unsignedp,rtx target,machine_mode mode,machine_mode tmode,bool reverse,bool fallback_p,rtx * alt_rtl)1624 extract_bit_field_1 (rtx str_rtx, poly_uint64 bitsize, poly_uint64 bitnum,
1625 int unsignedp, rtx target, machine_mode mode,
1626 machine_mode tmode, bool reverse, bool fallback_p,
1627 rtx *alt_rtl)
1628 {
1629 rtx op0 = str_rtx;
1630 machine_mode mode1;
1631
1632 if (tmode == VOIDmode)
1633 tmode = mode;
1634
1635 while (GET_CODE (op0) == SUBREG)
1636 {
1637 bitnum += SUBREG_BYTE (op0) * BITS_PER_UNIT;
1638 op0 = SUBREG_REG (op0);
1639 }
1640
1641 /* If we have an out-of-bounds access to a register, just return an
1642 uninitialized register of the required mode. This can occur if the
1643 source code contains an out-of-bounds access to a small array. */
1644 if (REG_P (op0) && known_ge (bitnum, GET_MODE_BITSIZE (GET_MODE (op0))))
1645 return gen_reg_rtx (tmode);
1646
1647 if (REG_P (op0)
1648 && mode == GET_MODE (op0)
1649 && known_eq (bitnum, 0U)
1650 && known_eq (bitsize, GET_MODE_BITSIZE (GET_MODE (op0))))
1651 {
1652 if (reverse)
1653 op0 = flip_storage_order (mode, op0);
1654 /* We're trying to extract a full register from itself. */
1655 return op0;
1656 }
1657
1658 /* First try to check for vector from vector extractions. */
1659 if (VECTOR_MODE_P (GET_MODE (op0))
1660 && !MEM_P (op0)
1661 && VECTOR_MODE_P (tmode)
1662 && known_eq (bitsize, GET_MODE_BITSIZE (tmode))
1663 && maybe_gt (GET_MODE_SIZE (GET_MODE (op0)), GET_MODE_SIZE (tmode)))
1664 {
1665 machine_mode new_mode = GET_MODE (op0);
1666 if (GET_MODE_INNER (new_mode) != GET_MODE_INNER (tmode))
1667 {
1668 scalar_mode inner_mode = GET_MODE_INNER (tmode);
1669 poly_uint64 nunits;
1670 if (!multiple_p (GET_MODE_BITSIZE (GET_MODE (op0)),
1671 GET_MODE_UNIT_BITSIZE (tmode), &nunits)
1672 || !related_vector_mode (tmode, inner_mode,
1673 nunits).exists (&new_mode)
1674 || maybe_ne (GET_MODE_SIZE (new_mode),
1675 GET_MODE_SIZE (GET_MODE (op0))))
1676 new_mode = VOIDmode;
1677 }
1678 poly_uint64 pos;
1679 if (new_mode != VOIDmode
1680 && (convert_optab_handler (vec_extract_optab, new_mode, tmode)
1681 != CODE_FOR_nothing)
1682 && multiple_p (bitnum, GET_MODE_BITSIZE (tmode), &pos))
1683 {
1684 class expand_operand ops[3];
1685 machine_mode outermode = new_mode;
1686 machine_mode innermode = tmode;
1687 enum insn_code icode
1688 = convert_optab_handler (vec_extract_optab, outermode, innermode);
1689
1690 if (new_mode != GET_MODE (op0))
1691 op0 = gen_lowpart (new_mode, op0);
1692 create_output_operand (&ops[0], target, innermode);
1693 ops[0].target = 1;
1694 create_input_operand (&ops[1], op0, outermode);
1695 create_integer_operand (&ops[2], pos);
1696 if (maybe_expand_insn (icode, 3, ops))
1697 {
1698 if (alt_rtl && ops[0].target)
1699 *alt_rtl = target;
1700 target = ops[0].value;
1701 if (GET_MODE (target) != mode)
1702 return gen_lowpart (tmode, target);
1703 return target;
1704 }
1705 }
1706 }
1707
1708 /* See if we can get a better vector mode before extracting. */
1709 if (VECTOR_MODE_P (GET_MODE (op0))
1710 && !MEM_P (op0)
1711 && GET_MODE_INNER (GET_MODE (op0)) != tmode)
1712 {
1713 machine_mode new_mode;
1714
1715 if (GET_MODE_CLASS (tmode) == MODE_FLOAT)
1716 new_mode = MIN_MODE_VECTOR_FLOAT;
1717 else if (GET_MODE_CLASS (tmode) == MODE_FRACT)
1718 new_mode = MIN_MODE_VECTOR_FRACT;
1719 else if (GET_MODE_CLASS (tmode) == MODE_UFRACT)
1720 new_mode = MIN_MODE_VECTOR_UFRACT;
1721 else if (GET_MODE_CLASS (tmode) == MODE_ACCUM)
1722 new_mode = MIN_MODE_VECTOR_ACCUM;
1723 else if (GET_MODE_CLASS (tmode) == MODE_UACCUM)
1724 new_mode = MIN_MODE_VECTOR_UACCUM;
1725 else
1726 new_mode = MIN_MODE_VECTOR_INT;
1727
1728 FOR_EACH_MODE_FROM (new_mode, new_mode)
1729 if (known_eq (GET_MODE_SIZE (new_mode), GET_MODE_SIZE (GET_MODE (op0)))
1730 && known_eq (GET_MODE_UNIT_SIZE (new_mode), GET_MODE_SIZE (tmode))
1731 && targetm.vector_mode_supported_p (new_mode))
1732 break;
1733 if (new_mode != VOIDmode)
1734 op0 = gen_lowpart (new_mode, op0);
1735 }
1736
1737 /* Use vec_extract patterns for extracting parts of vectors whenever
1738 available. If that fails, see whether the current modes and bitregion
1739 give a natural subreg. */
1740 machine_mode outermode = GET_MODE (op0);
1741 if (VECTOR_MODE_P (outermode) && !MEM_P (op0))
1742 {
1743 scalar_mode innermode = GET_MODE_INNER (outermode);
1744 enum insn_code icode
1745 = convert_optab_handler (vec_extract_optab, outermode, innermode);
1746 poly_uint64 pos;
1747 if (icode != CODE_FOR_nothing
1748 && known_eq (bitsize, GET_MODE_BITSIZE (innermode))
1749 && multiple_p (bitnum, GET_MODE_BITSIZE (innermode), &pos))
1750 {
1751 class expand_operand ops[3];
1752
1753 create_output_operand (&ops[0], target, innermode);
1754 ops[0].target = 1;
1755 create_input_operand (&ops[1], op0, outermode);
1756 create_integer_operand (&ops[2], pos);
1757 if (maybe_expand_insn (icode, 3, ops))
1758 {
1759 if (alt_rtl && ops[0].target)
1760 *alt_rtl = target;
1761 target = ops[0].value;
1762 if (GET_MODE (target) != mode)
1763 return gen_lowpart (tmode, target);
1764 return target;
1765 }
1766 }
1767 /* Using subregs is useful if we're extracting one register vector
1768 from a multi-register vector. extract_bit_field_as_subreg checks
1769 for valid bitsize and bitnum, so we don't need to do that here. */
1770 if (VECTOR_MODE_P (mode))
1771 {
1772 rtx sub = extract_bit_field_as_subreg (mode, op0, bitsize, bitnum);
1773 if (sub)
1774 return sub;
1775 }
1776 }
1777
1778 /* Make sure we are playing with integral modes. Pun with subregs
1779 if we aren't. */
1780 opt_scalar_int_mode op0_mode = int_mode_for_mode (GET_MODE (op0));
1781 scalar_int_mode imode;
1782 if (!op0_mode.exists (&imode) || imode != GET_MODE (op0))
1783 {
1784 if (MEM_P (op0))
1785 op0 = adjust_bitfield_address_size (op0, op0_mode.else_blk (),
1786 0, MEM_SIZE (op0));
1787 else if (op0_mode.exists (&imode))
1788 {
1789 op0 = gen_lowpart (imode, op0);
1790
1791 /* If we got a SUBREG, force it into a register since we
1792 aren't going to be able to do another SUBREG on it. */
1793 if (GET_CODE (op0) == SUBREG)
1794 op0 = force_reg (imode, op0);
1795 }
1796 else
1797 {
1798 poly_int64 size = GET_MODE_SIZE (GET_MODE (op0));
1799 rtx mem = assign_stack_temp (GET_MODE (op0), size);
1800 emit_move_insn (mem, op0);
1801 op0 = adjust_bitfield_address_size (mem, BLKmode, 0, size);
1802 }
1803 }
1804
1805 /* ??? We currently assume TARGET is at least as big as BITSIZE.
1806 If that's wrong, the solution is to test for it and set TARGET to 0
1807 if needed. */
1808
1809 /* Get the mode of the field to use for atomic access or subreg
1810 conversion. */
1811 if (!SCALAR_INT_MODE_P (tmode)
1812 || !mode_for_size (bitsize, GET_MODE_CLASS (tmode), 0).exists (&mode1))
1813 mode1 = mode;
1814 gcc_assert (mode1 != BLKmode);
1815
1816 /* Extraction of a full MODE1 value can be done with a subreg as long
1817 as the least significant bit of the value is the least significant
1818 bit of either OP0 or a word of OP0. */
1819 if (!MEM_P (op0) && !reverse)
1820 {
1821 rtx sub = extract_bit_field_as_subreg (mode1, op0, bitsize, bitnum);
1822 if (sub)
1823 return convert_extracted_bit_field (sub, mode, tmode, unsignedp);
1824 }
1825
1826 /* Extraction of a full MODE1 value can be done with a load as long as
1827 the field is on a byte boundary and is sufficiently aligned. */
1828 poly_uint64 bytenum;
1829 if (simple_mem_bitfield_p (op0, bitsize, bitnum, mode1, &bytenum))
1830 {
1831 op0 = adjust_bitfield_address (op0, mode1, bytenum);
1832 if (reverse)
1833 op0 = flip_storage_order (mode1, op0);
1834 return convert_extracted_bit_field (op0, mode, tmode, unsignedp);
1835 }
1836
1837 /* If we have a memory source and a non-constant bit offset, restrict
1838 the memory to the referenced bytes. This is a worst-case fallback
1839 but is useful for things like vector booleans. */
1840 if (MEM_P (op0) && !bitnum.is_constant ())
1841 {
1842 bytenum = bits_to_bytes_round_down (bitnum);
1843 bitnum = num_trailing_bits (bitnum);
1844 poly_uint64 bytesize = bits_to_bytes_round_up (bitnum + bitsize);
1845 op0 = adjust_bitfield_address_size (op0, BLKmode, bytenum, bytesize);
1846 op0_mode = opt_scalar_int_mode ();
1847 }
1848
1849 /* It's possible we'll need to handle other cases here for
1850 polynomial bitnum and bitsize. */
1851
1852 /* From here on we need to be looking at a fixed-size insertion. */
1853 return extract_integral_bit_field (op0, op0_mode, bitsize.to_constant (),
1854 bitnum.to_constant (), unsignedp,
1855 target, mode, tmode, reverse, fallback_p);
1856 }
1857
1858 /* Subroutine of extract_bit_field_1, with the same arguments, except
1859 that BITSIZE and BITNUM are constant. Handle cases specific to
1860 integral modes. If OP0_MODE is defined, it is the mode of OP0,
1861 otherwise OP0 is a BLKmode MEM. */
1862
1863 static rtx
extract_integral_bit_field(rtx op0,opt_scalar_int_mode op0_mode,unsigned HOST_WIDE_INT bitsize,unsigned HOST_WIDE_INT bitnum,int unsignedp,rtx target,machine_mode mode,machine_mode tmode,bool reverse,bool fallback_p)1864 extract_integral_bit_field (rtx op0, opt_scalar_int_mode op0_mode,
1865 unsigned HOST_WIDE_INT bitsize,
1866 unsigned HOST_WIDE_INT bitnum, int unsignedp,
1867 rtx target, machine_mode mode, machine_mode tmode,
1868 bool reverse, bool fallback_p)
1869 {
1870 /* Handle fields bigger than a word. */
1871
1872 if (bitsize > BITS_PER_WORD)
1873 {
1874 /* Here we transfer the words of the field
1875 in the order least significant first.
1876 This is because the most significant word is the one which may
1877 be less than full. */
1878
1879 const bool backwards = WORDS_BIG_ENDIAN;
1880 unsigned int nwords = (bitsize + (BITS_PER_WORD - 1)) / BITS_PER_WORD;
1881 unsigned int i;
1882 rtx_insn *last;
1883
1884 if (target == 0 || !REG_P (target) || !valid_multiword_target_p (target))
1885 target = gen_reg_rtx (mode);
1886
1887 /* In case we're about to clobber a base register or something
1888 (see gcc.c-torture/execute/20040625-1.c). */
1889 if (reg_mentioned_p (target, op0))
1890 target = gen_reg_rtx (mode);
1891
1892 /* Indicate for flow that the entire target reg is being set. */
1893 emit_clobber (target);
1894
1895 /* The mode must be fixed-size, since extract_bit_field_1 handles
1896 extractions from variable-sized objects before calling this
1897 function. */
1898 unsigned int target_size
1899 = GET_MODE_SIZE (GET_MODE (target)).to_constant ();
1900 last = get_last_insn ();
1901 for (i = 0; i < nwords; i++)
1902 {
1903 /* If I is 0, use the low-order word in both field and target;
1904 if I is 1, use the next to lowest word; and so on. */
1905 /* Word number in TARGET to use. */
1906 unsigned int wordnum
1907 = (backwards ? target_size / UNITS_PER_WORD - i - 1 : i);
1908 /* Offset from start of field in OP0. */
1909 unsigned int bit_offset = (backwards ^ reverse
1910 ? MAX ((int) bitsize - ((int) i + 1)
1911 * BITS_PER_WORD,
1912 0)
1913 : (int) i * BITS_PER_WORD);
1914 rtx target_part = operand_subword (target, wordnum, 1, VOIDmode);
1915 rtx result_part
1916 = extract_bit_field_1 (op0, MIN (BITS_PER_WORD,
1917 bitsize - i * BITS_PER_WORD),
1918 bitnum + bit_offset, 1, target_part,
1919 mode, word_mode, reverse, fallback_p, NULL);
1920
1921 gcc_assert (target_part);
1922 if (!result_part)
1923 {
1924 delete_insns_since (last);
1925 return NULL;
1926 }
1927
1928 if (result_part != target_part)
1929 emit_move_insn (target_part, result_part);
1930 }
1931
1932 if (unsignedp)
1933 {
1934 /* Unless we've filled TARGET, the upper regs in a multi-reg value
1935 need to be zero'd out. */
1936 if (target_size > nwords * UNITS_PER_WORD)
1937 {
1938 unsigned int i, total_words;
1939
1940 total_words = target_size / UNITS_PER_WORD;
1941 for (i = nwords; i < total_words; i++)
1942 emit_move_insn
1943 (operand_subword (target,
1944 backwards ? total_words - i - 1 : i,
1945 1, VOIDmode),
1946 const0_rtx);
1947 }
1948 return target;
1949 }
1950
1951 /* Signed bit field: sign-extend with two arithmetic shifts. */
1952 target = expand_shift (LSHIFT_EXPR, mode, target,
1953 GET_MODE_BITSIZE (mode) - bitsize, NULL_RTX, 0);
1954 return expand_shift (RSHIFT_EXPR, mode, target,
1955 GET_MODE_BITSIZE (mode) - bitsize, NULL_RTX, 0);
1956 }
1957
1958 /* If OP0 is a multi-word register, narrow it to the affected word.
1959 If the region spans two words, defer to extract_split_bit_field. */
1960 if (!MEM_P (op0) && GET_MODE_SIZE (op0_mode.require ()) > UNITS_PER_WORD)
1961 {
1962 if (bitnum % BITS_PER_WORD + bitsize > BITS_PER_WORD)
1963 {
1964 if (!fallback_p)
1965 return NULL_RTX;
1966 target = extract_split_bit_field (op0, op0_mode, bitsize, bitnum,
1967 unsignedp, reverse);
1968 return convert_extracted_bit_field (target, mode, tmode, unsignedp);
1969 }
1970 op0 = simplify_gen_subreg (word_mode, op0, op0_mode.require (),
1971 bitnum / BITS_PER_WORD * UNITS_PER_WORD);
1972 op0_mode = word_mode;
1973 bitnum %= BITS_PER_WORD;
1974 }
1975
1976 /* From here on we know the desired field is smaller than a word.
1977 If OP0 is a register, it too fits within a word. */
1978 enum extraction_pattern pattern = unsignedp ? EP_extzv : EP_extv;
1979 extraction_insn extv;
1980 if (!MEM_P (op0)
1981 && !reverse
1982 /* ??? We could limit the structure size to the part of OP0 that
1983 contains the field, with appropriate checks for endianness
1984 and TARGET_TRULY_NOOP_TRUNCATION. */
1985 && get_best_reg_extraction_insn (&extv, pattern,
1986 GET_MODE_BITSIZE (op0_mode.require ()),
1987 tmode))
1988 {
1989 rtx result = extract_bit_field_using_extv (&extv, op0, op0_mode,
1990 bitsize, bitnum,
1991 unsignedp, target, mode,
1992 tmode);
1993 if (result)
1994 return result;
1995 }
1996
1997 /* If OP0 is a memory, try copying it to a register and seeing if a
1998 cheap register alternative is available. */
1999 if (MEM_P (op0) & !reverse)
2000 {
2001 if (get_best_mem_extraction_insn (&extv, pattern, bitsize, bitnum,
2002 tmode))
2003 {
2004 rtx result = extract_bit_field_using_extv (&extv, op0, op0_mode,
2005 bitsize, bitnum,
2006 unsignedp, target, mode,
2007 tmode);
2008 if (result)
2009 return result;
2010 }
2011
2012 rtx_insn *last = get_last_insn ();
2013
2014 /* Try loading part of OP0 into a register and extracting the
2015 bitfield from that. */
2016 unsigned HOST_WIDE_INT bitpos;
2017 rtx xop0 = adjust_bit_field_mem_for_reg (pattern, op0, bitsize, bitnum,
2018 0, 0, tmode, &bitpos);
2019 if (xop0)
2020 {
2021 xop0 = copy_to_reg (xop0);
2022 rtx result = extract_bit_field_1 (xop0, bitsize, bitpos,
2023 unsignedp, target,
2024 mode, tmode, reverse, false, NULL);
2025 if (result)
2026 return result;
2027 delete_insns_since (last);
2028 }
2029 }
2030
2031 if (!fallback_p)
2032 return NULL;
2033
2034 /* Find a correspondingly-sized integer field, so we can apply
2035 shifts and masks to it. */
2036 scalar_int_mode int_mode;
2037 if (!int_mode_for_mode (tmode).exists (&int_mode))
2038 /* If this fails, we should probably push op0 out to memory and then
2039 do a load. */
2040 int_mode = int_mode_for_mode (mode).require ();
2041
2042 target = extract_fixed_bit_field (int_mode, op0, op0_mode, bitsize,
2043 bitnum, target, unsignedp, reverse);
2044
2045 /* Complex values must be reversed piecewise, so we need to undo the global
2046 reversal, convert to the complex mode and reverse again. */
2047 if (reverse && COMPLEX_MODE_P (tmode))
2048 {
2049 target = flip_storage_order (int_mode, target);
2050 target = convert_extracted_bit_field (target, mode, tmode, unsignedp);
2051 target = flip_storage_order (tmode, target);
2052 }
2053 else
2054 target = convert_extracted_bit_field (target, mode, tmode, unsignedp);
2055
2056 return target;
2057 }
2058
2059 /* Generate code to extract a byte-field from STR_RTX
2060 containing BITSIZE bits, starting at BITNUM,
2061 and put it in TARGET if possible (if TARGET is nonzero).
2062 Regardless of TARGET, we return the rtx for where the value is placed.
2063
2064 STR_RTX is the structure containing the byte (a REG or MEM).
2065 UNSIGNEDP is nonzero if this is an unsigned bit field.
2066 MODE is the natural mode of the field value once extracted.
2067 TMODE is the mode the caller would like the value to have;
2068 but the value may be returned with type MODE instead.
2069
2070 If REVERSE is true, the extraction is to be done in reverse order.
2071
2072 If a TARGET is specified and we can store in it at no extra cost,
2073 we do so, and return TARGET.
2074 Otherwise, we return a REG of mode TMODE or MODE, with TMODE preferred
2075 if they are equally easy.
2076
2077 If the result can be stored at TARGET, and ALT_RTL is non-NULL,
2078 then *ALT_RTL is set to TARGET (before legitimziation). */
2079
2080 rtx
extract_bit_field(rtx str_rtx,poly_uint64 bitsize,poly_uint64 bitnum,int unsignedp,rtx target,machine_mode mode,machine_mode tmode,bool reverse,rtx * alt_rtl)2081 extract_bit_field (rtx str_rtx, poly_uint64 bitsize, poly_uint64 bitnum,
2082 int unsignedp, rtx target, machine_mode mode,
2083 machine_mode tmode, bool reverse, rtx *alt_rtl)
2084 {
2085 machine_mode mode1;
2086
2087 /* Handle -fstrict-volatile-bitfields in the cases where it applies. */
2088 if (maybe_ne (GET_MODE_BITSIZE (GET_MODE (str_rtx)), 0))
2089 mode1 = GET_MODE (str_rtx);
2090 else if (target && maybe_ne (GET_MODE_BITSIZE (GET_MODE (target)), 0))
2091 mode1 = GET_MODE (target);
2092 else
2093 mode1 = tmode;
2094
2095 unsigned HOST_WIDE_INT ibitsize, ibitnum;
2096 scalar_int_mode int_mode;
2097 if (bitsize.is_constant (&ibitsize)
2098 && bitnum.is_constant (&ibitnum)
2099 && is_a <scalar_int_mode> (mode1, &int_mode)
2100 && strict_volatile_bitfield_p (str_rtx, ibitsize, ibitnum,
2101 int_mode, 0, 0))
2102 {
2103 /* Extraction of a full INT_MODE value can be done with a simple load.
2104 We know here that the field can be accessed with one single
2105 instruction. For targets that support unaligned memory,
2106 an unaligned access may be necessary. */
2107 if (ibitsize == GET_MODE_BITSIZE (int_mode))
2108 {
2109 rtx result = adjust_bitfield_address (str_rtx, int_mode,
2110 ibitnum / BITS_PER_UNIT);
2111 if (reverse)
2112 result = flip_storage_order (int_mode, result);
2113 gcc_assert (ibitnum % BITS_PER_UNIT == 0);
2114 return convert_extracted_bit_field (result, mode, tmode, unsignedp);
2115 }
2116
2117 str_rtx = narrow_bit_field_mem (str_rtx, int_mode, ibitsize, ibitnum,
2118 &ibitnum);
2119 gcc_assert (ibitnum + ibitsize <= GET_MODE_BITSIZE (int_mode));
2120 str_rtx = copy_to_reg (str_rtx);
2121 return extract_bit_field_1 (str_rtx, ibitsize, ibitnum, unsignedp,
2122 target, mode, tmode, reverse, true, alt_rtl);
2123 }
2124
2125 return extract_bit_field_1 (str_rtx, bitsize, bitnum, unsignedp,
2126 target, mode, tmode, reverse, true, alt_rtl);
2127 }
2128
2129 /* Use shifts and boolean operations to extract a field of BITSIZE bits
2130 from bit BITNUM of OP0. If OP0_MODE is defined, it is the mode of OP0,
2131 otherwise OP0 is a BLKmode MEM.
2132
2133 UNSIGNEDP is nonzero for an unsigned bit field (don't sign-extend value).
2134 If REVERSE is true, the extraction is to be done in reverse order.
2135
2136 If TARGET is nonzero, attempts to store the value there
2137 and return TARGET, but this is not guaranteed.
2138 If TARGET is not used, create a pseudo-reg of mode TMODE for the value. */
2139
2140 static rtx
extract_fixed_bit_field(machine_mode tmode,rtx op0,opt_scalar_int_mode op0_mode,unsigned HOST_WIDE_INT bitsize,unsigned HOST_WIDE_INT bitnum,rtx target,int unsignedp,bool reverse)2141 extract_fixed_bit_field (machine_mode tmode, rtx op0,
2142 opt_scalar_int_mode op0_mode,
2143 unsigned HOST_WIDE_INT bitsize,
2144 unsigned HOST_WIDE_INT bitnum, rtx target,
2145 int unsignedp, bool reverse)
2146 {
2147 scalar_int_mode mode;
2148 if (MEM_P (op0))
2149 {
2150 if (!get_best_mode (bitsize, bitnum, 0, 0, MEM_ALIGN (op0),
2151 BITS_PER_WORD, MEM_VOLATILE_P (op0), &mode))
2152 /* The only way this should occur is if the field spans word
2153 boundaries. */
2154 return extract_split_bit_field (op0, op0_mode, bitsize, bitnum,
2155 unsignedp, reverse);
2156
2157 op0 = narrow_bit_field_mem (op0, mode, bitsize, bitnum, &bitnum);
2158 }
2159 else
2160 mode = op0_mode.require ();
2161
2162 return extract_fixed_bit_field_1 (tmode, op0, mode, bitsize, bitnum,
2163 target, unsignedp, reverse);
2164 }
2165
2166 /* Helper function for extract_fixed_bit_field, extracts
2167 the bit field always using MODE, which is the mode of OP0.
2168 The other arguments are as for extract_fixed_bit_field. */
2169
2170 static rtx
extract_fixed_bit_field_1(machine_mode tmode,rtx op0,scalar_int_mode mode,unsigned HOST_WIDE_INT bitsize,unsigned HOST_WIDE_INT bitnum,rtx target,int unsignedp,bool reverse)2171 extract_fixed_bit_field_1 (machine_mode tmode, rtx op0, scalar_int_mode mode,
2172 unsigned HOST_WIDE_INT bitsize,
2173 unsigned HOST_WIDE_INT bitnum, rtx target,
2174 int unsignedp, bool reverse)
2175 {
2176 /* Note that bitsize + bitnum can be greater than GET_MODE_BITSIZE (mode)
2177 for invalid input, such as extract equivalent of f5 from
2178 gcc.dg/pr48335-2.c. */
2179
2180 if (reverse ? !BYTES_BIG_ENDIAN : BYTES_BIG_ENDIAN)
2181 /* BITNUM is the distance between our msb and that of OP0.
2182 Convert it to the distance from the lsb. */
2183 bitnum = GET_MODE_BITSIZE (mode) - bitsize - bitnum;
2184
2185 /* Now BITNUM is always the distance between the field's lsb and that of OP0.
2186 We have reduced the big-endian case to the little-endian case. */
2187 if (reverse)
2188 op0 = flip_storage_order (mode, op0);
2189
2190 if (unsignedp)
2191 {
2192 if (bitnum)
2193 {
2194 /* If the field does not already start at the lsb,
2195 shift it so it does. */
2196 /* Maybe propagate the target for the shift. */
2197 rtx subtarget = (target != 0 && REG_P (target) ? target : 0);
2198 if (tmode != mode)
2199 subtarget = 0;
2200 op0 = expand_shift (RSHIFT_EXPR, mode, op0, bitnum, subtarget, 1);
2201 }
2202 /* Convert the value to the desired mode. TMODE must also be a
2203 scalar integer for this conversion to make sense, since we
2204 shouldn't reinterpret the bits. */
2205 scalar_int_mode new_mode = as_a <scalar_int_mode> (tmode);
2206 if (mode != new_mode)
2207 op0 = convert_to_mode (new_mode, op0, 1);
2208
2209 /* Unless the msb of the field used to be the msb when we shifted,
2210 mask out the upper bits. */
2211
2212 if (GET_MODE_BITSIZE (mode) != bitnum + bitsize)
2213 return expand_binop (new_mode, and_optab, op0,
2214 mask_rtx (new_mode, 0, bitsize, 0),
2215 target, 1, OPTAB_LIB_WIDEN);
2216 return op0;
2217 }
2218
2219 /* To extract a signed bit-field, first shift its msb to the msb of the word,
2220 then arithmetic-shift its lsb to the lsb of the word. */
2221 op0 = force_reg (mode, op0);
2222
2223 /* Find the narrowest integer mode that contains the field. */
2224
2225 opt_scalar_int_mode mode_iter;
2226 FOR_EACH_MODE_IN_CLASS (mode_iter, MODE_INT)
2227 if (GET_MODE_BITSIZE (mode_iter.require ()) >= bitsize + bitnum)
2228 break;
2229
2230 mode = mode_iter.require ();
2231 op0 = convert_to_mode (mode, op0, 0);
2232
2233 if (mode != tmode)
2234 target = 0;
2235
2236 if (GET_MODE_BITSIZE (mode) != (bitsize + bitnum))
2237 {
2238 int amount = GET_MODE_BITSIZE (mode) - (bitsize + bitnum);
2239 /* Maybe propagate the target for the shift. */
2240 rtx subtarget = (target != 0 && REG_P (target) ? target : 0);
2241 op0 = expand_shift (LSHIFT_EXPR, mode, op0, amount, subtarget, 1);
2242 }
2243
2244 return expand_shift (RSHIFT_EXPR, mode, op0,
2245 GET_MODE_BITSIZE (mode) - bitsize, target, 0);
2246 }
2247
2248 /* Return a constant integer (CONST_INT or CONST_DOUBLE) rtx with the value
2249 VALUE << BITPOS. */
2250
2251 static rtx
lshift_value(machine_mode mode,unsigned HOST_WIDE_INT value,int bitpos)2252 lshift_value (machine_mode mode, unsigned HOST_WIDE_INT value,
2253 int bitpos)
2254 {
2255 return immed_wide_int_const (wi::lshift (value, bitpos), mode);
2256 }
2257
2258 /* Extract a bit field that is split across two words
2259 and return an RTX for the result.
2260
2261 OP0 is the REG, SUBREG or MEM rtx for the first of the two words.
2262 BITSIZE is the field width; BITPOS, position of its first bit, in the word.
2263 UNSIGNEDP is 1 if should zero-extend the contents; else sign-extend.
2264 If OP0_MODE is defined, it is the mode of OP0, otherwise OP0 is
2265 a BLKmode MEM.
2266
2267 If REVERSE is true, the extraction is to be done in reverse order. */
2268
2269 static rtx
extract_split_bit_field(rtx op0,opt_scalar_int_mode op0_mode,unsigned HOST_WIDE_INT bitsize,unsigned HOST_WIDE_INT bitpos,int unsignedp,bool reverse)2270 extract_split_bit_field (rtx op0, opt_scalar_int_mode op0_mode,
2271 unsigned HOST_WIDE_INT bitsize,
2272 unsigned HOST_WIDE_INT bitpos, int unsignedp,
2273 bool reverse)
2274 {
2275 unsigned int unit;
2276 unsigned int bitsdone = 0;
2277 rtx result = NULL_RTX;
2278 int first = 1;
2279
2280 /* Make sure UNIT isn't larger than BITS_PER_WORD, we can only handle that
2281 much at a time. */
2282 if (REG_P (op0) || GET_CODE (op0) == SUBREG)
2283 unit = BITS_PER_WORD;
2284 else
2285 unit = MIN (MEM_ALIGN (op0), BITS_PER_WORD);
2286
2287 while (bitsdone < bitsize)
2288 {
2289 unsigned HOST_WIDE_INT thissize;
2290 rtx part;
2291 unsigned HOST_WIDE_INT thispos;
2292 unsigned HOST_WIDE_INT offset;
2293
2294 offset = (bitpos + bitsdone) / unit;
2295 thispos = (bitpos + bitsdone) % unit;
2296
2297 /* THISSIZE must not overrun a word boundary. Otherwise,
2298 extract_fixed_bit_field will call us again, and we will mutually
2299 recurse forever. */
2300 thissize = MIN (bitsize - bitsdone, BITS_PER_WORD);
2301 thissize = MIN (thissize, unit - thispos);
2302
2303 /* If OP0 is a register, then handle OFFSET here. */
2304 rtx op0_piece = op0;
2305 opt_scalar_int_mode op0_piece_mode = op0_mode;
2306 if (SUBREG_P (op0) || REG_P (op0))
2307 {
2308 op0_piece = operand_subword_force (op0, offset, op0_mode.require ());
2309 op0_piece_mode = word_mode;
2310 offset = 0;
2311 }
2312
2313 /* Extract the parts in bit-counting order,
2314 whose meaning is determined by BYTES_PER_UNIT.
2315 OFFSET is in UNITs, and UNIT is in bits. */
2316 part = extract_fixed_bit_field (word_mode, op0_piece, op0_piece_mode,
2317 thissize, offset * unit + thispos,
2318 0, 1, reverse);
2319 bitsdone += thissize;
2320
2321 /* Shift this part into place for the result. */
2322 if (reverse ? !BYTES_BIG_ENDIAN : BYTES_BIG_ENDIAN)
2323 {
2324 if (bitsize != bitsdone)
2325 part = expand_shift (LSHIFT_EXPR, word_mode, part,
2326 bitsize - bitsdone, 0, 1);
2327 }
2328 else
2329 {
2330 if (bitsdone != thissize)
2331 part = expand_shift (LSHIFT_EXPR, word_mode, part,
2332 bitsdone - thissize, 0, 1);
2333 }
2334
2335 if (first)
2336 result = part;
2337 else
2338 /* Combine the parts with bitwise or. This works
2339 because we extracted each part as an unsigned bit field. */
2340 result = expand_binop (word_mode, ior_optab, part, result, NULL_RTX, 1,
2341 OPTAB_LIB_WIDEN);
2342
2343 first = 0;
2344 }
2345
2346 /* Unsigned bit field: we are done. */
2347 if (unsignedp)
2348 return result;
2349 /* Signed bit field: sign-extend with two arithmetic shifts. */
2350 result = expand_shift (LSHIFT_EXPR, word_mode, result,
2351 BITS_PER_WORD - bitsize, NULL_RTX, 0);
2352 return expand_shift (RSHIFT_EXPR, word_mode, result,
2353 BITS_PER_WORD - bitsize, NULL_RTX, 0);
2354 }
2355
2356 /* Try to read the low bits of SRC as an rvalue of mode MODE, preserving
2357 the bit pattern. SRC_MODE is the mode of SRC; if this is smaller than
2358 MODE, fill the upper bits with zeros. Fail if the layout of either
2359 mode is unknown (as for CC modes) or if the extraction would involve
2360 unprofitable mode punning. Return the value on success, otherwise
2361 return null.
2362
2363 This is different from gen_lowpart* in these respects:
2364
2365 - the returned value must always be considered an rvalue
2366
2367 - when MODE is wider than SRC_MODE, the extraction involves
2368 a zero extension
2369
2370 - when MODE is smaller than SRC_MODE, the extraction involves
2371 a truncation (and is thus subject to TARGET_TRULY_NOOP_TRUNCATION).
2372
2373 In other words, this routine performs a computation, whereas the
2374 gen_lowpart* routines are conceptually lvalue or rvalue subreg
2375 operations. */
2376
2377 rtx
extract_low_bits(machine_mode mode,machine_mode src_mode,rtx src)2378 extract_low_bits (machine_mode mode, machine_mode src_mode, rtx src)
2379 {
2380 scalar_int_mode int_mode, src_int_mode;
2381
2382 if (mode == src_mode)
2383 return src;
2384
2385 if (CONSTANT_P (src))
2386 {
2387 /* simplify_gen_subreg can't be used here, as if simplify_subreg
2388 fails, it will happily create (subreg (symbol_ref)) or similar
2389 invalid SUBREGs. */
2390 poly_uint64 byte = subreg_lowpart_offset (mode, src_mode);
2391 rtx ret = simplify_subreg (mode, src, src_mode, byte);
2392 if (ret)
2393 return ret;
2394
2395 if (GET_MODE (src) == VOIDmode
2396 || !validate_subreg (mode, src_mode, src, byte))
2397 return NULL_RTX;
2398
2399 src = force_reg (GET_MODE (src), src);
2400 return gen_rtx_SUBREG (mode, src, byte);
2401 }
2402
2403 if (GET_MODE_CLASS (mode) == MODE_CC || GET_MODE_CLASS (src_mode) == MODE_CC)
2404 return NULL_RTX;
2405
2406 if (known_eq (GET_MODE_BITSIZE (mode), GET_MODE_BITSIZE (src_mode))
2407 && targetm.modes_tieable_p (mode, src_mode))
2408 {
2409 rtx x = gen_lowpart_common (mode, src);
2410 if (x)
2411 return x;
2412 }
2413
2414 if (!int_mode_for_mode (src_mode).exists (&src_int_mode)
2415 || !int_mode_for_mode (mode).exists (&int_mode))
2416 return NULL_RTX;
2417
2418 if (!targetm.modes_tieable_p (src_int_mode, src_mode))
2419 return NULL_RTX;
2420 if (!targetm.modes_tieable_p (int_mode, mode))
2421 return NULL_RTX;
2422
2423 src = gen_lowpart (src_int_mode, src);
2424 if (!validate_subreg (int_mode, src_int_mode, src,
2425 subreg_lowpart_offset (int_mode, src_int_mode)))
2426 return NULL_RTX;
2427
2428 src = convert_modes (int_mode, src_int_mode, src, true);
2429 src = gen_lowpart (mode, src);
2430 return src;
2431 }
2432
2433 /* Add INC into TARGET. */
2434
2435 void
expand_inc(rtx target,rtx inc)2436 expand_inc (rtx target, rtx inc)
2437 {
2438 rtx value = expand_binop (GET_MODE (target), add_optab,
2439 target, inc,
2440 target, 0, OPTAB_LIB_WIDEN);
2441 if (value != target)
2442 emit_move_insn (target, value);
2443 }
2444
2445 /* Subtract DEC from TARGET. */
2446
2447 void
expand_dec(rtx target,rtx dec)2448 expand_dec (rtx target, rtx dec)
2449 {
2450 rtx value = expand_binop (GET_MODE (target), sub_optab,
2451 target, dec,
2452 target, 0, OPTAB_LIB_WIDEN);
2453 if (value != target)
2454 emit_move_insn (target, value);
2455 }
2456
2457 /* Output a shift instruction for expression code CODE,
2458 with SHIFTED being the rtx for the value to shift,
2459 and AMOUNT the rtx for the amount to shift by.
2460 Store the result in the rtx TARGET, if that is convenient.
2461 If UNSIGNEDP is nonzero, do a logical shift; otherwise, arithmetic.
2462 Return the rtx for where the value is.
2463 If that cannot be done, abort the compilation unless MAY_FAIL is true,
2464 in which case 0 is returned. */
2465
2466 static rtx
2467 expand_shift_1 (enum tree_code code, machine_mode mode, rtx shifted,
2468 rtx amount, rtx target, int unsignedp, bool may_fail = false)
2469 {
2470 rtx op1, temp = 0;
2471 int left = (code == LSHIFT_EXPR || code == LROTATE_EXPR);
2472 int rotate = (code == LROTATE_EXPR || code == RROTATE_EXPR);
2473 optab lshift_optab = ashl_optab;
2474 optab rshift_arith_optab = ashr_optab;
2475 optab rshift_uns_optab = lshr_optab;
2476 optab lrotate_optab = rotl_optab;
2477 optab rrotate_optab = rotr_optab;
2478 machine_mode op1_mode;
2479 scalar_mode scalar_mode = GET_MODE_INNER (mode);
2480 int attempt;
2481 bool speed = optimize_insn_for_speed_p ();
2482
2483 op1 = amount;
2484 op1_mode = GET_MODE (op1);
2485
2486 /* Determine whether the shift/rotate amount is a vector, or scalar. If the
2487 shift amount is a vector, use the vector/vector shift patterns. */
2488 if (VECTOR_MODE_P (mode) && VECTOR_MODE_P (op1_mode))
2489 {
2490 lshift_optab = vashl_optab;
2491 rshift_arith_optab = vashr_optab;
2492 rshift_uns_optab = vlshr_optab;
2493 lrotate_optab = vrotl_optab;
2494 rrotate_optab = vrotr_optab;
2495 }
2496
2497 /* Previously detected shift-counts computed by NEGATE_EXPR
2498 and shifted in the other direction; but that does not work
2499 on all machines. */
2500
2501 if (SHIFT_COUNT_TRUNCATED)
2502 {
2503 if (CONST_INT_P (op1)
2504 && ((unsigned HOST_WIDE_INT) INTVAL (op1) >=
2505 (unsigned HOST_WIDE_INT) GET_MODE_BITSIZE (scalar_mode)))
2506 op1 = gen_int_shift_amount (mode,
2507 (unsigned HOST_WIDE_INT) INTVAL (op1)
2508 % GET_MODE_BITSIZE (scalar_mode));
2509 else if (GET_CODE (op1) == SUBREG
2510 && subreg_lowpart_p (op1)
2511 && SCALAR_INT_MODE_P (GET_MODE (SUBREG_REG (op1)))
2512 && SCALAR_INT_MODE_P (GET_MODE (op1)))
2513 op1 = SUBREG_REG (op1);
2514 }
2515
2516 /* Canonicalize rotates by constant amount. If op1 is bitsize / 2,
2517 prefer left rotation, if op1 is from bitsize / 2 + 1 to
2518 bitsize - 1, use other direction of rotate with 1 .. bitsize / 2 - 1
2519 amount instead. */
2520 if (rotate
2521 && CONST_INT_P (op1)
2522 && IN_RANGE (INTVAL (op1), GET_MODE_BITSIZE (scalar_mode) / 2 + left,
2523 GET_MODE_BITSIZE (scalar_mode) - 1))
2524 {
2525 op1 = gen_int_shift_amount (mode, (GET_MODE_BITSIZE (scalar_mode)
2526 - INTVAL (op1)));
2527 left = !left;
2528 code = left ? LROTATE_EXPR : RROTATE_EXPR;
2529 }
2530
2531 /* Rotation of 16bit values by 8 bits is effectively equivalent to a bswaphi.
2532 Note that this is not the case for bigger values. For instance a rotation
2533 of 0x01020304 by 16 bits gives 0x03040102 which is different from
2534 0x04030201 (bswapsi). */
2535 if (rotate
2536 && CONST_INT_P (op1)
2537 && INTVAL (op1) == BITS_PER_UNIT
2538 && GET_MODE_SIZE (scalar_mode) == 2
2539 && optab_handler (bswap_optab, mode) != CODE_FOR_nothing)
2540 return expand_unop (mode, bswap_optab, shifted, NULL_RTX, unsignedp);
2541
2542 if (op1 == const0_rtx)
2543 return shifted;
2544
2545 /* Check whether its cheaper to implement a left shift by a constant
2546 bit count by a sequence of additions. */
2547 if (code == LSHIFT_EXPR
2548 && CONST_INT_P (op1)
2549 && INTVAL (op1) > 0
2550 && INTVAL (op1) < GET_MODE_PRECISION (scalar_mode)
2551 && INTVAL (op1) < MAX_BITS_PER_WORD
2552 && (shift_cost (speed, mode, INTVAL (op1))
2553 > INTVAL (op1) * add_cost (speed, mode))
2554 && shift_cost (speed, mode, INTVAL (op1)) != MAX_COST)
2555 {
2556 int i;
2557 for (i = 0; i < INTVAL (op1); i++)
2558 {
2559 temp = force_reg (mode, shifted);
2560 shifted = expand_binop (mode, add_optab, temp, temp, NULL_RTX,
2561 unsignedp, OPTAB_LIB_WIDEN);
2562 }
2563 return shifted;
2564 }
2565
2566 for (attempt = 0; temp == 0 && attempt < 3; attempt++)
2567 {
2568 enum optab_methods methods;
2569
2570 if (attempt == 0)
2571 methods = OPTAB_DIRECT;
2572 else if (attempt == 1)
2573 methods = OPTAB_WIDEN;
2574 else
2575 methods = OPTAB_LIB_WIDEN;
2576
2577 if (rotate)
2578 {
2579 /* Widening does not work for rotation. */
2580 if (methods == OPTAB_WIDEN)
2581 continue;
2582 else if (methods == OPTAB_LIB_WIDEN)
2583 {
2584 /* If we have been unable to open-code this by a rotation,
2585 do it as the IOR of two shifts. I.e., to rotate A
2586 by N bits, compute
2587 (A << N) | ((unsigned) A >> ((-N) & (C - 1)))
2588 where C is the bitsize of A.
2589
2590 It is theoretically possible that the target machine might
2591 not be able to perform either shift and hence we would
2592 be making two libcalls rather than just the one for the
2593 shift (similarly if IOR could not be done). We will allow
2594 this extremely unlikely lossage to avoid complicating the
2595 code below. */
2596
2597 rtx subtarget = target == shifted ? 0 : target;
2598 rtx new_amount, other_amount;
2599 rtx temp1;
2600
2601 new_amount = op1;
2602 if (op1 == const0_rtx)
2603 return shifted;
2604 else if (CONST_INT_P (op1))
2605 other_amount = gen_int_shift_amount
2606 (mode, GET_MODE_BITSIZE (scalar_mode) - INTVAL (op1));
2607 else
2608 {
2609 other_amount
2610 = simplify_gen_unary (NEG, GET_MODE (op1),
2611 op1, GET_MODE (op1));
2612 HOST_WIDE_INT mask = GET_MODE_PRECISION (scalar_mode) - 1;
2613 other_amount
2614 = simplify_gen_binary (AND, GET_MODE (op1), other_amount,
2615 gen_int_mode (mask, GET_MODE (op1)));
2616 }
2617
2618 shifted = force_reg (mode, shifted);
2619
2620 temp = expand_shift_1 (left ? LSHIFT_EXPR : RSHIFT_EXPR,
2621 mode, shifted, new_amount, 0, 1);
2622 temp1 = expand_shift_1 (left ? RSHIFT_EXPR : LSHIFT_EXPR,
2623 mode, shifted, other_amount,
2624 subtarget, 1);
2625 return expand_binop (mode, ior_optab, temp, temp1, target,
2626 unsignedp, methods);
2627 }
2628
2629 temp = expand_binop (mode,
2630 left ? lrotate_optab : rrotate_optab,
2631 shifted, op1, target, unsignedp, methods);
2632 }
2633 else if (unsignedp)
2634 temp = expand_binop (mode,
2635 left ? lshift_optab : rshift_uns_optab,
2636 shifted, op1, target, unsignedp, methods);
2637
2638 /* Do arithmetic shifts.
2639 Also, if we are going to widen the operand, we can just as well
2640 use an arithmetic right-shift instead of a logical one. */
2641 if (temp == 0 && ! rotate
2642 && (! unsignedp || (! left && methods == OPTAB_WIDEN)))
2643 {
2644 enum optab_methods methods1 = methods;
2645
2646 /* If trying to widen a log shift to an arithmetic shift,
2647 don't accept an arithmetic shift of the same size. */
2648 if (unsignedp)
2649 methods1 = OPTAB_MUST_WIDEN;
2650
2651 /* Arithmetic shift */
2652
2653 temp = expand_binop (mode,
2654 left ? lshift_optab : rshift_arith_optab,
2655 shifted, op1, target, unsignedp, methods1);
2656 }
2657
2658 /* We used to try extzv here for logical right shifts, but that was
2659 only useful for one machine, the VAX, and caused poor code
2660 generation there for lshrdi3, so the code was deleted and a
2661 define_expand for lshrsi3 was added to vax.md. */
2662 }
2663
2664 gcc_assert (temp != NULL_RTX || may_fail);
2665 return temp;
2666 }
2667
2668 /* Output a shift instruction for expression code CODE,
2669 with SHIFTED being the rtx for the value to shift,
2670 and AMOUNT the amount to shift by.
2671 Store the result in the rtx TARGET, if that is convenient.
2672 If UNSIGNEDP is nonzero, do a logical shift; otherwise, arithmetic.
2673 Return the rtx for where the value is. */
2674
2675 rtx
expand_shift(enum tree_code code,machine_mode mode,rtx shifted,poly_int64 amount,rtx target,int unsignedp)2676 expand_shift (enum tree_code code, machine_mode mode, rtx shifted,
2677 poly_int64 amount, rtx target, int unsignedp)
2678 {
2679 return expand_shift_1 (code, mode, shifted,
2680 gen_int_shift_amount (mode, amount),
2681 target, unsignedp);
2682 }
2683
2684 /* Likewise, but return 0 if that cannot be done. */
2685
2686 static rtx
maybe_expand_shift(enum tree_code code,machine_mode mode,rtx shifted,int amount,rtx target,int unsignedp)2687 maybe_expand_shift (enum tree_code code, machine_mode mode, rtx shifted,
2688 int amount, rtx target, int unsignedp)
2689 {
2690 return expand_shift_1 (code, mode,
2691 shifted, GEN_INT (amount), target, unsignedp, true);
2692 }
2693
2694 /* Output a shift instruction for expression code CODE,
2695 with SHIFTED being the rtx for the value to shift,
2696 and AMOUNT the tree for the amount to shift by.
2697 Store the result in the rtx TARGET, if that is convenient.
2698 If UNSIGNEDP is nonzero, do a logical shift; otherwise, arithmetic.
2699 Return the rtx for where the value is. */
2700
2701 rtx
expand_variable_shift(enum tree_code code,machine_mode mode,rtx shifted,tree amount,rtx target,int unsignedp)2702 expand_variable_shift (enum tree_code code, machine_mode mode, rtx shifted,
2703 tree amount, rtx target, int unsignedp)
2704 {
2705 return expand_shift_1 (code, mode,
2706 shifted, expand_normal (amount), target, unsignedp);
2707 }
2708
2709
2710 static void synth_mult (struct algorithm *, unsigned HOST_WIDE_INT,
2711 const struct mult_cost *, machine_mode mode);
2712 static rtx expand_mult_const (machine_mode, rtx, HOST_WIDE_INT, rtx,
2713 const struct algorithm *, enum mult_variant);
2714 static unsigned HOST_WIDE_INT invert_mod2n (unsigned HOST_WIDE_INT, int);
2715 static rtx extract_high_half (scalar_int_mode, rtx);
2716 static rtx expmed_mult_highpart (scalar_int_mode, rtx, rtx, rtx, int, int);
2717 static rtx expmed_mult_highpart_optab (scalar_int_mode, rtx, rtx, rtx,
2718 int, int);
2719 /* Compute and return the best algorithm for multiplying by T.
2720 The algorithm must cost less than cost_limit
2721 If retval.cost >= COST_LIMIT, no algorithm was found and all
2722 other field of the returned struct are undefined.
2723 MODE is the machine mode of the multiplication. */
2724
2725 static void
synth_mult(struct algorithm * alg_out,unsigned HOST_WIDE_INT t,const struct mult_cost * cost_limit,machine_mode mode)2726 synth_mult (struct algorithm *alg_out, unsigned HOST_WIDE_INT t,
2727 const struct mult_cost *cost_limit, machine_mode mode)
2728 {
2729 int m;
2730 struct algorithm *alg_in, *best_alg;
2731 struct mult_cost best_cost;
2732 struct mult_cost new_limit;
2733 int op_cost, op_latency;
2734 unsigned HOST_WIDE_INT orig_t = t;
2735 unsigned HOST_WIDE_INT q;
2736 int maxm, hash_index;
2737 bool cache_hit = false;
2738 enum alg_code cache_alg = alg_zero;
2739 bool speed = optimize_insn_for_speed_p ();
2740 scalar_int_mode imode;
2741 struct alg_hash_entry *entry_ptr;
2742
2743 /* Indicate that no algorithm is yet found. If no algorithm
2744 is found, this value will be returned and indicate failure. */
2745 alg_out->cost.cost = cost_limit->cost + 1;
2746 alg_out->cost.latency = cost_limit->latency + 1;
2747
2748 if (cost_limit->cost < 0
2749 || (cost_limit->cost == 0 && cost_limit->latency <= 0))
2750 return;
2751
2752 /* Be prepared for vector modes. */
2753 imode = as_a <scalar_int_mode> (GET_MODE_INNER (mode));
2754
2755 maxm = MIN (BITS_PER_WORD, GET_MODE_BITSIZE (imode));
2756
2757 /* Restrict the bits of "t" to the multiplication's mode. */
2758 t &= GET_MODE_MASK (imode);
2759
2760 /* t == 1 can be done in zero cost. */
2761 if (t == 1)
2762 {
2763 alg_out->ops = 1;
2764 alg_out->cost.cost = 0;
2765 alg_out->cost.latency = 0;
2766 alg_out->op[0] = alg_m;
2767 return;
2768 }
2769
2770 /* t == 0 sometimes has a cost. If it does and it exceeds our limit,
2771 fail now. */
2772 if (t == 0)
2773 {
2774 if (MULT_COST_LESS (cost_limit, zero_cost (speed)))
2775 return;
2776 else
2777 {
2778 alg_out->ops = 1;
2779 alg_out->cost.cost = zero_cost (speed);
2780 alg_out->cost.latency = zero_cost (speed);
2781 alg_out->op[0] = alg_zero;
2782 return;
2783 }
2784 }
2785
2786 /* We'll be needing a couple extra algorithm structures now. */
2787
2788 alg_in = XALLOCA (struct algorithm);
2789 best_alg = XALLOCA (struct algorithm);
2790 best_cost = *cost_limit;
2791
2792 /* Compute the hash index. */
2793 hash_index = (t ^ (unsigned int) mode ^ (speed * 256)) % NUM_ALG_HASH_ENTRIES;
2794
2795 /* See if we already know what to do for T. */
2796 entry_ptr = alg_hash_entry_ptr (hash_index);
2797 if (entry_ptr->t == t
2798 && entry_ptr->mode == mode
2799 && entry_ptr->speed == speed
2800 && entry_ptr->alg != alg_unknown)
2801 {
2802 cache_alg = entry_ptr->alg;
2803
2804 if (cache_alg == alg_impossible)
2805 {
2806 /* The cache tells us that it's impossible to synthesize
2807 multiplication by T within entry_ptr->cost. */
2808 if (!CHEAPER_MULT_COST (&entry_ptr->cost, cost_limit))
2809 /* COST_LIMIT is at least as restrictive as the one
2810 recorded in the hash table, in which case we have no
2811 hope of synthesizing a multiplication. Just
2812 return. */
2813 return;
2814
2815 /* If we get here, COST_LIMIT is less restrictive than the
2816 one recorded in the hash table, so we may be able to
2817 synthesize a multiplication. Proceed as if we didn't
2818 have the cache entry. */
2819 }
2820 else
2821 {
2822 if (CHEAPER_MULT_COST (cost_limit, &entry_ptr->cost))
2823 /* The cached algorithm shows that this multiplication
2824 requires more cost than COST_LIMIT. Just return. This
2825 way, we don't clobber this cache entry with
2826 alg_impossible but retain useful information. */
2827 return;
2828
2829 cache_hit = true;
2830
2831 switch (cache_alg)
2832 {
2833 case alg_shift:
2834 goto do_alg_shift;
2835
2836 case alg_add_t_m2:
2837 case alg_sub_t_m2:
2838 goto do_alg_addsub_t_m2;
2839
2840 case alg_add_factor:
2841 case alg_sub_factor:
2842 goto do_alg_addsub_factor;
2843
2844 case alg_add_t2_m:
2845 goto do_alg_add_t2_m;
2846
2847 case alg_sub_t2_m:
2848 goto do_alg_sub_t2_m;
2849
2850 default:
2851 gcc_unreachable ();
2852 }
2853 }
2854 }
2855
2856 /* If we have a group of zero bits at the low-order part of T, try
2857 multiplying by the remaining bits and then doing a shift. */
2858
2859 if ((t & 1) == 0)
2860 {
2861 do_alg_shift:
2862 m = ctz_or_zero (t); /* m = number of low zero bits */
2863 if (m < maxm)
2864 {
2865 q = t >> m;
2866 /* The function expand_shift will choose between a shift and
2867 a sequence of additions, so the observed cost is given as
2868 MIN (m * add_cost(speed, mode), shift_cost(speed, mode, m)). */
2869 op_cost = m * add_cost (speed, mode);
2870 if (shift_cost (speed, mode, m) < op_cost)
2871 op_cost = shift_cost (speed, mode, m);
2872 new_limit.cost = best_cost.cost - op_cost;
2873 new_limit.latency = best_cost.latency - op_cost;
2874 synth_mult (alg_in, q, &new_limit, mode);
2875
2876 alg_in->cost.cost += op_cost;
2877 alg_in->cost.latency += op_cost;
2878 if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2879 {
2880 best_cost = alg_in->cost;
2881 std::swap (alg_in, best_alg);
2882 best_alg->log[best_alg->ops] = m;
2883 best_alg->op[best_alg->ops] = alg_shift;
2884 }
2885
2886 /* See if treating ORIG_T as a signed number yields a better
2887 sequence. Try this sequence only for a negative ORIG_T
2888 as it would be useless for a non-negative ORIG_T. */
2889 if ((HOST_WIDE_INT) orig_t < 0)
2890 {
2891 /* Shift ORIG_T as follows because a right shift of a
2892 negative-valued signed type is implementation
2893 defined. */
2894 q = ~(~orig_t >> m);
2895 /* The function expand_shift will choose between a shift
2896 and a sequence of additions, so the observed cost is
2897 given as MIN (m * add_cost(speed, mode),
2898 shift_cost(speed, mode, m)). */
2899 op_cost = m * add_cost (speed, mode);
2900 if (shift_cost (speed, mode, m) < op_cost)
2901 op_cost = shift_cost (speed, mode, m);
2902 new_limit.cost = best_cost.cost - op_cost;
2903 new_limit.latency = best_cost.latency - op_cost;
2904 synth_mult (alg_in, q, &new_limit, mode);
2905
2906 alg_in->cost.cost += op_cost;
2907 alg_in->cost.latency += op_cost;
2908 if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2909 {
2910 best_cost = alg_in->cost;
2911 std::swap (alg_in, best_alg);
2912 best_alg->log[best_alg->ops] = m;
2913 best_alg->op[best_alg->ops] = alg_shift;
2914 }
2915 }
2916 }
2917 if (cache_hit)
2918 goto done;
2919 }
2920
2921 /* If we have an odd number, add or subtract one. */
2922 if ((t & 1) != 0)
2923 {
2924 unsigned HOST_WIDE_INT w;
2925
2926 do_alg_addsub_t_m2:
2927 for (w = 1; (w & t) != 0; w <<= 1)
2928 ;
2929 /* If T was -1, then W will be zero after the loop. This is another
2930 case where T ends with ...111. Handling this with (T + 1) and
2931 subtract 1 produces slightly better code and results in algorithm
2932 selection much faster than treating it like the ...0111 case
2933 below. */
2934 if (w == 0
2935 || (w > 2
2936 /* Reject the case where t is 3.
2937 Thus we prefer addition in that case. */
2938 && t != 3))
2939 {
2940 /* T ends with ...111. Multiply by (T + 1) and subtract T. */
2941
2942 op_cost = add_cost (speed, mode);
2943 new_limit.cost = best_cost.cost - op_cost;
2944 new_limit.latency = best_cost.latency - op_cost;
2945 synth_mult (alg_in, t + 1, &new_limit, mode);
2946
2947 alg_in->cost.cost += op_cost;
2948 alg_in->cost.latency += op_cost;
2949 if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2950 {
2951 best_cost = alg_in->cost;
2952 std::swap (alg_in, best_alg);
2953 best_alg->log[best_alg->ops] = 0;
2954 best_alg->op[best_alg->ops] = alg_sub_t_m2;
2955 }
2956 }
2957 else
2958 {
2959 /* T ends with ...01 or ...011. Multiply by (T - 1) and add T. */
2960
2961 op_cost = add_cost (speed, mode);
2962 new_limit.cost = best_cost.cost - op_cost;
2963 new_limit.latency = best_cost.latency - op_cost;
2964 synth_mult (alg_in, t - 1, &new_limit, mode);
2965
2966 alg_in->cost.cost += op_cost;
2967 alg_in->cost.latency += op_cost;
2968 if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2969 {
2970 best_cost = alg_in->cost;
2971 std::swap (alg_in, best_alg);
2972 best_alg->log[best_alg->ops] = 0;
2973 best_alg->op[best_alg->ops] = alg_add_t_m2;
2974 }
2975 }
2976
2977 /* We may be able to calculate a * -7, a * -15, a * -31, etc
2978 quickly with a - a * n for some appropriate constant n. */
2979 m = exact_log2 (-orig_t + 1);
2980 if (m >= 0 && m < maxm)
2981 {
2982 op_cost = add_cost (speed, mode) + shift_cost (speed, mode, m);
2983 /* If the target has a cheap shift-and-subtract insn use
2984 that in preference to a shift insn followed by a sub insn.
2985 Assume that the shift-and-sub is "atomic" with a latency
2986 equal to it's cost, otherwise assume that on superscalar
2987 hardware the shift may be executed concurrently with the
2988 earlier steps in the algorithm. */
2989 if (shiftsub1_cost (speed, mode, m) <= op_cost)
2990 {
2991 op_cost = shiftsub1_cost (speed, mode, m);
2992 op_latency = op_cost;
2993 }
2994 else
2995 op_latency = add_cost (speed, mode);
2996
2997 new_limit.cost = best_cost.cost - op_cost;
2998 new_limit.latency = best_cost.latency - op_latency;
2999 synth_mult (alg_in, (unsigned HOST_WIDE_INT) (-orig_t + 1) >> m,
3000 &new_limit, mode);
3001
3002 alg_in->cost.cost += op_cost;
3003 alg_in->cost.latency += op_latency;
3004 if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
3005 {
3006 best_cost = alg_in->cost;
3007 std::swap (alg_in, best_alg);
3008 best_alg->log[best_alg->ops] = m;
3009 best_alg->op[best_alg->ops] = alg_sub_t_m2;
3010 }
3011 }
3012
3013 if (cache_hit)
3014 goto done;
3015 }
3016
3017 /* Look for factors of t of the form
3018 t = q(2**m +- 1), 2 <= m <= floor(log2(t - 1)).
3019 If we find such a factor, we can multiply by t using an algorithm that
3020 multiplies by q, shift the result by m and add/subtract it to itself.
3021
3022 We search for large factors first and loop down, even if large factors
3023 are less probable than small; if we find a large factor we will find a
3024 good sequence quickly, and therefore be able to prune (by decreasing
3025 COST_LIMIT) the search. */
3026
3027 do_alg_addsub_factor:
3028 for (m = floor_log2 (t - 1); m >= 2; m--)
3029 {
3030 unsigned HOST_WIDE_INT d;
3031
3032 d = (HOST_WIDE_INT_1U << m) + 1;
3033 if (t % d == 0 && t > d && m < maxm
3034 && (!cache_hit || cache_alg == alg_add_factor))
3035 {
3036 op_cost = add_cost (speed, mode) + shift_cost (speed, mode, m);
3037 if (shiftadd_cost (speed, mode, m) <= op_cost)
3038 op_cost = shiftadd_cost (speed, mode, m);
3039
3040 op_latency = op_cost;
3041
3042
3043 new_limit.cost = best_cost.cost - op_cost;
3044 new_limit.latency = best_cost.latency - op_latency;
3045 synth_mult (alg_in, t / d, &new_limit, mode);
3046
3047 alg_in->cost.cost += op_cost;
3048 alg_in->cost.latency += op_latency;
3049 if (alg_in->cost.latency < op_cost)
3050 alg_in->cost.latency = op_cost;
3051 if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
3052 {
3053 best_cost = alg_in->cost;
3054 std::swap (alg_in, best_alg);
3055 best_alg->log[best_alg->ops] = m;
3056 best_alg->op[best_alg->ops] = alg_add_factor;
3057 }
3058 /* Other factors will have been taken care of in the recursion. */
3059 break;
3060 }
3061
3062 d = (HOST_WIDE_INT_1U << m) - 1;
3063 if (t % d == 0 && t > d && m < maxm
3064 && (!cache_hit || cache_alg == alg_sub_factor))
3065 {
3066 op_cost = add_cost (speed, mode) + shift_cost (speed, mode, m);
3067 if (shiftsub0_cost (speed, mode, m) <= op_cost)
3068 op_cost = shiftsub0_cost (speed, mode, m);
3069
3070 op_latency = op_cost;
3071
3072 new_limit.cost = best_cost.cost - op_cost;
3073 new_limit.latency = best_cost.latency - op_latency;
3074 synth_mult (alg_in, t / d, &new_limit, mode);
3075
3076 alg_in->cost.cost += op_cost;
3077 alg_in->cost.latency += op_latency;
3078 if (alg_in->cost.latency < op_cost)
3079 alg_in->cost.latency = op_cost;
3080 if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
3081 {
3082 best_cost = alg_in->cost;
3083 std::swap (alg_in, best_alg);
3084 best_alg->log[best_alg->ops] = m;
3085 best_alg->op[best_alg->ops] = alg_sub_factor;
3086 }
3087 break;
3088 }
3089 }
3090 if (cache_hit)
3091 goto done;
3092
3093 /* Try shift-and-add (load effective address) instructions,
3094 i.e. do a*3, a*5, a*9. */
3095 if ((t & 1) != 0)
3096 {
3097 do_alg_add_t2_m:
3098 q = t - 1;
3099 m = ctz_hwi (q);
3100 if (q && m < maxm)
3101 {
3102 op_cost = shiftadd_cost (speed, mode, m);
3103 new_limit.cost = best_cost.cost - op_cost;
3104 new_limit.latency = best_cost.latency - op_cost;
3105 synth_mult (alg_in, (t - 1) >> m, &new_limit, mode);
3106
3107 alg_in->cost.cost += op_cost;
3108 alg_in->cost.latency += op_cost;
3109 if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
3110 {
3111 best_cost = alg_in->cost;
3112 std::swap (alg_in, best_alg);
3113 best_alg->log[best_alg->ops] = m;
3114 best_alg->op[best_alg->ops] = alg_add_t2_m;
3115 }
3116 }
3117 if (cache_hit)
3118 goto done;
3119
3120 do_alg_sub_t2_m:
3121 q = t + 1;
3122 m = ctz_hwi (q);
3123 if (q && m < maxm)
3124 {
3125 op_cost = shiftsub0_cost (speed, mode, m);
3126 new_limit.cost = best_cost.cost - op_cost;
3127 new_limit.latency = best_cost.latency - op_cost;
3128 synth_mult (alg_in, (t + 1) >> m, &new_limit, mode);
3129
3130 alg_in->cost.cost += op_cost;
3131 alg_in->cost.latency += op_cost;
3132 if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
3133 {
3134 best_cost = alg_in->cost;
3135 std::swap (alg_in, best_alg);
3136 best_alg->log[best_alg->ops] = m;
3137 best_alg->op[best_alg->ops] = alg_sub_t2_m;
3138 }
3139 }
3140 if (cache_hit)
3141 goto done;
3142 }
3143
3144 done:
3145 /* If best_cost has not decreased, we have not found any algorithm. */
3146 if (!CHEAPER_MULT_COST (&best_cost, cost_limit))
3147 {
3148 /* We failed to find an algorithm. Record alg_impossible for
3149 this case (that is, <T, MODE, COST_LIMIT>) so that next time
3150 we are asked to find an algorithm for T within the same or
3151 lower COST_LIMIT, we can immediately return to the
3152 caller. */
3153 entry_ptr->t = t;
3154 entry_ptr->mode = mode;
3155 entry_ptr->speed = speed;
3156 entry_ptr->alg = alg_impossible;
3157 entry_ptr->cost = *cost_limit;
3158 return;
3159 }
3160
3161 /* Cache the result. */
3162 if (!cache_hit)
3163 {
3164 entry_ptr->t = t;
3165 entry_ptr->mode = mode;
3166 entry_ptr->speed = speed;
3167 entry_ptr->alg = best_alg->op[best_alg->ops];
3168 entry_ptr->cost.cost = best_cost.cost;
3169 entry_ptr->cost.latency = best_cost.latency;
3170 }
3171
3172 /* If we are getting a too long sequence for `struct algorithm'
3173 to record, make this search fail. */
3174 if (best_alg->ops == MAX_BITS_PER_WORD)
3175 return;
3176
3177 /* Copy the algorithm from temporary space to the space at alg_out.
3178 We avoid using structure assignment because the majority of
3179 best_alg is normally undefined, and this is a critical function. */
3180 alg_out->ops = best_alg->ops + 1;
3181 alg_out->cost = best_cost;
3182 memcpy (alg_out->op, best_alg->op,
3183 alg_out->ops * sizeof *alg_out->op);
3184 memcpy (alg_out->log, best_alg->log,
3185 alg_out->ops * sizeof *alg_out->log);
3186 }
3187
3188 /* Find the cheapest way of multiplying a value of mode MODE by VAL.
3189 Try three variations:
3190
3191 - a shift/add sequence based on VAL itself
3192 - a shift/add sequence based on -VAL, followed by a negation
3193 - a shift/add sequence based on VAL - 1, followed by an addition.
3194
3195 Return true if the cheapest of these cost less than MULT_COST,
3196 describing the algorithm in *ALG and final fixup in *VARIANT. */
3197
3198 bool
choose_mult_variant(machine_mode mode,HOST_WIDE_INT val,struct algorithm * alg,enum mult_variant * variant,int mult_cost)3199 choose_mult_variant (machine_mode mode, HOST_WIDE_INT val,
3200 struct algorithm *alg, enum mult_variant *variant,
3201 int mult_cost)
3202 {
3203 struct algorithm alg2;
3204 struct mult_cost limit;
3205 int op_cost;
3206 bool speed = optimize_insn_for_speed_p ();
3207
3208 /* Fail quickly for impossible bounds. */
3209 if (mult_cost < 0)
3210 return false;
3211
3212 /* Ensure that mult_cost provides a reasonable upper bound.
3213 Any constant multiplication can be performed with less
3214 than 2 * bits additions. */
3215 op_cost = 2 * GET_MODE_UNIT_BITSIZE (mode) * add_cost (speed, mode);
3216 if (mult_cost > op_cost)
3217 mult_cost = op_cost;
3218
3219 *variant = basic_variant;
3220 limit.cost = mult_cost;
3221 limit.latency = mult_cost;
3222 synth_mult (alg, val, &limit, mode);
3223
3224 /* This works only if the inverted value actually fits in an
3225 `unsigned int' */
3226 if (HOST_BITS_PER_INT >= GET_MODE_UNIT_BITSIZE (mode))
3227 {
3228 op_cost = neg_cost (speed, mode);
3229 if (MULT_COST_LESS (&alg->cost, mult_cost))
3230 {
3231 limit.cost = alg->cost.cost - op_cost;
3232 limit.latency = alg->cost.latency - op_cost;
3233 }
3234 else
3235 {
3236 limit.cost = mult_cost - op_cost;
3237 limit.latency = mult_cost - op_cost;
3238 }
3239
3240 synth_mult (&alg2, -val, &limit, mode);
3241 alg2.cost.cost += op_cost;
3242 alg2.cost.latency += op_cost;
3243 if (CHEAPER_MULT_COST (&alg2.cost, &alg->cost))
3244 *alg = alg2, *variant = negate_variant;
3245 }
3246
3247 /* This proves very useful for division-by-constant. */
3248 op_cost = add_cost (speed, mode);
3249 if (MULT_COST_LESS (&alg->cost, mult_cost))
3250 {
3251 limit.cost = alg->cost.cost - op_cost;
3252 limit.latency = alg->cost.latency - op_cost;
3253 }
3254 else
3255 {
3256 limit.cost = mult_cost - op_cost;
3257 limit.latency = mult_cost - op_cost;
3258 }
3259
3260 synth_mult (&alg2, val - 1, &limit, mode);
3261 alg2.cost.cost += op_cost;
3262 alg2.cost.latency += op_cost;
3263 if (CHEAPER_MULT_COST (&alg2.cost, &alg->cost))
3264 *alg = alg2, *variant = add_variant;
3265
3266 return MULT_COST_LESS (&alg->cost, mult_cost);
3267 }
3268
3269 /* A subroutine of expand_mult, used for constant multiplications.
3270 Multiply OP0 by VAL in mode MODE, storing the result in TARGET if
3271 convenient. Use the shift/add sequence described by ALG and apply
3272 the final fixup specified by VARIANT. */
3273
3274 static rtx
expand_mult_const(machine_mode mode,rtx op0,HOST_WIDE_INT val,rtx target,const struct algorithm * alg,enum mult_variant variant)3275 expand_mult_const (machine_mode mode, rtx op0, HOST_WIDE_INT val,
3276 rtx target, const struct algorithm *alg,
3277 enum mult_variant variant)
3278 {
3279 unsigned HOST_WIDE_INT val_so_far;
3280 rtx_insn *insn;
3281 rtx accum, tem;
3282 int opno;
3283 machine_mode nmode;
3284
3285 /* Avoid referencing memory over and over and invalid sharing
3286 on SUBREGs. */
3287 op0 = force_reg (mode, op0);
3288
3289 /* ACCUM starts out either as OP0 or as a zero, depending on
3290 the first operation. */
3291
3292 if (alg->op[0] == alg_zero)
3293 {
3294 accum = copy_to_mode_reg (mode, CONST0_RTX (mode));
3295 val_so_far = 0;
3296 }
3297 else if (alg->op[0] == alg_m)
3298 {
3299 accum = copy_to_mode_reg (mode, op0);
3300 val_so_far = 1;
3301 }
3302 else
3303 gcc_unreachable ();
3304
3305 for (opno = 1; opno < alg->ops; opno++)
3306 {
3307 int log = alg->log[opno];
3308 rtx shift_subtarget = optimize ? 0 : accum;
3309 rtx add_target
3310 = (opno == alg->ops - 1 && target != 0 && variant != add_variant
3311 && !optimize)
3312 ? target : 0;
3313 rtx accum_target = optimize ? 0 : accum;
3314 rtx accum_inner;
3315
3316 switch (alg->op[opno])
3317 {
3318 case alg_shift:
3319 tem = expand_shift (LSHIFT_EXPR, mode, accum, log, NULL_RTX, 0);
3320 /* REG_EQUAL note will be attached to the following insn. */
3321 emit_move_insn (accum, tem);
3322 val_so_far <<= log;
3323 break;
3324
3325 case alg_add_t_m2:
3326 tem = expand_shift (LSHIFT_EXPR, mode, op0, log, NULL_RTX, 0);
3327 accum = force_operand (gen_rtx_PLUS (mode, accum, tem),
3328 add_target ? add_target : accum_target);
3329 val_so_far += HOST_WIDE_INT_1U << log;
3330 break;
3331
3332 case alg_sub_t_m2:
3333 tem = expand_shift (LSHIFT_EXPR, mode, op0, log, NULL_RTX, 0);
3334 accum = force_operand (gen_rtx_MINUS (mode, accum, tem),
3335 add_target ? add_target : accum_target);
3336 val_so_far -= HOST_WIDE_INT_1U << log;
3337 break;
3338
3339 case alg_add_t2_m:
3340 accum = expand_shift (LSHIFT_EXPR, mode, accum,
3341 log, shift_subtarget, 0);
3342 accum = force_operand (gen_rtx_PLUS (mode, accum, op0),
3343 add_target ? add_target : accum_target);
3344 val_so_far = (val_so_far << log) + 1;
3345 break;
3346
3347 case alg_sub_t2_m:
3348 accum = expand_shift (LSHIFT_EXPR, mode, accum,
3349 log, shift_subtarget, 0);
3350 accum = force_operand (gen_rtx_MINUS (mode, accum, op0),
3351 add_target ? add_target : accum_target);
3352 val_so_far = (val_so_far << log) - 1;
3353 break;
3354
3355 case alg_add_factor:
3356 tem = expand_shift (LSHIFT_EXPR, mode, accum, log, NULL_RTX, 0);
3357 accum = force_operand (gen_rtx_PLUS (mode, accum, tem),
3358 add_target ? add_target : accum_target);
3359 val_so_far += val_so_far << log;
3360 break;
3361
3362 case alg_sub_factor:
3363 tem = expand_shift (LSHIFT_EXPR, mode, accum, log, NULL_RTX, 0);
3364 accum = force_operand (gen_rtx_MINUS (mode, tem, accum),
3365 (add_target
3366 ? add_target : (optimize ? 0 : tem)));
3367 val_so_far = (val_so_far << log) - val_so_far;
3368 break;
3369
3370 default:
3371 gcc_unreachable ();
3372 }
3373
3374 if (SCALAR_INT_MODE_P (mode))
3375 {
3376 /* Write a REG_EQUAL note on the last insn so that we can cse
3377 multiplication sequences. Note that if ACCUM is a SUBREG,
3378 we've set the inner register and must properly indicate that. */
3379 tem = op0, nmode = mode;
3380 accum_inner = accum;
3381 if (GET_CODE (accum) == SUBREG)
3382 {
3383 accum_inner = SUBREG_REG (accum);
3384 nmode = GET_MODE (accum_inner);
3385 tem = gen_lowpart (nmode, op0);
3386 }
3387
3388 /* Don't add a REG_EQUAL note if tem is a paradoxical SUBREG.
3389 In that case, only the low bits of accum would be guaranteed to
3390 be equal to the content of the REG_EQUAL note, the upper bits
3391 can be anything. */
3392 if (!paradoxical_subreg_p (tem))
3393 {
3394 insn = get_last_insn ();
3395 wide_int wval_so_far
3396 = wi::uhwi (val_so_far,
3397 GET_MODE_PRECISION (as_a <scalar_mode> (nmode)));
3398 rtx c = immed_wide_int_const (wval_so_far, nmode);
3399 set_dst_reg_note (insn, REG_EQUAL, gen_rtx_MULT (nmode, tem, c),
3400 accum_inner);
3401 }
3402 }
3403 }
3404
3405 if (variant == negate_variant)
3406 {
3407 val_so_far = -val_so_far;
3408 accum = expand_unop (mode, neg_optab, accum, target, 0);
3409 }
3410 else if (variant == add_variant)
3411 {
3412 val_so_far = val_so_far + 1;
3413 accum = force_operand (gen_rtx_PLUS (mode, accum, op0), target);
3414 }
3415
3416 /* Compare only the bits of val and val_so_far that are significant
3417 in the result mode, to avoid sign-/zero-extension confusion. */
3418 nmode = GET_MODE_INNER (mode);
3419 val &= GET_MODE_MASK (nmode);
3420 val_so_far &= GET_MODE_MASK (nmode);
3421 gcc_assert (val == (HOST_WIDE_INT) val_so_far);
3422
3423 return accum;
3424 }
3425
3426 /* Perform a multiplication and return an rtx for the result.
3427 MODE is mode of value; OP0 and OP1 are what to multiply (rtx's);
3428 TARGET is a suggestion for where to store the result (an rtx).
3429
3430 We check specially for a constant integer as OP1.
3431 If you want this check for OP0 as well, then before calling
3432 you should swap the two operands if OP0 would be constant. */
3433
3434 rtx
expand_mult(machine_mode mode,rtx op0,rtx op1,rtx target,int unsignedp,bool no_libcall)3435 expand_mult (machine_mode mode, rtx op0, rtx op1, rtx target,
3436 int unsignedp, bool no_libcall)
3437 {
3438 enum mult_variant variant;
3439 struct algorithm algorithm;
3440 rtx scalar_op1;
3441 int max_cost;
3442 bool speed = optimize_insn_for_speed_p ();
3443 bool do_trapv = flag_trapv && SCALAR_INT_MODE_P (mode) && !unsignedp;
3444
3445 if (CONSTANT_P (op0))
3446 std::swap (op0, op1);
3447
3448 /* For vectors, there are several simplifications that can be made if
3449 all elements of the vector constant are identical. */
3450 scalar_op1 = unwrap_const_vec_duplicate (op1);
3451
3452 if (INTEGRAL_MODE_P (mode))
3453 {
3454 rtx fake_reg;
3455 HOST_WIDE_INT coeff;
3456 bool is_neg;
3457 int mode_bitsize;
3458
3459 if (op1 == CONST0_RTX (mode))
3460 return op1;
3461 if (op1 == CONST1_RTX (mode))
3462 return op0;
3463 if (op1 == CONSTM1_RTX (mode))
3464 return expand_unop (mode, do_trapv ? negv_optab : neg_optab,
3465 op0, target, 0);
3466
3467 if (do_trapv)
3468 goto skip_synth;
3469
3470 /* If mode is integer vector mode, check if the backend supports
3471 vector lshift (by scalar or vector) at all. If not, we can't use
3472 synthetized multiply. */
3473 if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT
3474 && optab_handler (vashl_optab, mode) == CODE_FOR_nothing
3475 && optab_handler (ashl_optab, mode) == CODE_FOR_nothing)
3476 goto skip_synth;
3477
3478 /* These are the operations that are potentially turned into
3479 a sequence of shifts and additions. */
3480 mode_bitsize = GET_MODE_UNIT_BITSIZE (mode);
3481
3482 /* synth_mult does an `unsigned int' multiply. As long as the mode is
3483 less than or equal in size to `unsigned int' this doesn't matter.
3484 If the mode is larger than `unsigned int', then synth_mult works
3485 only if the constant value exactly fits in an `unsigned int' without
3486 any truncation. This means that multiplying by negative values does
3487 not work; results are off by 2^32 on a 32 bit machine. */
3488 if (CONST_INT_P (scalar_op1))
3489 {
3490 coeff = INTVAL (scalar_op1);
3491 is_neg = coeff < 0;
3492 }
3493 #if TARGET_SUPPORTS_WIDE_INT
3494 else if (CONST_WIDE_INT_P (scalar_op1))
3495 #else
3496 else if (CONST_DOUBLE_AS_INT_P (scalar_op1))
3497 #endif
3498 {
3499 int shift = wi::exact_log2 (rtx_mode_t (scalar_op1, mode));
3500 /* Perfect power of 2 (other than 1, which is handled above). */
3501 if (shift > 0)
3502 return expand_shift (LSHIFT_EXPR, mode, op0,
3503 shift, target, unsignedp);
3504 else
3505 goto skip_synth;
3506 }
3507 else
3508 goto skip_synth;
3509
3510 /* We used to test optimize here, on the grounds that it's better to
3511 produce a smaller program when -O is not used. But this causes
3512 such a terrible slowdown sometimes that it seems better to always
3513 use synth_mult. */
3514
3515 /* Special case powers of two. */
3516 if (EXACT_POWER_OF_2_OR_ZERO_P (coeff)
3517 && !(is_neg && mode_bitsize > HOST_BITS_PER_WIDE_INT))
3518 return expand_shift (LSHIFT_EXPR, mode, op0,
3519 floor_log2 (coeff), target, unsignedp);
3520
3521 fake_reg = gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1);
3522
3523 /* Attempt to handle multiplication of DImode values by negative
3524 coefficients, by performing the multiplication by a positive
3525 multiplier and then inverting the result. */
3526 if (is_neg && mode_bitsize > HOST_BITS_PER_WIDE_INT)
3527 {
3528 /* Its safe to use -coeff even for INT_MIN, as the
3529 result is interpreted as an unsigned coefficient.
3530 Exclude cost of op0 from max_cost to match the cost
3531 calculation of the synth_mult. */
3532 coeff = -(unsigned HOST_WIDE_INT) coeff;
3533 max_cost = (set_src_cost (gen_rtx_MULT (mode, fake_reg, op1),
3534 mode, speed)
3535 - neg_cost (speed, mode));
3536 if (max_cost <= 0)
3537 goto skip_synth;
3538
3539 /* Special case powers of two. */
3540 if (EXACT_POWER_OF_2_OR_ZERO_P (coeff))
3541 {
3542 rtx temp = expand_shift (LSHIFT_EXPR, mode, op0,
3543 floor_log2 (coeff), target, unsignedp);
3544 return expand_unop (mode, neg_optab, temp, target, 0);
3545 }
3546
3547 if (choose_mult_variant (mode, coeff, &algorithm, &variant,
3548 max_cost))
3549 {
3550 rtx temp = expand_mult_const (mode, op0, coeff, NULL_RTX,
3551 &algorithm, variant);
3552 return expand_unop (mode, neg_optab, temp, target, 0);
3553 }
3554 goto skip_synth;
3555 }
3556
3557 /* Exclude cost of op0 from max_cost to match the cost
3558 calculation of the synth_mult. */
3559 max_cost = set_src_cost (gen_rtx_MULT (mode, fake_reg, op1), mode, speed);
3560 if (choose_mult_variant (mode, coeff, &algorithm, &variant, max_cost))
3561 return expand_mult_const (mode, op0, coeff, target,
3562 &algorithm, variant);
3563 }
3564 skip_synth:
3565
3566 /* Expand x*2.0 as x+x. */
3567 if (CONST_DOUBLE_AS_FLOAT_P (scalar_op1)
3568 && real_equal (CONST_DOUBLE_REAL_VALUE (scalar_op1), &dconst2))
3569 {
3570 op0 = force_reg (GET_MODE (op0), op0);
3571 return expand_binop (mode, add_optab, op0, op0,
3572 target, unsignedp,
3573 no_libcall ? OPTAB_WIDEN : OPTAB_LIB_WIDEN);
3574 }
3575
3576 /* This used to use umul_optab if unsigned, but for non-widening multiply
3577 there is no difference between signed and unsigned. */
3578 op0 = expand_binop (mode, do_trapv ? smulv_optab : smul_optab,
3579 op0, op1, target, unsignedp,
3580 no_libcall ? OPTAB_WIDEN : OPTAB_LIB_WIDEN);
3581 gcc_assert (op0 || no_libcall);
3582 return op0;
3583 }
3584
3585 /* Return a cost estimate for multiplying a register by the given
3586 COEFFicient in the given MODE and SPEED. */
3587
3588 int
mult_by_coeff_cost(HOST_WIDE_INT coeff,machine_mode mode,bool speed)3589 mult_by_coeff_cost (HOST_WIDE_INT coeff, machine_mode mode, bool speed)
3590 {
3591 int max_cost;
3592 struct algorithm algorithm;
3593 enum mult_variant variant;
3594
3595 rtx fake_reg = gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1);
3596 max_cost = set_src_cost (gen_rtx_MULT (mode, fake_reg, fake_reg),
3597 mode, speed);
3598 if (choose_mult_variant (mode, coeff, &algorithm, &variant, max_cost))
3599 return algorithm.cost.cost;
3600 else
3601 return max_cost;
3602 }
3603
3604 /* Perform a widening multiplication and return an rtx for the result.
3605 MODE is mode of value; OP0 and OP1 are what to multiply (rtx's);
3606 TARGET is a suggestion for where to store the result (an rtx).
3607 THIS_OPTAB is the optab we should use, it must be either umul_widen_optab
3608 or smul_widen_optab.
3609
3610 We check specially for a constant integer as OP1, comparing the
3611 cost of a widening multiply against the cost of a sequence of shifts
3612 and adds. */
3613
3614 rtx
expand_widening_mult(machine_mode mode,rtx op0,rtx op1,rtx target,int unsignedp,optab this_optab)3615 expand_widening_mult (machine_mode mode, rtx op0, rtx op1, rtx target,
3616 int unsignedp, optab this_optab)
3617 {
3618 bool speed = optimize_insn_for_speed_p ();
3619 rtx cop1;
3620
3621 if (CONST_INT_P (op1)
3622 && GET_MODE (op0) != VOIDmode
3623 && (cop1 = convert_modes (mode, GET_MODE (op0), op1,
3624 this_optab == umul_widen_optab))
3625 && CONST_INT_P (cop1)
3626 && (INTVAL (cop1) >= 0
3627 || HWI_COMPUTABLE_MODE_P (mode)))
3628 {
3629 HOST_WIDE_INT coeff = INTVAL (cop1);
3630 int max_cost;
3631 enum mult_variant variant;
3632 struct algorithm algorithm;
3633
3634 if (coeff == 0)
3635 return CONST0_RTX (mode);
3636
3637 /* Special case powers of two. */
3638 if (EXACT_POWER_OF_2_OR_ZERO_P (coeff))
3639 {
3640 op0 = convert_to_mode (mode, op0, this_optab == umul_widen_optab);
3641 return expand_shift (LSHIFT_EXPR, mode, op0,
3642 floor_log2 (coeff), target, unsignedp);
3643 }
3644
3645 /* Exclude cost of op0 from max_cost to match the cost
3646 calculation of the synth_mult. */
3647 max_cost = mul_widen_cost (speed, mode);
3648 if (choose_mult_variant (mode, coeff, &algorithm, &variant,
3649 max_cost))
3650 {
3651 op0 = convert_to_mode (mode, op0, this_optab == umul_widen_optab);
3652 return expand_mult_const (mode, op0, coeff, target,
3653 &algorithm, variant);
3654 }
3655 }
3656 return expand_binop (mode, this_optab, op0, op1, target,
3657 unsignedp, OPTAB_LIB_WIDEN);
3658 }
3659
3660 /* Choose a minimal N + 1 bit approximation to 1/D that can be used to
3661 replace division by D, and put the least significant N bits of the result
3662 in *MULTIPLIER_PTR and return the most significant bit.
3663
3664 The width of operations is N (should be <= HOST_BITS_PER_WIDE_INT), the
3665 needed precision is in PRECISION (should be <= N).
3666
3667 PRECISION should be as small as possible so this function can choose
3668 multiplier more freely.
3669
3670 The rounded-up logarithm of D is placed in *lgup_ptr. A shift count that
3671 is to be used for a final right shift is placed in *POST_SHIFT_PTR.
3672
3673 Using this function, x/D will be equal to (x * m) >> (*POST_SHIFT_PTR),
3674 where m is the full HOST_BITS_PER_WIDE_INT + 1 bit multiplier. */
3675
3676 unsigned HOST_WIDE_INT
choose_multiplier(unsigned HOST_WIDE_INT d,int n,int precision,unsigned HOST_WIDE_INT * multiplier_ptr,int * post_shift_ptr,int * lgup_ptr)3677 choose_multiplier (unsigned HOST_WIDE_INT d, int n, int precision,
3678 unsigned HOST_WIDE_INT *multiplier_ptr,
3679 int *post_shift_ptr, int *lgup_ptr)
3680 {
3681 int lgup, post_shift;
3682 int pow, pow2;
3683
3684 /* lgup = ceil(log2(divisor)); */
3685 lgup = ceil_log2 (d);
3686
3687 gcc_assert (lgup <= n);
3688
3689 pow = n + lgup;
3690 pow2 = n + lgup - precision;
3691
3692 /* mlow = 2^(N + lgup)/d */
3693 wide_int val = wi::set_bit_in_zero (pow, HOST_BITS_PER_DOUBLE_INT);
3694 wide_int mlow = wi::udiv_trunc (val, d);
3695
3696 /* mhigh = (2^(N + lgup) + 2^(N + lgup - precision))/d */
3697 val |= wi::set_bit_in_zero (pow2, HOST_BITS_PER_DOUBLE_INT);
3698 wide_int mhigh = wi::udiv_trunc (val, d);
3699
3700 /* If precision == N, then mlow, mhigh exceed 2^N
3701 (but they do not exceed 2^(N+1)). */
3702
3703 /* Reduce to lowest terms. */
3704 for (post_shift = lgup; post_shift > 0; post_shift--)
3705 {
3706 unsigned HOST_WIDE_INT ml_lo = wi::extract_uhwi (mlow, 1,
3707 HOST_BITS_PER_WIDE_INT);
3708 unsigned HOST_WIDE_INT mh_lo = wi::extract_uhwi (mhigh, 1,
3709 HOST_BITS_PER_WIDE_INT);
3710 if (ml_lo >= mh_lo)
3711 break;
3712
3713 mlow = wi::uhwi (ml_lo, HOST_BITS_PER_DOUBLE_INT);
3714 mhigh = wi::uhwi (mh_lo, HOST_BITS_PER_DOUBLE_INT);
3715 }
3716
3717 *post_shift_ptr = post_shift;
3718 *lgup_ptr = lgup;
3719 if (n < HOST_BITS_PER_WIDE_INT)
3720 {
3721 unsigned HOST_WIDE_INT mask = (HOST_WIDE_INT_1U << n) - 1;
3722 *multiplier_ptr = mhigh.to_uhwi () & mask;
3723 return mhigh.to_uhwi () > mask;
3724 }
3725 else
3726 {
3727 *multiplier_ptr = mhigh.to_uhwi ();
3728 return wi::extract_uhwi (mhigh, HOST_BITS_PER_WIDE_INT, 1);
3729 }
3730 }
3731
3732 /* Compute the inverse of X mod 2**n, i.e., find Y such that X * Y is
3733 congruent to 1 (mod 2**N). */
3734
3735 static unsigned HOST_WIDE_INT
invert_mod2n(unsigned HOST_WIDE_INT x,int n)3736 invert_mod2n (unsigned HOST_WIDE_INT x, int n)
3737 {
3738 /* Solve x*y == 1 (mod 2^n), where x is odd. Return y. */
3739
3740 /* The algorithm notes that the choice y = x satisfies
3741 x*y == 1 mod 2^3, since x is assumed odd.
3742 Each iteration doubles the number of bits of significance in y. */
3743
3744 unsigned HOST_WIDE_INT mask;
3745 unsigned HOST_WIDE_INT y = x;
3746 int nbit = 3;
3747
3748 mask = (n == HOST_BITS_PER_WIDE_INT
3749 ? HOST_WIDE_INT_M1U
3750 : (HOST_WIDE_INT_1U << n) - 1);
3751
3752 while (nbit < n)
3753 {
3754 y = y * (2 - x*y) & mask; /* Modulo 2^N */
3755 nbit *= 2;
3756 }
3757 return y;
3758 }
3759
3760 /* Emit code to adjust ADJ_OPERAND after multiplication of wrong signedness
3761 flavor of OP0 and OP1. ADJ_OPERAND is already the high half of the
3762 product OP0 x OP1. If UNSIGNEDP is nonzero, adjust the signed product
3763 to become unsigned, if UNSIGNEDP is zero, adjust the unsigned product to
3764 become signed.
3765
3766 The result is put in TARGET if that is convenient.
3767
3768 MODE is the mode of operation. */
3769
3770 rtx
expand_mult_highpart_adjust(scalar_int_mode mode,rtx adj_operand,rtx op0,rtx op1,rtx target,int unsignedp)3771 expand_mult_highpart_adjust (scalar_int_mode mode, rtx adj_operand, rtx op0,
3772 rtx op1, rtx target, int unsignedp)
3773 {
3774 rtx tem;
3775 enum rtx_code adj_code = unsignedp ? PLUS : MINUS;
3776
3777 tem = expand_shift (RSHIFT_EXPR, mode, op0,
3778 GET_MODE_BITSIZE (mode) - 1, NULL_RTX, 0);
3779 tem = expand_and (mode, tem, op1, NULL_RTX);
3780 adj_operand
3781 = force_operand (gen_rtx_fmt_ee (adj_code, mode, adj_operand, tem),
3782 adj_operand);
3783
3784 tem = expand_shift (RSHIFT_EXPR, mode, op1,
3785 GET_MODE_BITSIZE (mode) - 1, NULL_RTX, 0);
3786 tem = expand_and (mode, tem, op0, NULL_RTX);
3787 target = force_operand (gen_rtx_fmt_ee (adj_code, mode, adj_operand, tem),
3788 target);
3789
3790 return target;
3791 }
3792
3793 /* Subroutine of expmed_mult_highpart. Return the MODE high part of OP. */
3794
3795 static rtx
extract_high_half(scalar_int_mode mode,rtx op)3796 extract_high_half (scalar_int_mode mode, rtx op)
3797 {
3798 if (mode == word_mode)
3799 return gen_highpart (mode, op);
3800
3801 scalar_int_mode wider_mode = GET_MODE_WIDER_MODE (mode).require ();
3802
3803 op = expand_shift (RSHIFT_EXPR, wider_mode, op,
3804 GET_MODE_BITSIZE (mode), 0, 1);
3805 return convert_modes (mode, wider_mode, op, 0);
3806 }
3807
3808 /* Like expmed_mult_highpart, but only consider using a multiplication
3809 optab. OP1 is an rtx for the constant operand. */
3810
3811 static rtx
expmed_mult_highpart_optab(scalar_int_mode mode,rtx op0,rtx op1,rtx target,int unsignedp,int max_cost)3812 expmed_mult_highpart_optab (scalar_int_mode mode, rtx op0, rtx op1,
3813 rtx target, int unsignedp, int max_cost)
3814 {
3815 rtx narrow_op1 = gen_int_mode (INTVAL (op1), mode);
3816 optab moptab;
3817 rtx tem;
3818 int size;
3819 bool speed = optimize_insn_for_speed_p ();
3820
3821 scalar_int_mode wider_mode = GET_MODE_WIDER_MODE (mode).require ();
3822
3823 size = GET_MODE_BITSIZE (mode);
3824
3825 /* Firstly, try using a multiplication insn that only generates the needed
3826 high part of the product, and in the sign flavor of unsignedp. */
3827 if (mul_highpart_cost (speed, mode) < max_cost)
3828 {
3829 moptab = unsignedp ? umul_highpart_optab : smul_highpart_optab;
3830 tem = expand_binop (mode, moptab, op0, narrow_op1, target,
3831 unsignedp, OPTAB_DIRECT);
3832 if (tem)
3833 return tem;
3834 }
3835
3836 /* Secondly, same as above, but use sign flavor opposite of unsignedp.
3837 Need to adjust the result after the multiplication. */
3838 if (size - 1 < BITS_PER_WORD
3839 && (mul_highpart_cost (speed, mode)
3840 + 2 * shift_cost (speed, mode, size-1)
3841 + 4 * add_cost (speed, mode) < max_cost))
3842 {
3843 moptab = unsignedp ? smul_highpart_optab : umul_highpart_optab;
3844 tem = expand_binop (mode, moptab, op0, narrow_op1, target,
3845 unsignedp, OPTAB_DIRECT);
3846 if (tem)
3847 /* We used the wrong signedness. Adjust the result. */
3848 return expand_mult_highpart_adjust (mode, tem, op0, narrow_op1,
3849 tem, unsignedp);
3850 }
3851
3852 /* Try widening multiplication. */
3853 moptab = unsignedp ? umul_widen_optab : smul_widen_optab;
3854 if (convert_optab_handler (moptab, wider_mode, mode) != CODE_FOR_nothing
3855 && mul_widen_cost (speed, wider_mode) < max_cost)
3856 {
3857 tem = expand_binop (wider_mode, moptab, op0, narrow_op1, 0,
3858 unsignedp, OPTAB_WIDEN);
3859 if (tem)
3860 return extract_high_half (mode, tem);
3861 }
3862
3863 /* Try widening the mode and perform a non-widening multiplication. */
3864 if (optab_handler (smul_optab, wider_mode) != CODE_FOR_nothing
3865 && size - 1 < BITS_PER_WORD
3866 && (mul_cost (speed, wider_mode) + shift_cost (speed, mode, size-1)
3867 < max_cost))
3868 {
3869 rtx_insn *insns;
3870 rtx wop0, wop1;
3871
3872 /* We need to widen the operands, for example to ensure the
3873 constant multiplier is correctly sign or zero extended.
3874 Use a sequence to clean-up any instructions emitted by
3875 the conversions if things don't work out. */
3876 start_sequence ();
3877 wop0 = convert_modes (wider_mode, mode, op0, unsignedp);
3878 wop1 = convert_modes (wider_mode, mode, op1, unsignedp);
3879 tem = expand_binop (wider_mode, smul_optab, wop0, wop1, 0,
3880 unsignedp, OPTAB_WIDEN);
3881 insns = get_insns ();
3882 end_sequence ();
3883
3884 if (tem)
3885 {
3886 emit_insn (insns);
3887 return extract_high_half (mode, tem);
3888 }
3889 }
3890
3891 /* Try widening multiplication of opposite signedness, and adjust. */
3892 moptab = unsignedp ? smul_widen_optab : umul_widen_optab;
3893 if (convert_optab_handler (moptab, wider_mode, mode) != CODE_FOR_nothing
3894 && size - 1 < BITS_PER_WORD
3895 && (mul_widen_cost (speed, wider_mode)
3896 + 2 * shift_cost (speed, mode, size-1)
3897 + 4 * add_cost (speed, mode) < max_cost))
3898 {
3899 tem = expand_binop (wider_mode, moptab, op0, narrow_op1,
3900 NULL_RTX, ! unsignedp, OPTAB_WIDEN);
3901 if (tem != 0)
3902 {
3903 tem = extract_high_half (mode, tem);
3904 /* We used the wrong signedness. Adjust the result. */
3905 return expand_mult_highpart_adjust (mode, tem, op0, narrow_op1,
3906 target, unsignedp);
3907 }
3908 }
3909
3910 return 0;
3911 }
3912
3913 /* Emit code to multiply OP0 and OP1 (where OP1 is an integer constant),
3914 putting the high half of the result in TARGET if that is convenient,
3915 and return where the result is. If the operation cannot be performed,
3916 0 is returned.
3917
3918 MODE is the mode of operation and result.
3919
3920 UNSIGNEDP nonzero means unsigned multiply.
3921
3922 MAX_COST is the total allowed cost for the expanded RTL. */
3923
3924 static rtx
expmed_mult_highpart(scalar_int_mode mode,rtx op0,rtx op1,rtx target,int unsignedp,int max_cost)3925 expmed_mult_highpart (scalar_int_mode mode, rtx op0, rtx op1,
3926 rtx target, int unsignedp, int max_cost)
3927 {
3928 unsigned HOST_WIDE_INT cnst1;
3929 int extra_cost;
3930 bool sign_adjust = false;
3931 enum mult_variant variant;
3932 struct algorithm alg;
3933 rtx tem;
3934 bool speed = optimize_insn_for_speed_p ();
3935
3936 /* We can't support modes wider than HOST_BITS_PER_INT. */
3937 gcc_assert (HWI_COMPUTABLE_MODE_P (mode));
3938
3939 cnst1 = INTVAL (op1) & GET_MODE_MASK (mode);
3940
3941 /* We can't optimize modes wider than BITS_PER_WORD.
3942 ??? We might be able to perform double-word arithmetic if
3943 mode == word_mode, however all the cost calculations in
3944 synth_mult etc. assume single-word operations. */
3945 scalar_int_mode wider_mode = GET_MODE_WIDER_MODE (mode).require ();
3946 if (GET_MODE_BITSIZE (wider_mode) > BITS_PER_WORD)
3947 return expmed_mult_highpart_optab (mode, op0, op1, target,
3948 unsignedp, max_cost);
3949
3950 extra_cost = shift_cost (speed, mode, GET_MODE_BITSIZE (mode) - 1);
3951
3952 /* Check whether we try to multiply by a negative constant. */
3953 if (!unsignedp && ((cnst1 >> (GET_MODE_BITSIZE (mode) - 1)) & 1))
3954 {
3955 sign_adjust = true;
3956 extra_cost += add_cost (speed, mode);
3957 }
3958
3959 /* See whether shift/add multiplication is cheap enough. */
3960 if (choose_mult_variant (wider_mode, cnst1, &alg, &variant,
3961 max_cost - extra_cost))
3962 {
3963 /* See whether the specialized multiplication optabs are
3964 cheaper than the shift/add version. */
3965 tem = expmed_mult_highpart_optab (mode, op0, op1, target, unsignedp,
3966 alg.cost.cost + extra_cost);
3967 if (tem)
3968 return tem;
3969
3970 tem = convert_to_mode (wider_mode, op0, unsignedp);
3971 tem = expand_mult_const (wider_mode, tem, cnst1, 0, &alg, variant);
3972 tem = extract_high_half (mode, tem);
3973
3974 /* Adjust result for signedness. */
3975 if (sign_adjust)
3976 tem = force_operand (gen_rtx_MINUS (mode, tem, op0), tem);
3977
3978 return tem;
3979 }
3980 return expmed_mult_highpart_optab (mode, op0, op1, target,
3981 unsignedp, max_cost);
3982 }
3983
3984
3985 /* Expand signed modulus of OP0 by a power of two D in mode MODE. */
3986
3987 static rtx
expand_smod_pow2(scalar_int_mode mode,rtx op0,HOST_WIDE_INT d)3988 expand_smod_pow2 (scalar_int_mode mode, rtx op0, HOST_WIDE_INT d)
3989 {
3990 rtx result, temp, shift;
3991 rtx_code_label *label;
3992 int logd;
3993 int prec = GET_MODE_PRECISION (mode);
3994
3995 logd = floor_log2 (d);
3996 result = gen_reg_rtx (mode);
3997
3998 /* Avoid conditional branches when they're expensive. */
3999 if (BRANCH_COST (optimize_insn_for_speed_p (), false) >= 2
4000 && optimize_insn_for_speed_p ())
4001 {
4002 rtx signmask = emit_store_flag (result, LT, op0, const0_rtx,
4003 mode, 0, -1);
4004 if (signmask)
4005 {
4006 HOST_WIDE_INT masklow = (HOST_WIDE_INT_1 << logd) - 1;
4007 signmask = force_reg (mode, signmask);
4008 shift = gen_int_shift_amount (mode, GET_MODE_BITSIZE (mode) - logd);
4009
4010 /* Use the rtx_cost of a LSHIFTRT instruction to determine
4011 which instruction sequence to use. If logical right shifts
4012 are expensive the use 2 XORs, 2 SUBs and an AND, otherwise
4013 use a LSHIFTRT, 1 ADD, 1 SUB and an AND. */
4014
4015 temp = gen_rtx_LSHIFTRT (mode, result, shift);
4016 if (optab_handler (lshr_optab, mode) == CODE_FOR_nothing
4017 || (set_src_cost (temp, mode, optimize_insn_for_speed_p ())
4018 > COSTS_N_INSNS (2)))
4019 {
4020 temp = expand_binop (mode, xor_optab, op0, signmask,
4021 NULL_RTX, 1, OPTAB_LIB_WIDEN);
4022 temp = expand_binop (mode, sub_optab, temp, signmask,
4023 NULL_RTX, 1, OPTAB_LIB_WIDEN);
4024 temp = expand_binop (mode, and_optab, temp,
4025 gen_int_mode (masklow, mode),
4026 NULL_RTX, 1, OPTAB_LIB_WIDEN);
4027 temp = expand_binop (mode, xor_optab, temp, signmask,
4028 NULL_RTX, 1, OPTAB_LIB_WIDEN);
4029 temp = expand_binop (mode, sub_optab, temp, signmask,
4030 NULL_RTX, 1, OPTAB_LIB_WIDEN);
4031 }
4032 else
4033 {
4034 signmask = expand_binop (mode, lshr_optab, signmask, shift,
4035 NULL_RTX, 1, OPTAB_LIB_WIDEN);
4036 signmask = force_reg (mode, signmask);
4037
4038 temp = expand_binop (mode, add_optab, op0, signmask,
4039 NULL_RTX, 1, OPTAB_LIB_WIDEN);
4040 temp = expand_binop (mode, and_optab, temp,
4041 gen_int_mode (masklow, mode),
4042 NULL_RTX, 1, OPTAB_LIB_WIDEN);
4043 temp = expand_binop (mode, sub_optab, temp, signmask,
4044 NULL_RTX, 1, OPTAB_LIB_WIDEN);
4045 }
4046 return temp;
4047 }
4048 }
4049
4050 /* Mask contains the mode's signbit and the significant bits of the
4051 modulus. By including the signbit in the operation, many targets
4052 can avoid an explicit compare operation in the following comparison
4053 against zero. */
4054 wide_int mask = wi::mask (logd, false, prec);
4055 mask = wi::set_bit (mask, prec - 1);
4056
4057 temp = expand_binop (mode, and_optab, op0,
4058 immed_wide_int_const (mask, mode),
4059 result, 1, OPTAB_LIB_WIDEN);
4060 if (temp != result)
4061 emit_move_insn (result, temp);
4062
4063 label = gen_label_rtx ();
4064 do_cmp_and_jump (result, const0_rtx, GE, mode, label);
4065
4066 temp = expand_binop (mode, sub_optab, result, const1_rtx, result,
4067 0, OPTAB_LIB_WIDEN);
4068
4069 mask = wi::mask (logd, true, prec);
4070 temp = expand_binop (mode, ior_optab, temp,
4071 immed_wide_int_const (mask, mode),
4072 result, 1, OPTAB_LIB_WIDEN);
4073 temp = expand_binop (mode, add_optab, temp, const1_rtx, result,
4074 0, OPTAB_LIB_WIDEN);
4075 if (temp != result)
4076 emit_move_insn (result, temp);
4077 emit_label (label);
4078 return result;
4079 }
4080
4081 /* Expand signed division of OP0 by a power of two D in mode MODE.
4082 This routine is only called for positive values of D. */
4083
4084 static rtx
expand_sdiv_pow2(scalar_int_mode mode,rtx op0,HOST_WIDE_INT d)4085 expand_sdiv_pow2 (scalar_int_mode mode, rtx op0, HOST_WIDE_INT d)
4086 {
4087 rtx temp;
4088 rtx_code_label *label;
4089 int logd;
4090
4091 logd = floor_log2 (d);
4092
4093 if (d == 2
4094 && BRANCH_COST (optimize_insn_for_speed_p (),
4095 false) >= 1)
4096 {
4097 temp = gen_reg_rtx (mode);
4098 temp = emit_store_flag (temp, LT, op0, const0_rtx, mode, 0, 1);
4099 temp = expand_binop (mode, add_optab, temp, op0, NULL_RTX,
4100 0, OPTAB_LIB_WIDEN);
4101 return expand_shift (RSHIFT_EXPR, mode, temp, logd, NULL_RTX, 0);
4102 }
4103
4104 if (HAVE_conditional_move
4105 && BRANCH_COST (optimize_insn_for_speed_p (), false) >= 2)
4106 {
4107 rtx temp2;
4108
4109 start_sequence ();
4110 temp2 = copy_to_mode_reg (mode, op0);
4111 temp = expand_binop (mode, add_optab, temp2, gen_int_mode (d - 1, mode),
4112 NULL_RTX, 0, OPTAB_LIB_WIDEN);
4113 temp = force_reg (mode, temp);
4114
4115 /* Construct "temp2 = (temp2 < 0) ? temp : temp2". */
4116 temp2 = emit_conditional_move (temp2, LT, temp2, const0_rtx,
4117 mode, temp, temp2, mode, 0);
4118 if (temp2)
4119 {
4120 rtx_insn *seq = get_insns ();
4121 end_sequence ();
4122 emit_insn (seq);
4123 return expand_shift (RSHIFT_EXPR, mode, temp2, logd, NULL_RTX, 0);
4124 }
4125 end_sequence ();
4126 }
4127
4128 if (BRANCH_COST (optimize_insn_for_speed_p (),
4129 false) >= 2)
4130 {
4131 int ushift = GET_MODE_BITSIZE (mode) - logd;
4132
4133 temp = gen_reg_rtx (mode);
4134 temp = emit_store_flag (temp, LT, op0, const0_rtx, mode, 0, -1);
4135 if (GET_MODE_BITSIZE (mode) >= BITS_PER_WORD
4136 || shift_cost (optimize_insn_for_speed_p (), mode, ushift)
4137 > COSTS_N_INSNS (1))
4138 temp = expand_binop (mode, and_optab, temp, gen_int_mode (d - 1, mode),
4139 NULL_RTX, 0, OPTAB_LIB_WIDEN);
4140 else
4141 temp = expand_shift (RSHIFT_EXPR, mode, temp,
4142 ushift, NULL_RTX, 1);
4143 temp = expand_binop (mode, add_optab, temp, op0, NULL_RTX,
4144 0, OPTAB_LIB_WIDEN);
4145 return expand_shift (RSHIFT_EXPR, mode, temp, logd, NULL_RTX, 0);
4146 }
4147
4148 label = gen_label_rtx ();
4149 temp = copy_to_mode_reg (mode, op0);
4150 do_cmp_and_jump (temp, const0_rtx, GE, mode, label);
4151 expand_inc (temp, gen_int_mode (d - 1, mode));
4152 emit_label (label);
4153 return expand_shift (RSHIFT_EXPR, mode, temp, logd, NULL_RTX, 0);
4154 }
4155
4156 /* Emit the code to divide OP0 by OP1, putting the result in TARGET
4157 if that is convenient, and returning where the result is.
4158 You may request either the quotient or the remainder as the result;
4159 specify REM_FLAG nonzero to get the remainder.
4160
4161 CODE is the expression code for which kind of division this is;
4162 it controls how rounding is done. MODE is the machine mode to use.
4163 UNSIGNEDP nonzero means do unsigned division. */
4164
4165 /* ??? For CEIL_MOD_EXPR, can compute incorrect remainder with ANDI
4166 and then correct it by or'ing in missing high bits
4167 if result of ANDI is nonzero.
4168 For ROUND_MOD_EXPR, can use ANDI and then sign-extend the result.
4169 This could optimize to a bfexts instruction.
4170 But C doesn't use these operations, so their optimizations are
4171 left for later. */
4172 /* ??? For modulo, we don't actually need the highpart of the first product,
4173 the low part will do nicely. And for small divisors, the second multiply
4174 can also be a low-part only multiply or even be completely left out.
4175 E.g. to calculate the remainder of a division by 3 with a 32 bit
4176 multiply, multiply with 0x55555556 and extract the upper two bits;
4177 the result is exact for inputs up to 0x1fffffff.
4178 The input range can be reduced by using cross-sum rules.
4179 For odd divisors >= 3, the following table gives right shift counts
4180 so that if a number is shifted by an integer multiple of the given
4181 amount, the remainder stays the same:
4182 2, 4, 3, 6, 10, 12, 4, 8, 18, 6, 11, 20, 18, 0, 5, 10, 12, 0, 12, 20,
4183 14, 12, 23, 21, 8, 0, 20, 18, 0, 0, 6, 12, 0, 22, 0, 18, 20, 30, 0, 0,
4184 0, 8, 0, 11, 12, 10, 36, 0, 30, 0, 0, 12, 0, 0, 0, 0, 44, 12, 24, 0,
4185 20, 0, 7, 14, 0, 18, 36, 0, 0, 46, 60, 0, 42, 0, 15, 24, 20, 0, 0, 33,
4186 0, 20, 0, 0, 18, 0, 60, 0, 0, 0, 0, 0, 40, 18, 0, 0, 12
4187
4188 Cross-sum rules for even numbers can be derived by leaving as many bits
4189 to the right alone as the divisor has zeros to the right.
4190 E.g. if x is an unsigned 32 bit number:
4191 (x mod 12) == (((x & 1023) + ((x >> 8) & ~3)) * 0x15555558 >> 2 * 3) >> 28
4192 */
4193
4194 rtx
expand_divmod(int rem_flag,enum tree_code code,machine_mode mode,rtx op0,rtx op1,rtx target,int unsignedp)4195 expand_divmod (int rem_flag, enum tree_code code, machine_mode mode,
4196 rtx op0, rtx op1, rtx target, int unsignedp)
4197 {
4198 machine_mode compute_mode;
4199 rtx tquotient;
4200 rtx quotient = 0, remainder = 0;
4201 rtx_insn *last;
4202 rtx_insn *insn;
4203 optab optab1, optab2;
4204 int op1_is_constant, op1_is_pow2 = 0;
4205 int max_cost, extra_cost;
4206 static HOST_WIDE_INT last_div_const = 0;
4207 bool speed = optimize_insn_for_speed_p ();
4208
4209 op1_is_constant = CONST_INT_P (op1);
4210 if (op1_is_constant)
4211 {
4212 wide_int ext_op1 = rtx_mode_t (op1, mode);
4213 op1_is_pow2 = (wi::popcount (ext_op1) == 1
4214 || (! unsignedp
4215 && wi::popcount (wi::neg (ext_op1)) == 1));
4216 }
4217
4218 /*
4219 This is the structure of expand_divmod:
4220
4221 First comes code to fix up the operands so we can perform the operations
4222 correctly and efficiently.
4223
4224 Second comes a switch statement with code specific for each rounding mode.
4225 For some special operands this code emits all RTL for the desired
4226 operation, for other cases, it generates only a quotient and stores it in
4227 QUOTIENT. The case for trunc division/remainder might leave quotient = 0,
4228 to indicate that it has not done anything.
4229
4230 Last comes code that finishes the operation. If QUOTIENT is set and
4231 REM_FLAG is set, the remainder is computed as OP0 - QUOTIENT * OP1. If
4232 QUOTIENT is not set, it is computed using trunc rounding.
4233
4234 We try to generate special code for division and remainder when OP1 is a
4235 constant. If |OP1| = 2**n we can use shifts and some other fast
4236 operations. For other values of OP1, we compute a carefully selected
4237 fixed-point approximation m = 1/OP1, and generate code that multiplies OP0
4238 by m.
4239
4240 In all cases but EXACT_DIV_EXPR, this multiplication requires the upper
4241 half of the product. Different strategies for generating the product are
4242 implemented in expmed_mult_highpart.
4243
4244 If what we actually want is the remainder, we generate that by another
4245 by-constant multiplication and a subtraction. */
4246
4247 /* We shouldn't be called with OP1 == const1_rtx, but some of the
4248 code below will malfunction if we are, so check here and handle
4249 the special case if so. */
4250 if (op1 == const1_rtx)
4251 return rem_flag ? const0_rtx : op0;
4252
4253 /* When dividing by -1, we could get an overflow.
4254 negv_optab can handle overflows. */
4255 if (! unsignedp && op1 == constm1_rtx)
4256 {
4257 if (rem_flag)
4258 return const0_rtx;
4259 return expand_unop (mode, flag_trapv && GET_MODE_CLASS (mode) == MODE_INT
4260 ? negv_optab : neg_optab, op0, target, 0);
4261 }
4262
4263 if (target
4264 /* Don't use the function value register as a target
4265 since we have to read it as well as write it,
4266 and function-inlining gets confused by this. */
4267 && ((REG_P (target) && REG_FUNCTION_VALUE_P (target))
4268 /* Don't clobber an operand while doing a multi-step calculation. */
4269 || ((rem_flag || op1_is_constant)
4270 && (reg_mentioned_p (target, op0)
4271 || (MEM_P (op0) && MEM_P (target))))
4272 || reg_mentioned_p (target, op1)
4273 || (MEM_P (op1) && MEM_P (target))))
4274 target = 0;
4275
4276 /* Get the mode in which to perform this computation. Normally it will
4277 be MODE, but sometimes we can't do the desired operation in MODE.
4278 If so, pick a wider mode in which we can do the operation. Convert
4279 to that mode at the start to avoid repeated conversions.
4280
4281 First see what operations we need. These depend on the expression
4282 we are evaluating. (We assume that divxx3 insns exist under the
4283 same conditions that modxx3 insns and that these insns don't normally
4284 fail. If these assumptions are not correct, we may generate less
4285 efficient code in some cases.)
4286
4287 Then see if we find a mode in which we can open-code that operation
4288 (either a division, modulus, or shift). Finally, check for the smallest
4289 mode for which we can do the operation with a library call. */
4290
4291 /* We might want to refine this now that we have division-by-constant
4292 optimization. Since expmed_mult_highpart tries so many variants, it is
4293 not straightforward to generalize this. Maybe we should make an array
4294 of possible modes in init_expmed? Save this for GCC 2.7. */
4295
4296 optab1 = (op1_is_pow2
4297 ? (unsignedp ? lshr_optab : ashr_optab)
4298 : (unsignedp ? udiv_optab : sdiv_optab));
4299 optab2 = (op1_is_pow2 ? optab1
4300 : (unsignedp ? udivmod_optab : sdivmod_optab));
4301
4302 FOR_EACH_MODE_FROM (compute_mode, mode)
4303 if (optab_handler (optab1, compute_mode) != CODE_FOR_nothing
4304 || optab_handler (optab2, compute_mode) != CODE_FOR_nothing)
4305 break;
4306
4307 if (compute_mode == VOIDmode)
4308 FOR_EACH_MODE_FROM (compute_mode, mode)
4309 if (optab_libfunc (optab1, compute_mode)
4310 || optab_libfunc (optab2, compute_mode))
4311 break;
4312
4313 /* If we still couldn't find a mode, use MODE, but expand_binop will
4314 probably die. */
4315 if (compute_mode == VOIDmode)
4316 compute_mode = mode;
4317
4318 if (target && GET_MODE (target) == compute_mode)
4319 tquotient = target;
4320 else
4321 tquotient = gen_reg_rtx (compute_mode);
4322
4323 #if 0
4324 /* It should be possible to restrict the precision to GET_MODE_BITSIZE
4325 (mode), and thereby get better code when OP1 is a constant. Do that
4326 later. It will require going over all usages of SIZE below. */
4327 size = GET_MODE_BITSIZE (mode);
4328 #endif
4329
4330 /* Only deduct something for a REM if the last divide done was
4331 for a different constant. Then set the constant of the last
4332 divide. */
4333 max_cost = (unsignedp
4334 ? udiv_cost (speed, compute_mode)
4335 : sdiv_cost (speed, compute_mode));
4336 if (rem_flag && ! (last_div_const != 0 && op1_is_constant
4337 && INTVAL (op1) == last_div_const))
4338 max_cost -= (mul_cost (speed, compute_mode)
4339 + add_cost (speed, compute_mode));
4340
4341 last_div_const = ! rem_flag && op1_is_constant ? INTVAL (op1) : 0;
4342
4343 /* Now convert to the best mode to use. */
4344 if (compute_mode != mode)
4345 {
4346 op0 = convert_modes (compute_mode, mode, op0, unsignedp);
4347 op1 = convert_modes (compute_mode, mode, op1, unsignedp);
4348
4349 /* convert_modes may have placed op1 into a register, so we
4350 must recompute the following. */
4351 op1_is_constant = CONST_INT_P (op1);
4352 if (op1_is_constant)
4353 {
4354 wide_int ext_op1 = rtx_mode_t (op1, compute_mode);
4355 op1_is_pow2 = (wi::popcount (ext_op1) == 1
4356 || (! unsignedp
4357 && wi::popcount (wi::neg (ext_op1)) == 1));
4358 }
4359 else
4360 op1_is_pow2 = 0;
4361 }
4362
4363 /* If one of the operands is a volatile MEM, copy it into a register. */
4364
4365 if (MEM_P (op0) && MEM_VOLATILE_P (op0))
4366 op0 = force_reg (compute_mode, op0);
4367 if (MEM_P (op1) && MEM_VOLATILE_P (op1))
4368 op1 = force_reg (compute_mode, op1);
4369
4370 /* If we need the remainder or if OP1 is constant, we need to
4371 put OP0 in a register in case it has any queued subexpressions. */
4372 if (rem_flag || op1_is_constant)
4373 op0 = force_reg (compute_mode, op0);
4374
4375 last = get_last_insn ();
4376
4377 /* Promote floor rounding to trunc rounding for unsigned operations. */
4378 if (unsignedp)
4379 {
4380 if (code == FLOOR_DIV_EXPR)
4381 code = TRUNC_DIV_EXPR;
4382 if (code == FLOOR_MOD_EXPR)
4383 code = TRUNC_MOD_EXPR;
4384 if (code == EXACT_DIV_EXPR && op1_is_pow2)
4385 code = TRUNC_DIV_EXPR;
4386 }
4387
4388 if (op1 != const0_rtx)
4389 switch (code)
4390 {
4391 case TRUNC_MOD_EXPR:
4392 case TRUNC_DIV_EXPR:
4393 if (op1_is_constant)
4394 {
4395 scalar_int_mode int_mode = as_a <scalar_int_mode> (compute_mode);
4396 int size = GET_MODE_BITSIZE (int_mode);
4397 if (unsignedp)
4398 {
4399 unsigned HOST_WIDE_INT mh, ml;
4400 int pre_shift, post_shift;
4401 int dummy;
4402 wide_int wd = rtx_mode_t (op1, int_mode);
4403 unsigned HOST_WIDE_INT d = wd.to_uhwi ();
4404
4405 if (wi::popcount (wd) == 1)
4406 {
4407 pre_shift = floor_log2 (d);
4408 if (rem_flag)
4409 {
4410 unsigned HOST_WIDE_INT mask
4411 = (HOST_WIDE_INT_1U << pre_shift) - 1;
4412 remainder
4413 = expand_binop (int_mode, and_optab, op0,
4414 gen_int_mode (mask, int_mode),
4415 remainder, 1,
4416 OPTAB_LIB_WIDEN);
4417 if (remainder)
4418 return gen_lowpart (mode, remainder);
4419 }
4420 quotient = expand_shift (RSHIFT_EXPR, int_mode, op0,
4421 pre_shift, tquotient, 1);
4422 }
4423 else if (size <= HOST_BITS_PER_WIDE_INT)
4424 {
4425 if (d >= (HOST_WIDE_INT_1U << (size - 1)))
4426 {
4427 /* Most significant bit of divisor is set; emit an scc
4428 insn. */
4429 quotient = emit_store_flag_force (tquotient, GEU, op0, op1,
4430 int_mode, 1, 1);
4431 }
4432 else
4433 {
4434 /* Find a suitable multiplier and right shift count
4435 instead of multiplying with D. */
4436
4437 mh = choose_multiplier (d, size, size,
4438 &ml, &post_shift, &dummy);
4439
4440 /* If the suggested multiplier is more than SIZE bits,
4441 we can do better for even divisors, using an
4442 initial right shift. */
4443 if (mh != 0 && (d & 1) == 0)
4444 {
4445 pre_shift = ctz_or_zero (d);
4446 mh = choose_multiplier (d >> pre_shift, size,
4447 size - pre_shift,
4448 &ml, &post_shift, &dummy);
4449 gcc_assert (!mh);
4450 }
4451 else
4452 pre_shift = 0;
4453
4454 if (mh != 0)
4455 {
4456 rtx t1, t2, t3, t4;
4457
4458 if (post_shift - 1 >= BITS_PER_WORD)
4459 goto fail1;
4460
4461 extra_cost
4462 = (shift_cost (speed, int_mode, post_shift - 1)
4463 + shift_cost (speed, int_mode, 1)
4464 + 2 * add_cost (speed, int_mode));
4465 t1 = expmed_mult_highpart
4466 (int_mode, op0, gen_int_mode (ml, int_mode),
4467 NULL_RTX, 1, max_cost - extra_cost);
4468 if (t1 == 0)
4469 goto fail1;
4470 t2 = force_operand (gen_rtx_MINUS (int_mode,
4471 op0, t1),
4472 NULL_RTX);
4473 t3 = expand_shift (RSHIFT_EXPR, int_mode,
4474 t2, 1, NULL_RTX, 1);
4475 t4 = force_operand (gen_rtx_PLUS (int_mode,
4476 t1, t3),
4477 NULL_RTX);
4478 quotient = expand_shift
4479 (RSHIFT_EXPR, int_mode, t4,
4480 post_shift - 1, tquotient, 1);
4481 }
4482 else
4483 {
4484 rtx t1, t2;
4485
4486 if (pre_shift >= BITS_PER_WORD
4487 || post_shift >= BITS_PER_WORD)
4488 goto fail1;
4489
4490 t1 = expand_shift
4491 (RSHIFT_EXPR, int_mode, op0,
4492 pre_shift, NULL_RTX, 1);
4493 extra_cost
4494 = (shift_cost (speed, int_mode, pre_shift)
4495 + shift_cost (speed, int_mode, post_shift));
4496 t2 = expmed_mult_highpart
4497 (int_mode, t1,
4498 gen_int_mode (ml, int_mode),
4499 NULL_RTX, 1, max_cost - extra_cost);
4500 if (t2 == 0)
4501 goto fail1;
4502 quotient = expand_shift
4503 (RSHIFT_EXPR, int_mode, t2,
4504 post_shift, tquotient, 1);
4505 }
4506 }
4507 }
4508 else /* Too wide mode to use tricky code */
4509 break;
4510
4511 insn = get_last_insn ();
4512 if (insn != last)
4513 set_dst_reg_note (insn, REG_EQUAL,
4514 gen_rtx_UDIV (int_mode, op0, op1),
4515 quotient);
4516 }
4517 else /* TRUNC_DIV, signed */
4518 {
4519 unsigned HOST_WIDE_INT ml;
4520 int lgup, post_shift;
4521 rtx mlr;
4522 HOST_WIDE_INT d = INTVAL (op1);
4523 unsigned HOST_WIDE_INT abs_d;
4524
4525 /* Not prepared to handle division/remainder by
4526 0xffffffffffffffff8000000000000000 etc. */
4527 if (d == HOST_WIDE_INT_MIN && size > HOST_BITS_PER_WIDE_INT)
4528 break;
4529
4530 /* Since d might be INT_MIN, we have to cast to
4531 unsigned HOST_WIDE_INT before negating to avoid
4532 undefined signed overflow. */
4533 abs_d = (d >= 0
4534 ? (unsigned HOST_WIDE_INT) d
4535 : - (unsigned HOST_WIDE_INT) d);
4536
4537 /* n rem d = n rem -d */
4538 if (rem_flag && d < 0)
4539 {
4540 d = abs_d;
4541 op1 = gen_int_mode (abs_d, int_mode);
4542 }
4543
4544 if (d == 1)
4545 quotient = op0;
4546 else if (d == -1)
4547 quotient = expand_unop (int_mode, neg_optab, op0,
4548 tquotient, 0);
4549 else if (size <= HOST_BITS_PER_WIDE_INT
4550 && abs_d == HOST_WIDE_INT_1U << (size - 1))
4551 {
4552 /* This case is not handled correctly below. */
4553 quotient = emit_store_flag (tquotient, EQ, op0, op1,
4554 int_mode, 1, 1);
4555 if (quotient == 0)
4556 goto fail1;
4557 }
4558 else if (EXACT_POWER_OF_2_OR_ZERO_P (d)
4559 && (size <= HOST_BITS_PER_WIDE_INT || d >= 0)
4560 && (rem_flag
4561 ? smod_pow2_cheap (speed, int_mode)
4562 : sdiv_pow2_cheap (speed, int_mode))
4563 /* We assume that cheap metric is true if the
4564 optab has an expander for this mode. */
4565 && ((optab_handler ((rem_flag ? smod_optab
4566 : sdiv_optab),
4567 int_mode)
4568 != CODE_FOR_nothing)
4569 || (optab_handler (sdivmod_optab, int_mode)
4570 != CODE_FOR_nothing)))
4571 ;
4572 else if (EXACT_POWER_OF_2_OR_ZERO_P (abs_d))
4573 {
4574 if (rem_flag)
4575 {
4576 remainder = expand_smod_pow2 (int_mode, op0, d);
4577 if (remainder)
4578 return gen_lowpart (mode, remainder);
4579 }
4580
4581 if (sdiv_pow2_cheap (speed, int_mode)
4582 && ((optab_handler (sdiv_optab, int_mode)
4583 != CODE_FOR_nothing)
4584 || (optab_handler (sdivmod_optab, int_mode)
4585 != CODE_FOR_nothing)))
4586 quotient = expand_divmod (0, TRUNC_DIV_EXPR,
4587 int_mode, op0,
4588 gen_int_mode (abs_d,
4589 int_mode),
4590 NULL_RTX, 0);
4591 else
4592 quotient = expand_sdiv_pow2 (int_mode, op0, abs_d);
4593
4594 /* We have computed OP0 / abs(OP1). If OP1 is negative,
4595 negate the quotient. */
4596 if (d < 0)
4597 {
4598 insn = get_last_insn ();
4599 if (insn != last
4600 && abs_d < (HOST_WIDE_INT_1U
4601 << (HOST_BITS_PER_WIDE_INT - 1)))
4602 set_dst_reg_note (insn, REG_EQUAL,
4603 gen_rtx_DIV (int_mode, op0,
4604 gen_int_mode
4605 (abs_d,
4606 int_mode)),
4607 quotient);
4608
4609 quotient = expand_unop (int_mode, neg_optab,
4610 quotient, quotient, 0);
4611 }
4612 }
4613 else if (size <= HOST_BITS_PER_WIDE_INT)
4614 {
4615 choose_multiplier (abs_d, size, size - 1,
4616 &ml, &post_shift, &lgup);
4617 if (ml < HOST_WIDE_INT_1U << (size - 1))
4618 {
4619 rtx t1, t2, t3;
4620
4621 if (post_shift >= BITS_PER_WORD
4622 || size - 1 >= BITS_PER_WORD)
4623 goto fail1;
4624
4625 extra_cost = (shift_cost (speed, int_mode, post_shift)
4626 + shift_cost (speed, int_mode, size - 1)
4627 + add_cost (speed, int_mode));
4628 t1 = expmed_mult_highpart
4629 (int_mode, op0, gen_int_mode (ml, int_mode),
4630 NULL_RTX, 0, max_cost - extra_cost);
4631 if (t1 == 0)
4632 goto fail1;
4633 t2 = expand_shift
4634 (RSHIFT_EXPR, int_mode, t1,
4635 post_shift, NULL_RTX, 0);
4636 t3 = expand_shift
4637 (RSHIFT_EXPR, int_mode, op0,
4638 size - 1, NULL_RTX, 0);
4639 if (d < 0)
4640 quotient
4641 = force_operand (gen_rtx_MINUS (int_mode, t3, t2),
4642 tquotient);
4643 else
4644 quotient
4645 = force_operand (gen_rtx_MINUS (int_mode, t2, t3),
4646 tquotient);
4647 }
4648 else
4649 {
4650 rtx t1, t2, t3, t4;
4651
4652 if (post_shift >= BITS_PER_WORD
4653 || size - 1 >= BITS_PER_WORD)
4654 goto fail1;
4655
4656 ml |= HOST_WIDE_INT_M1U << (size - 1);
4657 mlr = gen_int_mode (ml, int_mode);
4658 extra_cost = (shift_cost (speed, int_mode, post_shift)
4659 + shift_cost (speed, int_mode, size - 1)
4660 + 2 * add_cost (speed, int_mode));
4661 t1 = expmed_mult_highpart (int_mode, op0, mlr,
4662 NULL_RTX, 0,
4663 max_cost - extra_cost);
4664 if (t1 == 0)
4665 goto fail1;
4666 t2 = force_operand (gen_rtx_PLUS (int_mode, t1, op0),
4667 NULL_RTX);
4668 t3 = expand_shift
4669 (RSHIFT_EXPR, int_mode, t2,
4670 post_shift, NULL_RTX, 0);
4671 t4 = expand_shift
4672 (RSHIFT_EXPR, int_mode, op0,
4673 size - 1, NULL_RTX, 0);
4674 if (d < 0)
4675 quotient
4676 = force_operand (gen_rtx_MINUS (int_mode, t4, t3),
4677 tquotient);
4678 else
4679 quotient
4680 = force_operand (gen_rtx_MINUS (int_mode, t3, t4),
4681 tquotient);
4682 }
4683 }
4684 else /* Too wide mode to use tricky code */
4685 break;
4686
4687 insn = get_last_insn ();
4688 if (insn != last)
4689 set_dst_reg_note (insn, REG_EQUAL,
4690 gen_rtx_DIV (int_mode, op0, op1),
4691 quotient);
4692 }
4693 break;
4694 }
4695 fail1:
4696 delete_insns_since (last);
4697 break;
4698
4699 case FLOOR_DIV_EXPR:
4700 case FLOOR_MOD_EXPR:
4701 /* We will come here only for signed operations. */
4702 if (op1_is_constant && HWI_COMPUTABLE_MODE_P (compute_mode))
4703 {
4704 scalar_int_mode int_mode = as_a <scalar_int_mode> (compute_mode);
4705 int size = GET_MODE_BITSIZE (int_mode);
4706 unsigned HOST_WIDE_INT mh, ml;
4707 int pre_shift, lgup, post_shift;
4708 HOST_WIDE_INT d = INTVAL (op1);
4709
4710 if (d > 0)
4711 {
4712 /* We could just as easily deal with negative constants here,
4713 but it does not seem worth the trouble for GCC 2.6. */
4714 if (EXACT_POWER_OF_2_OR_ZERO_P (d))
4715 {
4716 pre_shift = floor_log2 (d);
4717 if (rem_flag)
4718 {
4719 unsigned HOST_WIDE_INT mask
4720 = (HOST_WIDE_INT_1U << pre_shift) - 1;
4721 remainder = expand_binop
4722 (int_mode, and_optab, op0,
4723 gen_int_mode (mask, int_mode),
4724 remainder, 0, OPTAB_LIB_WIDEN);
4725 if (remainder)
4726 return gen_lowpart (mode, remainder);
4727 }
4728 quotient = expand_shift
4729 (RSHIFT_EXPR, int_mode, op0,
4730 pre_shift, tquotient, 0);
4731 }
4732 else
4733 {
4734 rtx t1, t2, t3, t4;
4735
4736 mh = choose_multiplier (d, size, size - 1,
4737 &ml, &post_shift, &lgup);
4738 gcc_assert (!mh);
4739
4740 if (post_shift < BITS_PER_WORD
4741 && size - 1 < BITS_PER_WORD)
4742 {
4743 t1 = expand_shift
4744 (RSHIFT_EXPR, int_mode, op0,
4745 size - 1, NULL_RTX, 0);
4746 t2 = expand_binop (int_mode, xor_optab, op0, t1,
4747 NULL_RTX, 0, OPTAB_WIDEN);
4748 extra_cost = (shift_cost (speed, int_mode, post_shift)
4749 + shift_cost (speed, int_mode, size - 1)
4750 + 2 * add_cost (speed, int_mode));
4751 t3 = expmed_mult_highpart
4752 (int_mode, t2, gen_int_mode (ml, int_mode),
4753 NULL_RTX, 1, max_cost - extra_cost);
4754 if (t3 != 0)
4755 {
4756 t4 = expand_shift
4757 (RSHIFT_EXPR, int_mode, t3,
4758 post_shift, NULL_RTX, 1);
4759 quotient = expand_binop (int_mode, xor_optab,
4760 t4, t1, tquotient, 0,
4761 OPTAB_WIDEN);
4762 }
4763 }
4764 }
4765 }
4766 else
4767 {
4768 rtx nsign, t1, t2, t3, t4;
4769 t1 = force_operand (gen_rtx_PLUS (int_mode,
4770 op0, constm1_rtx), NULL_RTX);
4771 t2 = expand_binop (int_mode, ior_optab, op0, t1, NULL_RTX,
4772 0, OPTAB_WIDEN);
4773 nsign = expand_shift (RSHIFT_EXPR, int_mode, t2,
4774 size - 1, NULL_RTX, 0);
4775 t3 = force_operand (gen_rtx_MINUS (int_mode, t1, nsign),
4776 NULL_RTX);
4777 t4 = expand_divmod (0, TRUNC_DIV_EXPR, int_mode, t3, op1,
4778 NULL_RTX, 0);
4779 if (t4)
4780 {
4781 rtx t5;
4782 t5 = expand_unop (int_mode, one_cmpl_optab, nsign,
4783 NULL_RTX, 0);
4784 quotient = force_operand (gen_rtx_PLUS (int_mode, t4, t5),
4785 tquotient);
4786 }
4787 }
4788 }
4789
4790 if (quotient != 0)
4791 break;
4792 delete_insns_since (last);
4793
4794 /* Try using an instruction that produces both the quotient and
4795 remainder, using truncation. We can easily compensate the quotient
4796 or remainder to get floor rounding, once we have the remainder.
4797 Notice that we compute also the final remainder value here,
4798 and return the result right away. */
4799 if (target == 0 || GET_MODE (target) != compute_mode)
4800 target = gen_reg_rtx (compute_mode);
4801
4802 if (rem_flag)
4803 {
4804 remainder
4805 = REG_P (target) ? target : gen_reg_rtx (compute_mode);
4806 quotient = gen_reg_rtx (compute_mode);
4807 }
4808 else
4809 {
4810 quotient
4811 = REG_P (target) ? target : gen_reg_rtx (compute_mode);
4812 remainder = gen_reg_rtx (compute_mode);
4813 }
4814
4815 if (expand_twoval_binop (sdivmod_optab, op0, op1,
4816 quotient, remainder, 0))
4817 {
4818 /* This could be computed with a branch-less sequence.
4819 Save that for later. */
4820 rtx tem;
4821 rtx_code_label *label = gen_label_rtx ();
4822 do_cmp_and_jump (remainder, const0_rtx, EQ, compute_mode, label);
4823 tem = expand_binop (compute_mode, xor_optab, op0, op1,
4824 NULL_RTX, 0, OPTAB_WIDEN);
4825 do_cmp_and_jump (tem, const0_rtx, GE, compute_mode, label);
4826 expand_dec (quotient, const1_rtx);
4827 expand_inc (remainder, op1);
4828 emit_label (label);
4829 return gen_lowpart (mode, rem_flag ? remainder : quotient);
4830 }
4831
4832 /* No luck with division elimination or divmod. Have to do it
4833 by conditionally adjusting op0 *and* the result. */
4834 {
4835 rtx_code_label *label1, *label2, *label3, *label4, *label5;
4836 rtx adjusted_op0;
4837 rtx tem;
4838
4839 quotient = gen_reg_rtx (compute_mode);
4840 adjusted_op0 = copy_to_mode_reg (compute_mode, op0);
4841 label1 = gen_label_rtx ();
4842 label2 = gen_label_rtx ();
4843 label3 = gen_label_rtx ();
4844 label4 = gen_label_rtx ();
4845 label5 = gen_label_rtx ();
4846 do_cmp_and_jump (op1, const0_rtx, LT, compute_mode, label2);
4847 do_cmp_and_jump (adjusted_op0, const0_rtx, LT, compute_mode, label1);
4848 tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4849 quotient, 0, OPTAB_LIB_WIDEN);
4850 if (tem != quotient)
4851 emit_move_insn (quotient, tem);
4852 emit_jump_insn (targetm.gen_jump (label5));
4853 emit_barrier ();
4854 emit_label (label1);
4855 expand_inc (adjusted_op0, const1_rtx);
4856 emit_jump_insn (targetm.gen_jump (label4));
4857 emit_barrier ();
4858 emit_label (label2);
4859 do_cmp_and_jump (adjusted_op0, const0_rtx, GT, compute_mode, label3);
4860 tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4861 quotient, 0, OPTAB_LIB_WIDEN);
4862 if (tem != quotient)
4863 emit_move_insn (quotient, tem);
4864 emit_jump_insn (targetm.gen_jump (label5));
4865 emit_barrier ();
4866 emit_label (label3);
4867 expand_dec (adjusted_op0, const1_rtx);
4868 emit_label (label4);
4869 tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4870 quotient, 0, OPTAB_LIB_WIDEN);
4871 if (tem != quotient)
4872 emit_move_insn (quotient, tem);
4873 expand_dec (quotient, const1_rtx);
4874 emit_label (label5);
4875 }
4876 break;
4877
4878 case CEIL_DIV_EXPR:
4879 case CEIL_MOD_EXPR:
4880 if (unsignedp)
4881 {
4882 if (op1_is_constant
4883 && EXACT_POWER_OF_2_OR_ZERO_P (INTVAL (op1))
4884 && (HWI_COMPUTABLE_MODE_P (compute_mode)
4885 || INTVAL (op1) >= 0))
4886 {
4887 scalar_int_mode int_mode
4888 = as_a <scalar_int_mode> (compute_mode);
4889 rtx t1, t2, t3;
4890 unsigned HOST_WIDE_INT d = INTVAL (op1);
4891 t1 = expand_shift (RSHIFT_EXPR, int_mode, op0,
4892 floor_log2 (d), tquotient, 1);
4893 t2 = expand_binop (int_mode, and_optab, op0,
4894 gen_int_mode (d - 1, int_mode),
4895 NULL_RTX, 1, OPTAB_LIB_WIDEN);
4896 t3 = gen_reg_rtx (int_mode);
4897 t3 = emit_store_flag (t3, NE, t2, const0_rtx, int_mode, 1, 1);
4898 if (t3 == 0)
4899 {
4900 rtx_code_label *lab;
4901 lab = gen_label_rtx ();
4902 do_cmp_and_jump (t2, const0_rtx, EQ, int_mode, lab);
4903 expand_inc (t1, const1_rtx);
4904 emit_label (lab);
4905 quotient = t1;
4906 }
4907 else
4908 quotient = force_operand (gen_rtx_PLUS (int_mode, t1, t3),
4909 tquotient);
4910 break;
4911 }
4912
4913 /* Try using an instruction that produces both the quotient and
4914 remainder, using truncation. We can easily compensate the
4915 quotient or remainder to get ceiling rounding, once we have the
4916 remainder. Notice that we compute also the final remainder
4917 value here, and return the result right away. */
4918 if (target == 0 || GET_MODE (target) != compute_mode)
4919 target = gen_reg_rtx (compute_mode);
4920
4921 if (rem_flag)
4922 {
4923 remainder = (REG_P (target)
4924 ? target : gen_reg_rtx (compute_mode));
4925 quotient = gen_reg_rtx (compute_mode);
4926 }
4927 else
4928 {
4929 quotient = (REG_P (target)
4930 ? target : gen_reg_rtx (compute_mode));
4931 remainder = gen_reg_rtx (compute_mode);
4932 }
4933
4934 if (expand_twoval_binop (udivmod_optab, op0, op1, quotient,
4935 remainder, 1))
4936 {
4937 /* This could be computed with a branch-less sequence.
4938 Save that for later. */
4939 rtx_code_label *label = gen_label_rtx ();
4940 do_cmp_and_jump (remainder, const0_rtx, EQ,
4941 compute_mode, label);
4942 expand_inc (quotient, const1_rtx);
4943 expand_dec (remainder, op1);
4944 emit_label (label);
4945 return gen_lowpart (mode, rem_flag ? remainder : quotient);
4946 }
4947
4948 /* No luck with division elimination or divmod. Have to do it
4949 by conditionally adjusting op0 *and* the result. */
4950 {
4951 rtx_code_label *label1, *label2;
4952 rtx adjusted_op0, tem;
4953
4954 quotient = gen_reg_rtx (compute_mode);
4955 adjusted_op0 = copy_to_mode_reg (compute_mode, op0);
4956 label1 = gen_label_rtx ();
4957 label2 = gen_label_rtx ();
4958 do_cmp_and_jump (adjusted_op0, const0_rtx, NE,
4959 compute_mode, label1);
4960 emit_move_insn (quotient, const0_rtx);
4961 emit_jump_insn (targetm.gen_jump (label2));
4962 emit_barrier ();
4963 emit_label (label1);
4964 expand_dec (adjusted_op0, const1_rtx);
4965 tem = expand_binop (compute_mode, udiv_optab, adjusted_op0, op1,
4966 quotient, 1, OPTAB_LIB_WIDEN);
4967 if (tem != quotient)
4968 emit_move_insn (quotient, tem);
4969 expand_inc (quotient, const1_rtx);
4970 emit_label (label2);
4971 }
4972 }
4973 else /* signed */
4974 {
4975 if (op1_is_constant && EXACT_POWER_OF_2_OR_ZERO_P (INTVAL (op1))
4976 && INTVAL (op1) >= 0)
4977 {
4978 /* This is extremely similar to the code for the unsigned case
4979 above. For 2.7 we should merge these variants, but for
4980 2.6.1 I don't want to touch the code for unsigned since that
4981 get used in C. The signed case will only be used by other
4982 languages (Ada). */
4983
4984 rtx t1, t2, t3;
4985 unsigned HOST_WIDE_INT d = INTVAL (op1);
4986 t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
4987 floor_log2 (d), tquotient, 0);
4988 t2 = expand_binop (compute_mode, and_optab, op0,
4989 gen_int_mode (d - 1, compute_mode),
4990 NULL_RTX, 1, OPTAB_LIB_WIDEN);
4991 t3 = gen_reg_rtx (compute_mode);
4992 t3 = emit_store_flag (t3, NE, t2, const0_rtx,
4993 compute_mode, 1, 1);
4994 if (t3 == 0)
4995 {
4996 rtx_code_label *lab;
4997 lab = gen_label_rtx ();
4998 do_cmp_and_jump (t2, const0_rtx, EQ, compute_mode, lab);
4999 expand_inc (t1, const1_rtx);
5000 emit_label (lab);
5001 quotient = t1;
5002 }
5003 else
5004 quotient = force_operand (gen_rtx_PLUS (compute_mode,
5005 t1, t3),
5006 tquotient);
5007 break;
5008 }
5009
5010 /* Try using an instruction that produces both the quotient and
5011 remainder, using truncation. We can easily compensate the
5012 quotient or remainder to get ceiling rounding, once we have the
5013 remainder. Notice that we compute also the final remainder
5014 value here, and return the result right away. */
5015 if (target == 0 || GET_MODE (target) != compute_mode)
5016 target = gen_reg_rtx (compute_mode);
5017 if (rem_flag)
5018 {
5019 remainder= (REG_P (target)
5020 ? target : gen_reg_rtx (compute_mode));
5021 quotient = gen_reg_rtx (compute_mode);
5022 }
5023 else
5024 {
5025 quotient = (REG_P (target)
5026 ? target : gen_reg_rtx (compute_mode));
5027 remainder = gen_reg_rtx (compute_mode);
5028 }
5029
5030 if (expand_twoval_binop (sdivmod_optab, op0, op1, quotient,
5031 remainder, 0))
5032 {
5033 /* This could be computed with a branch-less sequence.
5034 Save that for later. */
5035 rtx tem;
5036 rtx_code_label *label = gen_label_rtx ();
5037 do_cmp_and_jump (remainder, const0_rtx, EQ,
5038 compute_mode, label);
5039 tem = expand_binop (compute_mode, xor_optab, op0, op1,
5040 NULL_RTX, 0, OPTAB_WIDEN);
5041 do_cmp_and_jump (tem, const0_rtx, LT, compute_mode, label);
5042 expand_inc (quotient, const1_rtx);
5043 expand_dec (remainder, op1);
5044 emit_label (label);
5045 return gen_lowpart (mode, rem_flag ? remainder : quotient);
5046 }
5047
5048 /* No luck with division elimination or divmod. Have to do it
5049 by conditionally adjusting op0 *and* the result. */
5050 {
5051 rtx_code_label *label1, *label2, *label3, *label4, *label5;
5052 rtx adjusted_op0;
5053 rtx tem;
5054
5055 quotient = gen_reg_rtx (compute_mode);
5056 adjusted_op0 = copy_to_mode_reg (compute_mode, op0);
5057 label1 = gen_label_rtx ();
5058 label2 = gen_label_rtx ();
5059 label3 = gen_label_rtx ();
5060 label4 = gen_label_rtx ();
5061 label5 = gen_label_rtx ();
5062 do_cmp_and_jump (op1, const0_rtx, LT, compute_mode, label2);
5063 do_cmp_and_jump (adjusted_op0, const0_rtx, GT,
5064 compute_mode, label1);
5065 tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
5066 quotient, 0, OPTAB_LIB_WIDEN);
5067 if (tem != quotient)
5068 emit_move_insn (quotient, tem);
5069 emit_jump_insn (targetm.gen_jump (label5));
5070 emit_barrier ();
5071 emit_label (label1);
5072 expand_dec (adjusted_op0, const1_rtx);
5073 emit_jump_insn (targetm.gen_jump (label4));
5074 emit_barrier ();
5075 emit_label (label2);
5076 do_cmp_and_jump (adjusted_op0, const0_rtx, LT,
5077 compute_mode, label3);
5078 tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
5079 quotient, 0, OPTAB_LIB_WIDEN);
5080 if (tem != quotient)
5081 emit_move_insn (quotient, tem);
5082 emit_jump_insn (targetm.gen_jump (label5));
5083 emit_barrier ();
5084 emit_label (label3);
5085 expand_inc (adjusted_op0, const1_rtx);
5086 emit_label (label4);
5087 tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
5088 quotient, 0, OPTAB_LIB_WIDEN);
5089 if (tem != quotient)
5090 emit_move_insn (quotient, tem);
5091 expand_inc (quotient, const1_rtx);
5092 emit_label (label5);
5093 }
5094 }
5095 break;
5096
5097 case EXACT_DIV_EXPR:
5098 if (op1_is_constant && HWI_COMPUTABLE_MODE_P (compute_mode))
5099 {
5100 scalar_int_mode int_mode = as_a <scalar_int_mode> (compute_mode);
5101 int size = GET_MODE_BITSIZE (int_mode);
5102 HOST_WIDE_INT d = INTVAL (op1);
5103 unsigned HOST_WIDE_INT ml;
5104 int pre_shift;
5105 rtx t1;
5106
5107 pre_shift = ctz_or_zero (d);
5108 ml = invert_mod2n (d >> pre_shift, size);
5109 t1 = expand_shift (RSHIFT_EXPR, int_mode, op0,
5110 pre_shift, NULL_RTX, unsignedp);
5111 quotient = expand_mult (int_mode, t1, gen_int_mode (ml, int_mode),
5112 NULL_RTX, 1);
5113
5114 insn = get_last_insn ();
5115 set_dst_reg_note (insn, REG_EQUAL,
5116 gen_rtx_fmt_ee (unsignedp ? UDIV : DIV,
5117 int_mode, op0, op1),
5118 quotient);
5119 }
5120 break;
5121
5122 case ROUND_DIV_EXPR:
5123 case ROUND_MOD_EXPR:
5124 if (unsignedp)
5125 {
5126 scalar_int_mode int_mode = as_a <scalar_int_mode> (compute_mode);
5127 rtx tem;
5128 rtx_code_label *label;
5129 label = gen_label_rtx ();
5130 quotient = gen_reg_rtx (int_mode);
5131 remainder = gen_reg_rtx (int_mode);
5132 if (expand_twoval_binop (udivmod_optab, op0, op1, quotient, remainder, 1) == 0)
5133 {
5134 rtx tem;
5135 quotient = expand_binop (int_mode, udiv_optab, op0, op1,
5136 quotient, 1, OPTAB_LIB_WIDEN);
5137 tem = expand_mult (int_mode, quotient, op1, NULL_RTX, 1);
5138 remainder = expand_binop (int_mode, sub_optab, op0, tem,
5139 remainder, 1, OPTAB_LIB_WIDEN);
5140 }
5141 tem = plus_constant (int_mode, op1, -1);
5142 tem = expand_shift (RSHIFT_EXPR, int_mode, tem, 1, NULL_RTX, 1);
5143 do_cmp_and_jump (remainder, tem, LEU, int_mode, label);
5144 expand_inc (quotient, const1_rtx);
5145 expand_dec (remainder, op1);
5146 emit_label (label);
5147 }
5148 else
5149 {
5150 scalar_int_mode int_mode = as_a <scalar_int_mode> (compute_mode);
5151 int size = GET_MODE_BITSIZE (int_mode);
5152 rtx abs_rem, abs_op1, tem, mask;
5153 rtx_code_label *label;
5154 label = gen_label_rtx ();
5155 quotient = gen_reg_rtx (int_mode);
5156 remainder = gen_reg_rtx (int_mode);
5157 if (expand_twoval_binop (sdivmod_optab, op0, op1, quotient, remainder, 0) == 0)
5158 {
5159 rtx tem;
5160 quotient = expand_binop (int_mode, sdiv_optab, op0, op1,
5161 quotient, 0, OPTAB_LIB_WIDEN);
5162 tem = expand_mult (int_mode, quotient, op1, NULL_RTX, 0);
5163 remainder = expand_binop (int_mode, sub_optab, op0, tem,
5164 remainder, 0, OPTAB_LIB_WIDEN);
5165 }
5166 abs_rem = expand_abs (int_mode, remainder, NULL_RTX, 1, 0);
5167 abs_op1 = expand_abs (int_mode, op1, NULL_RTX, 1, 0);
5168 tem = expand_shift (LSHIFT_EXPR, int_mode, abs_rem,
5169 1, NULL_RTX, 1);
5170 do_cmp_and_jump (tem, abs_op1, LTU, int_mode, label);
5171 tem = expand_binop (int_mode, xor_optab, op0, op1,
5172 NULL_RTX, 0, OPTAB_WIDEN);
5173 mask = expand_shift (RSHIFT_EXPR, int_mode, tem,
5174 size - 1, NULL_RTX, 0);
5175 tem = expand_binop (int_mode, xor_optab, mask, const1_rtx,
5176 NULL_RTX, 0, OPTAB_WIDEN);
5177 tem = expand_binop (int_mode, sub_optab, tem, mask,
5178 NULL_RTX, 0, OPTAB_WIDEN);
5179 expand_inc (quotient, tem);
5180 tem = expand_binop (int_mode, xor_optab, mask, op1,
5181 NULL_RTX, 0, OPTAB_WIDEN);
5182 tem = expand_binop (int_mode, sub_optab, tem, mask,
5183 NULL_RTX, 0, OPTAB_WIDEN);
5184 expand_dec (remainder, tem);
5185 emit_label (label);
5186 }
5187 return gen_lowpart (mode, rem_flag ? remainder : quotient);
5188
5189 default:
5190 gcc_unreachable ();
5191 }
5192
5193 if (quotient == 0)
5194 {
5195 if (target && GET_MODE (target) != compute_mode)
5196 target = 0;
5197
5198 if (rem_flag)
5199 {
5200 /* Try to produce the remainder without producing the quotient.
5201 If we seem to have a divmod pattern that does not require widening,
5202 don't try widening here. We should really have a WIDEN argument
5203 to expand_twoval_binop, since what we'd really like to do here is
5204 1) try a mod insn in compute_mode
5205 2) try a divmod insn in compute_mode
5206 3) try a div insn in compute_mode and multiply-subtract to get
5207 remainder
5208 4) try the same things with widening allowed. */
5209 remainder
5210 = sign_expand_binop (compute_mode, umod_optab, smod_optab,
5211 op0, op1, target,
5212 unsignedp,
5213 ((optab_handler (optab2, compute_mode)
5214 != CODE_FOR_nothing)
5215 ? OPTAB_DIRECT : OPTAB_WIDEN));
5216 if (remainder == 0)
5217 {
5218 /* No luck there. Can we do remainder and divide at once
5219 without a library call? */
5220 remainder = gen_reg_rtx (compute_mode);
5221 if (! expand_twoval_binop ((unsignedp
5222 ? udivmod_optab
5223 : sdivmod_optab),
5224 op0, op1,
5225 NULL_RTX, remainder, unsignedp))
5226 remainder = 0;
5227 }
5228
5229 if (remainder)
5230 return gen_lowpart (mode, remainder);
5231 }
5232
5233 /* Produce the quotient. Try a quotient insn, but not a library call.
5234 If we have a divmod in this mode, use it in preference to widening
5235 the div (for this test we assume it will not fail). Note that optab2
5236 is set to the one of the two optabs that the call below will use. */
5237 quotient
5238 = sign_expand_binop (compute_mode, udiv_optab, sdiv_optab,
5239 op0, op1, rem_flag ? NULL_RTX : target,
5240 unsignedp,
5241 ((optab_handler (optab2, compute_mode)
5242 != CODE_FOR_nothing)
5243 ? OPTAB_DIRECT : OPTAB_WIDEN));
5244
5245 if (quotient == 0)
5246 {
5247 /* No luck there. Try a quotient-and-remainder insn,
5248 keeping the quotient alone. */
5249 quotient = gen_reg_rtx (compute_mode);
5250 if (! expand_twoval_binop (unsignedp ? udivmod_optab : sdivmod_optab,
5251 op0, op1,
5252 quotient, NULL_RTX, unsignedp))
5253 {
5254 quotient = 0;
5255 if (! rem_flag)
5256 /* Still no luck. If we are not computing the remainder,
5257 use a library call for the quotient. */
5258 quotient = sign_expand_binop (compute_mode,
5259 udiv_optab, sdiv_optab,
5260 op0, op1, target,
5261 unsignedp, OPTAB_LIB_WIDEN);
5262 }
5263 }
5264 }
5265
5266 if (rem_flag)
5267 {
5268 if (target && GET_MODE (target) != compute_mode)
5269 target = 0;
5270
5271 if (quotient == 0)
5272 {
5273 /* No divide instruction either. Use library for remainder. */
5274 remainder = sign_expand_binop (compute_mode, umod_optab, smod_optab,
5275 op0, op1, target,
5276 unsignedp, OPTAB_LIB_WIDEN);
5277 /* No remainder function. Try a quotient-and-remainder
5278 function, keeping the remainder. */
5279 if (!remainder)
5280 {
5281 remainder = gen_reg_rtx (compute_mode);
5282 if (!expand_twoval_binop_libfunc
5283 (unsignedp ? udivmod_optab : sdivmod_optab,
5284 op0, op1,
5285 NULL_RTX, remainder,
5286 unsignedp ? UMOD : MOD))
5287 remainder = NULL_RTX;
5288 }
5289 }
5290 else
5291 {
5292 /* We divided. Now finish doing X - Y * (X / Y). */
5293 remainder = expand_mult (compute_mode, quotient, op1,
5294 NULL_RTX, unsignedp);
5295 remainder = expand_binop (compute_mode, sub_optab, op0,
5296 remainder, target, unsignedp,
5297 OPTAB_LIB_WIDEN);
5298 }
5299 }
5300
5301 return gen_lowpart (mode, rem_flag ? remainder : quotient);
5302 }
5303
5304 /* Return a tree node with data type TYPE, describing the value of X.
5305 Usually this is an VAR_DECL, if there is no obvious better choice.
5306 X may be an expression, however we only support those expressions
5307 generated by loop.c. */
5308
5309 tree
make_tree(tree type,rtx x)5310 make_tree (tree type, rtx x)
5311 {
5312 tree t;
5313
5314 switch (GET_CODE (x))
5315 {
5316 case CONST_INT:
5317 case CONST_WIDE_INT:
5318 t = wide_int_to_tree (type, rtx_mode_t (x, TYPE_MODE (type)));
5319 return t;
5320
5321 case CONST_DOUBLE:
5322 STATIC_ASSERT (HOST_BITS_PER_WIDE_INT * 2 <= MAX_BITSIZE_MODE_ANY_INT);
5323 if (TARGET_SUPPORTS_WIDE_INT == 0 && GET_MODE (x) == VOIDmode)
5324 t = wide_int_to_tree (type,
5325 wide_int::from_array (&CONST_DOUBLE_LOW (x), 2,
5326 HOST_BITS_PER_WIDE_INT * 2));
5327 else
5328 t = build_real (type, *CONST_DOUBLE_REAL_VALUE (x));
5329
5330 return t;
5331
5332 case CONST_VECTOR:
5333 {
5334 unsigned int npatterns = CONST_VECTOR_NPATTERNS (x);
5335 unsigned int nelts_per_pattern = CONST_VECTOR_NELTS_PER_PATTERN (x);
5336 tree itype = TREE_TYPE (type);
5337
5338 /* Build a tree with vector elements. */
5339 tree_vector_builder elts (type, npatterns, nelts_per_pattern);
5340 unsigned int count = elts.encoded_nelts ();
5341 for (unsigned int i = 0; i < count; ++i)
5342 {
5343 rtx elt = CONST_VECTOR_ELT (x, i);
5344 elts.quick_push (make_tree (itype, elt));
5345 }
5346
5347 return elts.build ();
5348 }
5349
5350 case PLUS:
5351 return fold_build2 (PLUS_EXPR, type, make_tree (type, XEXP (x, 0)),
5352 make_tree (type, XEXP (x, 1)));
5353
5354 case MINUS:
5355 return fold_build2 (MINUS_EXPR, type, make_tree (type, XEXP (x, 0)),
5356 make_tree (type, XEXP (x, 1)));
5357
5358 case NEG:
5359 return fold_build1 (NEGATE_EXPR, type, make_tree (type, XEXP (x, 0)));
5360
5361 case MULT:
5362 return fold_build2 (MULT_EXPR, type, make_tree (type, XEXP (x, 0)),
5363 make_tree (type, XEXP (x, 1)));
5364
5365 case ASHIFT:
5366 return fold_build2 (LSHIFT_EXPR, type, make_tree (type, XEXP (x, 0)),
5367 make_tree (type, XEXP (x, 1)));
5368
5369 case LSHIFTRT:
5370 t = unsigned_type_for (type);
5371 return fold_convert (type, build2 (RSHIFT_EXPR, t,
5372 make_tree (t, XEXP (x, 0)),
5373 make_tree (type, XEXP (x, 1))));
5374
5375 case ASHIFTRT:
5376 t = signed_type_for (type);
5377 return fold_convert (type, build2 (RSHIFT_EXPR, t,
5378 make_tree (t, XEXP (x, 0)),
5379 make_tree (type, XEXP (x, 1))));
5380
5381 case DIV:
5382 if (TREE_CODE (type) != REAL_TYPE)
5383 t = signed_type_for (type);
5384 else
5385 t = type;
5386
5387 return fold_convert (type, build2 (TRUNC_DIV_EXPR, t,
5388 make_tree (t, XEXP (x, 0)),
5389 make_tree (t, XEXP (x, 1))));
5390 case UDIV:
5391 t = unsigned_type_for (type);
5392 return fold_convert (type, build2 (TRUNC_DIV_EXPR, t,
5393 make_tree (t, XEXP (x, 0)),
5394 make_tree (t, XEXP (x, 1))));
5395
5396 case SIGN_EXTEND:
5397 case ZERO_EXTEND:
5398 t = lang_hooks.types.type_for_mode (GET_MODE (XEXP (x, 0)),
5399 GET_CODE (x) == ZERO_EXTEND);
5400 return fold_convert (type, make_tree (t, XEXP (x, 0)));
5401
5402 case CONST:
5403 return make_tree (type, XEXP (x, 0));
5404
5405 case SYMBOL_REF:
5406 t = SYMBOL_REF_DECL (x);
5407 if (t)
5408 return fold_convert (type, build_fold_addr_expr (t));
5409 /* fall through. */
5410
5411 default:
5412 if (CONST_POLY_INT_P (x))
5413 return wide_int_to_tree (t, const_poly_int_value (x));
5414
5415 t = build_decl (RTL_LOCATION (x), VAR_DECL, NULL_TREE, type);
5416
5417 /* If TYPE is a POINTER_TYPE, we might need to convert X from
5418 address mode to pointer mode. */
5419 if (POINTER_TYPE_P (type))
5420 x = convert_memory_address_addr_space
5421 (SCALAR_INT_TYPE_MODE (type), x, TYPE_ADDR_SPACE (TREE_TYPE (type)));
5422
5423 /* Note that we do *not* use SET_DECL_RTL here, because we do not
5424 want set_decl_rtl to go adjusting REG_ATTRS for this temporary. */
5425 t->decl_with_rtl.rtl = x;
5426
5427 return t;
5428 }
5429 }
5430
5431 /* Compute the logical-and of OP0 and OP1, storing it in TARGET
5432 and returning TARGET.
5433
5434 If TARGET is 0, a pseudo-register or constant is returned. */
5435
5436 rtx
expand_and(machine_mode mode,rtx op0,rtx op1,rtx target)5437 expand_and (machine_mode mode, rtx op0, rtx op1, rtx target)
5438 {
5439 rtx tem = 0;
5440
5441 if (GET_MODE (op0) == VOIDmode && GET_MODE (op1) == VOIDmode)
5442 tem = simplify_binary_operation (AND, mode, op0, op1);
5443 if (tem == 0)
5444 tem = expand_binop (mode, and_optab, op0, op1, target, 0, OPTAB_LIB_WIDEN);
5445
5446 if (target == 0)
5447 target = tem;
5448 else if (tem != target)
5449 emit_move_insn (target, tem);
5450 return target;
5451 }
5452
5453 /* Helper function for emit_store_flag. */
5454 rtx
emit_cstore(rtx target,enum insn_code icode,enum rtx_code code,machine_mode mode,machine_mode compare_mode,int unsignedp,rtx x,rtx y,int normalizep,machine_mode target_mode)5455 emit_cstore (rtx target, enum insn_code icode, enum rtx_code code,
5456 machine_mode mode, machine_mode compare_mode,
5457 int unsignedp, rtx x, rtx y, int normalizep,
5458 machine_mode target_mode)
5459 {
5460 class expand_operand ops[4];
5461 rtx op0, comparison, subtarget;
5462 rtx_insn *last;
5463 scalar_int_mode result_mode = targetm.cstore_mode (icode);
5464 scalar_int_mode int_target_mode;
5465
5466 last = get_last_insn ();
5467 x = prepare_operand (icode, x, 2, mode, compare_mode, unsignedp);
5468 y = prepare_operand (icode, y, 3, mode, compare_mode, unsignedp);
5469 if (!x || !y)
5470 {
5471 delete_insns_since (last);
5472 return NULL_RTX;
5473 }
5474
5475 if (target_mode == VOIDmode)
5476 int_target_mode = result_mode;
5477 else
5478 int_target_mode = as_a <scalar_int_mode> (target_mode);
5479 if (!target)
5480 target = gen_reg_rtx (int_target_mode);
5481
5482 comparison = gen_rtx_fmt_ee (code, result_mode, x, y);
5483
5484 create_output_operand (&ops[0], optimize ? NULL_RTX : target, result_mode);
5485 create_fixed_operand (&ops[1], comparison);
5486 create_fixed_operand (&ops[2], x);
5487 create_fixed_operand (&ops[3], y);
5488 if (!maybe_expand_insn (icode, 4, ops))
5489 {
5490 delete_insns_since (last);
5491 return NULL_RTX;
5492 }
5493 subtarget = ops[0].value;
5494
5495 /* If we are converting to a wider mode, first convert to
5496 INT_TARGET_MODE, then normalize. This produces better combining
5497 opportunities on machines that have a SIGN_EXTRACT when we are
5498 testing a single bit. This mostly benefits the 68k.
5499
5500 If STORE_FLAG_VALUE does not have the sign bit set when
5501 interpreted in MODE, we can do this conversion as unsigned, which
5502 is usually more efficient. */
5503 if (GET_MODE_PRECISION (int_target_mode) > GET_MODE_PRECISION (result_mode))
5504 {
5505 gcc_assert (GET_MODE_PRECISION (result_mode) != 1
5506 || STORE_FLAG_VALUE == 1 || STORE_FLAG_VALUE == -1);
5507
5508 bool unsignedp = (STORE_FLAG_VALUE >= 0);
5509 convert_move (target, subtarget, unsignedp);
5510
5511 op0 = target;
5512 result_mode = int_target_mode;
5513 }
5514 else
5515 op0 = subtarget;
5516
5517 /* If we want to keep subexpressions around, don't reuse our last
5518 target. */
5519 if (optimize)
5520 subtarget = 0;
5521
5522 /* Now normalize to the proper value in MODE. Sometimes we don't
5523 have to do anything. */
5524 if (normalizep == 0 || normalizep == STORE_FLAG_VALUE)
5525 ;
5526 /* STORE_FLAG_VALUE might be the most negative number, so write
5527 the comparison this way to avoid a compiler-time warning. */
5528 else if (- normalizep == STORE_FLAG_VALUE)
5529 op0 = expand_unop (result_mode, neg_optab, op0, subtarget, 0);
5530
5531 /* We don't want to use STORE_FLAG_VALUE < 0 below since this makes
5532 it hard to use a value of just the sign bit due to ANSI integer
5533 constant typing rules. */
5534 else if (val_signbit_known_set_p (result_mode, STORE_FLAG_VALUE))
5535 op0 = expand_shift (RSHIFT_EXPR, result_mode, op0,
5536 GET_MODE_BITSIZE (result_mode) - 1, subtarget,
5537 normalizep == 1);
5538 else
5539 {
5540 gcc_assert (STORE_FLAG_VALUE & 1);
5541
5542 op0 = expand_and (result_mode, op0, const1_rtx, subtarget);
5543 if (normalizep == -1)
5544 op0 = expand_unop (result_mode, neg_optab, op0, op0, 0);
5545 }
5546
5547 /* If we were converting to a smaller mode, do the conversion now. */
5548 if (int_target_mode != result_mode)
5549 {
5550 convert_move (target, op0, 0);
5551 return target;
5552 }
5553 else
5554 return op0;
5555 }
5556
5557
5558 /* A subroutine of emit_store_flag only including "tricks" that do not
5559 need a recursive call. These are kept separate to avoid infinite
5560 loops. */
5561
5562 static rtx
emit_store_flag_1(rtx target,enum rtx_code code,rtx op0,rtx op1,machine_mode mode,int unsignedp,int normalizep,machine_mode target_mode)5563 emit_store_flag_1 (rtx target, enum rtx_code code, rtx op0, rtx op1,
5564 machine_mode mode, int unsignedp, int normalizep,
5565 machine_mode target_mode)
5566 {
5567 rtx subtarget;
5568 enum insn_code icode;
5569 machine_mode compare_mode;
5570 enum mode_class mclass;
5571 enum rtx_code scode;
5572
5573 if (unsignedp)
5574 code = unsigned_condition (code);
5575 scode = swap_condition (code);
5576
5577 /* If one operand is constant, make it the second one. Only do this
5578 if the other operand is not constant as well. */
5579
5580 if (swap_commutative_operands_p (op0, op1))
5581 {
5582 std::swap (op0, op1);
5583 code = swap_condition (code);
5584 }
5585
5586 if (mode == VOIDmode)
5587 mode = GET_MODE (op0);
5588
5589 if (CONST_SCALAR_INT_P (op1))
5590 canonicalize_comparison (mode, &code, &op1);
5591
5592 /* For some comparisons with 1 and -1, we can convert this to
5593 comparisons with zero. This will often produce more opportunities for
5594 store-flag insns. */
5595
5596 switch (code)
5597 {
5598 case LT:
5599 if (op1 == const1_rtx)
5600 op1 = const0_rtx, code = LE;
5601 break;
5602 case LE:
5603 if (op1 == constm1_rtx)
5604 op1 = const0_rtx, code = LT;
5605 break;
5606 case GE:
5607 if (op1 == const1_rtx)
5608 op1 = const0_rtx, code = GT;
5609 break;
5610 case GT:
5611 if (op1 == constm1_rtx)
5612 op1 = const0_rtx, code = GE;
5613 break;
5614 case GEU:
5615 if (op1 == const1_rtx)
5616 op1 = const0_rtx, code = NE;
5617 break;
5618 case LTU:
5619 if (op1 == const1_rtx)
5620 op1 = const0_rtx, code = EQ;
5621 break;
5622 default:
5623 break;
5624 }
5625
5626 /* If we are comparing a double-word integer with zero or -1, we can
5627 convert the comparison into one involving a single word. */
5628 scalar_int_mode int_mode;
5629 if (is_int_mode (mode, &int_mode)
5630 && GET_MODE_BITSIZE (int_mode) == BITS_PER_WORD * 2
5631 && (!MEM_P (op0) || ! MEM_VOLATILE_P (op0)))
5632 {
5633 rtx tem;
5634 if ((code == EQ || code == NE)
5635 && (op1 == const0_rtx || op1 == constm1_rtx))
5636 {
5637 rtx op00, op01;
5638
5639 /* Do a logical OR or AND of the two words and compare the
5640 result. */
5641 op00 = simplify_gen_subreg (word_mode, op0, int_mode, 0);
5642 op01 = simplify_gen_subreg (word_mode, op0, int_mode, UNITS_PER_WORD);
5643 tem = expand_binop (word_mode,
5644 op1 == const0_rtx ? ior_optab : and_optab,
5645 op00, op01, NULL_RTX, unsignedp,
5646 OPTAB_DIRECT);
5647
5648 if (tem != 0)
5649 tem = emit_store_flag (NULL_RTX, code, tem, op1, word_mode,
5650 unsignedp, normalizep);
5651 }
5652 else if ((code == LT || code == GE) && op1 == const0_rtx)
5653 {
5654 rtx op0h;
5655
5656 /* If testing the sign bit, can just test on high word. */
5657 op0h = simplify_gen_subreg (word_mode, op0, int_mode,
5658 subreg_highpart_offset (word_mode,
5659 int_mode));
5660 tem = emit_store_flag (NULL_RTX, code, op0h, op1, word_mode,
5661 unsignedp, normalizep);
5662 }
5663 else
5664 tem = NULL_RTX;
5665
5666 if (tem)
5667 {
5668 if (target_mode == VOIDmode || GET_MODE (tem) == target_mode)
5669 return tem;
5670 if (!target)
5671 target = gen_reg_rtx (target_mode);
5672
5673 convert_move (target, tem,
5674 !val_signbit_known_set_p (word_mode,
5675 (normalizep ? normalizep
5676 : STORE_FLAG_VALUE)));
5677 return target;
5678 }
5679 }
5680
5681 /* If this is A < 0 or A >= 0, we can do this by taking the ones
5682 complement of A (for GE) and shifting the sign bit to the low bit. */
5683 if (op1 == const0_rtx && (code == LT || code == GE)
5684 && is_int_mode (mode, &int_mode)
5685 && (normalizep || STORE_FLAG_VALUE == 1
5686 || val_signbit_p (int_mode, STORE_FLAG_VALUE)))
5687 {
5688 scalar_int_mode int_target_mode;
5689 subtarget = target;
5690
5691 if (!target)
5692 int_target_mode = int_mode;
5693 else
5694 {
5695 /* If the result is to be wider than OP0, it is best to convert it
5696 first. If it is to be narrower, it is *incorrect* to convert it
5697 first. */
5698 int_target_mode = as_a <scalar_int_mode> (target_mode);
5699 if (GET_MODE_SIZE (int_target_mode) > GET_MODE_SIZE (int_mode))
5700 {
5701 op0 = convert_modes (int_target_mode, int_mode, op0, 0);
5702 int_mode = int_target_mode;
5703 }
5704 }
5705
5706 if (int_target_mode != int_mode)
5707 subtarget = 0;
5708
5709 if (code == GE)
5710 op0 = expand_unop (int_mode, one_cmpl_optab, op0,
5711 ((STORE_FLAG_VALUE == 1 || normalizep)
5712 ? 0 : subtarget), 0);
5713
5714 if (STORE_FLAG_VALUE == 1 || normalizep)
5715 /* If we are supposed to produce a 0/1 value, we want to do
5716 a logical shift from the sign bit to the low-order bit; for
5717 a -1/0 value, we do an arithmetic shift. */
5718 op0 = expand_shift (RSHIFT_EXPR, int_mode, op0,
5719 GET_MODE_BITSIZE (int_mode) - 1,
5720 subtarget, normalizep != -1);
5721
5722 if (int_mode != int_target_mode)
5723 op0 = convert_modes (int_target_mode, int_mode, op0, 0);
5724
5725 return op0;
5726 }
5727
5728 mclass = GET_MODE_CLASS (mode);
5729 FOR_EACH_MODE_FROM (compare_mode, mode)
5730 {
5731 machine_mode optab_mode = mclass == MODE_CC ? CCmode : compare_mode;
5732 icode = optab_handler (cstore_optab, optab_mode);
5733 if (icode != CODE_FOR_nothing)
5734 {
5735 do_pending_stack_adjust ();
5736 rtx tem = emit_cstore (target, icode, code, mode, compare_mode,
5737 unsignedp, op0, op1, normalizep, target_mode);
5738 if (tem)
5739 return tem;
5740
5741 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
5742 {
5743 tem = emit_cstore (target, icode, scode, mode, compare_mode,
5744 unsignedp, op1, op0, normalizep, target_mode);
5745 if (tem)
5746 return tem;
5747 }
5748 break;
5749 }
5750 }
5751
5752 return 0;
5753 }
5754
5755 /* Subroutine of emit_store_flag that handles cases in which the operands
5756 are scalar integers. SUBTARGET is the target to use for temporary
5757 operations and TRUEVAL is the value to store when the condition is
5758 true. All other arguments are as for emit_store_flag. */
5759
5760 rtx
emit_store_flag_int(rtx target,rtx subtarget,enum rtx_code code,rtx op0,rtx op1,scalar_int_mode mode,int unsignedp,int normalizep,rtx trueval)5761 emit_store_flag_int (rtx target, rtx subtarget, enum rtx_code code, rtx op0,
5762 rtx op1, scalar_int_mode mode, int unsignedp,
5763 int normalizep, rtx trueval)
5764 {
5765 machine_mode target_mode = target ? GET_MODE (target) : VOIDmode;
5766 rtx_insn *last = get_last_insn ();
5767
5768 /* If this is an equality comparison of integers, we can try to exclusive-or
5769 (or subtract) the two operands and use a recursive call to try the
5770 comparison with zero. Don't do any of these cases if branches are
5771 very cheap. */
5772
5773 if ((code == EQ || code == NE) && op1 != const0_rtx)
5774 {
5775 rtx tem = expand_binop (mode, xor_optab, op0, op1, subtarget, 1,
5776 OPTAB_WIDEN);
5777
5778 if (tem == 0)
5779 tem = expand_binop (mode, sub_optab, op0, op1, subtarget, 1,
5780 OPTAB_WIDEN);
5781 if (tem != 0)
5782 tem = emit_store_flag (target, code, tem, const0_rtx,
5783 mode, unsignedp, normalizep);
5784 if (tem != 0)
5785 return tem;
5786
5787 delete_insns_since (last);
5788 }
5789
5790 /* For integer comparisons, try the reverse comparison. However, for
5791 small X and if we'd have anyway to extend, implementing "X != 0"
5792 as "-(int)X >> 31" is still cheaper than inverting "(int)X == 0". */
5793 rtx_code rcode = reverse_condition (code);
5794 if (can_compare_p (rcode, mode, ccp_store_flag)
5795 && ! (optab_handler (cstore_optab, mode) == CODE_FOR_nothing
5796 && code == NE
5797 && GET_MODE_SIZE (mode) < UNITS_PER_WORD
5798 && op1 == const0_rtx))
5799 {
5800 int want_add = ((STORE_FLAG_VALUE == 1 && normalizep == -1)
5801 || (STORE_FLAG_VALUE == -1 && normalizep == 1));
5802
5803 /* Again, for the reverse comparison, use either an addition or a XOR. */
5804 if (want_add
5805 && rtx_cost (GEN_INT (normalizep), mode, PLUS, 1,
5806 optimize_insn_for_speed_p ()) == 0)
5807 {
5808 rtx tem = emit_store_flag_1 (subtarget, rcode, op0, op1, mode, 0,
5809 STORE_FLAG_VALUE, target_mode);
5810 if (tem != 0)
5811 tem = expand_binop (target_mode, add_optab, tem,
5812 gen_int_mode (normalizep, target_mode),
5813 target, 0, OPTAB_WIDEN);
5814 if (tem != 0)
5815 return tem;
5816 }
5817 else if (!want_add
5818 && rtx_cost (trueval, mode, XOR, 1,
5819 optimize_insn_for_speed_p ()) == 0)
5820 {
5821 rtx tem = emit_store_flag_1 (subtarget, rcode, op0, op1, mode, 0,
5822 normalizep, target_mode);
5823 if (tem != 0)
5824 tem = expand_binop (target_mode, xor_optab, tem, trueval, target,
5825 INTVAL (trueval) >= 0, OPTAB_WIDEN);
5826 if (tem != 0)
5827 return tem;
5828 }
5829
5830 delete_insns_since (last);
5831 }
5832
5833 /* Some other cases we can do are EQ, NE, LE, and GT comparisons with
5834 the constant zero. Reject all other comparisons at this point. Only
5835 do LE and GT if branches are expensive since they are expensive on
5836 2-operand machines. */
5837
5838 if (op1 != const0_rtx
5839 || (code != EQ && code != NE
5840 && (BRANCH_COST (optimize_insn_for_speed_p (),
5841 false) <= 1 || (code != LE && code != GT))))
5842 return 0;
5843
5844 /* Try to put the result of the comparison in the sign bit. Assume we can't
5845 do the necessary operation below. */
5846
5847 rtx tem = 0;
5848
5849 /* To see if A <= 0, compute (A | (A - 1)). A <= 0 iff that result has
5850 the sign bit set. */
5851
5852 if (code == LE)
5853 {
5854 /* This is destructive, so SUBTARGET can't be OP0. */
5855 if (rtx_equal_p (subtarget, op0))
5856 subtarget = 0;
5857
5858 tem = expand_binop (mode, sub_optab, op0, const1_rtx, subtarget, 0,
5859 OPTAB_WIDEN);
5860 if (tem)
5861 tem = expand_binop (mode, ior_optab, op0, tem, subtarget, 0,
5862 OPTAB_WIDEN);
5863 }
5864
5865 /* To see if A > 0, compute (((signed) A) << BITS) - A, where BITS is the
5866 number of bits in the mode of OP0, minus one. */
5867
5868 if (code == GT)
5869 {
5870 if (rtx_equal_p (subtarget, op0))
5871 subtarget = 0;
5872
5873 tem = maybe_expand_shift (RSHIFT_EXPR, mode, op0,
5874 GET_MODE_BITSIZE (mode) - 1,
5875 subtarget, 0);
5876 if (tem)
5877 tem = expand_binop (mode, sub_optab, tem, op0, subtarget, 0,
5878 OPTAB_WIDEN);
5879 }
5880
5881 if (code == EQ || code == NE)
5882 {
5883 /* For EQ or NE, one way to do the comparison is to apply an operation
5884 that converts the operand into a positive number if it is nonzero
5885 or zero if it was originally zero. Then, for EQ, we subtract 1 and
5886 for NE we negate. This puts the result in the sign bit. Then we
5887 normalize with a shift, if needed.
5888
5889 Two operations that can do the above actions are ABS and FFS, so try
5890 them. If that doesn't work, and MODE is smaller than a full word,
5891 we can use zero-extension to the wider mode (an unsigned conversion)
5892 as the operation. */
5893
5894 /* Note that ABS doesn't yield a positive number for INT_MIN, but
5895 that is compensated by the subsequent overflow when subtracting
5896 one / negating. */
5897
5898 if (optab_handler (abs_optab, mode) != CODE_FOR_nothing)
5899 tem = expand_unop (mode, abs_optab, op0, subtarget, 1);
5900 else if (optab_handler (ffs_optab, mode) != CODE_FOR_nothing)
5901 tem = expand_unop (mode, ffs_optab, op0, subtarget, 1);
5902 else if (GET_MODE_SIZE (mode) < UNITS_PER_WORD)
5903 {
5904 tem = convert_modes (word_mode, mode, op0, 1);
5905 mode = word_mode;
5906 }
5907
5908 if (tem != 0)
5909 {
5910 if (code == EQ)
5911 tem = expand_binop (mode, sub_optab, tem, const1_rtx, subtarget,
5912 0, OPTAB_WIDEN);
5913 else
5914 tem = expand_unop (mode, neg_optab, tem, subtarget, 0);
5915 }
5916
5917 /* If we couldn't do it that way, for NE we can "or" the two's complement
5918 of the value with itself. For EQ, we take the one's complement of
5919 that "or", which is an extra insn, so we only handle EQ if branches
5920 are expensive. */
5921
5922 if (tem == 0
5923 && (code == NE
5924 || BRANCH_COST (optimize_insn_for_speed_p (),
5925 false) > 1))
5926 {
5927 if (rtx_equal_p (subtarget, op0))
5928 subtarget = 0;
5929
5930 tem = expand_unop (mode, neg_optab, op0, subtarget, 0);
5931 tem = expand_binop (mode, ior_optab, tem, op0, subtarget, 0,
5932 OPTAB_WIDEN);
5933
5934 if (tem && code == EQ)
5935 tem = expand_unop (mode, one_cmpl_optab, tem, subtarget, 0);
5936 }
5937 }
5938
5939 if (tem && normalizep)
5940 tem = maybe_expand_shift (RSHIFT_EXPR, mode, tem,
5941 GET_MODE_BITSIZE (mode) - 1,
5942 subtarget, normalizep == 1);
5943
5944 if (tem)
5945 {
5946 if (!target)
5947 ;
5948 else if (GET_MODE (tem) != target_mode)
5949 {
5950 convert_move (target, tem, 0);
5951 tem = target;
5952 }
5953 else if (!subtarget)
5954 {
5955 emit_move_insn (target, tem);
5956 tem = target;
5957 }
5958 }
5959 else
5960 delete_insns_since (last);
5961
5962 return tem;
5963 }
5964
5965 /* Emit a store-flags instruction for comparison CODE on OP0 and OP1
5966 and storing in TARGET. Normally return TARGET.
5967 Return 0 if that cannot be done.
5968
5969 MODE is the mode to use for OP0 and OP1 should they be CONST_INTs. If
5970 it is VOIDmode, they cannot both be CONST_INT.
5971
5972 UNSIGNEDP is for the case where we have to widen the operands
5973 to perform the operation. It says to use zero-extension.
5974
5975 NORMALIZEP is 1 if we should convert the result to be either zero
5976 or one. Normalize is -1 if we should convert the result to be
5977 either zero or -1. If NORMALIZEP is zero, the result will be left
5978 "raw" out of the scc insn. */
5979
5980 rtx
emit_store_flag(rtx target,enum rtx_code code,rtx op0,rtx op1,machine_mode mode,int unsignedp,int normalizep)5981 emit_store_flag (rtx target, enum rtx_code code, rtx op0, rtx op1,
5982 machine_mode mode, int unsignedp, int normalizep)
5983 {
5984 machine_mode target_mode = target ? GET_MODE (target) : VOIDmode;
5985 enum rtx_code rcode;
5986 rtx subtarget;
5987 rtx tem, trueval;
5988 rtx_insn *last;
5989
5990 /* If we compare constants, we shouldn't use a store-flag operation,
5991 but a constant load. We can get there via the vanilla route that
5992 usually generates a compare-branch sequence, but will in this case
5993 fold the comparison to a constant, and thus elide the branch. */
5994 if (CONSTANT_P (op0) && CONSTANT_P (op1))
5995 return NULL_RTX;
5996
5997 tem = emit_store_flag_1 (target, code, op0, op1, mode, unsignedp, normalizep,
5998 target_mode);
5999 if (tem)
6000 return tem;
6001
6002 /* If we reached here, we can't do this with a scc insn, however there
6003 are some comparisons that can be done in other ways. Don't do any
6004 of these cases if branches are very cheap. */
6005 if (BRANCH_COST (optimize_insn_for_speed_p (), false) == 0)
6006 return 0;
6007
6008 /* See what we need to return. We can only return a 1, -1, or the
6009 sign bit. */
6010
6011 if (normalizep == 0)
6012 {
6013 if (STORE_FLAG_VALUE == 1 || STORE_FLAG_VALUE == -1)
6014 normalizep = STORE_FLAG_VALUE;
6015
6016 else if (val_signbit_p (mode, STORE_FLAG_VALUE))
6017 ;
6018 else
6019 return 0;
6020 }
6021
6022 last = get_last_insn ();
6023
6024 /* If optimizing, use different pseudo registers for each insn, instead
6025 of reusing the same pseudo. This leads to better CSE, but slows
6026 down the compiler, since there are more pseudos. */
6027 subtarget = (!optimize
6028 && (target_mode == mode)) ? target : NULL_RTX;
6029 trueval = GEN_INT (normalizep ? normalizep : STORE_FLAG_VALUE);
6030
6031 /* For floating-point comparisons, try the reverse comparison or try
6032 changing the "orderedness" of the comparison. */
6033 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
6034 {
6035 enum rtx_code first_code;
6036 bool and_them;
6037
6038 rcode = reverse_condition_maybe_unordered (code);
6039 if (can_compare_p (rcode, mode, ccp_store_flag)
6040 && (code == ORDERED || code == UNORDERED
6041 || (! HONOR_NANS (mode) && (code == LTGT || code == UNEQ))
6042 || (! HONOR_SNANS (mode) && (code == EQ || code == NE))))
6043 {
6044 int want_add = ((STORE_FLAG_VALUE == 1 && normalizep == -1)
6045 || (STORE_FLAG_VALUE == -1 && normalizep == 1));
6046
6047 /* For the reverse comparison, use either an addition or a XOR. */
6048 if (want_add
6049 && rtx_cost (GEN_INT (normalizep), mode, PLUS, 1,
6050 optimize_insn_for_speed_p ()) == 0)
6051 {
6052 tem = emit_store_flag_1 (subtarget, rcode, op0, op1, mode, 0,
6053 STORE_FLAG_VALUE, target_mode);
6054 if (tem)
6055 return expand_binop (target_mode, add_optab, tem,
6056 gen_int_mode (normalizep, target_mode),
6057 target, 0, OPTAB_WIDEN);
6058 }
6059 else if (!want_add
6060 && rtx_cost (trueval, mode, XOR, 1,
6061 optimize_insn_for_speed_p ()) == 0)
6062 {
6063 tem = emit_store_flag_1 (subtarget, rcode, op0, op1, mode, 0,
6064 normalizep, target_mode);
6065 if (tem)
6066 return expand_binop (target_mode, xor_optab, tem, trueval,
6067 target, INTVAL (trueval) >= 0,
6068 OPTAB_WIDEN);
6069 }
6070 }
6071
6072 delete_insns_since (last);
6073
6074 /* Cannot split ORDERED and UNORDERED, only try the above trick. */
6075 if (code == ORDERED || code == UNORDERED)
6076 return 0;
6077
6078 and_them = split_comparison (code, mode, &first_code, &code);
6079
6080 /* If there are no NaNs, the first comparison should always fall through.
6081 Effectively change the comparison to the other one. */
6082 if (!HONOR_NANS (mode))
6083 {
6084 gcc_assert (first_code == (and_them ? ORDERED : UNORDERED));
6085 return emit_store_flag_1 (target, code, op0, op1, mode, 0, normalizep,
6086 target_mode);
6087 }
6088
6089 if (!HAVE_conditional_move)
6090 return 0;
6091
6092 /* Do not turn a trapping comparison into a non-trapping one. */
6093 if ((code != EQ && code != NE && code != UNEQ && code != LTGT)
6094 && flag_trapping_math)
6095 return 0;
6096
6097 /* Try using a setcc instruction for ORDERED/UNORDERED, followed by a
6098 conditional move. */
6099 tem = emit_store_flag_1 (subtarget, first_code, op0, op1, mode, 0,
6100 normalizep, target_mode);
6101 if (tem == 0)
6102 return 0;
6103
6104 if (and_them)
6105 tem = emit_conditional_move (target, code, op0, op1, mode,
6106 tem, const0_rtx, GET_MODE (tem), 0);
6107 else
6108 tem = emit_conditional_move (target, code, op0, op1, mode,
6109 trueval, tem, GET_MODE (tem), 0);
6110
6111 if (tem == 0)
6112 delete_insns_since (last);
6113 return tem;
6114 }
6115
6116 /* The remaining tricks only apply to integer comparisons. */
6117
6118 scalar_int_mode int_mode;
6119 if (is_int_mode (mode, &int_mode))
6120 return emit_store_flag_int (target, subtarget, code, op0, op1, int_mode,
6121 unsignedp, normalizep, trueval);
6122
6123 return 0;
6124 }
6125
6126 /* Like emit_store_flag, but always succeeds. */
6127
6128 rtx
emit_store_flag_force(rtx target,enum rtx_code code,rtx op0,rtx op1,machine_mode mode,int unsignedp,int normalizep)6129 emit_store_flag_force (rtx target, enum rtx_code code, rtx op0, rtx op1,
6130 machine_mode mode, int unsignedp, int normalizep)
6131 {
6132 rtx tem;
6133 rtx_code_label *label;
6134 rtx trueval, falseval;
6135
6136 /* First see if emit_store_flag can do the job. */
6137 tem = emit_store_flag (target, code, op0, op1, mode, unsignedp, normalizep);
6138 if (tem != 0)
6139 return tem;
6140
6141 /* If one operand is constant, make it the second one. Only do this
6142 if the other operand is not constant as well. */
6143 if (swap_commutative_operands_p (op0, op1))
6144 {
6145 std::swap (op0, op1);
6146 code = swap_condition (code);
6147 }
6148
6149 if (mode == VOIDmode)
6150 mode = GET_MODE (op0);
6151
6152 if (!target)
6153 target = gen_reg_rtx (word_mode);
6154
6155 /* If this failed, we have to do this with set/compare/jump/set code.
6156 For foo != 0, if foo is in OP0, just replace it with 1 if nonzero. */
6157 trueval = normalizep ? GEN_INT (normalizep) : const1_rtx;
6158 if (code == NE
6159 && GET_MODE_CLASS (mode) == MODE_INT
6160 && REG_P (target)
6161 && op0 == target
6162 && op1 == const0_rtx)
6163 {
6164 label = gen_label_rtx ();
6165 do_compare_rtx_and_jump (target, const0_rtx, EQ, unsignedp, mode,
6166 NULL_RTX, NULL, label,
6167 profile_probability::uninitialized ());
6168 emit_move_insn (target, trueval);
6169 emit_label (label);
6170 return target;
6171 }
6172
6173 if (!REG_P (target)
6174 || reg_mentioned_p (target, op0) || reg_mentioned_p (target, op1))
6175 target = gen_reg_rtx (GET_MODE (target));
6176
6177 /* Jump in the right direction if the target cannot implement CODE
6178 but can jump on its reverse condition. */
6179 falseval = const0_rtx;
6180 if (! can_compare_p (code, mode, ccp_jump)
6181 && (! FLOAT_MODE_P (mode)
6182 || code == ORDERED || code == UNORDERED
6183 || (! HONOR_NANS (mode) && (code == LTGT || code == UNEQ))
6184 || (! HONOR_SNANS (mode) && (code == EQ || code == NE))))
6185 {
6186 enum rtx_code rcode;
6187 if (FLOAT_MODE_P (mode))
6188 rcode = reverse_condition_maybe_unordered (code);
6189 else
6190 rcode = reverse_condition (code);
6191
6192 /* Canonicalize to UNORDERED for the libcall. */
6193 if (can_compare_p (rcode, mode, ccp_jump)
6194 || (code == ORDERED && ! can_compare_p (ORDERED, mode, ccp_jump)))
6195 {
6196 falseval = trueval;
6197 trueval = const0_rtx;
6198 code = rcode;
6199 }
6200 }
6201
6202 emit_move_insn (target, trueval);
6203 label = gen_label_rtx ();
6204 do_compare_rtx_and_jump (op0, op1, code, unsignedp, mode, NULL_RTX, NULL,
6205 label, profile_probability::uninitialized ());
6206
6207 emit_move_insn (target, falseval);
6208 emit_label (label);
6209
6210 return target;
6211 }
6212
6213 /* Helper function for canonicalize_cmp_for_target. Swap between inclusive
6214 and exclusive ranges in order to create an equivalent comparison. See
6215 canonicalize_cmp_for_target for the possible cases. */
6216
6217 static enum rtx_code
equivalent_cmp_code(enum rtx_code code)6218 equivalent_cmp_code (enum rtx_code code)
6219 {
6220 switch (code)
6221 {
6222 case GT:
6223 return GE;
6224 case GE:
6225 return GT;
6226 case LT:
6227 return LE;
6228 case LE:
6229 return LT;
6230 case GTU:
6231 return GEU;
6232 case GEU:
6233 return GTU;
6234 case LTU:
6235 return LEU;
6236 case LEU:
6237 return LTU;
6238
6239 default:
6240 return code;
6241 }
6242 }
6243
6244 /* Choose the more appropiate immediate in scalar integer comparisons. The
6245 purpose of this is to end up with an immediate which can be loaded into a
6246 register in fewer moves, if possible.
6247
6248 For each integer comparison there exists an equivalent choice:
6249 i) a > b or a >= b + 1
6250 ii) a <= b or a < b + 1
6251 iii) a >= b or a > b - 1
6252 iv) a < b or a <= b - 1
6253
6254 MODE is the mode of the first operand.
6255 CODE points to the comparison code.
6256 IMM points to the rtx containing the immediate. *IMM must satisfy
6257 CONST_SCALAR_INT_P on entry and continues to satisfy CONST_SCALAR_INT_P
6258 on exit. */
6259
6260 void
canonicalize_comparison(machine_mode mode,enum rtx_code * code,rtx * imm)6261 canonicalize_comparison (machine_mode mode, enum rtx_code *code, rtx *imm)
6262 {
6263 if (!SCALAR_INT_MODE_P (mode))
6264 return;
6265
6266 int to_add = 0;
6267 enum signop sgn = unsigned_condition_p (*code) ? UNSIGNED : SIGNED;
6268
6269 /* Extract the immediate value from the rtx. */
6270 wide_int imm_val = rtx_mode_t (*imm, mode);
6271
6272 if (*code == GT || *code == GTU || *code == LE || *code == LEU)
6273 to_add = 1;
6274 else if (*code == GE || *code == GEU || *code == LT || *code == LTU)
6275 to_add = -1;
6276 else
6277 return;
6278
6279 /* Check for overflow/underflow in the case of signed values and
6280 wrapping around in the case of unsigned values. If any occur
6281 cancel the optimization. */
6282 wi::overflow_type overflow = wi::OVF_NONE;
6283 wide_int imm_modif;
6284
6285 if (to_add == 1)
6286 imm_modif = wi::add (imm_val, 1, sgn, &overflow);
6287 else
6288 imm_modif = wi::sub (imm_val, 1, sgn, &overflow);
6289
6290 if (overflow)
6291 return;
6292
6293 /* The following creates a pseudo; if we cannot do that, bail out. */
6294 if (!can_create_pseudo_p ())
6295 return;
6296
6297 rtx reg = gen_rtx_REG (mode, LAST_VIRTUAL_REGISTER + 1);
6298 rtx new_imm = immed_wide_int_const (imm_modif, mode);
6299
6300 rtx_insn *old_rtx = gen_move_insn (reg, *imm);
6301 rtx_insn *new_rtx = gen_move_insn (reg, new_imm);
6302
6303 /* Update the immediate and the code. */
6304 if (insn_cost (old_rtx, true) > insn_cost (new_rtx, true))
6305 {
6306 *code = equivalent_cmp_code (*code);
6307 *imm = new_imm;
6308 }
6309 }
6310
6311
6312
6313 /* Perform possibly multi-word comparison and conditional jump to LABEL
6314 if ARG1 OP ARG2 true where ARG1 and ARG2 are of mode MODE. This is
6315 now a thin wrapper around do_compare_rtx_and_jump. */
6316
6317 static void
do_cmp_and_jump(rtx arg1,rtx arg2,enum rtx_code op,machine_mode mode,rtx_code_label * label)6318 do_cmp_and_jump (rtx arg1, rtx arg2, enum rtx_code op, machine_mode mode,
6319 rtx_code_label *label)
6320 {
6321 int unsignedp = (op == LTU || op == LEU || op == GTU || op == GEU);
6322 do_compare_rtx_and_jump (arg1, arg2, op, unsignedp, mode, NULL_RTX,
6323 NULL, label, profile_probability::uninitialized ());
6324 }
6325