xref: /netbsd-src/external/gpl3/gcc.old/dist/gcc/expmed.c (revision 8feb0f0b7eaff0608f8350bbfa3098827b4bb91b)
1 /* Medium-level subroutines: convert bit-field store and extract
2    and shifts, multiplies and divides to rtl instructions.
3    Copyright (C) 1987-2020 Free Software Foundation, Inc.
4 
5 This file is part of GCC.
6 
7 GCC is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 3, or (at your option) any later
10 version.
11 
12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
15 for more details.
16 
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3.  If not see
19 <http://www.gnu.org/licenses/>.  */
20 
21 /* Work around tree-optimization/91825.  */
22 #pragma GCC diagnostic warning "-Wmaybe-uninitialized"
23 
24 #include "config.h"
25 #include "system.h"
26 #include "coretypes.h"
27 #include "backend.h"
28 #include "target.h"
29 #include "rtl.h"
30 #include "tree.h"
31 #include "predict.h"
32 #include "memmodel.h"
33 #include "tm_p.h"
34 #include "expmed.h"
35 #include "optabs.h"
36 #include "regs.h"
37 #include "emit-rtl.h"
38 #include "diagnostic-core.h"
39 #include "fold-const.h"
40 #include "stor-layout.h"
41 #include "dojump.h"
42 #include "explow.h"
43 #include "expr.h"
44 #include "langhooks.h"
45 #include "tree-vector-builder.h"
46 
47 struct target_expmed default_target_expmed;
48 #if SWITCHABLE_TARGET
49 struct target_expmed *this_target_expmed = &default_target_expmed;
50 #endif
51 
52 static bool store_integral_bit_field (rtx, opt_scalar_int_mode,
53 				      unsigned HOST_WIDE_INT,
54 				      unsigned HOST_WIDE_INT,
55 				      poly_uint64, poly_uint64,
56 				      machine_mode, rtx, bool, bool);
57 static void store_fixed_bit_field (rtx, opt_scalar_int_mode,
58 				   unsigned HOST_WIDE_INT,
59 				   unsigned HOST_WIDE_INT,
60 				   poly_uint64, poly_uint64,
61 				   rtx, scalar_int_mode, bool);
62 static void store_fixed_bit_field_1 (rtx, scalar_int_mode,
63 				     unsigned HOST_WIDE_INT,
64 				     unsigned HOST_WIDE_INT,
65 				     rtx, scalar_int_mode, bool);
66 static void store_split_bit_field (rtx, opt_scalar_int_mode,
67 				   unsigned HOST_WIDE_INT,
68 				   unsigned HOST_WIDE_INT,
69 				   poly_uint64, poly_uint64,
70 				   rtx, scalar_int_mode, bool);
71 static rtx extract_integral_bit_field (rtx, opt_scalar_int_mode,
72 				       unsigned HOST_WIDE_INT,
73 				       unsigned HOST_WIDE_INT, int, rtx,
74 				       machine_mode, machine_mode, bool, bool);
75 static rtx extract_fixed_bit_field (machine_mode, rtx, opt_scalar_int_mode,
76 				    unsigned HOST_WIDE_INT,
77 				    unsigned HOST_WIDE_INT, rtx, int, bool);
78 static rtx extract_fixed_bit_field_1 (machine_mode, rtx, scalar_int_mode,
79 				      unsigned HOST_WIDE_INT,
80 				      unsigned HOST_WIDE_INT, rtx, int, bool);
81 static rtx lshift_value (machine_mode, unsigned HOST_WIDE_INT, int);
82 static rtx extract_split_bit_field (rtx, opt_scalar_int_mode,
83 				    unsigned HOST_WIDE_INT,
84 				    unsigned HOST_WIDE_INT, int, bool);
85 static void do_cmp_and_jump (rtx, rtx, enum rtx_code, machine_mode, rtx_code_label *);
86 static rtx expand_smod_pow2 (scalar_int_mode, rtx, HOST_WIDE_INT);
87 static rtx expand_sdiv_pow2 (scalar_int_mode, rtx, HOST_WIDE_INT);
88 
89 /* Return a constant integer mask value of mode MODE with BITSIZE ones
90    followed by BITPOS zeros, or the complement of that if COMPLEMENT.
91    The mask is truncated if necessary to the width of mode MODE.  The
92    mask is zero-extended if BITSIZE+BITPOS is too small for MODE.  */
93 
94 static inline rtx
mask_rtx(scalar_int_mode mode,int bitpos,int bitsize,bool complement)95 mask_rtx (scalar_int_mode mode, int bitpos, int bitsize, bool complement)
96 {
97   return immed_wide_int_const
98     (wi::shifted_mask (bitpos, bitsize, complement,
99 		       GET_MODE_PRECISION (mode)), mode);
100 }
101 
102 /* Test whether a value is zero of a power of two.  */
103 #define EXACT_POWER_OF_2_OR_ZERO_P(x) \
104   (((x) & ((x) - HOST_WIDE_INT_1U)) == 0)
105 
106 struct init_expmed_rtl
107 {
108   rtx reg;
109   rtx plus;
110   rtx neg;
111   rtx mult;
112   rtx sdiv;
113   rtx udiv;
114   rtx sdiv_32;
115   rtx smod_32;
116   rtx wide_mult;
117   rtx wide_lshr;
118   rtx wide_trunc;
119   rtx shift;
120   rtx shift_mult;
121   rtx shift_add;
122   rtx shift_sub0;
123   rtx shift_sub1;
124   rtx zext;
125   rtx trunc;
126 
127   rtx pow2[MAX_BITS_PER_WORD];
128   rtx cint[MAX_BITS_PER_WORD];
129 };
130 
131 static void
init_expmed_one_conv(struct init_expmed_rtl * all,scalar_int_mode to_mode,scalar_int_mode from_mode,bool speed)132 init_expmed_one_conv (struct init_expmed_rtl *all, scalar_int_mode to_mode,
133 		      scalar_int_mode from_mode, bool speed)
134 {
135   int to_size, from_size;
136   rtx which;
137 
138   to_size = GET_MODE_PRECISION (to_mode);
139   from_size = GET_MODE_PRECISION (from_mode);
140 
141   /* Most partial integers have a precision less than the "full"
142      integer it requires for storage.  In case one doesn't, for
143      comparison purposes here, reduce the bit size by one in that
144      case.  */
145   if (GET_MODE_CLASS (to_mode) == MODE_PARTIAL_INT
146       && pow2p_hwi (to_size))
147     to_size --;
148   if (GET_MODE_CLASS (from_mode) == MODE_PARTIAL_INT
149       && pow2p_hwi (from_size))
150     from_size --;
151 
152   /* Assume cost of zero-extend and sign-extend is the same.  */
153   which = (to_size < from_size ? all->trunc : all->zext);
154 
155   PUT_MODE (all->reg, from_mode);
156   set_convert_cost (to_mode, from_mode, speed,
157 		    set_src_cost (which, to_mode, speed));
158 }
159 
160 static void
init_expmed_one_mode(struct init_expmed_rtl * all,machine_mode mode,int speed)161 init_expmed_one_mode (struct init_expmed_rtl *all,
162 		      machine_mode mode, int speed)
163 {
164   int m, n, mode_bitsize;
165   machine_mode mode_from;
166 
167   mode_bitsize = GET_MODE_UNIT_BITSIZE (mode);
168 
169   PUT_MODE (all->reg, mode);
170   PUT_MODE (all->plus, mode);
171   PUT_MODE (all->neg, mode);
172   PUT_MODE (all->mult, mode);
173   PUT_MODE (all->sdiv, mode);
174   PUT_MODE (all->udiv, mode);
175   PUT_MODE (all->sdiv_32, mode);
176   PUT_MODE (all->smod_32, mode);
177   PUT_MODE (all->wide_trunc, mode);
178   PUT_MODE (all->shift, mode);
179   PUT_MODE (all->shift_mult, mode);
180   PUT_MODE (all->shift_add, mode);
181   PUT_MODE (all->shift_sub0, mode);
182   PUT_MODE (all->shift_sub1, mode);
183   PUT_MODE (all->zext, mode);
184   PUT_MODE (all->trunc, mode);
185 
186   set_add_cost (speed, mode, set_src_cost (all->plus, mode, speed));
187   set_neg_cost (speed, mode, set_src_cost (all->neg, mode, speed));
188   set_mul_cost (speed, mode, set_src_cost (all->mult, mode, speed));
189   set_sdiv_cost (speed, mode, set_src_cost (all->sdiv, mode, speed));
190   set_udiv_cost (speed, mode, set_src_cost (all->udiv, mode, speed));
191 
192   set_sdiv_pow2_cheap (speed, mode, (set_src_cost (all->sdiv_32, mode, speed)
193 				     <= 2 * add_cost (speed, mode)));
194   set_smod_pow2_cheap (speed, mode, (set_src_cost (all->smod_32, mode, speed)
195 				     <= 4 * add_cost (speed, mode)));
196 
197   set_shift_cost (speed, mode, 0, 0);
198   {
199     int cost = add_cost (speed, mode);
200     set_shiftadd_cost (speed, mode, 0, cost);
201     set_shiftsub0_cost (speed, mode, 0, cost);
202     set_shiftsub1_cost (speed, mode, 0, cost);
203   }
204 
205   n = MIN (MAX_BITS_PER_WORD, mode_bitsize);
206   for (m = 1; m < n; m++)
207     {
208       XEXP (all->shift, 1) = all->cint[m];
209       XEXP (all->shift_mult, 1) = all->pow2[m];
210 
211       set_shift_cost (speed, mode, m, set_src_cost (all->shift, mode, speed));
212       set_shiftadd_cost (speed, mode, m, set_src_cost (all->shift_add, mode,
213 						       speed));
214       set_shiftsub0_cost (speed, mode, m, set_src_cost (all->shift_sub0, mode,
215 							speed));
216       set_shiftsub1_cost (speed, mode, m, set_src_cost (all->shift_sub1, mode,
217 							speed));
218     }
219 
220   scalar_int_mode int_mode_to;
221   if (is_a <scalar_int_mode> (mode, &int_mode_to))
222     {
223       for (mode_from = MIN_MODE_INT; mode_from <= MAX_MODE_INT;
224 	   mode_from = (machine_mode)(mode_from + 1))
225 	init_expmed_one_conv (all, int_mode_to,
226 			      as_a <scalar_int_mode> (mode_from), speed);
227 
228       scalar_int_mode wider_mode;
229       if (GET_MODE_CLASS (int_mode_to) == MODE_INT
230 	  && GET_MODE_WIDER_MODE (int_mode_to).exists (&wider_mode))
231 	{
232 	  PUT_MODE (all->zext, wider_mode);
233 	  PUT_MODE (all->wide_mult, wider_mode);
234 	  PUT_MODE (all->wide_lshr, wider_mode);
235 	  XEXP (all->wide_lshr, 1)
236 	    = gen_int_shift_amount (wider_mode, mode_bitsize);
237 
238 	  set_mul_widen_cost (speed, wider_mode,
239 			      set_src_cost (all->wide_mult, wider_mode, speed));
240 	  set_mul_highpart_cost (speed, int_mode_to,
241 				 set_src_cost (all->wide_trunc,
242 					       int_mode_to, speed));
243 	}
244     }
245 }
246 
247 void
init_expmed(void)248 init_expmed (void)
249 {
250   struct init_expmed_rtl all;
251   machine_mode mode = QImode;
252   int m, speed;
253 
254   memset (&all, 0, sizeof all);
255   for (m = 1; m < MAX_BITS_PER_WORD; m++)
256     {
257       all.pow2[m] = GEN_INT (HOST_WIDE_INT_1 << m);
258       all.cint[m] = GEN_INT (m);
259     }
260 
261   /* Avoid using hard regs in ways which may be unsupported.  */
262   all.reg = gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1);
263   all.plus = gen_rtx_PLUS (mode, all.reg, all.reg);
264   all.neg = gen_rtx_NEG (mode, all.reg);
265   all.mult = gen_rtx_MULT (mode, all.reg, all.reg);
266   all.sdiv = gen_rtx_DIV (mode, all.reg, all.reg);
267   all.udiv = gen_rtx_UDIV (mode, all.reg, all.reg);
268   all.sdiv_32 = gen_rtx_DIV (mode, all.reg, all.pow2[5]);
269   all.smod_32 = gen_rtx_MOD (mode, all.reg, all.pow2[5]);
270   all.zext = gen_rtx_ZERO_EXTEND (mode, all.reg);
271   all.wide_mult = gen_rtx_MULT (mode, all.zext, all.zext);
272   all.wide_lshr = gen_rtx_LSHIFTRT (mode, all.wide_mult, all.reg);
273   all.wide_trunc = gen_rtx_TRUNCATE (mode, all.wide_lshr);
274   all.shift = gen_rtx_ASHIFT (mode, all.reg, all.reg);
275   all.shift_mult = gen_rtx_MULT (mode, all.reg, all.reg);
276   all.shift_add = gen_rtx_PLUS (mode, all.shift_mult, all.reg);
277   all.shift_sub0 = gen_rtx_MINUS (mode, all.shift_mult, all.reg);
278   all.shift_sub1 = gen_rtx_MINUS (mode, all.reg, all.shift_mult);
279   all.trunc = gen_rtx_TRUNCATE (mode, all.reg);
280 
281   for (speed = 0; speed < 2; speed++)
282     {
283       crtl->maybe_hot_insn_p = speed;
284       set_zero_cost (speed, set_src_cost (const0_rtx, mode, speed));
285 
286       for (mode = MIN_MODE_INT; mode <= MAX_MODE_INT;
287 	   mode = (machine_mode)(mode + 1))
288 	init_expmed_one_mode (&all, mode, speed);
289 
290       if (MIN_MODE_PARTIAL_INT != VOIDmode)
291 	for (mode = MIN_MODE_PARTIAL_INT; mode <= MAX_MODE_PARTIAL_INT;
292 	     mode = (machine_mode)(mode + 1))
293 	  init_expmed_one_mode (&all, mode, speed);
294 
295       if (MIN_MODE_VECTOR_INT != VOIDmode)
296 	for (mode = MIN_MODE_VECTOR_INT; mode <= MAX_MODE_VECTOR_INT;
297 	     mode = (machine_mode)(mode + 1))
298 	  init_expmed_one_mode (&all, mode, speed);
299     }
300 
301   if (alg_hash_used_p ())
302     {
303       struct alg_hash_entry *p = alg_hash_entry_ptr (0);
304       memset (p, 0, sizeof (*p) * NUM_ALG_HASH_ENTRIES);
305     }
306   else
307     set_alg_hash_used_p (true);
308   default_rtl_profile ();
309 
310   ggc_free (all.trunc);
311   ggc_free (all.shift_sub1);
312   ggc_free (all.shift_sub0);
313   ggc_free (all.shift_add);
314   ggc_free (all.shift_mult);
315   ggc_free (all.shift);
316   ggc_free (all.wide_trunc);
317   ggc_free (all.wide_lshr);
318   ggc_free (all.wide_mult);
319   ggc_free (all.zext);
320   ggc_free (all.smod_32);
321   ggc_free (all.sdiv_32);
322   ggc_free (all.udiv);
323   ggc_free (all.sdiv);
324   ggc_free (all.mult);
325   ggc_free (all.neg);
326   ggc_free (all.plus);
327   ggc_free (all.reg);
328 }
329 
330 /* Return an rtx representing minus the value of X.
331    MODE is the intended mode of the result,
332    useful if X is a CONST_INT.  */
333 
334 rtx
negate_rtx(machine_mode mode,rtx x)335 negate_rtx (machine_mode mode, rtx x)
336 {
337   rtx result = simplify_unary_operation (NEG, mode, x, mode);
338 
339   if (result == 0)
340     result = expand_unop (mode, neg_optab, x, NULL_RTX, 0);
341 
342   return result;
343 }
344 
345 /* Whether reverse storage order is supported on the target.  */
346 static int reverse_storage_order_supported = -1;
347 
348 /* Check whether reverse storage order is supported on the target.  */
349 
350 static void
check_reverse_storage_order_support(void)351 check_reverse_storage_order_support (void)
352 {
353   if (BYTES_BIG_ENDIAN != WORDS_BIG_ENDIAN)
354     {
355       reverse_storage_order_supported = 0;
356       sorry ("reverse scalar storage order");
357     }
358   else
359     reverse_storage_order_supported = 1;
360 }
361 
362 /* Whether reverse FP storage order is supported on the target.  */
363 static int reverse_float_storage_order_supported = -1;
364 
365 /* Check whether reverse FP storage order is supported on the target.  */
366 
367 static void
check_reverse_float_storage_order_support(void)368 check_reverse_float_storage_order_support (void)
369 {
370   if (FLOAT_WORDS_BIG_ENDIAN != WORDS_BIG_ENDIAN)
371     {
372       reverse_float_storage_order_supported = 0;
373       sorry ("reverse floating-point scalar storage order");
374     }
375   else
376     reverse_float_storage_order_supported = 1;
377 }
378 
379 /* Return an rtx representing value of X with reverse storage order.
380    MODE is the intended mode of the result,
381    useful if X is a CONST_INT.  */
382 
383 rtx
flip_storage_order(machine_mode mode,rtx x)384 flip_storage_order (machine_mode mode, rtx x)
385 {
386   scalar_int_mode int_mode;
387   rtx result;
388 
389   if (mode == QImode)
390     return x;
391 
392   if (COMPLEX_MODE_P (mode))
393     {
394       rtx real = read_complex_part (x, false);
395       rtx imag = read_complex_part (x, true);
396 
397       real = flip_storage_order (GET_MODE_INNER (mode), real);
398       imag = flip_storage_order (GET_MODE_INNER (mode), imag);
399 
400       return gen_rtx_CONCAT (mode, real, imag);
401     }
402 
403   if (__builtin_expect (reverse_storage_order_supported < 0, 0))
404     check_reverse_storage_order_support ();
405 
406   if (!is_a <scalar_int_mode> (mode, &int_mode))
407     {
408       if (FLOAT_MODE_P (mode)
409 	  && __builtin_expect (reverse_float_storage_order_supported < 0, 0))
410 	check_reverse_float_storage_order_support ();
411 
412       if (!int_mode_for_size (GET_MODE_PRECISION (mode), 0).exists (&int_mode))
413 	{
414 	  sorry ("reverse storage order for %smode", GET_MODE_NAME (mode));
415 	  return x;
416 	}
417       x = gen_lowpart (int_mode, x);
418     }
419 
420   result = simplify_unary_operation (BSWAP, int_mode, x, int_mode);
421   if (result == 0)
422     result = expand_unop (int_mode, bswap_optab, x, NULL_RTX, 1);
423 
424   if (int_mode != mode)
425     result = gen_lowpart (mode, result);
426 
427   return result;
428 }
429 
430 /* If MODE is set, adjust bitfield memory MEM so that it points to the
431    first unit of mode MODE that contains a bitfield of size BITSIZE at
432    bit position BITNUM.  If MODE is not set, return a BLKmode reference
433    to every byte in the bitfield.  Set *NEW_BITNUM to the bit position
434    of the field within the new memory.  */
435 
436 static rtx
narrow_bit_field_mem(rtx mem,opt_scalar_int_mode mode,unsigned HOST_WIDE_INT bitsize,unsigned HOST_WIDE_INT bitnum,unsigned HOST_WIDE_INT * new_bitnum)437 narrow_bit_field_mem (rtx mem, opt_scalar_int_mode mode,
438 		      unsigned HOST_WIDE_INT bitsize,
439 		      unsigned HOST_WIDE_INT bitnum,
440 		      unsigned HOST_WIDE_INT *new_bitnum)
441 {
442   scalar_int_mode imode;
443   if (mode.exists (&imode))
444     {
445       unsigned int unit = GET_MODE_BITSIZE (imode);
446       *new_bitnum = bitnum % unit;
447       HOST_WIDE_INT offset = (bitnum - *new_bitnum) / BITS_PER_UNIT;
448       return adjust_bitfield_address (mem, imode, offset);
449     }
450   else
451     {
452       *new_bitnum = bitnum % BITS_PER_UNIT;
453       HOST_WIDE_INT offset = bitnum / BITS_PER_UNIT;
454       HOST_WIDE_INT size = ((*new_bitnum + bitsize + BITS_PER_UNIT - 1)
455 			    / BITS_PER_UNIT);
456       return adjust_bitfield_address_size (mem, BLKmode, offset, size);
457     }
458 }
459 
460 /* The caller wants to perform insertion or extraction PATTERN on a
461    bitfield of size BITSIZE at BITNUM bits into memory operand OP0.
462    BITREGION_START and BITREGION_END are as for store_bit_field
463    and FIELDMODE is the natural mode of the field.
464 
465    Search for a mode that is compatible with the memory access
466    restrictions and (where applicable) with a register insertion or
467    extraction.  Return the new memory on success, storing the adjusted
468    bit position in *NEW_BITNUM.  Return null otherwise.  */
469 
470 static rtx
adjust_bit_field_mem_for_reg(enum extraction_pattern pattern,rtx op0,HOST_WIDE_INT bitsize,HOST_WIDE_INT bitnum,poly_uint64 bitregion_start,poly_uint64 bitregion_end,machine_mode fieldmode,unsigned HOST_WIDE_INT * new_bitnum)471 adjust_bit_field_mem_for_reg (enum extraction_pattern pattern,
472 			      rtx op0, HOST_WIDE_INT bitsize,
473 			      HOST_WIDE_INT bitnum,
474 			      poly_uint64 bitregion_start,
475 			      poly_uint64 bitregion_end,
476 			      machine_mode fieldmode,
477 			      unsigned HOST_WIDE_INT *new_bitnum)
478 {
479   bit_field_mode_iterator iter (bitsize, bitnum, bitregion_start,
480 				bitregion_end, MEM_ALIGN (op0),
481 				MEM_VOLATILE_P (op0));
482   scalar_int_mode best_mode;
483   if (iter.next_mode (&best_mode))
484     {
485       /* We can use a memory in BEST_MODE.  See whether this is true for
486 	 any wider modes.  All other things being equal, we prefer to
487 	 use the widest mode possible because it tends to expose more
488 	 CSE opportunities.  */
489       if (!iter.prefer_smaller_modes ())
490 	{
491 	  /* Limit the search to the mode required by the corresponding
492 	     register insertion or extraction instruction, if any.  */
493 	  scalar_int_mode limit_mode = word_mode;
494 	  extraction_insn insn;
495 	  if (get_best_reg_extraction_insn (&insn, pattern,
496 					    GET_MODE_BITSIZE (best_mode),
497 					    fieldmode))
498 	    limit_mode = insn.field_mode;
499 
500 	  scalar_int_mode wider_mode;
501 	  while (iter.next_mode (&wider_mode)
502 		 && GET_MODE_SIZE (wider_mode) <= GET_MODE_SIZE (limit_mode))
503 	    best_mode = wider_mode;
504 	}
505       return narrow_bit_field_mem (op0, best_mode, bitsize, bitnum,
506 				   new_bitnum);
507     }
508   return NULL_RTX;
509 }
510 
511 /* Return true if a bitfield of size BITSIZE at bit number BITNUM within
512    a structure of mode STRUCT_MODE represents a lowpart subreg.   The subreg
513    offset is then BITNUM / BITS_PER_UNIT.  */
514 
515 static bool
lowpart_bit_field_p(poly_uint64 bitnum,poly_uint64 bitsize,machine_mode struct_mode)516 lowpart_bit_field_p (poly_uint64 bitnum, poly_uint64 bitsize,
517 		     machine_mode struct_mode)
518 {
519   poly_uint64 regsize = REGMODE_NATURAL_SIZE (struct_mode);
520   if (BYTES_BIG_ENDIAN)
521     return (multiple_p (bitnum, BITS_PER_UNIT)
522 	    && (known_eq (bitnum + bitsize, GET_MODE_BITSIZE (struct_mode))
523 		|| multiple_p (bitnum + bitsize,
524 			       regsize * BITS_PER_UNIT)));
525   else
526     return multiple_p (bitnum, regsize * BITS_PER_UNIT);
527 }
528 
529 /* Return true if -fstrict-volatile-bitfields applies to an access of OP0
530    containing BITSIZE bits starting at BITNUM, with field mode FIELDMODE.
531    Return false if the access would touch memory outside the range
532    BITREGION_START to BITREGION_END for conformance to the C++ memory
533    model.  */
534 
535 static bool
strict_volatile_bitfield_p(rtx op0,unsigned HOST_WIDE_INT bitsize,unsigned HOST_WIDE_INT bitnum,scalar_int_mode fieldmode,poly_uint64 bitregion_start,poly_uint64 bitregion_end)536 strict_volatile_bitfield_p (rtx op0, unsigned HOST_WIDE_INT bitsize,
537 			    unsigned HOST_WIDE_INT bitnum,
538 			    scalar_int_mode fieldmode,
539 			    poly_uint64 bitregion_start,
540 			    poly_uint64 bitregion_end)
541 {
542   unsigned HOST_WIDE_INT modesize = GET_MODE_BITSIZE (fieldmode);
543 
544   /* -fstrict-volatile-bitfields must be enabled and we must have a
545      volatile MEM.  */
546   if (!MEM_P (op0)
547       || !MEM_VOLATILE_P (op0)
548       || flag_strict_volatile_bitfields <= 0)
549     return false;
550 
551   /* The bit size must not be larger than the field mode, and
552      the field mode must not be larger than a word.  */
553   if (bitsize > modesize || modesize > BITS_PER_WORD)
554     return false;
555 
556   /* Check for cases of unaligned fields that must be split.  */
557   if (bitnum % modesize + bitsize > modesize)
558     return false;
559 
560   /* The memory must be sufficiently aligned for a MODESIZE access.
561      This condition guarantees, that the memory access will not
562      touch anything after the end of the structure.  */
563   if (MEM_ALIGN (op0) < modesize)
564     return false;
565 
566   /* Check for cases where the C++ memory model applies.  */
567   if (maybe_ne (bitregion_end, 0U)
568       && (maybe_lt (bitnum - bitnum % modesize, bitregion_start)
569 	  || maybe_gt (bitnum - bitnum % modesize + modesize - 1,
570 		       bitregion_end)))
571     return false;
572 
573   return true;
574 }
575 
576 /* Return true if OP is a memory and if a bitfield of size BITSIZE at
577    bit number BITNUM can be treated as a simple value of mode MODE.
578    Store the byte offset in *BYTENUM if so.  */
579 
580 static bool
simple_mem_bitfield_p(rtx op0,poly_uint64 bitsize,poly_uint64 bitnum,machine_mode mode,poly_uint64 * bytenum)581 simple_mem_bitfield_p (rtx op0, poly_uint64 bitsize, poly_uint64 bitnum,
582 		       machine_mode mode, poly_uint64 *bytenum)
583 {
584   return (MEM_P (op0)
585 	  && multiple_p (bitnum, BITS_PER_UNIT, bytenum)
586 	  && known_eq (bitsize, GET_MODE_BITSIZE (mode))
587 	  && (!targetm.slow_unaligned_access (mode, MEM_ALIGN (op0))
588 	      || (multiple_p (bitnum, GET_MODE_ALIGNMENT (mode))
589 		  && MEM_ALIGN (op0) >= GET_MODE_ALIGNMENT (mode))));
590 }
591 
592 /* Try to use instruction INSV to store VALUE into a field of OP0.
593    If OP0_MODE is defined, it is the mode of OP0, otherwise OP0 is a
594    BLKmode MEM.  VALUE_MODE is the mode of VALUE.  BITSIZE and BITNUM
595    are as for store_bit_field.  */
596 
597 static bool
store_bit_field_using_insv(const extraction_insn * insv,rtx op0,opt_scalar_int_mode op0_mode,unsigned HOST_WIDE_INT bitsize,unsigned HOST_WIDE_INT bitnum,rtx value,scalar_int_mode value_mode)598 store_bit_field_using_insv (const extraction_insn *insv, rtx op0,
599 			    opt_scalar_int_mode op0_mode,
600 			    unsigned HOST_WIDE_INT bitsize,
601 			    unsigned HOST_WIDE_INT bitnum,
602 			    rtx value, scalar_int_mode value_mode)
603 {
604   class expand_operand ops[4];
605   rtx value1;
606   rtx xop0 = op0;
607   rtx_insn *last = get_last_insn ();
608   bool copy_back = false;
609 
610   scalar_int_mode op_mode = insv->field_mode;
611   unsigned int unit = GET_MODE_BITSIZE (op_mode);
612   if (bitsize == 0 || bitsize > unit)
613     return false;
614 
615   if (MEM_P (xop0))
616     /* Get a reference to the first byte of the field.  */
617     xop0 = narrow_bit_field_mem (xop0, insv->struct_mode, bitsize, bitnum,
618 				 &bitnum);
619   else
620     {
621       /* Convert from counting within OP0 to counting in OP_MODE.  */
622       if (BYTES_BIG_ENDIAN)
623 	bitnum += unit - GET_MODE_BITSIZE (op0_mode.require ());
624 
625       /* If xop0 is a register, we need it in OP_MODE
626 	 to make it acceptable to the format of insv.  */
627       if (GET_CODE (xop0) == SUBREG)
628 	{
629 	  /* If such a SUBREG can't be created, give up.  */
630 	  if (!validate_subreg (op_mode, GET_MODE (SUBREG_REG (xop0)),
631 				SUBREG_REG (xop0), SUBREG_BYTE (xop0)))
632 	    return false;
633 	  /* We can't just change the mode, because this might clobber op0,
634 	     and we will need the original value of op0 if insv fails.  */
635 	  xop0 = gen_rtx_SUBREG (op_mode, SUBREG_REG (xop0),
636 				 SUBREG_BYTE (xop0));
637 	}
638       if (REG_P (xop0) && GET_MODE (xop0) != op_mode)
639 	xop0 = gen_lowpart_SUBREG (op_mode, xop0);
640     }
641 
642   /* If the destination is a paradoxical subreg such that we need a
643      truncate to the inner mode, perform the insertion on a temporary and
644      truncate the result to the original destination.  Note that we can't
645      just truncate the paradoxical subreg as (truncate:N (subreg:W (reg:N
646      X) 0)) is (reg:N X).  */
647   if (GET_CODE (xop0) == SUBREG
648       && REG_P (SUBREG_REG (xop0))
649       && !TRULY_NOOP_TRUNCATION_MODES_P (GET_MODE (SUBREG_REG (xop0)),
650 					 op_mode))
651     {
652       rtx tem = gen_reg_rtx (op_mode);
653       emit_move_insn (tem, xop0);
654       xop0 = tem;
655       copy_back = true;
656     }
657 
658   /* There are similar overflow check at the start of store_bit_field_1,
659      but that only check the situation where the field lies completely
660      outside the register, while there do have situation where the field
661      lies partialy in the register, we need to adjust bitsize for this
662      partial overflow situation.  Without this fix, pr48335-2.c on big-endian
663      will broken on those arch support bit insert instruction, like arm, aarch64
664      etc.  */
665   if (bitsize + bitnum > unit && bitnum < unit)
666     {
667       warning (OPT_Wextra, "write of %wu-bit data outside the bound of "
668 	       "destination object, data truncated into %wu-bit",
669 	       bitsize, unit - bitnum);
670       bitsize = unit - bitnum;
671     }
672 
673   /* If BITS_BIG_ENDIAN is zero on a BYTES_BIG_ENDIAN machine, we count
674      "backwards" from the size of the unit we are inserting into.
675      Otherwise, we count bits from the most significant on a
676      BYTES/BITS_BIG_ENDIAN machine.  */
677 
678   if (BITS_BIG_ENDIAN != BYTES_BIG_ENDIAN)
679     bitnum = unit - bitsize - bitnum;
680 
681   /* Convert VALUE to op_mode (which insv insn wants) in VALUE1.  */
682   value1 = value;
683   if (value_mode != op_mode)
684     {
685       if (GET_MODE_BITSIZE (value_mode) >= bitsize)
686 	{
687 	  rtx tmp;
688 	  /* Optimization: Don't bother really extending VALUE
689 	     if it has all the bits we will actually use.  However,
690 	     if we must narrow it, be sure we do it correctly.  */
691 
692 	  if (GET_MODE_SIZE (value_mode) < GET_MODE_SIZE (op_mode))
693 	    {
694 	      tmp = simplify_subreg (op_mode, value1, value_mode, 0);
695 	      if (! tmp)
696 		tmp = simplify_gen_subreg (op_mode,
697 					   force_reg (value_mode, value1),
698 					   value_mode, 0);
699 	    }
700 	  else
701 	    {
702 	      tmp = gen_lowpart_if_possible (op_mode, value1);
703 	      if (! tmp)
704 		tmp = gen_lowpart (op_mode, force_reg (value_mode, value1));
705 	    }
706 	  value1 = tmp;
707 	}
708       else if (CONST_INT_P (value))
709 	value1 = gen_int_mode (INTVAL (value), op_mode);
710       else
711 	/* Parse phase is supposed to make VALUE's data type
712 	   match that of the component reference, which is a type
713 	   at least as wide as the field; so VALUE should have
714 	   a mode that corresponds to that type.  */
715 	gcc_assert (CONSTANT_P (value));
716     }
717 
718   create_fixed_operand (&ops[0], xop0);
719   create_integer_operand (&ops[1], bitsize);
720   create_integer_operand (&ops[2], bitnum);
721   create_input_operand (&ops[3], value1, op_mode);
722   if (maybe_expand_insn (insv->icode, 4, ops))
723     {
724       if (copy_back)
725 	convert_move (op0, xop0, true);
726       return true;
727     }
728   delete_insns_since (last);
729   return false;
730 }
731 
732 /* A subroutine of store_bit_field, with the same arguments.  Return true
733    if the operation could be implemented.
734 
735    If FALLBACK_P is true, fall back to store_fixed_bit_field if we have
736    no other way of implementing the operation.  If FALLBACK_P is false,
737    return false instead.  */
738 
739 static bool
store_bit_field_1(rtx str_rtx,poly_uint64 bitsize,poly_uint64 bitnum,poly_uint64 bitregion_start,poly_uint64 bitregion_end,machine_mode fieldmode,rtx value,bool reverse,bool fallback_p)740 store_bit_field_1 (rtx str_rtx, poly_uint64 bitsize, poly_uint64 bitnum,
741 		   poly_uint64 bitregion_start, poly_uint64 bitregion_end,
742 		   machine_mode fieldmode,
743 		   rtx value, bool reverse, bool fallback_p)
744 {
745   rtx op0 = str_rtx;
746 
747   while (GET_CODE (op0) == SUBREG)
748     {
749       bitnum += subreg_memory_offset (op0) * BITS_PER_UNIT;
750       op0 = SUBREG_REG (op0);
751     }
752 
753   /* No action is needed if the target is a register and if the field
754      lies completely outside that register.  This can occur if the source
755      code contains an out-of-bounds access to a small array.  */
756   if (REG_P (op0) && known_ge (bitnum, GET_MODE_BITSIZE (GET_MODE (op0))))
757     return true;
758 
759   /* Use vec_set patterns for inserting parts of vectors whenever
760      available.  */
761   machine_mode outermode = GET_MODE (op0);
762   scalar_mode innermode = GET_MODE_INNER (outermode);
763   poly_uint64 pos;
764   if (VECTOR_MODE_P (outermode)
765       && !MEM_P (op0)
766       && optab_handler (vec_set_optab, outermode) != CODE_FOR_nothing
767       && fieldmode == innermode
768       && known_eq (bitsize, GET_MODE_BITSIZE (innermode))
769       && multiple_p (bitnum, GET_MODE_BITSIZE (innermode), &pos))
770     {
771       class expand_operand ops[3];
772       enum insn_code icode = optab_handler (vec_set_optab, outermode);
773 
774       create_fixed_operand (&ops[0], op0);
775       create_input_operand (&ops[1], value, innermode);
776       create_integer_operand (&ops[2], pos);
777       if (maybe_expand_insn (icode, 3, ops))
778 	return true;
779     }
780 
781   /* If the target is a register, overwriting the entire object, or storing
782      a full-word or multi-word field can be done with just a SUBREG.  */
783   if (!MEM_P (op0)
784       && known_eq (bitsize, GET_MODE_BITSIZE (fieldmode)))
785     {
786       /* Use the subreg machinery either to narrow OP0 to the required
787 	 words or to cope with mode punning between equal-sized modes.
788 	 In the latter case, use subreg on the rhs side, not lhs.  */
789       rtx sub;
790       HOST_WIDE_INT regnum;
791       poly_uint64 regsize = REGMODE_NATURAL_SIZE (GET_MODE (op0));
792       if (known_eq (bitnum, 0U)
793 	  && known_eq (bitsize, GET_MODE_BITSIZE (GET_MODE (op0))))
794 	{
795 	  sub = simplify_gen_subreg (GET_MODE (op0), value, fieldmode, 0);
796 	  if (sub)
797 	    {
798 	      if (reverse)
799 		sub = flip_storage_order (GET_MODE (op0), sub);
800 	      emit_move_insn (op0, sub);
801 	      return true;
802 	    }
803 	}
804       else if (constant_multiple_p (bitnum, regsize * BITS_PER_UNIT, &regnum)
805 	       && multiple_p (bitsize, regsize * BITS_PER_UNIT))
806 	{
807 	  sub = simplify_gen_subreg (fieldmode, op0, GET_MODE (op0),
808 				     regnum * regsize);
809 	  if (sub)
810 	    {
811 	      if (reverse)
812 		value = flip_storage_order (fieldmode, value);
813 	      emit_move_insn (sub, value);
814 	      return true;
815 	    }
816 	}
817     }
818 
819   /* If the target is memory, storing any naturally aligned field can be
820      done with a simple store.  For targets that support fast unaligned
821      memory, any naturally sized, unit aligned field can be done directly.  */
822   poly_uint64 bytenum;
823   if (simple_mem_bitfield_p (op0, bitsize, bitnum, fieldmode, &bytenum))
824     {
825       op0 = adjust_bitfield_address (op0, fieldmode, bytenum);
826       if (reverse)
827 	value = flip_storage_order (fieldmode, value);
828       emit_move_insn (op0, value);
829       return true;
830     }
831 
832   /* It's possible we'll need to handle other cases here for
833      polynomial bitnum and bitsize.  */
834 
835   /* From here on we need to be looking at a fixed-size insertion.  */
836   unsigned HOST_WIDE_INT ibitsize = bitsize.to_constant ();
837   unsigned HOST_WIDE_INT ibitnum = bitnum.to_constant ();
838 
839   /* Make sure we are playing with integral modes.  Pun with subregs
840      if we aren't.  This must come after the entire register case above,
841      since that case is valid for any mode.  The following cases are only
842      valid for integral modes.  */
843   opt_scalar_int_mode op0_mode = int_mode_for_mode (GET_MODE (op0));
844   scalar_int_mode imode;
845   if (!op0_mode.exists (&imode) || imode != GET_MODE (op0))
846     {
847       if (MEM_P (op0))
848 	op0 = adjust_bitfield_address_size (op0, op0_mode.else_blk (),
849 					    0, MEM_SIZE (op0));
850       else if (!op0_mode.exists ())
851 	{
852 	  if (ibitnum == 0
853 	      && known_eq (ibitsize, GET_MODE_BITSIZE (GET_MODE (op0)))
854 	      && MEM_P (value)
855 	      && !reverse)
856 	    {
857 	      value = adjust_address (value, GET_MODE (op0), 0);
858 	      emit_move_insn (op0, value);
859 	      return true;
860 	    }
861 	  if (!fallback_p)
862 	    return false;
863 	  rtx temp = assign_stack_temp (GET_MODE (op0),
864 					GET_MODE_SIZE (GET_MODE (op0)));
865 	  emit_move_insn (temp, op0);
866 	  store_bit_field_1 (temp, bitsize, bitnum, 0, 0, fieldmode, value,
867 			     reverse, fallback_p);
868 	  emit_move_insn (op0, temp);
869 	  return true;
870 	}
871       else
872 	op0 = gen_lowpart (op0_mode.require (), op0);
873     }
874 
875   return store_integral_bit_field (op0, op0_mode, ibitsize, ibitnum,
876 				   bitregion_start, bitregion_end,
877 				   fieldmode, value, reverse, fallback_p);
878 }
879 
880 /* Subroutine of store_bit_field_1, with the same arguments, except
881    that BITSIZE and BITNUM are constant.  Handle cases specific to
882    integral modes.  If OP0_MODE is defined, it is the mode of OP0,
883    otherwise OP0 is a BLKmode MEM.  */
884 
885 static bool
store_integral_bit_field(rtx op0,opt_scalar_int_mode op0_mode,unsigned HOST_WIDE_INT bitsize,unsigned HOST_WIDE_INT bitnum,poly_uint64 bitregion_start,poly_uint64 bitregion_end,machine_mode fieldmode,rtx value,bool reverse,bool fallback_p)886 store_integral_bit_field (rtx op0, opt_scalar_int_mode op0_mode,
887 			  unsigned HOST_WIDE_INT bitsize,
888 			  unsigned HOST_WIDE_INT bitnum,
889 			  poly_uint64 bitregion_start,
890 			  poly_uint64 bitregion_end,
891 			  machine_mode fieldmode,
892 			  rtx value, bool reverse, bool fallback_p)
893 {
894   /* Storing an lsb-aligned field in a register
895      can be done with a movstrict instruction.  */
896 
897   if (!MEM_P (op0)
898       && !reverse
899       && lowpart_bit_field_p (bitnum, bitsize, op0_mode.require ())
900       && known_eq (bitsize, GET_MODE_BITSIZE (fieldmode))
901       && optab_handler (movstrict_optab, fieldmode) != CODE_FOR_nothing)
902     {
903       class expand_operand ops[2];
904       enum insn_code icode = optab_handler (movstrict_optab, fieldmode);
905       rtx arg0 = op0;
906       unsigned HOST_WIDE_INT subreg_off;
907 
908       if (GET_CODE (arg0) == SUBREG)
909 	{
910 	  /* Else we've got some float mode source being extracted into
911 	     a different float mode destination -- this combination of
912 	     subregs results in Severe Tire Damage.  */
913 	  gcc_assert (GET_MODE (SUBREG_REG (arg0)) == fieldmode
914 		      || GET_MODE_CLASS (fieldmode) == MODE_INT
915 		      || GET_MODE_CLASS (fieldmode) == MODE_PARTIAL_INT);
916 	  arg0 = SUBREG_REG (arg0);
917 	}
918 
919       subreg_off = bitnum / BITS_PER_UNIT;
920       if (validate_subreg (fieldmode, GET_MODE (arg0), arg0, subreg_off)
921 	  /* STRICT_LOW_PART must have a non-paradoxical subreg as
922 	     operand.  */
923 	  && !paradoxical_subreg_p (fieldmode, GET_MODE (arg0)))
924 	{
925 	  arg0 = gen_rtx_SUBREG (fieldmode, arg0, subreg_off);
926 
927 	  create_fixed_operand (&ops[0], arg0);
928 	  /* Shrink the source operand to FIELDMODE.  */
929 	  create_convert_operand_to (&ops[1], value, fieldmode, false);
930 	  if (maybe_expand_insn (icode, 2, ops))
931 	    return true;
932 	}
933     }
934 
935   /* Handle fields bigger than a word.  */
936 
937   if (bitsize > BITS_PER_WORD)
938     {
939       /* Here we transfer the words of the field
940 	 in the order least significant first.
941 	 This is because the most significant word is the one which may
942 	 be less than full.
943 	 However, only do that if the value is not BLKmode.  */
944 
945       const bool backwards = WORDS_BIG_ENDIAN && fieldmode != BLKmode;
946       const int nwords = (bitsize + (BITS_PER_WORD - 1)) / BITS_PER_WORD;
947       rtx_insn *last;
948 
949       /* This is the mode we must force value to, so that there will be enough
950 	 subwords to extract.  Note that fieldmode will often (always?) be
951 	 VOIDmode, because that is what store_field uses to indicate that this
952 	 is a bit field, but passing VOIDmode to operand_subword_force
953 	 is not allowed.
954 
955 	 The mode must be fixed-size, since insertions into variable-sized
956 	 objects are meant to be handled before calling this function.  */
957       fixed_size_mode value_mode = as_a <fixed_size_mode> (GET_MODE (value));
958       if (value_mode == VOIDmode)
959 	value_mode = smallest_int_mode_for_size (nwords * BITS_PER_WORD);
960 
961       last = get_last_insn ();
962       for (int i = 0; i < nwords; i++)
963 	{
964 	  /* Number of bits to be stored in this iteration, i.e. BITS_PER_WORD
965 	     except maybe for the last iteration.  */
966 	  const unsigned HOST_WIDE_INT new_bitsize
967 	    = MIN (BITS_PER_WORD, bitsize - i * BITS_PER_WORD);
968 	  /* Bit offset from the starting bit number in the target.  */
969 	  const unsigned int bit_offset
970 	    = backwards ^ reverse
971 	      ? MAX ((int) bitsize - (i + 1) * BITS_PER_WORD, 0)
972 	      : i * BITS_PER_WORD;
973 	  /* Starting word number in the value.  */
974 	  const unsigned int wordnum
975 	    = backwards
976 	      ? GET_MODE_SIZE (value_mode) / UNITS_PER_WORD - (i + 1)
977 	      : i;
978 	  /* The chunk of the value in word_mode.  We use bit-field extraction
979 	      in BLKmode to handle unaligned memory references and to shift the
980 	      last chunk right on big-endian machines if need be.  */
981 	  rtx value_word
982 	    = fieldmode == BLKmode
983 	      ? extract_bit_field (value, new_bitsize, wordnum * BITS_PER_WORD,
984 				   1, NULL_RTX, word_mode, word_mode, false,
985 				   NULL)
986 	      : operand_subword_force (value, wordnum, value_mode);
987 
988 	  if (!store_bit_field_1 (op0, new_bitsize,
989 				  bitnum + bit_offset,
990 				  bitregion_start, bitregion_end,
991 				  word_mode,
992 				  value_word, reverse, fallback_p))
993 	    {
994 	      delete_insns_since (last);
995 	      return false;
996 	    }
997 	}
998       return true;
999     }
1000 
1001   /* If VALUE has a floating-point or complex mode, access it as an
1002      integer of the corresponding size.  This can occur on a machine
1003      with 64 bit registers that uses SFmode for float.  It can also
1004      occur for unaligned float or complex fields.  */
1005   rtx orig_value = value;
1006   scalar_int_mode value_mode;
1007   if (GET_MODE (value) == VOIDmode)
1008     /* By this point we've dealt with values that are bigger than a word,
1009        so word_mode is a conservatively correct choice.  */
1010     value_mode = word_mode;
1011   else if (!is_a <scalar_int_mode> (GET_MODE (value), &value_mode))
1012     {
1013       value_mode = int_mode_for_mode (GET_MODE (value)).require ();
1014       value = gen_reg_rtx (value_mode);
1015       emit_move_insn (gen_lowpart (GET_MODE (orig_value), value), orig_value);
1016     }
1017 
1018   /* If OP0 is a multi-word register, narrow it to the affected word.
1019      If the region spans two words, defer to store_split_bit_field.
1020      Don't do this if op0 is a single hard register wider than word
1021      such as a float or vector register.  */
1022   if (!MEM_P (op0)
1023       && GET_MODE_SIZE (op0_mode.require ()) > UNITS_PER_WORD
1024       && (!REG_P (op0)
1025 	  || !HARD_REGISTER_P (op0)
1026 	  || hard_regno_nregs (REGNO (op0), op0_mode.require ()) != 1))
1027     {
1028       if (bitnum % BITS_PER_WORD + bitsize > BITS_PER_WORD)
1029 	{
1030 	  if (!fallback_p)
1031 	    return false;
1032 
1033 	  store_split_bit_field (op0, op0_mode, bitsize, bitnum,
1034 				 bitregion_start, bitregion_end,
1035 				 value, value_mode, reverse);
1036 	  return true;
1037 	}
1038       op0 = simplify_gen_subreg (word_mode, op0, op0_mode.require (),
1039 				 bitnum / BITS_PER_WORD * UNITS_PER_WORD);
1040       gcc_assert (op0);
1041       op0_mode = word_mode;
1042       bitnum %= BITS_PER_WORD;
1043     }
1044 
1045   /* From here on we can assume that the field to be stored in fits
1046      within a word.  If the destination is a register, it too fits
1047      in a word.  */
1048 
1049   extraction_insn insv;
1050   if (!MEM_P (op0)
1051       && !reverse
1052       && get_best_reg_extraction_insn (&insv, EP_insv,
1053 				       GET_MODE_BITSIZE (op0_mode.require ()),
1054 				       fieldmode)
1055       && store_bit_field_using_insv (&insv, op0, op0_mode,
1056 				     bitsize, bitnum, value, value_mode))
1057     return true;
1058 
1059   /* If OP0 is a memory, try copying it to a register and seeing if a
1060      cheap register alternative is available.  */
1061   if (MEM_P (op0) && !reverse)
1062     {
1063       if (get_best_mem_extraction_insn (&insv, EP_insv, bitsize, bitnum,
1064 					fieldmode)
1065 	  && store_bit_field_using_insv (&insv, op0, op0_mode,
1066 					 bitsize, bitnum, value, value_mode))
1067 	return true;
1068 
1069       rtx_insn *last = get_last_insn ();
1070 
1071       /* Try loading part of OP0 into a register, inserting the bitfield
1072 	 into that, and then copying the result back to OP0.  */
1073       unsigned HOST_WIDE_INT bitpos;
1074       rtx xop0 = adjust_bit_field_mem_for_reg (EP_insv, op0, bitsize, bitnum,
1075 					       bitregion_start, bitregion_end,
1076 					       fieldmode, &bitpos);
1077       if (xop0)
1078 	{
1079 	  rtx tempreg = copy_to_reg (xop0);
1080 	  if (store_bit_field_1 (tempreg, bitsize, bitpos,
1081 				 bitregion_start, bitregion_end,
1082 				 fieldmode, orig_value, reverse, false))
1083 	    {
1084 	      emit_move_insn (xop0, tempreg);
1085 	      return true;
1086 	    }
1087 	  delete_insns_since (last);
1088 	}
1089     }
1090 
1091   if (!fallback_p)
1092     return false;
1093 
1094   store_fixed_bit_field (op0, op0_mode, bitsize, bitnum, bitregion_start,
1095 			 bitregion_end, value, value_mode, reverse);
1096   return true;
1097 }
1098 
1099 /* Generate code to store value from rtx VALUE
1100    into a bit-field within structure STR_RTX
1101    containing BITSIZE bits starting at bit BITNUM.
1102 
1103    BITREGION_START is bitpos of the first bitfield in this region.
1104    BITREGION_END is the bitpos of the ending bitfield in this region.
1105    These two fields are 0, if the C++ memory model does not apply,
1106    or we are not interested in keeping track of bitfield regions.
1107 
1108    FIELDMODE is the machine-mode of the FIELD_DECL node for this field.
1109 
1110    If REVERSE is true, the store is to be done in reverse order.  */
1111 
1112 void
store_bit_field(rtx str_rtx,poly_uint64 bitsize,poly_uint64 bitnum,poly_uint64 bitregion_start,poly_uint64 bitregion_end,machine_mode fieldmode,rtx value,bool reverse)1113 store_bit_field (rtx str_rtx, poly_uint64 bitsize, poly_uint64 bitnum,
1114 		 poly_uint64 bitregion_start, poly_uint64 bitregion_end,
1115 		 machine_mode fieldmode,
1116 		 rtx value, bool reverse)
1117 {
1118   /* Handle -fstrict-volatile-bitfields in the cases where it applies.  */
1119   unsigned HOST_WIDE_INT ibitsize = 0, ibitnum = 0;
1120   scalar_int_mode int_mode;
1121   if (bitsize.is_constant (&ibitsize)
1122       && bitnum.is_constant (&ibitnum)
1123       && is_a <scalar_int_mode> (fieldmode, &int_mode)
1124       && strict_volatile_bitfield_p (str_rtx, ibitsize, ibitnum, int_mode,
1125 				     bitregion_start, bitregion_end))
1126     {
1127       /* Storing of a full word can be done with a simple store.
1128 	 We know here that the field can be accessed with one single
1129 	 instruction.  For targets that support unaligned memory,
1130 	 an unaligned access may be necessary.  */
1131       if (ibitsize == GET_MODE_BITSIZE (int_mode))
1132 	{
1133 	  str_rtx = adjust_bitfield_address (str_rtx, int_mode,
1134 					     ibitnum / BITS_PER_UNIT);
1135 	  if (reverse)
1136 	    value = flip_storage_order (int_mode, value);
1137 	  gcc_assert (ibitnum % BITS_PER_UNIT == 0);
1138 	  emit_move_insn (str_rtx, value);
1139 	}
1140       else
1141 	{
1142 	  rtx temp;
1143 
1144 	  str_rtx = narrow_bit_field_mem (str_rtx, int_mode, ibitsize,
1145 					  ibitnum, &ibitnum);
1146 	  gcc_assert (ibitnum + ibitsize <= GET_MODE_BITSIZE (int_mode));
1147 	  temp = copy_to_reg (str_rtx);
1148 	  if (!store_bit_field_1 (temp, ibitsize, ibitnum, 0, 0,
1149 				  int_mode, value, reverse, true))
1150 	    gcc_unreachable ();
1151 
1152 	  emit_move_insn (str_rtx, temp);
1153 	}
1154 
1155       return;
1156     }
1157 
1158   /* Under the C++0x memory model, we must not touch bits outside the
1159      bit region.  Adjust the address to start at the beginning of the
1160      bit region.  */
1161   if (MEM_P (str_rtx) && maybe_ne (bitregion_start, 0U))
1162     {
1163       scalar_int_mode best_mode;
1164       machine_mode addr_mode = VOIDmode;
1165 
1166       poly_uint64 offset = exact_div (bitregion_start, BITS_PER_UNIT);
1167       bitnum -= bitregion_start;
1168       poly_int64 size = bits_to_bytes_round_up (bitnum + bitsize);
1169       bitregion_end -= bitregion_start;
1170       bitregion_start = 0;
1171       if (bitsize.is_constant (&ibitsize)
1172 	  && bitnum.is_constant (&ibitnum)
1173 	  && get_best_mode (ibitsize, ibitnum,
1174 			    bitregion_start, bitregion_end,
1175 			    MEM_ALIGN (str_rtx), INT_MAX,
1176 			    MEM_VOLATILE_P (str_rtx), &best_mode))
1177 	addr_mode = best_mode;
1178       str_rtx = adjust_bitfield_address_size (str_rtx, addr_mode,
1179 					      offset, size);
1180     }
1181 
1182   if (!store_bit_field_1 (str_rtx, bitsize, bitnum,
1183 			  bitregion_start, bitregion_end,
1184 			  fieldmode, value, reverse, true))
1185     gcc_unreachable ();
1186 }
1187 
1188 /* Use shifts and boolean operations to store VALUE into a bit field of
1189    width BITSIZE in OP0, starting at bit BITNUM.  If OP0_MODE is defined,
1190    it is the mode of OP0, otherwise OP0 is a BLKmode MEM.  VALUE_MODE is
1191    the mode of VALUE.
1192 
1193    If REVERSE is true, the store is to be done in reverse order.  */
1194 
1195 static void
store_fixed_bit_field(rtx op0,opt_scalar_int_mode op0_mode,unsigned HOST_WIDE_INT bitsize,unsigned HOST_WIDE_INT bitnum,poly_uint64 bitregion_start,poly_uint64 bitregion_end,rtx value,scalar_int_mode value_mode,bool reverse)1196 store_fixed_bit_field (rtx op0, opt_scalar_int_mode op0_mode,
1197 		       unsigned HOST_WIDE_INT bitsize,
1198 		       unsigned HOST_WIDE_INT bitnum,
1199 		       poly_uint64 bitregion_start, poly_uint64 bitregion_end,
1200 		       rtx value, scalar_int_mode value_mode, bool reverse)
1201 {
1202   /* There is a case not handled here:
1203      a structure with a known alignment of just a halfword
1204      and a field split across two aligned halfwords within the structure.
1205      Or likewise a structure with a known alignment of just a byte
1206      and a field split across two bytes.
1207      Such cases are not supposed to be able to occur.  */
1208 
1209   scalar_int_mode best_mode;
1210   if (MEM_P (op0))
1211     {
1212       unsigned int max_bitsize = BITS_PER_WORD;
1213       scalar_int_mode imode;
1214       if (op0_mode.exists (&imode) && GET_MODE_BITSIZE (imode) < max_bitsize)
1215 	max_bitsize = GET_MODE_BITSIZE (imode);
1216 
1217       if (!get_best_mode (bitsize, bitnum, bitregion_start, bitregion_end,
1218 			  MEM_ALIGN (op0), max_bitsize, MEM_VOLATILE_P (op0),
1219 			  &best_mode))
1220 	{
1221 	  /* The only way this should occur is if the field spans word
1222 	     boundaries.  */
1223 	  store_split_bit_field (op0, op0_mode, bitsize, bitnum,
1224 				 bitregion_start, bitregion_end,
1225 				 value, value_mode, reverse);
1226 	  return;
1227 	}
1228 
1229       op0 = narrow_bit_field_mem (op0, best_mode, bitsize, bitnum, &bitnum);
1230     }
1231   else
1232     best_mode = op0_mode.require ();
1233 
1234   store_fixed_bit_field_1 (op0, best_mode, bitsize, bitnum,
1235 			   value, value_mode, reverse);
1236 }
1237 
1238 /* Helper function for store_fixed_bit_field, stores
1239    the bit field always using MODE, which is the mode of OP0.  The other
1240    arguments are as for store_fixed_bit_field.  */
1241 
1242 static void
store_fixed_bit_field_1(rtx op0,scalar_int_mode mode,unsigned HOST_WIDE_INT bitsize,unsigned HOST_WIDE_INT bitnum,rtx value,scalar_int_mode value_mode,bool reverse)1243 store_fixed_bit_field_1 (rtx op0, scalar_int_mode mode,
1244 			 unsigned HOST_WIDE_INT bitsize,
1245 			 unsigned HOST_WIDE_INT bitnum,
1246 			 rtx value, scalar_int_mode value_mode, bool reverse)
1247 {
1248   rtx temp;
1249   int all_zero = 0;
1250   int all_one = 0;
1251 
1252   /* Note that bitsize + bitnum can be greater than GET_MODE_BITSIZE (mode)
1253      for invalid input, such as f5 from gcc.dg/pr48335-2.c.  */
1254 
1255   if (reverse ? !BYTES_BIG_ENDIAN : BYTES_BIG_ENDIAN)
1256     /* BITNUM is the distance between our msb
1257        and that of the containing datum.
1258        Convert it to the distance from the lsb.  */
1259     bitnum = GET_MODE_BITSIZE (mode) - bitsize - bitnum;
1260 
1261   /* Now BITNUM is always the distance between our lsb
1262      and that of OP0.  */
1263 
1264   /* Shift VALUE left by BITNUM bits.  If VALUE is not constant,
1265      we must first convert its mode to MODE.  */
1266 
1267   if (CONST_INT_P (value))
1268     {
1269       unsigned HOST_WIDE_INT v = UINTVAL (value);
1270 
1271       if (bitsize < HOST_BITS_PER_WIDE_INT)
1272 	v &= (HOST_WIDE_INT_1U << bitsize) - 1;
1273 
1274       if (v == 0)
1275 	all_zero = 1;
1276       else if ((bitsize < HOST_BITS_PER_WIDE_INT
1277 		&& v == (HOST_WIDE_INT_1U << bitsize) - 1)
1278 	       || (bitsize == HOST_BITS_PER_WIDE_INT
1279 		   && v == HOST_WIDE_INT_M1U))
1280 	all_one = 1;
1281 
1282       value = lshift_value (mode, v, bitnum);
1283     }
1284   else
1285     {
1286       int must_and = (GET_MODE_BITSIZE (value_mode) != bitsize
1287 		      && bitnum + bitsize != GET_MODE_BITSIZE (mode));
1288 
1289       if (value_mode != mode)
1290 	value = convert_to_mode (mode, value, 1);
1291 
1292       if (must_and)
1293 	value = expand_binop (mode, and_optab, value,
1294 			      mask_rtx (mode, 0, bitsize, 0),
1295 			      NULL_RTX, 1, OPTAB_LIB_WIDEN);
1296       if (bitnum > 0)
1297 	value = expand_shift (LSHIFT_EXPR, mode, value,
1298 			      bitnum, NULL_RTX, 1);
1299     }
1300 
1301   if (reverse)
1302     value = flip_storage_order (mode, value);
1303 
1304   /* Now clear the chosen bits in OP0,
1305      except that if VALUE is -1 we need not bother.  */
1306   /* We keep the intermediates in registers to allow CSE to combine
1307      consecutive bitfield assignments.  */
1308 
1309   temp = force_reg (mode, op0);
1310 
1311   if (! all_one)
1312     {
1313       rtx mask = mask_rtx (mode, bitnum, bitsize, 1);
1314       if (reverse)
1315 	mask = flip_storage_order (mode, mask);
1316       temp = expand_binop (mode, and_optab, temp, mask,
1317 			   NULL_RTX, 1, OPTAB_LIB_WIDEN);
1318       temp = force_reg (mode, temp);
1319     }
1320 
1321   /* Now logical-or VALUE into OP0, unless it is zero.  */
1322 
1323   if (! all_zero)
1324     {
1325       temp = expand_binop (mode, ior_optab, temp, value,
1326 			   NULL_RTX, 1, OPTAB_LIB_WIDEN);
1327       temp = force_reg (mode, temp);
1328     }
1329 
1330   if (op0 != temp)
1331     {
1332       op0 = copy_rtx (op0);
1333       emit_move_insn (op0, temp);
1334     }
1335 }
1336 
1337 /* Store a bit field that is split across multiple accessible memory objects.
1338 
1339    OP0 is the REG, SUBREG or MEM rtx for the first of the objects.
1340    BITSIZE is the field width; BITPOS the position of its first bit
1341    (within the word).
1342    VALUE is the value to store, which has mode VALUE_MODE.
1343    If OP0_MODE is defined, it is the mode of OP0, otherwise OP0 is
1344    a BLKmode MEM.
1345 
1346    If REVERSE is true, the store is to be done in reverse order.
1347 
1348    This does not yet handle fields wider than BITS_PER_WORD.  */
1349 
1350 static void
store_split_bit_field(rtx op0,opt_scalar_int_mode op0_mode,unsigned HOST_WIDE_INT bitsize,unsigned HOST_WIDE_INT bitpos,poly_uint64 bitregion_start,poly_uint64 bitregion_end,rtx value,scalar_int_mode value_mode,bool reverse)1351 store_split_bit_field (rtx op0, opt_scalar_int_mode op0_mode,
1352 		       unsigned HOST_WIDE_INT bitsize,
1353 		       unsigned HOST_WIDE_INT bitpos,
1354 		       poly_uint64 bitregion_start, poly_uint64 bitregion_end,
1355 		       rtx value, scalar_int_mode value_mode, bool reverse)
1356 {
1357   unsigned int unit, total_bits, bitsdone = 0;
1358 
1359   /* Make sure UNIT isn't larger than BITS_PER_WORD, we can only handle that
1360      much at a time.  */
1361   if (REG_P (op0) || GET_CODE (op0) == SUBREG)
1362     unit = BITS_PER_WORD;
1363   else
1364     unit = MIN (MEM_ALIGN (op0), BITS_PER_WORD);
1365 
1366   /* If OP0 is a memory with a mode, then UNIT must not be larger than
1367      OP0's mode as well.  Otherwise, store_fixed_bit_field will call us
1368      again, and we will mutually recurse forever.  */
1369   if (MEM_P (op0) && op0_mode.exists ())
1370     unit = MIN (unit, GET_MODE_BITSIZE (op0_mode.require ()));
1371 
1372   /* If VALUE is a constant other than a CONST_INT, get it into a register in
1373      WORD_MODE.  If we can do this using gen_lowpart_common, do so.  Note
1374      that VALUE might be a floating-point constant.  */
1375   if (CONSTANT_P (value) && !CONST_INT_P (value))
1376     {
1377       rtx word = gen_lowpart_common (word_mode, value);
1378 
1379       if (word && (value != word))
1380 	value = word;
1381       else
1382 	value = gen_lowpart_common (word_mode, force_reg (value_mode, value));
1383       value_mode = word_mode;
1384     }
1385 
1386   total_bits = GET_MODE_BITSIZE (value_mode);
1387 
1388   while (bitsdone < bitsize)
1389     {
1390       unsigned HOST_WIDE_INT thissize;
1391       unsigned HOST_WIDE_INT thispos;
1392       unsigned HOST_WIDE_INT offset;
1393       rtx part;
1394 
1395       offset = (bitpos + bitsdone) / unit;
1396       thispos = (bitpos + bitsdone) % unit;
1397 
1398       /* When region of bytes we can touch is restricted, decrease
1399 	 UNIT close to the end of the region as needed.  If op0 is a REG
1400 	 or SUBREG of REG, don't do this, as there can't be data races
1401 	 on a register and we can expand shorter code in some cases.  */
1402       if (maybe_ne (bitregion_end, 0U)
1403 	  && unit > BITS_PER_UNIT
1404 	  && maybe_gt (bitpos + bitsdone - thispos + unit, bitregion_end + 1)
1405 	  && !REG_P (op0)
1406 	  && (GET_CODE (op0) != SUBREG || !REG_P (SUBREG_REG (op0))))
1407 	{
1408 	  unit = unit / 2;
1409 	  continue;
1410 	}
1411 
1412       /* THISSIZE must not overrun a word boundary.  Otherwise,
1413 	 store_fixed_bit_field will call us again, and we will mutually
1414 	 recurse forever.  */
1415       thissize = MIN (bitsize - bitsdone, BITS_PER_WORD);
1416       thissize = MIN (thissize, unit - thispos);
1417 
1418       if (reverse ? !BYTES_BIG_ENDIAN : BYTES_BIG_ENDIAN)
1419 	{
1420 	  /* Fetch successively less significant portions.  */
1421 	  if (CONST_INT_P (value))
1422 	    part = GEN_INT (((unsigned HOST_WIDE_INT) (INTVAL (value))
1423 			     >> (bitsize - bitsdone - thissize))
1424 			    & ((HOST_WIDE_INT_1 << thissize) - 1));
1425           /* Likewise, but the source is little-endian.  */
1426           else if (reverse)
1427 	    part = extract_fixed_bit_field (word_mode, value, value_mode,
1428 					    thissize,
1429 					    bitsize - bitsdone - thissize,
1430 					    NULL_RTX, 1, false);
1431 	  else
1432 	    /* The args are chosen so that the last part includes the
1433 	       lsb.  Give extract_bit_field the value it needs (with
1434 	       endianness compensation) to fetch the piece we want.  */
1435 	    part = extract_fixed_bit_field (word_mode, value, value_mode,
1436 					    thissize,
1437 					    total_bits - bitsize + bitsdone,
1438 					    NULL_RTX, 1, false);
1439 	}
1440       else
1441 	{
1442 	  /* Fetch successively more significant portions.  */
1443 	  if (CONST_INT_P (value))
1444 	    part = GEN_INT (((unsigned HOST_WIDE_INT) (INTVAL (value))
1445 			     >> bitsdone)
1446 			    & ((HOST_WIDE_INT_1 << thissize) - 1));
1447 	  /* Likewise, but the source is big-endian.  */
1448           else if (reverse)
1449 	    part = extract_fixed_bit_field (word_mode, value, value_mode,
1450 					    thissize,
1451 					    total_bits - bitsdone - thissize,
1452 					    NULL_RTX, 1, false);
1453 	  else
1454 	    part = extract_fixed_bit_field (word_mode, value, value_mode,
1455 					    thissize, bitsdone, NULL_RTX,
1456 					    1, false);
1457 	}
1458 
1459       /* If OP0 is a register, then handle OFFSET here.  */
1460       rtx op0_piece = op0;
1461       opt_scalar_int_mode op0_piece_mode = op0_mode;
1462       if (SUBREG_P (op0) || REG_P (op0))
1463 	{
1464 	  scalar_int_mode imode;
1465 	  if (op0_mode.exists (&imode)
1466 	      && GET_MODE_SIZE (imode) < UNITS_PER_WORD)
1467 	    {
1468 	      if (offset)
1469 		op0_piece = const0_rtx;
1470 	    }
1471 	  else
1472 	    {
1473 	      op0_piece = operand_subword_force (op0,
1474 						 offset * unit / BITS_PER_WORD,
1475 						 GET_MODE (op0));
1476 	      op0_piece_mode = word_mode;
1477 	    }
1478 	  offset &= BITS_PER_WORD / unit - 1;
1479 	}
1480 
1481       /* OFFSET is in UNITs, and UNIT is in bits.  If WORD is const0_rtx,
1482 	 it is just an out-of-bounds access.  Ignore it.  */
1483       if (op0_piece != const0_rtx)
1484 	store_fixed_bit_field (op0_piece, op0_piece_mode, thissize,
1485 			       offset * unit + thispos, bitregion_start,
1486 			       bitregion_end, part, word_mode, reverse);
1487       bitsdone += thissize;
1488     }
1489 }
1490 
1491 /* A subroutine of extract_bit_field_1 that converts return value X
1492    to either MODE or TMODE.  MODE, TMODE and UNSIGNEDP are arguments
1493    to extract_bit_field.  */
1494 
1495 static rtx
convert_extracted_bit_field(rtx x,machine_mode mode,machine_mode tmode,bool unsignedp)1496 convert_extracted_bit_field (rtx x, machine_mode mode,
1497 			     machine_mode tmode, bool unsignedp)
1498 {
1499   if (GET_MODE (x) == tmode || GET_MODE (x) == mode)
1500     return x;
1501 
1502   /* If the x mode is not a scalar integral, first convert to the
1503      integer mode of that size and then access it as a floating-point
1504      value via a SUBREG.  */
1505   if (!SCALAR_INT_MODE_P (tmode))
1506     {
1507       scalar_int_mode int_mode = int_mode_for_mode (tmode).require ();
1508       x = convert_to_mode (int_mode, x, unsignedp);
1509       x = force_reg (int_mode, x);
1510       return gen_lowpart (tmode, x);
1511     }
1512 
1513   return convert_to_mode (tmode, x, unsignedp);
1514 }
1515 
1516 /* Try to use an ext(z)v pattern to extract a field from OP0.
1517    Return the extracted value on success, otherwise return null.
1518    EXTV describes the extraction instruction to use.  If OP0_MODE
1519    is defined, it is the mode of OP0, otherwise OP0 is a BLKmode MEM.
1520    The other arguments are as for extract_bit_field.  */
1521 
1522 static rtx
extract_bit_field_using_extv(const extraction_insn * extv,rtx op0,opt_scalar_int_mode op0_mode,unsigned HOST_WIDE_INT bitsize,unsigned HOST_WIDE_INT bitnum,int unsignedp,rtx target,machine_mode mode,machine_mode tmode)1523 extract_bit_field_using_extv (const extraction_insn *extv, rtx op0,
1524 			      opt_scalar_int_mode op0_mode,
1525 			      unsigned HOST_WIDE_INT bitsize,
1526 			      unsigned HOST_WIDE_INT bitnum,
1527 			      int unsignedp, rtx target,
1528 			      machine_mode mode, machine_mode tmode)
1529 {
1530   class expand_operand ops[4];
1531   rtx spec_target = target;
1532   rtx spec_target_subreg = 0;
1533   scalar_int_mode ext_mode = extv->field_mode;
1534   unsigned unit = GET_MODE_BITSIZE (ext_mode);
1535 
1536   if (bitsize == 0 || unit < bitsize)
1537     return NULL_RTX;
1538 
1539   if (MEM_P (op0))
1540     /* Get a reference to the first byte of the field.  */
1541     op0 = narrow_bit_field_mem (op0, extv->struct_mode, bitsize, bitnum,
1542 				&bitnum);
1543   else
1544     {
1545       /* Convert from counting within OP0 to counting in EXT_MODE.  */
1546       if (BYTES_BIG_ENDIAN)
1547 	bitnum += unit - GET_MODE_BITSIZE (op0_mode.require ());
1548 
1549       /* If op0 is a register, we need it in EXT_MODE to make it
1550 	 acceptable to the format of ext(z)v.  */
1551       if (GET_CODE (op0) == SUBREG && op0_mode.require () != ext_mode)
1552 	return NULL_RTX;
1553       if (REG_P (op0) && op0_mode.require () != ext_mode)
1554 	op0 = gen_lowpart_SUBREG (ext_mode, op0);
1555     }
1556 
1557   /* If BITS_BIG_ENDIAN is zero on a BYTES_BIG_ENDIAN machine, we count
1558      "backwards" from the size of the unit we are extracting from.
1559      Otherwise, we count bits from the most significant on a
1560      BYTES/BITS_BIG_ENDIAN machine.  */
1561 
1562   if (BITS_BIG_ENDIAN != BYTES_BIG_ENDIAN)
1563     bitnum = unit - bitsize - bitnum;
1564 
1565   if (target == 0)
1566     target = spec_target = gen_reg_rtx (tmode);
1567 
1568   if (GET_MODE (target) != ext_mode)
1569     {
1570       /* Don't use LHS paradoxical subreg if explicit truncation is needed
1571 	 between the mode of the extraction (word_mode) and the target
1572 	 mode.  Instead, create a temporary and use convert_move to set
1573 	 the target.  */
1574       if (REG_P (target)
1575 	  && TRULY_NOOP_TRUNCATION_MODES_P (GET_MODE (target), ext_mode))
1576 	{
1577 	  target = gen_lowpart (ext_mode, target);
1578 	  if (partial_subreg_p (GET_MODE (spec_target), ext_mode))
1579 	    spec_target_subreg = target;
1580 	}
1581       else
1582 	target = gen_reg_rtx (ext_mode);
1583     }
1584 
1585   create_output_operand (&ops[0], target, ext_mode);
1586   create_fixed_operand (&ops[1], op0);
1587   create_integer_operand (&ops[2], bitsize);
1588   create_integer_operand (&ops[3], bitnum);
1589   if (maybe_expand_insn (extv->icode, 4, ops))
1590     {
1591       target = ops[0].value;
1592       if (target == spec_target)
1593 	return target;
1594       if (target == spec_target_subreg)
1595 	return spec_target;
1596       return convert_extracted_bit_field (target, mode, tmode, unsignedp);
1597     }
1598   return NULL_RTX;
1599 }
1600 
1601 /* See whether it would be valid to extract the part of OP0 described
1602    by BITNUM and BITSIZE into a value of mode MODE using a subreg
1603    operation.  Return the subreg if so, otherwise return null.  */
1604 
1605 static rtx
extract_bit_field_as_subreg(machine_mode mode,rtx op0,poly_uint64 bitsize,poly_uint64 bitnum)1606 extract_bit_field_as_subreg (machine_mode mode, rtx op0,
1607 			     poly_uint64 bitsize, poly_uint64 bitnum)
1608 {
1609   poly_uint64 bytenum;
1610   if (multiple_p (bitnum, BITS_PER_UNIT, &bytenum)
1611       && known_eq (bitsize, GET_MODE_BITSIZE (mode))
1612       && lowpart_bit_field_p (bitnum, bitsize, GET_MODE (op0))
1613       && TRULY_NOOP_TRUNCATION_MODES_P (mode, GET_MODE (op0)))
1614     return simplify_gen_subreg (mode, op0, GET_MODE (op0), bytenum);
1615   return NULL_RTX;
1616 }
1617 
1618 /* A subroutine of extract_bit_field, with the same arguments.
1619    If FALLBACK_P is true, fall back to extract_fixed_bit_field
1620    if we can find no other means of implementing the operation.
1621    if FALLBACK_P is false, return NULL instead.  */
1622 
1623 static rtx
extract_bit_field_1(rtx str_rtx,poly_uint64 bitsize,poly_uint64 bitnum,int unsignedp,rtx target,machine_mode mode,machine_mode tmode,bool reverse,bool fallback_p,rtx * alt_rtl)1624 extract_bit_field_1 (rtx str_rtx, poly_uint64 bitsize, poly_uint64 bitnum,
1625 		     int unsignedp, rtx target, machine_mode mode,
1626 		     machine_mode tmode, bool reverse, bool fallback_p,
1627 		     rtx *alt_rtl)
1628 {
1629   rtx op0 = str_rtx;
1630   machine_mode mode1;
1631 
1632   if (tmode == VOIDmode)
1633     tmode = mode;
1634 
1635   while (GET_CODE (op0) == SUBREG)
1636     {
1637       bitnum += SUBREG_BYTE (op0) * BITS_PER_UNIT;
1638       op0 = SUBREG_REG (op0);
1639     }
1640 
1641   /* If we have an out-of-bounds access to a register, just return an
1642      uninitialized register of the required mode.  This can occur if the
1643      source code contains an out-of-bounds access to a small array.  */
1644   if (REG_P (op0) && known_ge (bitnum, GET_MODE_BITSIZE (GET_MODE (op0))))
1645     return gen_reg_rtx (tmode);
1646 
1647   if (REG_P (op0)
1648       && mode == GET_MODE (op0)
1649       && known_eq (bitnum, 0U)
1650       && known_eq (bitsize, GET_MODE_BITSIZE (GET_MODE (op0))))
1651     {
1652       if (reverse)
1653 	op0 = flip_storage_order (mode, op0);
1654       /* We're trying to extract a full register from itself.  */
1655       return op0;
1656     }
1657 
1658   /* First try to check for vector from vector extractions.  */
1659   if (VECTOR_MODE_P (GET_MODE (op0))
1660       && !MEM_P (op0)
1661       && VECTOR_MODE_P (tmode)
1662       && known_eq (bitsize, GET_MODE_BITSIZE (tmode))
1663       && maybe_gt (GET_MODE_SIZE (GET_MODE (op0)), GET_MODE_SIZE (tmode)))
1664     {
1665       machine_mode new_mode = GET_MODE (op0);
1666       if (GET_MODE_INNER (new_mode) != GET_MODE_INNER (tmode))
1667 	{
1668 	  scalar_mode inner_mode = GET_MODE_INNER (tmode);
1669 	  poly_uint64 nunits;
1670 	  if (!multiple_p (GET_MODE_BITSIZE (GET_MODE (op0)),
1671 			   GET_MODE_UNIT_BITSIZE (tmode), &nunits)
1672 	      || !related_vector_mode (tmode, inner_mode,
1673 				       nunits).exists (&new_mode)
1674 	      || maybe_ne (GET_MODE_SIZE (new_mode),
1675 			   GET_MODE_SIZE (GET_MODE (op0))))
1676 	    new_mode = VOIDmode;
1677 	}
1678       poly_uint64 pos;
1679       if (new_mode != VOIDmode
1680 	  && (convert_optab_handler (vec_extract_optab, new_mode, tmode)
1681 	      != CODE_FOR_nothing)
1682 	  && multiple_p (bitnum, GET_MODE_BITSIZE (tmode), &pos))
1683 	{
1684 	  class expand_operand ops[3];
1685 	  machine_mode outermode = new_mode;
1686 	  machine_mode innermode = tmode;
1687 	  enum insn_code icode
1688 	    = convert_optab_handler (vec_extract_optab, outermode, innermode);
1689 
1690 	  if (new_mode != GET_MODE (op0))
1691 	    op0 = gen_lowpart (new_mode, op0);
1692 	  create_output_operand (&ops[0], target, innermode);
1693 	  ops[0].target = 1;
1694 	  create_input_operand (&ops[1], op0, outermode);
1695 	  create_integer_operand (&ops[2], pos);
1696 	  if (maybe_expand_insn (icode, 3, ops))
1697 	    {
1698 	      if (alt_rtl && ops[0].target)
1699 		*alt_rtl = target;
1700 	      target = ops[0].value;
1701 	      if (GET_MODE (target) != mode)
1702 		return gen_lowpart (tmode, target);
1703 	      return target;
1704 	    }
1705 	}
1706     }
1707 
1708   /* See if we can get a better vector mode before extracting.  */
1709   if (VECTOR_MODE_P (GET_MODE (op0))
1710       && !MEM_P (op0)
1711       && GET_MODE_INNER (GET_MODE (op0)) != tmode)
1712     {
1713       machine_mode new_mode;
1714 
1715       if (GET_MODE_CLASS (tmode) == MODE_FLOAT)
1716 	new_mode = MIN_MODE_VECTOR_FLOAT;
1717       else if (GET_MODE_CLASS (tmode) == MODE_FRACT)
1718 	new_mode = MIN_MODE_VECTOR_FRACT;
1719       else if (GET_MODE_CLASS (tmode) == MODE_UFRACT)
1720 	new_mode = MIN_MODE_VECTOR_UFRACT;
1721       else if (GET_MODE_CLASS (tmode) == MODE_ACCUM)
1722 	new_mode = MIN_MODE_VECTOR_ACCUM;
1723       else if (GET_MODE_CLASS (tmode) == MODE_UACCUM)
1724 	new_mode = MIN_MODE_VECTOR_UACCUM;
1725       else
1726 	new_mode = MIN_MODE_VECTOR_INT;
1727 
1728       FOR_EACH_MODE_FROM (new_mode, new_mode)
1729 	if (known_eq (GET_MODE_SIZE (new_mode), GET_MODE_SIZE (GET_MODE (op0)))
1730 	    && known_eq (GET_MODE_UNIT_SIZE (new_mode), GET_MODE_SIZE (tmode))
1731 	    && targetm.vector_mode_supported_p (new_mode))
1732 	  break;
1733       if (new_mode != VOIDmode)
1734 	op0 = gen_lowpart (new_mode, op0);
1735     }
1736 
1737   /* Use vec_extract patterns for extracting parts of vectors whenever
1738      available.  If that fails, see whether the current modes and bitregion
1739      give a natural subreg.  */
1740   machine_mode outermode = GET_MODE (op0);
1741   if (VECTOR_MODE_P (outermode) && !MEM_P (op0))
1742     {
1743       scalar_mode innermode = GET_MODE_INNER (outermode);
1744       enum insn_code icode
1745 	= convert_optab_handler (vec_extract_optab, outermode, innermode);
1746       poly_uint64 pos;
1747       if (icode != CODE_FOR_nothing
1748 	  && known_eq (bitsize, GET_MODE_BITSIZE (innermode))
1749 	  && multiple_p (bitnum, GET_MODE_BITSIZE (innermode), &pos))
1750 	{
1751 	  class expand_operand ops[3];
1752 
1753 	  create_output_operand (&ops[0], target, innermode);
1754 	  ops[0].target = 1;
1755 	  create_input_operand (&ops[1], op0, outermode);
1756 	  create_integer_operand (&ops[2], pos);
1757 	  if (maybe_expand_insn (icode, 3, ops))
1758 	    {
1759 	      if (alt_rtl && ops[0].target)
1760 		*alt_rtl = target;
1761 	      target = ops[0].value;
1762 	      if (GET_MODE (target) != mode)
1763 		return gen_lowpart (tmode, target);
1764 	      return target;
1765 	    }
1766 	}
1767       /* Using subregs is useful if we're extracting one register vector
1768 	 from a multi-register vector.  extract_bit_field_as_subreg checks
1769 	 for valid bitsize and bitnum, so we don't need to do that here.  */
1770       if (VECTOR_MODE_P (mode))
1771 	{
1772 	  rtx sub = extract_bit_field_as_subreg (mode, op0, bitsize, bitnum);
1773 	  if (sub)
1774 	    return sub;
1775 	}
1776     }
1777 
1778   /* Make sure we are playing with integral modes.  Pun with subregs
1779      if we aren't.  */
1780   opt_scalar_int_mode op0_mode = int_mode_for_mode (GET_MODE (op0));
1781   scalar_int_mode imode;
1782   if (!op0_mode.exists (&imode) || imode != GET_MODE (op0))
1783     {
1784       if (MEM_P (op0))
1785 	op0 = adjust_bitfield_address_size (op0, op0_mode.else_blk (),
1786 					    0, MEM_SIZE (op0));
1787       else if (op0_mode.exists (&imode))
1788 	{
1789 	  op0 = gen_lowpart (imode, op0);
1790 
1791 	  /* If we got a SUBREG, force it into a register since we
1792 	     aren't going to be able to do another SUBREG on it.  */
1793 	  if (GET_CODE (op0) == SUBREG)
1794 	    op0 = force_reg (imode, op0);
1795 	}
1796       else
1797 	{
1798 	  poly_int64 size = GET_MODE_SIZE (GET_MODE (op0));
1799 	  rtx mem = assign_stack_temp (GET_MODE (op0), size);
1800 	  emit_move_insn (mem, op0);
1801 	  op0 = adjust_bitfield_address_size (mem, BLKmode, 0, size);
1802 	}
1803     }
1804 
1805   /* ??? We currently assume TARGET is at least as big as BITSIZE.
1806      If that's wrong, the solution is to test for it and set TARGET to 0
1807      if needed.  */
1808 
1809   /* Get the mode of the field to use for atomic access or subreg
1810      conversion.  */
1811   if (!SCALAR_INT_MODE_P (tmode)
1812       || !mode_for_size (bitsize, GET_MODE_CLASS (tmode), 0).exists (&mode1))
1813     mode1 = mode;
1814   gcc_assert (mode1 != BLKmode);
1815 
1816   /* Extraction of a full MODE1 value can be done with a subreg as long
1817      as the least significant bit of the value is the least significant
1818      bit of either OP0 or a word of OP0.  */
1819   if (!MEM_P (op0) && !reverse)
1820     {
1821       rtx sub = extract_bit_field_as_subreg (mode1, op0, bitsize, bitnum);
1822       if (sub)
1823 	return convert_extracted_bit_field (sub, mode, tmode, unsignedp);
1824     }
1825 
1826   /* Extraction of a full MODE1 value can be done with a load as long as
1827      the field is on a byte boundary and is sufficiently aligned.  */
1828   poly_uint64 bytenum;
1829   if (simple_mem_bitfield_p (op0, bitsize, bitnum, mode1, &bytenum))
1830     {
1831       op0 = adjust_bitfield_address (op0, mode1, bytenum);
1832       if (reverse)
1833 	op0 = flip_storage_order (mode1, op0);
1834       return convert_extracted_bit_field (op0, mode, tmode, unsignedp);
1835     }
1836 
1837   /* If we have a memory source and a non-constant bit offset, restrict
1838      the memory to the referenced bytes.  This is a worst-case fallback
1839      but is useful for things like vector booleans.  */
1840   if (MEM_P (op0) && !bitnum.is_constant ())
1841     {
1842       bytenum = bits_to_bytes_round_down (bitnum);
1843       bitnum = num_trailing_bits (bitnum);
1844       poly_uint64 bytesize = bits_to_bytes_round_up (bitnum + bitsize);
1845       op0 = adjust_bitfield_address_size (op0, BLKmode, bytenum, bytesize);
1846       op0_mode = opt_scalar_int_mode ();
1847     }
1848 
1849   /* It's possible we'll need to handle other cases here for
1850      polynomial bitnum and bitsize.  */
1851 
1852   /* From here on we need to be looking at a fixed-size insertion.  */
1853   return extract_integral_bit_field (op0, op0_mode, bitsize.to_constant (),
1854 				     bitnum.to_constant (), unsignedp,
1855 				     target, mode, tmode, reverse, fallback_p);
1856 }
1857 
1858 /* Subroutine of extract_bit_field_1, with the same arguments, except
1859    that BITSIZE and BITNUM are constant.  Handle cases specific to
1860    integral modes.  If OP0_MODE is defined, it is the mode of OP0,
1861    otherwise OP0 is a BLKmode MEM.  */
1862 
1863 static rtx
extract_integral_bit_field(rtx op0,opt_scalar_int_mode op0_mode,unsigned HOST_WIDE_INT bitsize,unsigned HOST_WIDE_INT bitnum,int unsignedp,rtx target,machine_mode mode,machine_mode tmode,bool reverse,bool fallback_p)1864 extract_integral_bit_field (rtx op0, opt_scalar_int_mode op0_mode,
1865 			    unsigned HOST_WIDE_INT bitsize,
1866 			    unsigned HOST_WIDE_INT bitnum, int unsignedp,
1867 			    rtx target, machine_mode mode, machine_mode tmode,
1868 			    bool reverse, bool fallback_p)
1869 {
1870   /* Handle fields bigger than a word.  */
1871 
1872   if (bitsize > BITS_PER_WORD)
1873     {
1874       /* Here we transfer the words of the field
1875 	 in the order least significant first.
1876 	 This is because the most significant word is the one which may
1877 	 be less than full.  */
1878 
1879       const bool backwards = WORDS_BIG_ENDIAN;
1880       unsigned int nwords = (bitsize + (BITS_PER_WORD - 1)) / BITS_PER_WORD;
1881       unsigned int i;
1882       rtx_insn *last;
1883 
1884       if (target == 0 || !REG_P (target) || !valid_multiword_target_p (target))
1885 	target = gen_reg_rtx (mode);
1886 
1887       /* In case we're about to clobber a base register or something
1888 	 (see gcc.c-torture/execute/20040625-1.c).   */
1889       if (reg_mentioned_p (target, op0))
1890 	target = gen_reg_rtx (mode);
1891 
1892       /* Indicate for flow that the entire target reg is being set.  */
1893       emit_clobber (target);
1894 
1895       /* The mode must be fixed-size, since extract_bit_field_1 handles
1896 	 extractions from variable-sized objects before calling this
1897 	 function.  */
1898       unsigned int target_size
1899 	= GET_MODE_SIZE (GET_MODE (target)).to_constant ();
1900       last = get_last_insn ();
1901       for (i = 0; i < nwords; i++)
1902 	{
1903 	  /* If I is 0, use the low-order word in both field and target;
1904 	     if I is 1, use the next to lowest word; and so on.  */
1905 	  /* Word number in TARGET to use.  */
1906 	  unsigned int wordnum
1907 	    = (backwards ? target_size / UNITS_PER_WORD - i - 1 : i);
1908 	  /* Offset from start of field in OP0.  */
1909 	  unsigned int bit_offset = (backwards ^ reverse
1910 				     ? MAX ((int) bitsize - ((int) i + 1)
1911 					    * BITS_PER_WORD,
1912 					    0)
1913 				     : (int) i * BITS_PER_WORD);
1914 	  rtx target_part = operand_subword (target, wordnum, 1, VOIDmode);
1915 	  rtx result_part
1916 	    = extract_bit_field_1 (op0, MIN (BITS_PER_WORD,
1917 					     bitsize - i * BITS_PER_WORD),
1918 				   bitnum + bit_offset, 1, target_part,
1919 				   mode, word_mode, reverse, fallback_p, NULL);
1920 
1921 	  gcc_assert (target_part);
1922 	  if (!result_part)
1923 	    {
1924 	      delete_insns_since (last);
1925 	      return NULL;
1926 	    }
1927 
1928 	  if (result_part != target_part)
1929 	    emit_move_insn (target_part, result_part);
1930 	}
1931 
1932       if (unsignedp)
1933 	{
1934 	  /* Unless we've filled TARGET, the upper regs in a multi-reg value
1935 	     need to be zero'd out.  */
1936 	  if (target_size > nwords * UNITS_PER_WORD)
1937 	    {
1938 	      unsigned int i, total_words;
1939 
1940 	      total_words = target_size / UNITS_PER_WORD;
1941 	      for (i = nwords; i < total_words; i++)
1942 		emit_move_insn
1943 		  (operand_subword (target,
1944 				    backwards ? total_words - i - 1 : i,
1945 				    1, VOIDmode),
1946 		   const0_rtx);
1947 	    }
1948 	  return target;
1949 	}
1950 
1951       /* Signed bit field: sign-extend with two arithmetic shifts.  */
1952       target = expand_shift (LSHIFT_EXPR, mode, target,
1953 			     GET_MODE_BITSIZE (mode) - bitsize, NULL_RTX, 0);
1954       return expand_shift (RSHIFT_EXPR, mode, target,
1955 			   GET_MODE_BITSIZE (mode) - bitsize, NULL_RTX, 0);
1956     }
1957 
1958   /* If OP0 is a multi-word register, narrow it to the affected word.
1959      If the region spans two words, defer to extract_split_bit_field.  */
1960   if (!MEM_P (op0) && GET_MODE_SIZE (op0_mode.require ()) > UNITS_PER_WORD)
1961     {
1962       if (bitnum % BITS_PER_WORD + bitsize > BITS_PER_WORD)
1963 	{
1964 	  if (!fallback_p)
1965 	    return NULL_RTX;
1966 	  target = extract_split_bit_field (op0, op0_mode, bitsize, bitnum,
1967 					    unsignedp, reverse);
1968 	  return convert_extracted_bit_field (target, mode, tmode, unsignedp);
1969 	}
1970       op0 = simplify_gen_subreg (word_mode, op0, op0_mode.require (),
1971 				 bitnum / BITS_PER_WORD * UNITS_PER_WORD);
1972       op0_mode = word_mode;
1973       bitnum %= BITS_PER_WORD;
1974     }
1975 
1976   /* From here on we know the desired field is smaller than a word.
1977      If OP0 is a register, it too fits within a word.  */
1978   enum extraction_pattern pattern = unsignedp ? EP_extzv : EP_extv;
1979   extraction_insn extv;
1980   if (!MEM_P (op0)
1981       && !reverse
1982       /* ??? We could limit the structure size to the part of OP0 that
1983 	 contains the field, with appropriate checks for endianness
1984 	 and TARGET_TRULY_NOOP_TRUNCATION.  */
1985       && get_best_reg_extraction_insn (&extv, pattern,
1986 				       GET_MODE_BITSIZE (op0_mode.require ()),
1987 				       tmode))
1988     {
1989       rtx result = extract_bit_field_using_extv (&extv, op0, op0_mode,
1990 						 bitsize, bitnum,
1991 						 unsignedp, target, mode,
1992 						 tmode);
1993       if (result)
1994 	return result;
1995     }
1996 
1997   /* If OP0 is a memory, try copying it to a register and seeing if a
1998      cheap register alternative is available.  */
1999   if (MEM_P (op0) & !reverse)
2000     {
2001       if (get_best_mem_extraction_insn (&extv, pattern, bitsize, bitnum,
2002 					tmode))
2003 	{
2004 	  rtx result = extract_bit_field_using_extv (&extv, op0, op0_mode,
2005 						     bitsize, bitnum,
2006 						     unsignedp, target, mode,
2007 						     tmode);
2008 	  if (result)
2009 	    return result;
2010 	}
2011 
2012       rtx_insn *last = get_last_insn ();
2013 
2014       /* Try loading part of OP0 into a register and extracting the
2015 	 bitfield from that.  */
2016       unsigned HOST_WIDE_INT bitpos;
2017       rtx xop0 = adjust_bit_field_mem_for_reg (pattern, op0, bitsize, bitnum,
2018 					       0, 0, tmode, &bitpos);
2019       if (xop0)
2020 	{
2021 	  xop0 = copy_to_reg (xop0);
2022 	  rtx result = extract_bit_field_1 (xop0, bitsize, bitpos,
2023 					    unsignedp, target,
2024 					    mode, tmode, reverse, false, NULL);
2025 	  if (result)
2026 	    return result;
2027 	  delete_insns_since (last);
2028 	}
2029     }
2030 
2031   if (!fallback_p)
2032     return NULL;
2033 
2034   /* Find a correspondingly-sized integer field, so we can apply
2035      shifts and masks to it.  */
2036   scalar_int_mode int_mode;
2037   if (!int_mode_for_mode (tmode).exists (&int_mode))
2038     /* If this fails, we should probably push op0 out to memory and then
2039        do a load.  */
2040     int_mode = int_mode_for_mode (mode).require ();
2041 
2042   target = extract_fixed_bit_field (int_mode, op0, op0_mode, bitsize,
2043 				    bitnum, target, unsignedp, reverse);
2044 
2045   /* Complex values must be reversed piecewise, so we need to undo the global
2046      reversal, convert to the complex mode and reverse again.  */
2047   if (reverse && COMPLEX_MODE_P (tmode))
2048     {
2049       target = flip_storage_order (int_mode, target);
2050       target = convert_extracted_bit_field (target, mode, tmode, unsignedp);
2051       target = flip_storage_order (tmode, target);
2052     }
2053   else
2054     target = convert_extracted_bit_field (target, mode, tmode, unsignedp);
2055 
2056   return target;
2057 }
2058 
2059 /* Generate code to extract a byte-field from STR_RTX
2060    containing BITSIZE bits, starting at BITNUM,
2061    and put it in TARGET if possible (if TARGET is nonzero).
2062    Regardless of TARGET, we return the rtx for where the value is placed.
2063 
2064    STR_RTX is the structure containing the byte (a REG or MEM).
2065    UNSIGNEDP is nonzero if this is an unsigned bit field.
2066    MODE is the natural mode of the field value once extracted.
2067    TMODE is the mode the caller would like the value to have;
2068    but the value may be returned with type MODE instead.
2069 
2070    If REVERSE is true, the extraction is to be done in reverse order.
2071 
2072    If a TARGET is specified and we can store in it at no extra cost,
2073    we do so, and return TARGET.
2074    Otherwise, we return a REG of mode TMODE or MODE, with TMODE preferred
2075    if they are equally easy.
2076 
2077    If the result can be stored at TARGET, and ALT_RTL is non-NULL,
2078    then *ALT_RTL is set to TARGET (before legitimziation).  */
2079 
2080 rtx
extract_bit_field(rtx str_rtx,poly_uint64 bitsize,poly_uint64 bitnum,int unsignedp,rtx target,machine_mode mode,machine_mode tmode,bool reverse,rtx * alt_rtl)2081 extract_bit_field (rtx str_rtx, poly_uint64 bitsize, poly_uint64 bitnum,
2082 		   int unsignedp, rtx target, machine_mode mode,
2083 		   machine_mode tmode, bool reverse, rtx *alt_rtl)
2084 {
2085   machine_mode mode1;
2086 
2087   /* Handle -fstrict-volatile-bitfields in the cases where it applies.  */
2088   if (maybe_ne (GET_MODE_BITSIZE (GET_MODE (str_rtx)), 0))
2089     mode1 = GET_MODE (str_rtx);
2090   else if (target && maybe_ne (GET_MODE_BITSIZE (GET_MODE (target)), 0))
2091     mode1 = GET_MODE (target);
2092   else
2093     mode1 = tmode;
2094 
2095   unsigned HOST_WIDE_INT ibitsize, ibitnum;
2096   scalar_int_mode int_mode;
2097   if (bitsize.is_constant (&ibitsize)
2098       && bitnum.is_constant (&ibitnum)
2099       && is_a <scalar_int_mode> (mode1, &int_mode)
2100       && strict_volatile_bitfield_p (str_rtx, ibitsize, ibitnum,
2101 				     int_mode, 0, 0))
2102     {
2103       /* Extraction of a full INT_MODE value can be done with a simple load.
2104 	 We know here that the field can be accessed with one single
2105 	 instruction.  For targets that support unaligned memory,
2106 	 an unaligned access may be necessary.  */
2107       if (ibitsize == GET_MODE_BITSIZE (int_mode))
2108 	{
2109 	  rtx result = adjust_bitfield_address (str_rtx, int_mode,
2110 						ibitnum / BITS_PER_UNIT);
2111 	  if (reverse)
2112 	    result = flip_storage_order (int_mode, result);
2113 	  gcc_assert (ibitnum % BITS_PER_UNIT == 0);
2114 	  return convert_extracted_bit_field (result, mode, tmode, unsignedp);
2115 	}
2116 
2117       str_rtx = narrow_bit_field_mem (str_rtx, int_mode, ibitsize, ibitnum,
2118 				      &ibitnum);
2119       gcc_assert (ibitnum + ibitsize <= GET_MODE_BITSIZE (int_mode));
2120       str_rtx = copy_to_reg (str_rtx);
2121       return extract_bit_field_1 (str_rtx, ibitsize, ibitnum, unsignedp,
2122 				  target, mode, tmode, reverse, true, alt_rtl);
2123     }
2124 
2125   return extract_bit_field_1 (str_rtx, bitsize, bitnum, unsignedp,
2126 			      target, mode, tmode, reverse, true, alt_rtl);
2127 }
2128 
2129 /* Use shifts and boolean operations to extract a field of BITSIZE bits
2130    from bit BITNUM of OP0.  If OP0_MODE is defined, it is the mode of OP0,
2131    otherwise OP0 is a BLKmode MEM.
2132 
2133    UNSIGNEDP is nonzero for an unsigned bit field (don't sign-extend value).
2134    If REVERSE is true, the extraction is to be done in reverse order.
2135 
2136    If TARGET is nonzero, attempts to store the value there
2137    and return TARGET, but this is not guaranteed.
2138    If TARGET is not used, create a pseudo-reg of mode TMODE for the value.  */
2139 
2140 static rtx
extract_fixed_bit_field(machine_mode tmode,rtx op0,opt_scalar_int_mode op0_mode,unsigned HOST_WIDE_INT bitsize,unsigned HOST_WIDE_INT bitnum,rtx target,int unsignedp,bool reverse)2141 extract_fixed_bit_field (machine_mode tmode, rtx op0,
2142 			 opt_scalar_int_mode op0_mode,
2143 			 unsigned HOST_WIDE_INT bitsize,
2144 			 unsigned HOST_WIDE_INT bitnum, rtx target,
2145 			 int unsignedp, bool reverse)
2146 {
2147   scalar_int_mode mode;
2148   if (MEM_P (op0))
2149     {
2150       if (!get_best_mode (bitsize, bitnum, 0, 0, MEM_ALIGN (op0),
2151 			  BITS_PER_WORD, MEM_VOLATILE_P (op0), &mode))
2152 	/* The only way this should occur is if the field spans word
2153 	   boundaries.  */
2154 	return extract_split_bit_field (op0, op0_mode, bitsize, bitnum,
2155 					unsignedp, reverse);
2156 
2157       op0 = narrow_bit_field_mem (op0, mode, bitsize, bitnum, &bitnum);
2158     }
2159   else
2160     mode = op0_mode.require ();
2161 
2162   return extract_fixed_bit_field_1 (tmode, op0, mode, bitsize, bitnum,
2163 				    target, unsignedp, reverse);
2164 }
2165 
2166 /* Helper function for extract_fixed_bit_field, extracts
2167    the bit field always using MODE, which is the mode of OP0.
2168    The other arguments are as for extract_fixed_bit_field.  */
2169 
2170 static rtx
extract_fixed_bit_field_1(machine_mode tmode,rtx op0,scalar_int_mode mode,unsigned HOST_WIDE_INT bitsize,unsigned HOST_WIDE_INT bitnum,rtx target,int unsignedp,bool reverse)2171 extract_fixed_bit_field_1 (machine_mode tmode, rtx op0, scalar_int_mode mode,
2172 			   unsigned HOST_WIDE_INT bitsize,
2173 			   unsigned HOST_WIDE_INT bitnum, rtx target,
2174 			   int unsignedp, bool reverse)
2175 {
2176   /* Note that bitsize + bitnum can be greater than GET_MODE_BITSIZE (mode)
2177      for invalid input, such as extract equivalent of f5 from
2178      gcc.dg/pr48335-2.c.  */
2179 
2180   if (reverse ? !BYTES_BIG_ENDIAN : BYTES_BIG_ENDIAN)
2181     /* BITNUM is the distance between our msb and that of OP0.
2182        Convert it to the distance from the lsb.  */
2183     bitnum = GET_MODE_BITSIZE (mode) - bitsize - bitnum;
2184 
2185   /* Now BITNUM is always the distance between the field's lsb and that of OP0.
2186      We have reduced the big-endian case to the little-endian case.  */
2187   if (reverse)
2188     op0 = flip_storage_order (mode, op0);
2189 
2190   if (unsignedp)
2191     {
2192       if (bitnum)
2193 	{
2194 	  /* If the field does not already start at the lsb,
2195 	     shift it so it does.  */
2196 	  /* Maybe propagate the target for the shift.  */
2197 	  rtx subtarget = (target != 0 && REG_P (target) ? target : 0);
2198 	  if (tmode != mode)
2199 	    subtarget = 0;
2200 	  op0 = expand_shift (RSHIFT_EXPR, mode, op0, bitnum, subtarget, 1);
2201 	}
2202       /* Convert the value to the desired mode.  TMODE must also be a
2203 	 scalar integer for this conversion to make sense, since we
2204 	 shouldn't reinterpret the bits.  */
2205       scalar_int_mode new_mode = as_a <scalar_int_mode> (tmode);
2206       if (mode != new_mode)
2207 	op0 = convert_to_mode (new_mode, op0, 1);
2208 
2209       /* Unless the msb of the field used to be the msb when we shifted,
2210 	 mask out the upper bits.  */
2211 
2212       if (GET_MODE_BITSIZE (mode) != bitnum + bitsize)
2213 	return expand_binop (new_mode, and_optab, op0,
2214 			     mask_rtx (new_mode, 0, bitsize, 0),
2215 			     target, 1, OPTAB_LIB_WIDEN);
2216       return op0;
2217     }
2218 
2219   /* To extract a signed bit-field, first shift its msb to the msb of the word,
2220      then arithmetic-shift its lsb to the lsb of the word.  */
2221   op0 = force_reg (mode, op0);
2222 
2223   /* Find the narrowest integer mode that contains the field.  */
2224 
2225   opt_scalar_int_mode mode_iter;
2226   FOR_EACH_MODE_IN_CLASS (mode_iter, MODE_INT)
2227     if (GET_MODE_BITSIZE (mode_iter.require ()) >= bitsize + bitnum)
2228       break;
2229 
2230   mode = mode_iter.require ();
2231   op0 = convert_to_mode (mode, op0, 0);
2232 
2233   if (mode != tmode)
2234     target = 0;
2235 
2236   if (GET_MODE_BITSIZE (mode) != (bitsize + bitnum))
2237     {
2238       int amount = GET_MODE_BITSIZE (mode) - (bitsize + bitnum);
2239       /* Maybe propagate the target for the shift.  */
2240       rtx subtarget = (target != 0 && REG_P (target) ? target : 0);
2241       op0 = expand_shift (LSHIFT_EXPR, mode, op0, amount, subtarget, 1);
2242     }
2243 
2244   return expand_shift (RSHIFT_EXPR, mode, op0,
2245 		       GET_MODE_BITSIZE (mode) - bitsize, target, 0);
2246 }
2247 
2248 /* Return a constant integer (CONST_INT or CONST_DOUBLE) rtx with the value
2249    VALUE << BITPOS.  */
2250 
2251 static rtx
lshift_value(machine_mode mode,unsigned HOST_WIDE_INT value,int bitpos)2252 lshift_value (machine_mode mode, unsigned HOST_WIDE_INT value,
2253 	      int bitpos)
2254 {
2255   return immed_wide_int_const (wi::lshift (value, bitpos), mode);
2256 }
2257 
2258 /* Extract a bit field that is split across two words
2259    and return an RTX for the result.
2260 
2261    OP0 is the REG, SUBREG or MEM rtx for the first of the two words.
2262    BITSIZE is the field width; BITPOS, position of its first bit, in the word.
2263    UNSIGNEDP is 1 if should zero-extend the contents; else sign-extend.
2264    If OP0_MODE is defined, it is the mode of OP0, otherwise OP0 is
2265    a BLKmode MEM.
2266 
2267    If REVERSE is true, the extraction is to be done in reverse order.  */
2268 
2269 static rtx
extract_split_bit_field(rtx op0,opt_scalar_int_mode op0_mode,unsigned HOST_WIDE_INT bitsize,unsigned HOST_WIDE_INT bitpos,int unsignedp,bool reverse)2270 extract_split_bit_field (rtx op0, opt_scalar_int_mode op0_mode,
2271 			 unsigned HOST_WIDE_INT bitsize,
2272 			 unsigned HOST_WIDE_INT bitpos, int unsignedp,
2273 			 bool reverse)
2274 {
2275   unsigned int unit;
2276   unsigned int bitsdone = 0;
2277   rtx result = NULL_RTX;
2278   int first = 1;
2279 
2280   /* Make sure UNIT isn't larger than BITS_PER_WORD, we can only handle that
2281      much at a time.  */
2282   if (REG_P (op0) || GET_CODE (op0) == SUBREG)
2283     unit = BITS_PER_WORD;
2284   else
2285     unit = MIN (MEM_ALIGN (op0), BITS_PER_WORD);
2286 
2287   while (bitsdone < bitsize)
2288     {
2289       unsigned HOST_WIDE_INT thissize;
2290       rtx part;
2291       unsigned HOST_WIDE_INT thispos;
2292       unsigned HOST_WIDE_INT offset;
2293 
2294       offset = (bitpos + bitsdone) / unit;
2295       thispos = (bitpos + bitsdone) % unit;
2296 
2297       /* THISSIZE must not overrun a word boundary.  Otherwise,
2298 	 extract_fixed_bit_field will call us again, and we will mutually
2299 	 recurse forever.  */
2300       thissize = MIN (bitsize - bitsdone, BITS_PER_WORD);
2301       thissize = MIN (thissize, unit - thispos);
2302 
2303       /* If OP0 is a register, then handle OFFSET here.  */
2304       rtx op0_piece = op0;
2305       opt_scalar_int_mode op0_piece_mode = op0_mode;
2306       if (SUBREG_P (op0) || REG_P (op0))
2307 	{
2308 	  op0_piece = operand_subword_force (op0, offset, op0_mode.require ());
2309 	  op0_piece_mode = word_mode;
2310 	  offset = 0;
2311 	}
2312 
2313       /* Extract the parts in bit-counting order,
2314 	 whose meaning is determined by BYTES_PER_UNIT.
2315 	 OFFSET is in UNITs, and UNIT is in bits.  */
2316       part = extract_fixed_bit_field (word_mode, op0_piece, op0_piece_mode,
2317 				      thissize, offset * unit + thispos,
2318 				      0, 1, reverse);
2319       bitsdone += thissize;
2320 
2321       /* Shift this part into place for the result.  */
2322       if (reverse ? !BYTES_BIG_ENDIAN : BYTES_BIG_ENDIAN)
2323 	{
2324 	  if (bitsize != bitsdone)
2325 	    part = expand_shift (LSHIFT_EXPR, word_mode, part,
2326 				 bitsize - bitsdone, 0, 1);
2327 	}
2328       else
2329 	{
2330 	  if (bitsdone != thissize)
2331 	    part = expand_shift (LSHIFT_EXPR, word_mode, part,
2332 				 bitsdone - thissize, 0, 1);
2333 	}
2334 
2335       if (first)
2336 	result = part;
2337       else
2338 	/* Combine the parts with bitwise or.  This works
2339 	   because we extracted each part as an unsigned bit field.  */
2340 	result = expand_binop (word_mode, ior_optab, part, result, NULL_RTX, 1,
2341 			       OPTAB_LIB_WIDEN);
2342 
2343       first = 0;
2344     }
2345 
2346   /* Unsigned bit field: we are done.  */
2347   if (unsignedp)
2348     return result;
2349   /* Signed bit field: sign-extend with two arithmetic shifts.  */
2350   result = expand_shift (LSHIFT_EXPR, word_mode, result,
2351 			 BITS_PER_WORD - bitsize, NULL_RTX, 0);
2352   return expand_shift (RSHIFT_EXPR, word_mode, result,
2353 		       BITS_PER_WORD - bitsize, NULL_RTX, 0);
2354 }
2355 
2356 /* Try to read the low bits of SRC as an rvalue of mode MODE, preserving
2357    the bit pattern.  SRC_MODE is the mode of SRC; if this is smaller than
2358    MODE, fill the upper bits with zeros.  Fail if the layout of either
2359    mode is unknown (as for CC modes) or if the extraction would involve
2360    unprofitable mode punning.  Return the value on success, otherwise
2361    return null.
2362 
2363    This is different from gen_lowpart* in these respects:
2364 
2365      - the returned value must always be considered an rvalue
2366 
2367      - when MODE is wider than SRC_MODE, the extraction involves
2368        a zero extension
2369 
2370      - when MODE is smaller than SRC_MODE, the extraction involves
2371        a truncation (and is thus subject to TARGET_TRULY_NOOP_TRUNCATION).
2372 
2373    In other words, this routine performs a computation, whereas the
2374    gen_lowpart* routines are conceptually lvalue or rvalue subreg
2375    operations.  */
2376 
2377 rtx
extract_low_bits(machine_mode mode,machine_mode src_mode,rtx src)2378 extract_low_bits (machine_mode mode, machine_mode src_mode, rtx src)
2379 {
2380   scalar_int_mode int_mode, src_int_mode;
2381 
2382   if (mode == src_mode)
2383     return src;
2384 
2385   if (CONSTANT_P (src))
2386     {
2387       /* simplify_gen_subreg can't be used here, as if simplify_subreg
2388 	 fails, it will happily create (subreg (symbol_ref)) or similar
2389 	 invalid SUBREGs.  */
2390       poly_uint64 byte = subreg_lowpart_offset (mode, src_mode);
2391       rtx ret = simplify_subreg (mode, src, src_mode, byte);
2392       if (ret)
2393 	return ret;
2394 
2395       if (GET_MODE (src) == VOIDmode
2396 	  || !validate_subreg (mode, src_mode, src, byte))
2397 	return NULL_RTX;
2398 
2399       src = force_reg (GET_MODE (src), src);
2400       return gen_rtx_SUBREG (mode, src, byte);
2401     }
2402 
2403   if (GET_MODE_CLASS (mode) == MODE_CC || GET_MODE_CLASS (src_mode) == MODE_CC)
2404     return NULL_RTX;
2405 
2406   if (known_eq (GET_MODE_BITSIZE (mode), GET_MODE_BITSIZE (src_mode))
2407       && targetm.modes_tieable_p (mode, src_mode))
2408     {
2409       rtx x = gen_lowpart_common (mode, src);
2410       if (x)
2411         return x;
2412     }
2413 
2414   if (!int_mode_for_mode (src_mode).exists (&src_int_mode)
2415       || !int_mode_for_mode (mode).exists (&int_mode))
2416     return NULL_RTX;
2417 
2418   if (!targetm.modes_tieable_p (src_int_mode, src_mode))
2419     return NULL_RTX;
2420   if (!targetm.modes_tieable_p (int_mode, mode))
2421     return NULL_RTX;
2422 
2423   src = gen_lowpart (src_int_mode, src);
2424   if (!validate_subreg (int_mode, src_int_mode, src,
2425 			subreg_lowpart_offset (int_mode, src_int_mode)))
2426     return NULL_RTX;
2427 
2428   src = convert_modes (int_mode, src_int_mode, src, true);
2429   src = gen_lowpart (mode, src);
2430   return src;
2431 }
2432 
2433 /* Add INC into TARGET.  */
2434 
2435 void
expand_inc(rtx target,rtx inc)2436 expand_inc (rtx target, rtx inc)
2437 {
2438   rtx value = expand_binop (GET_MODE (target), add_optab,
2439 			    target, inc,
2440 			    target, 0, OPTAB_LIB_WIDEN);
2441   if (value != target)
2442     emit_move_insn (target, value);
2443 }
2444 
2445 /* Subtract DEC from TARGET.  */
2446 
2447 void
expand_dec(rtx target,rtx dec)2448 expand_dec (rtx target, rtx dec)
2449 {
2450   rtx value = expand_binop (GET_MODE (target), sub_optab,
2451 			    target, dec,
2452 			    target, 0, OPTAB_LIB_WIDEN);
2453   if (value != target)
2454     emit_move_insn (target, value);
2455 }
2456 
2457 /* Output a shift instruction for expression code CODE,
2458    with SHIFTED being the rtx for the value to shift,
2459    and AMOUNT the rtx for the amount to shift by.
2460    Store the result in the rtx TARGET, if that is convenient.
2461    If UNSIGNEDP is nonzero, do a logical shift; otherwise, arithmetic.
2462    Return the rtx for where the value is.
2463    If that cannot be done, abort the compilation unless MAY_FAIL is true,
2464    in which case 0 is returned.  */
2465 
2466 static rtx
2467 expand_shift_1 (enum tree_code code, machine_mode mode, rtx shifted,
2468 		rtx amount, rtx target, int unsignedp, bool may_fail = false)
2469 {
2470   rtx op1, temp = 0;
2471   int left = (code == LSHIFT_EXPR || code == LROTATE_EXPR);
2472   int rotate = (code == LROTATE_EXPR || code == RROTATE_EXPR);
2473   optab lshift_optab = ashl_optab;
2474   optab rshift_arith_optab = ashr_optab;
2475   optab rshift_uns_optab = lshr_optab;
2476   optab lrotate_optab = rotl_optab;
2477   optab rrotate_optab = rotr_optab;
2478   machine_mode op1_mode;
2479   scalar_mode scalar_mode = GET_MODE_INNER (mode);
2480   int attempt;
2481   bool speed = optimize_insn_for_speed_p ();
2482 
2483   op1 = amount;
2484   op1_mode = GET_MODE (op1);
2485 
2486   /* Determine whether the shift/rotate amount is a vector, or scalar.  If the
2487      shift amount is a vector, use the vector/vector shift patterns.  */
2488   if (VECTOR_MODE_P (mode) && VECTOR_MODE_P (op1_mode))
2489     {
2490       lshift_optab = vashl_optab;
2491       rshift_arith_optab = vashr_optab;
2492       rshift_uns_optab = vlshr_optab;
2493       lrotate_optab = vrotl_optab;
2494       rrotate_optab = vrotr_optab;
2495     }
2496 
2497   /* Previously detected shift-counts computed by NEGATE_EXPR
2498      and shifted in the other direction; but that does not work
2499      on all machines.  */
2500 
2501   if (SHIFT_COUNT_TRUNCATED)
2502     {
2503       if (CONST_INT_P (op1)
2504 	  && ((unsigned HOST_WIDE_INT) INTVAL (op1) >=
2505 	      (unsigned HOST_WIDE_INT) GET_MODE_BITSIZE (scalar_mode)))
2506 	op1 = gen_int_shift_amount (mode,
2507 				    (unsigned HOST_WIDE_INT) INTVAL (op1)
2508 				    % GET_MODE_BITSIZE (scalar_mode));
2509       else if (GET_CODE (op1) == SUBREG
2510 	       && subreg_lowpart_p (op1)
2511 	       && SCALAR_INT_MODE_P (GET_MODE (SUBREG_REG (op1)))
2512 	       && SCALAR_INT_MODE_P (GET_MODE (op1)))
2513 	op1 = SUBREG_REG (op1);
2514     }
2515 
2516   /* Canonicalize rotates by constant amount.  If op1 is bitsize / 2,
2517      prefer left rotation, if op1 is from bitsize / 2 + 1 to
2518      bitsize - 1, use other direction of rotate with 1 .. bitsize / 2 - 1
2519      amount instead.  */
2520   if (rotate
2521       && CONST_INT_P (op1)
2522       && IN_RANGE (INTVAL (op1), GET_MODE_BITSIZE (scalar_mode) / 2 + left,
2523 		   GET_MODE_BITSIZE (scalar_mode) - 1))
2524     {
2525       op1 = gen_int_shift_amount (mode, (GET_MODE_BITSIZE (scalar_mode)
2526 					 - INTVAL (op1)));
2527       left = !left;
2528       code = left ? LROTATE_EXPR : RROTATE_EXPR;
2529     }
2530 
2531   /* Rotation of 16bit values by 8 bits is effectively equivalent to a bswaphi.
2532      Note that this is not the case for bigger values.  For instance a rotation
2533      of 0x01020304 by 16 bits gives 0x03040102 which is different from
2534      0x04030201 (bswapsi).  */
2535   if (rotate
2536       && CONST_INT_P (op1)
2537       && INTVAL (op1) == BITS_PER_UNIT
2538       && GET_MODE_SIZE (scalar_mode) == 2
2539       && optab_handler (bswap_optab, mode) != CODE_FOR_nothing)
2540     return expand_unop (mode, bswap_optab, shifted, NULL_RTX, unsignedp);
2541 
2542   if (op1 == const0_rtx)
2543     return shifted;
2544 
2545   /* Check whether its cheaper to implement a left shift by a constant
2546      bit count by a sequence of additions.  */
2547   if (code == LSHIFT_EXPR
2548       && CONST_INT_P (op1)
2549       && INTVAL (op1) > 0
2550       && INTVAL (op1) < GET_MODE_PRECISION (scalar_mode)
2551       && INTVAL (op1) < MAX_BITS_PER_WORD
2552       && (shift_cost (speed, mode, INTVAL (op1))
2553 	  > INTVAL (op1) * add_cost (speed, mode))
2554       && shift_cost (speed, mode, INTVAL (op1)) != MAX_COST)
2555     {
2556       int i;
2557       for (i = 0; i < INTVAL (op1); i++)
2558 	{
2559 	  temp = force_reg (mode, shifted);
2560 	  shifted = expand_binop (mode, add_optab, temp, temp, NULL_RTX,
2561 				  unsignedp, OPTAB_LIB_WIDEN);
2562 	}
2563       return shifted;
2564     }
2565 
2566   for (attempt = 0; temp == 0 && attempt < 3; attempt++)
2567     {
2568       enum optab_methods methods;
2569 
2570       if (attempt == 0)
2571 	methods = OPTAB_DIRECT;
2572       else if (attempt == 1)
2573 	methods = OPTAB_WIDEN;
2574       else
2575 	methods = OPTAB_LIB_WIDEN;
2576 
2577       if (rotate)
2578 	{
2579 	  /* Widening does not work for rotation.  */
2580 	  if (methods == OPTAB_WIDEN)
2581 	    continue;
2582 	  else if (methods == OPTAB_LIB_WIDEN)
2583 	    {
2584 	      /* If we have been unable to open-code this by a rotation,
2585 		 do it as the IOR of two shifts.  I.e., to rotate A
2586 		 by N bits, compute
2587 		 (A << N) | ((unsigned) A >> ((-N) & (C - 1)))
2588 		 where C is the bitsize of A.
2589 
2590 		 It is theoretically possible that the target machine might
2591 		 not be able to perform either shift and hence we would
2592 		 be making two libcalls rather than just the one for the
2593 		 shift (similarly if IOR could not be done).  We will allow
2594 		 this extremely unlikely lossage to avoid complicating the
2595 		 code below.  */
2596 
2597 	      rtx subtarget = target == shifted ? 0 : target;
2598 	      rtx new_amount, other_amount;
2599 	      rtx temp1;
2600 
2601 	      new_amount = op1;
2602 	      if (op1 == const0_rtx)
2603 		return shifted;
2604 	      else if (CONST_INT_P (op1))
2605 		other_amount = gen_int_shift_amount
2606 		  (mode, GET_MODE_BITSIZE (scalar_mode) - INTVAL (op1));
2607 	      else
2608 		{
2609 		  other_amount
2610 		    = simplify_gen_unary (NEG, GET_MODE (op1),
2611 					  op1, GET_MODE (op1));
2612 		  HOST_WIDE_INT mask = GET_MODE_PRECISION (scalar_mode) - 1;
2613 		  other_amount
2614 		    = simplify_gen_binary (AND, GET_MODE (op1), other_amount,
2615 					   gen_int_mode (mask, GET_MODE (op1)));
2616 		}
2617 
2618 	      shifted = force_reg (mode, shifted);
2619 
2620 	      temp = expand_shift_1 (left ? LSHIFT_EXPR : RSHIFT_EXPR,
2621 				     mode, shifted, new_amount, 0, 1);
2622 	      temp1 = expand_shift_1 (left ? RSHIFT_EXPR : LSHIFT_EXPR,
2623 				      mode, shifted, other_amount,
2624 				      subtarget, 1);
2625 	      return expand_binop (mode, ior_optab, temp, temp1, target,
2626 				   unsignedp, methods);
2627 	    }
2628 
2629 	  temp = expand_binop (mode,
2630 			       left ? lrotate_optab : rrotate_optab,
2631 			       shifted, op1, target, unsignedp, methods);
2632 	}
2633       else if (unsignedp)
2634 	temp = expand_binop (mode,
2635 			     left ? lshift_optab : rshift_uns_optab,
2636 			     shifted, op1, target, unsignedp, methods);
2637 
2638       /* Do arithmetic shifts.
2639 	 Also, if we are going to widen the operand, we can just as well
2640 	 use an arithmetic right-shift instead of a logical one.  */
2641       if (temp == 0 && ! rotate
2642 	  && (! unsignedp || (! left && methods == OPTAB_WIDEN)))
2643 	{
2644 	  enum optab_methods methods1 = methods;
2645 
2646 	  /* If trying to widen a log shift to an arithmetic shift,
2647 	     don't accept an arithmetic shift of the same size.  */
2648 	  if (unsignedp)
2649 	    methods1 = OPTAB_MUST_WIDEN;
2650 
2651 	  /* Arithmetic shift */
2652 
2653 	  temp = expand_binop (mode,
2654 			       left ? lshift_optab : rshift_arith_optab,
2655 			       shifted, op1, target, unsignedp, methods1);
2656 	}
2657 
2658       /* We used to try extzv here for logical right shifts, but that was
2659 	 only useful for one machine, the VAX, and caused poor code
2660 	 generation there for lshrdi3, so the code was deleted and a
2661 	 define_expand for lshrsi3 was added to vax.md.  */
2662     }
2663 
2664   gcc_assert (temp != NULL_RTX || may_fail);
2665   return temp;
2666 }
2667 
2668 /* Output a shift instruction for expression code CODE,
2669    with SHIFTED being the rtx for the value to shift,
2670    and AMOUNT the amount to shift by.
2671    Store the result in the rtx TARGET, if that is convenient.
2672    If UNSIGNEDP is nonzero, do a logical shift; otherwise, arithmetic.
2673    Return the rtx for where the value is.  */
2674 
2675 rtx
expand_shift(enum tree_code code,machine_mode mode,rtx shifted,poly_int64 amount,rtx target,int unsignedp)2676 expand_shift (enum tree_code code, machine_mode mode, rtx shifted,
2677 	      poly_int64 amount, rtx target, int unsignedp)
2678 {
2679   return expand_shift_1 (code, mode, shifted,
2680 			 gen_int_shift_amount (mode, amount),
2681 			 target, unsignedp);
2682 }
2683 
2684 /* Likewise, but return 0 if that cannot be done.  */
2685 
2686 static rtx
maybe_expand_shift(enum tree_code code,machine_mode mode,rtx shifted,int amount,rtx target,int unsignedp)2687 maybe_expand_shift (enum tree_code code, machine_mode mode, rtx shifted,
2688 		    int amount, rtx target, int unsignedp)
2689 {
2690   return expand_shift_1 (code, mode,
2691 			 shifted, GEN_INT (amount), target, unsignedp, true);
2692 }
2693 
2694 /* Output a shift instruction for expression code CODE,
2695    with SHIFTED being the rtx for the value to shift,
2696    and AMOUNT the tree for the amount to shift by.
2697    Store the result in the rtx TARGET, if that is convenient.
2698    If UNSIGNEDP is nonzero, do a logical shift; otherwise, arithmetic.
2699    Return the rtx for where the value is.  */
2700 
2701 rtx
expand_variable_shift(enum tree_code code,machine_mode mode,rtx shifted,tree amount,rtx target,int unsignedp)2702 expand_variable_shift (enum tree_code code, machine_mode mode, rtx shifted,
2703 		       tree amount, rtx target, int unsignedp)
2704 {
2705   return expand_shift_1 (code, mode,
2706 			 shifted, expand_normal (amount), target, unsignedp);
2707 }
2708 
2709 
2710 static void synth_mult (struct algorithm *, unsigned HOST_WIDE_INT,
2711 			const struct mult_cost *, machine_mode mode);
2712 static rtx expand_mult_const (machine_mode, rtx, HOST_WIDE_INT, rtx,
2713 			      const struct algorithm *, enum mult_variant);
2714 static unsigned HOST_WIDE_INT invert_mod2n (unsigned HOST_WIDE_INT, int);
2715 static rtx extract_high_half (scalar_int_mode, rtx);
2716 static rtx expmed_mult_highpart (scalar_int_mode, rtx, rtx, rtx, int, int);
2717 static rtx expmed_mult_highpart_optab (scalar_int_mode, rtx, rtx, rtx,
2718 				       int, int);
2719 /* Compute and return the best algorithm for multiplying by T.
2720    The algorithm must cost less than cost_limit
2721    If retval.cost >= COST_LIMIT, no algorithm was found and all
2722    other field of the returned struct are undefined.
2723    MODE is the machine mode of the multiplication.  */
2724 
2725 static void
synth_mult(struct algorithm * alg_out,unsigned HOST_WIDE_INT t,const struct mult_cost * cost_limit,machine_mode mode)2726 synth_mult (struct algorithm *alg_out, unsigned HOST_WIDE_INT t,
2727 	    const struct mult_cost *cost_limit, machine_mode mode)
2728 {
2729   int m;
2730   struct algorithm *alg_in, *best_alg;
2731   struct mult_cost best_cost;
2732   struct mult_cost new_limit;
2733   int op_cost, op_latency;
2734   unsigned HOST_WIDE_INT orig_t = t;
2735   unsigned HOST_WIDE_INT q;
2736   int maxm, hash_index;
2737   bool cache_hit = false;
2738   enum alg_code cache_alg = alg_zero;
2739   bool speed = optimize_insn_for_speed_p ();
2740   scalar_int_mode imode;
2741   struct alg_hash_entry *entry_ptr;
2742 
2743   /* Indicate that no algorithm is yet found.  If no algorithm
2744      is found, this value will be returned and indicate failure.  */
2745   alg_out->cost.cost = cost_limit->cost + 1;
2746   alg_out->cost.latency = cost_limit->latency + 1;
2747 
2748   if (cost_limit->cost < 0
2749       || (cost_limit->cost == 0 && cost_limit->latency <= 0))
2750     return;
2751 
2752   /* Be prepared for vector modes.  */
2753   imode = as_a <scalar_int_mode> (GET_MODE_INNER (mode));
2754 
2755   maxm = MIN (BITS_PER_WORD, GET_MODE_BITSIZE (imode));
2756 
2757   /* Restrict the bits of "t" to the multiplication's mode.  */
2758   t &= GET_MODE_MASK (imode);
2759 
2760   /* t == 1 can be done in zero cost.  */
2761   if (t == 1)
2762     {
2763       alg_out->ops = 1;
2764       alg_out->cost.cost = 0;
2765       alg_out->cost.latency = 0;
2766       alg_out->op[0] = alg_m;
2767       return;
2768     }
2769 
2770   /* t == 0 sometimes has a cost.  If it does and it exceeds our limit,
2771      fail now.  */
2772   if (t == 0)
2773     {
2774       if (MULT_COST_LESS (cost_limit, zero_cost (speed)))
2775 	return;
2776       else
2777 	{
2778 	  alg_out->ops = 1;
2779 	  alg_out->cost.cost = zero_cost (speed);
2780 	  alg_out->cost.latency = zero_cost (speed);
2781 	  alg_out->op[0] = alg_zero;
2782 	  return;
2783 	}
2784     }
2785 
2786   /* We'll be needing a couple extra algorithm structures now.  */
2787 
2788   alg_in = XALLOCA (struct algorithm);
2789   best_alg = XALLOCA (struct algorithm);
2790   best_cost = *cost_limit;
2791 
2792   /* Compute the hash index.  */
2793   hash_index = (t ^ (unsigned int) mode ^ (speed * 256)) % NUM_ALG_HASH_ENTRIES;
2794 
2795   /* See if we already know what to do for T.  */
2796   entry_ptr = alg_hash_entry_ptr (hash_index);
2797   if (entry_ptr->t == t
2798       && entry_ptr->mode == mode
2799       && entry_ptr->speed == speed
2800       && entry_ptr->alg != alg_unknown)
2801     {
2802       cache_alg = entry_ptr->alg;
2803 
2804       if (cache_alg == alg_impossible)
2805 	{
2806 	  /* The cache tells us that it's impossible to synthesize
2807 	     multiplication by T within entry_ptr->cost.  */
2808 	  if (!CHEAPER_MULT_COST (&entry_ptr->cost, cost_limit))
2809 	    /* COST_LIMIT is at least as restrictive as the one
2810 	       recorded in the hash table, in which case we have no
2811 	       hope of synthesizing a multiplication.  Just
2812 	       return.  */
2813 	    return;
2814 
2815 	  /* If we get here, COST_LIMIT is less restrictive than the
2816 	     one recorded in the hash table, so we may be able to
2817 	     synthesize a multiplication.  Proceed as if we didn't
2818 	     have the cache entry.  */
2819 	}
2820       else
2821 	{
2822 	  if (CHEAPER_MULT_COST (cost_limit, &entry_ptr->cost))
2823 	    /* The cached algorithm shows that this multiplication
2824 	       requires more cost than COST_LIMIT.  Just return.  This
2825 	       way, we don't clobber this cache entry with
2826 	       alg_impossible but retain useful information.  */
2827 	    return;
2828 
2829 	  cache_hit = true;
2830 
2831 	  switch (cache_alg)
2832 	    {
2833 	    case alg_shift:
2834 	      goto do_alg_shift;
2835 
2836 	    case alg_add_t_m2:
2837 	    case alg_sub_t_m2:
2838 	      goto do_alg_addsub_t_m2;
2839 
2840 	    case alg_add_factor:
2841 	    case alg_sub_factor:
2842 	      goto do_alg_addsub_factor;
2843 
2844 	    case alg_add_t2_m:
2845 	      goto do_alg_add_t2_m;
2846 
2847 	    case alg_sub_t2_m:
2848 	      goto do_alg_sub_t2_m;
2849 
2850 	    default:
2851 	      gcc_unreachable ();
2852 	    }
2853 	}
2854     }
2855 
2856   /* If we have a group of zero bits at the low-order part of T, try
2857      multiplying by the remaining bits and then doing a shift.  */
2858 
2859   if ((t & 1) == 0)
2860     {
2861     do_alg_shift:
2862       m = ctz_or_zero (t); /* m = number of low zero bits */
2863       if (m < maxm)
2864 	{
2865 	  q = t >> m;
2866 	  /* The function expand_shift will choose between a shift and
2867 	     a sequence of additions, so the observed cost is given as
2868 	     MIN (m * add_cost(speed, mode), shift_cost(speed, mode, m)).  */
2869 	  op_cost = m * add_cost (speed, mode);
2870 	  if (shift_cost (speed, mode, m) < op_cost)
2871 	    op_cost = shift_cost (speed, mode, m);
2872 	  new_limit.cost = best_cost.cost - op_cost;
2873 	  new_limit.latency = best_cost.latency - op_cost;
2874 	  synth_mult (alg_in, q, &new_limit, mode);
2875 
2876 	  alg_in->cost.cost += op_cost;
2877 	  alg_in->cost.latency += op_cost;
2878 	  if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2879 	    {
2880 	      best_cost = alg_in->cost;
2881 	      std::swap (alg_in, best_alg);
2882 	      best_alg->log[best_alg->ops] = m;
2883 	      best_alg->op[best_alg->ops] = alg_shift;
2884 	    }
2885 
2886 	  /* See if treating ORIG_T as a signed number yields a better
2887 	     sequence.  Try this sequence only for a negative ORIG_T
2888 	     as it would be useless for a non-negative ORIG_T.  */
2889 	  if ((HOST_WIDE_INT) orig_t < 0)
2890 	    {
2891 	      /* Shift ORIG_T as follows because a right shift of a
2892 		 negative-valued signed type is implementation
2893 		 defined.  */
2894 	      q = ~(~orig_t >> m);
2895 	      /* The function expand_shift will choose between a shift
2896 		 and a sequence of additions, so the observed cost is
2897 		 given as MIN (m * add_cost(speed, mode),
2898 		 shift_cost(speed, mode, m)).  */
2899 	      op_cost = m * add_cost (speed, mode);
2900 	      if (shift_cost (speed, mode, m) < op_cost)
2901 		op_cost = shift_cost (speed, mode, m);
2902 	      new_limit.cost = best_cost.cost - op_cost;
2903 	      new_limit.latency = best_cost.latency - op_cost;
2904 	      synth_mult (alg_in, q, &new_limit, mode);
2905 
2906 	      alg_in->cost.cost += op_cost;
2907 	      alg_in->cost.latency += op_cost;
2908 	      if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2909 		{
2910 		  best_cost = alg_in->cost;
2911 		  std::swap (alg_in, best_alg);
2912 		  best_alg->log[best_alg->ops] = m;
2913 		  best_alg->op[best_alg->ops] = alg_shift;
2914 		}
2915 	    }
2916 	}
2917       if (cache_hit)
2918 	goto done;
2919     }
2920 
2921   /* If we have an odd number, add or subtract one.  */
2922   if ((t & 1) != 0)
2923     {
2924       unsigned HOST_WIDE_INT w;
2925 
2926     do_alg_addsub_t_m2:
2927       for (w = 1; (w & t) != 0; w <<= 1)
2928 	;
2929       /* If T was -1, then W will be zero after the loop.  This is another
2930 	 case where T ends with ...111.  Handling this with (T + 1) and
2931 	 subtract 1 produces slightly better code and results in algorithm
2932 	 selection much faster than treating it like the ...0111 case
2933 	 below.  */
2934       if (w == 0
2935 	  || (w > 2
2936 	      /* Reject the case where t is 3.
2937 		 Thus we prefer addition in that case.  */
2938 	      && t != 3))
2939 	{
2940 	  /* T ends with ...111.  Multiply by (T + 1) and subtract T.  */
2941 
2942 	  op_cost = add_cost (speed, mode);
2943 	  new_limit.cost = best_cost.cost - op_cost;
2944 	  new_limit.latency = best_cost.latency - op_cost;
2945 	  synth_mult (alg_in, t + 1, &new_limit, mode);
2946 
2947 	  alg_in->cost.cost += op_cost;
2948 	  alg_in->cost.latency += op_cost;
2949 	  if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2950 	    {
2951 	      best_cost = alg_in->cost;
2952 	      std::swap (alg_in, best_alg);
2953 	      best_alg->log[best_alg->ops] = 0;
2954 	      best_alg->op[best_alg->ops] = alg_sub_t_m2;
2955 	    }
2956 	}
2957       else
2958 	{
2959 	  /* T ends with ...01 or ...011.  Multiply by (T - 1) and add T.  */
2960 
2961 	  op_cost = add_cost (speed, mode);
2962 	  new_limit.cost = best_cost.cost - op_cost;
2963 	  new_limit.latency = best_cost.latency - op_cost;
2964 	  synth_mult (alg_in, t - 1, &new_limit, mode);
2965 
2966 	  alg_in->cost.cost += op_cost;
2967 	  alg_in->cost.latency += op_cost;
2968 	  if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2969 	    {
2970 	      best_cost = alg_in->cost;
2971 	      std::swap (alg_in, best_alg);
2972 	      best_alg->log[best_alg->ops] = 0;
2973 	      best_alg->op[best_alg->ops] = alg_add_t_m2;
2974 	    }
2975 	}
2976 
2977       /* We may be able to calculate a * -7, a * -15, a * -31, etc
2978 	 quickly with a - a * n for some appropriate constant n.  */
2979       m = exact_log2 (-orig_t + 1);
2980       if (m >= 0 && m < maxm)
2981 	{
2982 	  op_cost = add_cost (speed, mode) + shift_cost (speed, mode, m);
2983 	  /* If the target has a cheap shift-and-subtract insn use
2984 	     that in preference to a shift insn followed by a sub insn.
2985 	     Assume that the shift-and-sub is "atomic" with a latency
2986 	     equal to it's cost, otherwise assume that on superscalar
2987 	     hardware the shift may be executed concurrently with the
2988 	     earlier steps in the algorithm.  */
2989 	  if (shiftsub1_cost (speed, mode, m) <= op_cost)
2990 	    {
2991 	      op_cost = shiftsub1_cost (speed, mode, m);
2992 	      op_latency = op_cost;
2993 	    }
2994 	  else
2995 	    op_latency = add_cost (speed, mode);
2996 
2997 	  new_limit.cost = best_cost.cost - op_cost;
2998 	  new_limit.latency = best_cost.latency - op_latency;
2999 	  synth_mult (alg_in, (unsigned HOST_WIDE_INT) (-orig_t + 1) >> m,
3000 		      &new_limit, mode);
3001 
3002 	  alg_in->cost.cost += op_cost;
3003 	  alg_in->cost.latency += op_latency;
3004 	  if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
3005 	    {
3006 	      best_cost = alg_in->cost;
3007 	      std::swap (alg_in, best_alg);
3008 	      best_alg->log[best_alg->ops] = m;
3009 	      best_alg->op[best_alg->ops] = alg_sub_t_m2;
3010 	    }
3011 	}
3012 
3013       if (cache_hit)
3014 	goto done;
3015     }
3016 
3017   /* Look for factors of t of the form
3018      t = q(2**m +- 1), 2 <= m <= floor(log2(t - 1)).
3019      If we find such a factor, we can multiply by t using an algorithm that
3020      multiplies by q, shift the result by m and add/subtract it to itself.
3021 
3022      We search for large factors first and loop down, even if large factors
3023      are less probable than small; if we find a large factor we will find a
3024      good sequence quickly, and therefore be able to prune (by decreasing
3025      COST_LIMIT) the search.  */
3026 
3027  do_alg_addsub_factor:
3028   for (m = floor_log2 (t - 1); m >= 2; m--)
3029     {
3030       unsigned HOST_WIDE_INT d;
3031 
3032       d = (HOST_WIDE_INT_1U << m) + 1;
3033       if (t % d == 0 && t > d && m < maxm
3034 	  && (!cache_hit || cache_alg == alg_add_factor))
3035 	{
3036 	  op_cost = add_cost (speed, mode) + shift_cost (speed, mode, m);
3037 	  if (shiftadd_cost (speed, mode, m) <= op_cost)
3038 	    op_cost = shiftadd_cost (speed, mode, m);
3039 
3040 	  op_latency = op_cost;
3041 
3042 
3043 	  new_limit.cost = best_cost.cost - op_cost;
3044 	  new_limit.latency = best_cost.latency - op_latency;
3045 	  synth_mult (alg_in, t / d, &new_limit, mode);
3046 
3047 	  alg_in->cost.cost += op_cost;
3048 	  alg_in->cost.latency += op_latency;
3049 	  if (alg_in->cost.latency < op_cost)
3050 	    alg_in->cost.latency = op_cost;
3051 	  if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
3052 	    {
3053 	      best_cost = alg_in->cost;
3054 	      std::swap (alg_in, best_alg);
3055 	      best_alg->log[best_alg->ops] = m;
3056 	      best_alg->op[best_alg->ops] = alg_add_factor;
3057 	    }
3058 	  /* Other factors will have been taken care of in the recursion.  */
3059 	  break;
3060 	}
3061 
3062       d = (HOST_WIDE_INT_1U << m) - 1;
3063       if (t % d == 0 && t > d && m < maxm
3064 	  && (!cache_hit || cache_alg == alg_sub_factor))
3065 	{
3066 	  op_cost = add_cost (speed, mode) + shift_cost (speed, mode, m);
3067 	  if (shiftsub0_cost (speed, mode, m) <= op_cost)
3068 	    op_cost = shiftsub0_cost (speed, mode, m);
3069 
3070 	  op_latency = op_cost;
3071 
3072 	  new_limit.cost = best_cost.cost - op_cost;
3073 	  new_limit.latency = best_cost.latency - op_latency;
3074 	  synth_mult (alg_in, t / d, &new_limit, mode);
3075 
3076 	  alg_in->cost.cost += op_cost;
3077 	  alg_in->cost.latency += op_latency;
3078 	  if (alg_in->cost.latency < op_cost)
3079 	    alg_in->cost.latency = op_cost;
3080 	  if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
3081 	    {
3082 	      best_cost = alg_in->cost;
3083 	      std::swap (alg_in, best_alg);
3084 	      best_alg->log[best_alg->ops] = m;
3085 	      best_alg->op[best_alg->ops] = alg_sub_factor;
3086 	    }
3087 	  break;
3088 	}
3089     }
3090   if (cache_hit)
3091     goto done;
3092 
3093   /* Try shift-and-add (load effective address) instructions,
3094      i.e. do a*3, a*5, a*9.  */
3095   if ((t & 1) != 0)
3096     {
3097     do_alg_add_t2_m:
3098       q = t - 1;
3099       m = ctz_hwi (q);
3100       if (q && m < maxm)
3101 	{
3102 	  op_cost = shiftadd_cost (speed, mode, m);
3103 	  new_limit.cost = best_cost.cost - op_cost;
3104 	  new_limit.latency = best_cost.latency - op_cost;
3105 	  synth_mult (alg_in, (t - 1) >> m, &new_limit, mode);
3106 
3107 	  alg_in->cost.cost += op_cost;
3108 	  alg_in->cost.latency += op_cost;
3109 	  if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
3110 	    {
3111 	      best_cost = alg_in->cost;
3112 	      std::swap (alg_in, best_alg);
3113 	      best_alg->log[best_alg->ops] = m;
3114 	      best_alg->op[best_alg->ops] = alg_add_t2_m;
3115 	    }
3116 	}
3117       if (cache_hit)
3118 	goto done;
3119 
3120     do_alg_sub_t2_m:
3121       q = t + 1;
3122       m = ctz_hwi (q);
3123       if (q && m < maxm)
3124 	{
3125 	  op_cost = shiftsub0_cost (speed, mode, m);
3126 	  new_limit.cost = best_cost.cost - op_cost;
3127 	  new_limit.latency = best_cost.latency - op_cost;
3128 	  synth_mult (alg_in, (t + 1) >> m, &new_limit, mode);
3129 
3130 	  alg_in->cost.cost += op_cost;
3131 	  alg_in->cost.latency += op_cost;
3132 	  if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
3133 	    {
3134 	      best_cost = alg_in->cost;
3135 	      std::swap (alg_in, best_alg);
3136 	      best_alg->log[best_alg->ops] = m;
3137 	      best_alg->op[best_alg->ops] = alg_sub_t2_m;
3138 	    }
3139 	}
3140       if (cache_hit)
3141 	goto done;
3142     }
3143 
3144  done:
3145   /* If best_cost has not decreased, we have not found any algorithm.  */
3146   if (!CHEAPER_MULT_COST (&best_cost, cost_limit))
3147     {
3148       /* We failed to find an algorithm.  Record alg_impossible for
3149 	 this case (that is, <T, MODE, COST_LIMIT>) so that next time
3150 	 we are asked to find an algorithm for T within the same or
3151 	 lower COST_LIMIT, we can immediately return to the
3152 	 caller.  */
3153       entry_ptr->t = t;
3154       entry_ptr->mode = mode;
3155       entry_ptr->speed = speed;
3156       entry_ptr->alg = alg_impossible;
3157       entry_ptr->cost = *cost_limit;
3158       return;
3159     }
3160 
3161   /* Cache the result.  */
3162   if (!cache_hit)
3163     {
3164       entry_ptr->t = t;
3165       entry_ptr->mode = mode;
3166       entry_ptr->speed = speed;
3167       entry_ptr->alg = best_alg->op[best_alg->ops];
3168       entry_ptr->cost.cost = best_cost.cost;
3169       entry_ptr->cost.latency = best_cost.latency;
3170     }
3171 
3172   /* If we are getting a too long sequence for `struct algorithm'
3173      to record, make this search fail.  */
3174   if (best_alg->ops == MAX_BITS_PER_WORD)
3175     return;
3176 
3177   /* Copy the algorithm from temporary space to the space at alg_out.
3178      We avoid using structure assignment because the majority of
3179      best_alg is normally undefined, and this is a critical function.  */
3180   alg_out->ops = best_alg->ops + 1;
3181   alg_out->cost = best_cost;
3182   memcpy (alg_out->op, best_alg->op,
3183 	  alg_out->ops * sizeof *alg_out->op);
3184   memcpy (alg_out->log, best_alg->log,
3185 	  alg_out->ops * sizeof *alg_out->log);
3186 }
3187 
3188 /* Find the cheapest way of multiplying a value of mode MODE by VAL.
3189    Try three variations:
3190 
3191        - a shift/add sequence based on VAL itself
3192        - a shift/add sequence based on -VAL, followed by a negation
3193        - a shift/add sequence based on VAL - 1, followed by an addition.
3194 
3195    Return true if the cheapest of these cost less than MULT_COST,
3196    describing the algorithm in *ALG and final fixup in *VARIANT.  */
3197 
3198 bool
choose_mult_variant(machine_mode mode,HOST_WIDE_INT val,struct algorithm * alg,enum mult_variant * variant,int mult_cost)3199 choose_mult_variant (machine_mode mode, HOST_WIDE_INT val,
3200 		     struct algorithm *alg, enum mult_variant *variant,
3201 		     int mult_cost)
3202 {
3203   struct algorithm alg2;
3204   struct mult_cost limit;
3205   int op_cost;
3206   bool speed = optimize_insn_for_speed_p ();
3207 
3208   /* Fail quickly for impossible bounds.  */
3209   if (mult_cost < 0)
3210     return false;
3211 
3212   /* Ensure that mult_cost provides a reasonable upper bound.
3213      Any constant multiplication can be performed with less
3214      than 2 * bits additions.  */
3215   op_cost = 2 * GET_MODE_UNIT_BITSIZE (mode) * add_cost (speed, mode);
3216   if (mult_cost > op_cost)
3217     mult_cost = op_cost;
3218 
3219   *variant = basic_variant;
3220   limit.cost = mult_cost;
3221   limit.latency = mult_cost;
3222   synth_mult (alg, val, &limit, mode);
3223 
3224   /* This works only if the inverted value actually fits in an
3225      `unsigned int' */
3226   if (HOST_BITS_PER_INT >= GET_MODE_UNIT_BITSIZE (mode))
3227     {
3228       op_cost = neg_cost (speed, mode);
3229       if (MULT_COST_LESS (&alg->cost, mult_cost))
3230 	{
3231 	  limit.cost = alg->cost.cost - op_cost;
3232 	  limit.latency = alg->cost.latency - op_cost;
3233 	}
3234       else
3235 	{
3236 	  limit.cost = mult_cost - op_cost;
3237 	  limit.latency = mult_cost - op_cost;
3238 	}
3239 
3240       synth_mult (&alg2, -val, &limit, mode);
3241       alg2.cost.cost += op_cost;
3242       alg2.cost.latency += op_cost;
3243       if (CHEAPER_MULT_COST (&alg2.cost, &alg->cost))
3244 	*alg = alg2, *variant = negate_variant;
3245     }
3246 
3247   /* This proves very useful for division-by-constant.  */
3248   op_cost = add_cost (speed, mode);
3249   if (MULT_COST_LESS (&alg->cost, mult_cost))
3250     {
3251       limit.cost = alg->cost.cost - op_cost;
3252       limit.latency = alg->cost.latency - op_cost;
3253     }
3254   else
3255     {
3256       limit.cost = mult_cost - op_cost;
3257       limit.latency = mult_cost - op_cost;
3258     }
3259 
3260   synth_mult (&alg2, val - 1, &limit, mode);
3261   alg2.cost.cost += op_cost;
3262   alg2.cost.latency += op_cost;
3263   if (CHEAPER_MULT_COST (&alg2.cost, &alg->cost))
3264     *alg = alg2, *variant = add_variant;
3265 
3266   return MULT_COST_LESS (&alg->cost, mult_cost);
3267 }
3268 
3269 /* A subroutine of expand_mult, used for constant multiplications.
3270    Multiply OP0 by VAL in mode MODE, storing the result in TARGET if
3271    convenient.  Use the shift/add sequence described by ALG and apply
3272    the final fixup specified by VARIANT.  */
3273 
3274 static rtx
expand_mult_const(machine_mode mode,rtx op0,HOST_WIDE_INT val,rtx target,const struct algorithm * alg,enum mult_variant variant)3275 expand_mult_const (machine_mode mode, rtx op0, HOST_WIDE_INT val,
3276 		   rtx target, const struct algorithm *alg,
3277 		   enum mult_variant variant)
3278 {
3279   unsigned HOST_WIDE_INT val_so_far;
3280   rtx_insn *insn;
3281   rtx accum, tem;
3282   int opno;
3283   machine_mode nmode;
3284 
3285   /* Avoid referencing memory over and over and invalid sharing
3286      on SUBREGs.  */
3287   op0 = force_reg (mode, op0);
3288 
3289   /* ACCUM starts out either as OP0 or as a zero, depending on
3290      the first operation.  */
3291 
3292   if (alg->op[0] == alg_zero)
3293     {
3294       accum = copy_to_mode_reg (mode, CONST0_RTX (mode));
3295       val_so_far = 0;
3296     }
3297   else if (alg->op[0] == alg_m)
3298     {
3299       accum = copy_to_mode_reg (mode, op0);
3300       val_so_far = 1;
3301     }
3302   else
3303     gcc_unreachable ();
3304 
3305   for (opno = 1; opno < alg->ops; opno++)
3306     {
3307       int log = alg->log[opno];
3308       rtx shift_subtarget = optimize ? 0 : accum;
3309       rtx add_target
3310 	= (opno == alg->ops - 1 && target != 0 && variant != add_variant
3311 	   && !optimize)
3312 	  ? target : 0;
3313       rtx accum_target = optimize ? 0 : accum;
3314       rtx accum_inner;
3315 
3316       switch (alg->op[opno])
3317 	{
3318 	case alg_shift:
3319 	  tem = expand_shift (LSHIFT_EXPR, mode, accum, log, NULL_RTX, 0);
3320 	  /* REG_EQUAL note will be attached to the following insn.  */
3321 	  emit_move_insn (accum, tem);
3322 	  val_so_far <<= log;
3323 	  break;
3324 
3325 	case alg_add_t_m2:
3326 	  tem = expand_shift (LSHIFT_EXPR, mode, op0, log, NULL_RTX, 0);
3327 	  accum = force_operand (gen_rtx_PLUS (mode, accum, tem),
3328 				 add_target ? add_target : accum_target);
3329 	  val_so_far += HOST_WIDE_INT_1U << log;
3330 	  break;
3331 
3332 	case alg_sub_t_m2:
3333 	  tem = expand_shift (LSHIFT_EXPR, mode, op0, log, NULL_RTX, 0);
3334 	  accum = force_operand (gen_rtx_MINUS (mode, accum, tem),
3335 				 add_target ? add_target : accum_target);
3336 	  val_so_far -= HOST_WIDE_INT_1U << log;
3337 	  break;
3338 
3339 	case alg_add_t2_m:
3340 	  accum = expand_shift (LSHIFT_EXPR, mode, accum,
3341 				log, shift_subtarget, 0);
3342 	  accum = force_operand (gen_rtx_PLUS (mode, accum, op0),
3343 				 add_target ? add_target : accum_target);
3344 	  val_so_far = (val_so_far << log) + 1;
3345 	  break;
3346 
3347 	case alg_sub_t2_m:
3348 	  accum = expand_shift (LSHIFT_EXPR, mode, accum,
3349 				log, shift_subtarget, 0);
3350 	  accum = force_operand (gen_rtx_MINUS (mode, accum, op0),
3351 				 add_target ? add_target : accum_target);
3352 	  val_so_far = (val_so_far << log) - 1;
3353 	  break;
3354 
3355 	case alg_add_factor:
3356 	  tem = expand_shift (LSHIFT_EXPR, mode, accum, log, NULL_RTX, 0);
3357 	  accum = force_operand (gen_rtx_PLUS (mode, accum, tem),
3358 				 add_target ? add_target : accum_target);
3359 	  val_so_far += val_so_far << log;
3360 	  break;
3361 
3362 	case alg_sub_factor:
3363 	  tem = expand_shift (LSHIFT_EXPR, mode, accum, log, NULL_RTX, 0);
3364 	  accum = force_operand (gen_rtx_MINUS (mode, tem, accum),
3365 				 (add_target
3366 				  ? add_target : (optimize ? 0 : tem)));
3367 	  val_so_far = (val_so_far << log) - val_so_far;
3368 	  break;
3369 
3370 	default:
3371 	  gcc_unreachable ();
3372 	}
3373 
3374       if (SCALAR_INT_MODE_P (mode))
3375 	{
3376 	  /* Write a REG_EQUAL note on the last insn so that we can cse
3377 	     multiplication sequences.  Note that if ACCUM is a SUBREG,
3378 	     we've set the inner register and must properly indicate that.  */
3379 	  tem = op0, nmode = mode;
3380 	  accum_inner = accum;
3381 	  if (GET_CODE (accum) == SUBREG)
3382 	    {
3383 	      accum_inner = SUBREG_REG (accum);
3384 	      nmode = GET_MODE (accum_inner);
3385 	      tem = gen_lowpart (nmode, op0);
3386 	    }
3387 
3388 	  /* Don't add a REG_EQUAL note if tem is a paradoxical SUBREG.
3389 	     In that case, only the low bits of accum would be guaranteed to
3390 	     be equal to the content of the REG_EQUAL note, the upper bits
3391 	     can be anything.  */
3392 	  if (!paradoxical_subreg_p (tem))
3393 	    {
3394 	      insn = get_last_insn ();
3395 	      wide_int wval_so_far
3396 		= wi::uhwi (val_so_far,
3397 			    GET_MODE_PRECISION (as_a <scalar_mode> (nmode)));
3398 	      rtx c = immed_wide_int_const (wval_so_far, nmode);
3399 	      set_dst_reg_note (insn, REG_EQUAL, gen_rtx_MULT (nmode, tem, c),
3400 				accum_inner);
3401 	    }
3402 	}
3403     }
3404 
3405   if (variant == negate_variant)
3406     {
3407       val_so_far = -val_so_far;
3408       accum = expand_unop (mode, neg_optab, accum, target, 0);
3409     }
3410   else if (variant == add_variant)
3411     {
3412       val_so_far = val_so_far + 1;
3413       accum = force_operand (gen_rtx_PLUS (mode, accum, op0), target);
3414     }
3415 
3416   /* Compare only the bits of val and val_so_far that are significant
3417      in the result mode, to avoid sign-/zero-extension confusion.  */
3418   nmode = GET_MODE_INNER (mode);
3419   val &= GET_MODE_MASK (nmode);
3420   val_so_far &= GET_MODE_MASK (nmode);
3421   gcc_assert (val == (HOST_WIDE_INT) val_so_far);
3422 
3423   return accum;
3424 }
3425 
3426 /* Perform a multiplication and return an rtx for the result.
3427    MODE is mode of value; OP0 and OP1 are what to multiply (rtx's);
3428    TARGET is a suggestion for where to store the result (an rtx).
3429 
3430    We check specially for a constant integer as OP1.
3431    If you want this check for OP0 as well, then before calling
3432    you should swap the two operands if OP0 would be constant.  */
3433 
3434 rtx
expand_mult(machine_mode mode,rtx op0,rtx op1,rtx target,int unsignedp,bool no_libcall)3435 expand_mult (machine_mode mode, rtx op0, rtx op1, rtx target,
3436 	     int unsignedp, bool no_libcall)
3437 {
3438   enum mult_variant variant;
3439   struct algorithm algorithm;
3440   rtx scalar_op1;
3441   int max_cost;
3442   bool speed = optimize_insn_for_speed_p ();
3443   bool do_trapv = flag_trapv && SCALAR_INT_MODE_P (mode) && !unsignedp;
3444 
3445   if (CONSTANT_P (op0))
3446     std::swap (op0, op1);
3447 
3448   /* For vectors, there are several simplifications that can be made if
3449      all elements of the vector constant are identical.  */
3450   scalar_op1 = unwrap_const_vec_duplicate (op1);
3451 
3452   if (INTEGRAL_MODE_P (mode))
3453     {
3454       rtx fake_reg;
3455       HOST_WIDE_INT coeff;
3456       bool is_neg;
3457       int mode_bitsize;
3458 
3459       if (op1 == CONST0_RTX (mode))
3460 	return op1;
3461       if (op1 == CONST1_RTX (mode))
3462 	return op0;
3463       if (op1 == CONSTM1_RTX (mode))
3464 	return expand_unop (mode, do_trapv ? negv_optab : neg_optab,
3465 			    op0, target, 0);
3466 
3467       if (do_trapv)
3468 	goto skip_synth;
3469 
3470       /* If mode is integer vector mode, check if the backend supports
3471 	 vector lshift (by scalar or vector) at all.  If not, we can't use
3472 	 synthetized multiply.  */
3473       if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT
3474 	  && optab_handler (vashl_optab, mode) == CODE_FOR_nothing
3475 	  && optab_handler (ashl_optab, mode) == CODE_FOR_nothing)
3476 	goto skip_synth;
3477 
3478       /* These are the operations that are potentially turned into
3479 	 a sequence of shifts and additions.  */
3480       mode_bitsize = GET_MODE_UNIT_BITSIZE (mode);
3481 
3482       /* synth_mult does an `unsigned int' multiply.  As long as the mode is
3483 	 less than or equal in size to `unsigned int' this doesn't matter.
3484 	 If the mode is larger than `unsigned int', then synth_mult works
3485 	 only if the constant value exactly fits in an `unsigned int' without
3486 	 any truncation.  This means that multiplying by negative values does
3487 	 not work; results are off by 2^32 on a 32 bit machine.  */
3488       if (CONST_INT_P (scalar_op1))
3489 	{
3490 	  coeff = INTVAL (scalar_op1);
3491 	  is_neg = coeff < 0;
3492 	}
3493 #if TARGET_SUPPORTS_WIDE_INT
3494       else if (CONST_WIDE_INT_P (scalar_op1))
3495 #else
3496       else if (CONST_DOUBLE_AS_INT_P (scalar_op1))
3497 #endif
3498 	{
3499 	  int shift = wi::exact_log2 (rtx_mode_t (scalar_op1, mode));
3500 	  /* Perfect power of 2 (other than 1, which is handled above).  */
3501 	  if (shift > 0)
3502 	    return expand_shift (LSHIFT_EXPR, mode, op0,
3503 				 shift, target, unsignedp);
3504 	  else
3505 	    goto skip_synth;
3506 	}
3507       else
3508 	goto skip_synth;
3509 
3510       /* We used to test optimize here, on the grounds that it's better to
3511 	 produce a smaller program when -O is not used.  But this causes
3512 	 such a terrible slowdown sometimes that it seems better to always
3513 	 use synth_mult.  */
3514 
3515       /* Special case powers of two.  */
3516       if (EXACT_POWER_OF_2_OR_ZERO_P (coeff)
3517 	  && !(is_neg && mode_bitsize > HOST_BITS_PER_WIDE_INT))
3518 	return expand_shift (LSHIFT_EXPR, mode, op0,
3519 			     floor_log2 (coeff), target, unsignedp);
3520 
3521       fake_reg = gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1);
3522 
3523       /* Attempt to handle multiplication of DImode values by negative
3524 	 coefficients, by performing the multiplication by a positive
3525 	 multiplier and then inverting the result.  */
3526       if (is_neg && mode_bitsize > HOST_BITS_PER_WIDE_INT)
3527 	{
3528 	  /* Its safe to use -coeff even for INT_MIN, as the
3529 	     result is interpreted as an unsigned coefficient.
3530 	     Exclude cost of op0 from max_cost to match the cost
3531 	     calculation of the synth_mult.  */
3532 	  coeff = -(unsigned HOST_WIDE_INT) coeff;
3533 	  max_cost = (set_src_cost (gen_rtx_MULT (mode, fake_reg, op1),
3534 				    mode, speed)
3535 		      - neg_cost (speed, mode));
3536 	  if (max_cost <= 0)
3537 	    goto skip_synth;
3538 
3539 	  /* Special case powers of two.  */
3540 	  if (EXACT_POWER_OF_2_OR_ZERO_P (coeff))
3541 	    {
3542 	      rtx temp = expand_shift (LSHIFT_EXPR, mode, op0,
3543 				       floor_log2 (coeff), target, unsignedp);
3544 	      return expand_unop (mode, neg_optab, temp, target, 0);
3545 	    }
3546 
3547 	  if (choose_mult_variant (mode, coeff, &algorithm, &variant,
3548 				   max_cost))
3549 	    {
3550 	      rtx temp = expand_mult_const (mode, op0, coeff, NULL_RTX,
3551 					    &algorithm, variant);
3552 	      return expand_unop (mode, neg_optab, temp, target, 0);
3553 	    }
3554 	  goto skip_synth;
3555 	}
3556 
3557       /* Exclude cost of op0 from max_cost to match the cost
3558 	 calculation of the synth_mult.  */
3559       max_cost = set_src_cost (gen_rtx_MULT (mode, fake_reg, op1), mode, speed);
3560       if (choose_mult_variant (mode, coeff, &algorithm, &variant, max_cost))
3561 	return expand_mult_const (mode, op0, coeff, target,
3562 				  &algorithm, variant);
3563     }
3564  skip_synth:
3565 
3566   /* Expand x*2.0 as x+x.  */
3567   if (CONST_DOUBLE_AS_FLOAT_P (scalar_op1)
3568       && real_equal (CONST_DOUBLE_REAL_VALUE (scalar_op1), &dconst2))
3569     {
3570       op0 = force_reg (GET_MODE (op0), op0);
3571       return expand_binop (mode, add_optab, op0, op0,
3572 			   target, unsignedp,
3573 			   no_libcall ? OPTAB_WIDEN : OPTAB_LIB_WIDEN);
3574     }
3575 
3576   /* This used to use umul_optab if unsigned, but for non-widening multiply
3577      there is no difference between signed and unsigned.  */
3578   op0 = expand_binop (mode, do_trapv ? smulv_optab : smul_optab,
3579 		      op0, op1, target, unsignedp,
3580 		      no_libcall ? OPTAB_WIDEN : OPTAB_LIB_WIDEN);
3581   gcc_assert (op0 || no_libcall);
3582   return op0;
3583 }
3584 
3585 /* Return a cost estimate for multiplying a register by the given
3586    COEFFicient in the given MODE and SPEED.  */
3587 
3588 int
mult_by_coeff_cost(HOST_WIDE_INT coeff,machine_mode mode,bool speed)3589 mult_by_coeff_cost (HOST_WIDE_INT coeff, machine_mode mode, bool speed)
3590 {
3591   int max_cost;
3592   struct algorithm algorithm;
3593   enum mult_variant variant;
3594 
3595   rtx fake_reg = gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1);
3596   max_cost = set_src_cost (gen_rtx_MULT (mode, fake_reg, fake_reg),
3597 			   mode, speed);
3598   if (choose_mult_variant (mode, coeff, &algorithm, &variant, max_cost))
3599     return algorithm.cost.cost;
3600   else
3601     return max_cost;
3602 }
3603 
3604 /* Perform a widening multiplication and return an rtx for the result.
3605    MODE is mode of value; OP0 and OP1 are what to multiply (rtx's);
3606    TARGET is a suggestion for where to store the result (an rtx).
3607    THIS_OPTAB is the optab we should use, it must be either umul_widen_optab
3608    or smul_widen_optab.
3609 
3610    We check specially for a constant integer as OP1, comparing the
3611    cost of a widening multiply against the cost of a sequence of shifts
3612    and adds.  */
3613 
3614 rtx
expand_widening_mult(machine_mode mode,rtx op0,rtx op1,rtx target,int unsignedp,optab this_optab)3615 expand_widening_mult (machine_mode mode, rtx op0, rtx op1, rtx target,
3616 		      int unsignedp, optab this_optab)
3617 {
3618   bool speed = optimize_insn_for_speed_p ();
3619   rtx cop1;
3620 
3621   if (CONST_INT_P (op1)
3622       && GET_MODE (op0) != VOIDmode
3623       && (cop1 = convert_modes (mode, GET_MODE (op0), op1,
3624 				this_optab == umul_widen_optab))
3625       && CONST_INT_P (cop1)
3626       && (INTVAL (cop1) >= 0
3627 	  || HWI_COMPUTABLE_MODE_P (mode)))
3628     {
3629       HOST_WIDE_INT coeff = INTVAL (cop1);
3630       int max_cost;
3631       enum mult_variant variant;
3632       struct algorithm algorithm;
3633 
3634       if (coeff == 0)
3635 	return CONST0_RTX (mode);
3636 
3637       /* Special case powers of two.  */
3638       if (EXACT_POWER_OF_2_OR_ZERO_P (coeff))
3639 	{
3640 	  op0 = convert_to_mode (mode, op0, this_optab == umul_widen_optab);
3641 	  return expand_shift (LSHIFT_EXPR, mode, op0,
3642 			       floor_log2 (coeff), target, unsignedp);
3643 	}
3644 
3645       /* Exclude cost of op0 from max_cost to match the cost
3646 	 calculation of the synth_mult.  */
3647       max_cost = mul_widen_cost (speed, mode);
3648       if (choose_mult_variant (mode, coeff, &algorithm, &variant,
3649 			       max_cost))
3650 	{
3651 	  op0 = convert_to_mode (mode, op0, this_optab == umul_widen_optab);
3652 	  return expand_mult_const (mode, op0, coeff, target,
3653 				    &algorithm, variant);
3654 	}
3655     }
3656   return expand_binop (mode, this_optab, op0, op1, target,
3657 		       unsignedp, OPTAB_LIB_WIDEN);
3658 }
3659 
3660 /* Choose a minimal N + 1 bit approximation to 1/D that can be used to
3661    replace division by D, and put the least significant N bits of the result
3662    in *MULTIPLIER_PTR and return the most significant bit.
3663 
3664    The width of operations is N (should be <= HOST_BITS_PER_WIDE_INT), the
3665    needed precision is in PRECISION (should be <= N).
3666 
3667    PRECISION should be as small as possible so this function can choose
3668    multiplier more freely.
3669 
3670    The rounded-up logarithm of D is placed in *lgup_ptr.  A shift count that
3671    is to be used for a final right shift is placed in *POST_SHIFT_PTR.
3672 
3673    Using this function, x/D will be equal to (x * m) >> (*POST_SHIFT_PTR),
3674    where m is the full HOST_BITS_PER_WIDE_INT + 1 bit multiplier.  */
3675 
3676 unsigned HOST_WIDE_INT
choose_multiplier(unsigned HOST_WIDE_INT d,int n,int precision,unsigned HOST_WIDE_INT * multiplier_ptr,int * post_shift_ptr,int * lgup_ptr)3677 choose_multiplier (unsigned HOST_WIDE_INT d, int n, int precision,
3678 		   unsigned HOST_WIDE_INT *multiplier_ptr,
3679 		   int *post_shift_ptr, int *lgup_ptr)
3680 {
3681   int lgup, post_shift;
3682   int pow, pow2;
3683 
3684   /* lgup = ceil(log2(divisor)); */
3685   lgup = ceil_log2 (d);
3686 
3687   gcc_assert (lgup <= n);
3688 
3689   pow = n + lgup;
3690   pow2 = n + lgup - precision;
3691 
3692   /* mlow = 2^(N + lgup)/d */
3693   wide_int val = wi::set_bit_in_zero (pow, HOST_BITS_PER_DOUBLE_INT);
3694   wide_int mlow = wi::udiv_trunc (val, d);
3695 
3696   /* mhigh = (2^(N + lgup) + 2^(N + lgup - precision))/d */
3697   val |= wi::set_bit_in_zero (pow2, HOST_BITS_PER_DOUBLE_INT);
3698   wide_int mhigh = wi::udiv_trunc (val, d);
3699 
3700   /* If precision == N, then mlow, mhigh exceed 2^N
3701      (but they do not exceed 2^(N+1)).  */
3702 
3703   /* Reduce to lowest terms.  */
3704   for (post_shift = lgup; post_shift > 0; post_shift--)
3705     {
3706       unsigned HOST_WIDE_INT ml_lo = wi::extract_uhwi (mlow, 1,
3707 						       HOST_BITS_PER_WIDE_INT);
3708       unsigned HOST_WIDE_INT mh_lo = wi::extract_uhwi (mhigh, 1,
3709 						       HOST_BITS_PER_WIDE_INT);
3710       if (ml_lo >= mh_lo)
3711 	break;
3712 
3713       mlow = wi::uhwi (ml_lo, HOST_BITS_PER_DOUBLE_INT);
3714       mhigh = wi::uhwi (mh_lo, HOST_BITS_PER_DOUBLE_INT);
3715     }
3716 
3717   *post_shift_ptr = post_shift;
3718   *lgup_ptr = lgup;
3719   if (n < HOST_BITS_PER_WIDE_INT)
3720     {
3721       unsigned HOST_WIDE_INT mask = (HOST_WIDE_INT_1U << n) - 1;
3722       *multiplier_ptr = mhigh.to_uhwi () & mask;
3723       return mhigh.to_uhwi () > mask;
3724     }
3725   else
3726     {
3727       *multiplier_ptr = mhigh.to_uhwi ();
3728       return wi::extract_uhwi (mhigh, HOST_BITS_PER_WIDE_INT, 1);
3729     }
3730 }
3731 
3732 /* Compute the inverse of X mod 2**n, i.e., find Y such that X * Y is
3733    congruent to 1 (mod 2**N).  */
3734 
3735 static unsigned HOST_WIDE_INT
invert_mod2n(unsigned HOST_WIDE_INT x,int n)3736 invert_mod2n (unsigned HOST_WIDE_INT x, int n)
3737 {
3738   /* Solve x*y == 1 (mod 2^n), where x is odd.  Return y.  */
3739 
3740   /* The algorithm notes that the choice y = x satisfies
3741      x*y == 1 mod 2^3, since x is assumed odd.
3742      Each iteration doubles the number of bits of significance in y.  */
3743 
3744   unsigned HOST_WIDE_INT mask;
3745   unsigned HOST_WIDE_INT y = x;
3746   int nbit = 3;
3747 
3748   mask = (n == HOST_BITS_PER_WIDE_INT
3749 	  ? HOST_WIDE_INT_M1U
3750 	  : (HOST_WIDE_INT_1U << n) - 1);
3751 
3752   while (nbit < n)
3753     {
3754       y = y * (2 - x*y) & mask;		/* Modulo 2^N */
3755       nbit *= 2;
3756     }
3757   return y;
3758 }
3759 
3760 /* Emit code to adjust ADJ_OPERAND after multiplication of wrong signedness
3761    flavor of OP0 and OP1.  ADJ_OPERAND is already the high half of the
3762    product OP0 x OP1.  If UNSIGNEDP is nonzero, adjust the signed product
3763    to become unsigned, if UNSIGNEDP is zero, adjust the unsigned product to
3764    become signed.
3765 
3766    The result is put in TARGET if that is convenient.
3767 
3768    MODE is the mode of operation.  */
3769 
3770 rtx
expand_mult_highpart_adjust(scalar_int_mode mode,rtx adj_operand,rtx op0,rtx op1,rtx target,int unsignedp)3771 expand_mult_highpart_adjust (scalar_int_mode mode, rtx adj_operand, rtx op0,
3772 			     rtx op1, rtx target, int unsignedp)
3773 {
3774   rtx tem;
3775   enum rtx_code adj_code = unsignedp ? PLUS : MINUS;
3776 
3777   tem = expand_shift (RSHIFT_EXPR, mode, op0,
3778 		      GET_MODE_BITSIZE (mode) - 1, NULL_RTX, 0);
3779   tem = expand_and (mode, tem, op1, NULL_RTX);
3780   adj_operand
3781     = force_operand (gen_rtx_fmt_ee (adj_code, mode, adj_operand, tem),
3782 		     adj_operand);
3783 
3784   tem = expand_shift (RSHIFT_EXPR, mode, op1,
3785 		      GET_MODE_BITSIZE (mode) - 1, NULL_RTX, 0);
3786   tem = expand_and (mode, tem, op0, NULL_RTX);
3787   target = force_operand (gen_rtx_fmt_ee (adj_code, mode, adj_operand, tem),
3788 			  target);
3789 
3790   return target;
3791 }
3792 
3793 /* Subroutine of expmed_mult_highpart.  Return the MODE high part of OP.  */
3794 
3795 static rtx
extract_high_half(scalar_int_mode mode,rtx op)3796 extract_high_half (scalar_int_mode mode, rtx op)
3797 {
3798   if (mode == word_mode)
3799     return gen_highpart (mode, op);
3800 
3801   scalar_int_mode wider_mode = GET_MODE_WIDER_MODE (mode).require ();
3802 
3803   op = expand_shift (RSHIFT_EXPR, wider_mode, op,
3804 		     GET_MODE_BITSIZE (mode), 0, 1);
3805   return convert_modes (mode, wider_mode, op, 0);
3806 }
3807 
3808 /* Like expmed_mult_highpart, but only consider using a multiplication
3809    optab.  OP1 is an rtx for the constant operand.  */
3810 
3811 static rtx
expmed_mult_highpart_optab(scalar_int_mode mode,rtx op0,rtx op1,rtx target,int unsignedp,int max_cost)3812 expmed_mult_highpart_optab (scalar_int_mode mode, rtx op0, rtx op1,
3813 			    rtx target, int unsignedp, int max_cost)
3814 {
3815   rtx narrow_op1 = gen_int_mode (INTVAL (op1), mode);
3816   optab moptab;
3817   rtx tem;
3818   int size;
3819   bool speed = optimize_insn_for_speed_p ();
3820 
3821   scalar_int_mode wider_mode = GET_MODE_WIDER_MODE (mode).require ();
3822 
3823   size = GET_MODE_BITSIZE (mode);
3824 
3825   /* Firstly, try using a multiplication insn that only generates the needed
3826      high part of the product, and in the sign flavor of unsignedp.  */
3827   if (mul_highpart_cost (speed, mode) < max_cost)
3828     {
3829       moptab = unsignedp ? umul_highpart_optab : smul_highpart_optab;
3830       tem = expand_binop (mode, moptab, op0, narrow_op1, target,
3831 			  unsignedp, OPTAB_DIRECT);
3832       if (tem)
3833 	return tem;
3834     }
3835 
3836   /* Secondly, same as above, but use sign flavor opposite of unsignedp.
3837      Need to adjust the result after the multiplication.  */
3838   if (size - 1 < BITS_PER_WORD
3839       && (mul_highpart_cost (speed, mode)
3840 	  + 2 * shift_cost (speed, mode, size-1)
3841 	  + 4 * add_cost (speed, mode) < max_cost))
3842     {
3843       moptab = unsignedp ? smul_highpart_optab : umul_highpart_optab;
3844       tem = expand_binop (mode, moptab, op0, narrow_op1, target,
3845 			  unsignedp, OPTAB_DIRECT);
3846       if (tem)
3847 	/* We used the wrong signedness.  Adjust the result.  */
3848 	return expand_mult_highpart_adjust (mode, tem, op0, narrow_op1,
3849 					    tem, unsignedp);
3850     }
3851 
3852   /* Try widening multiplication.  */
3853   moptab = unsignedp ? umul_widen_optab : smul_widen_optab;
3854   if (convert_optab_handler (moptab, wider_mode, mode) != CODE_FOR_nothing
3855       && mul_widen_cost (speed, wider_mode) < max_cost)
3856     {
3857       tem = expand_binop (wider_mode, moptab, op0, narrow_op1, 0,
3858 			  unsignedp, OPTAB_WIDEN);
3859       if (tem)
3860 	return extract_high_half (mode, tem);
3861     }
3862 
3863   /* Try widening the mode and perform a non-widening multiplication.  */
3864   if (optab_handler (smul_optab, wider_mode) != CODE_FOR_nothing
3865       && size - 1 < BITS_PER_WORD
3866       && (mul_cost (speed, wider_mode) + shift_cost (speed, mode, size-1)
3867 	  < max_cost))
3868     {
3869       rtx_insn *insns;
3870       rtx wop0, wop1;
3871 
3872       /* We need to widen the operands, for example to ensure the
3873 	 constant multiplier is correctly sign or zero extended.
3874 	 Use a sequence to clean-up any instructions emitted by
3875 	 the conversions if things don't work out.  */
3876       start_sequence ();
3877       wop0 = convert_modes (wider_mode, mode, op0, unsignedp);
3878       wop1 = convert_modes (wider_mode, mode, op1, unsignedp);
3879       tem = expand_binop (wider_mode, smul_optab, wop0, wop1, 0,
3880 			  unsignedp, OPTAB_WIDEN);
3881       insns = get_insns ();
3882       end_sequence ();
3883 
3884       if (tem)
3885 	{
3886 	  emit_insn (insns);
3887 	  return extract_high_half (mode, tem);
3888 	}
3889     }
3890 
3891   /* Try widening multiplication of opposite signedness, and adjust.  */
3892   moptab = unsignedp ? smul_widen_optab : umul_widen_optab;
3893   if (convert_optab_handler (moptab, wider_mode, mode) != CODE_FOR_nothing
3894       && size - 1 < BITS_PER_WORD
3895       && (mul_widen_cost (speed, wider_mode)
3896 	  + 2 * shift_cost (speed, mode, size-1)
3897 	  + 4 * add_cost (speed, mode) < max_cost))
3898     {
3899       tem = expand_binop (wider_mode, moptab, op0, narrow_op1,
3900 			  NULL_RTX, ! unsignedp, OPTAB_WIDEN);
3901       if (tem != 0)
3902 	{
3903 	  tem = extract_high_half (mode, tem);
3904 	  /* We used the wrong signedness.  Adjust the result.  */
3905 	  return expand_mult_highpart_adjust (mode, tem, op0, narrow_op1,
3906 					      target, unsignedp);
3907 	}
3908     }
3909 
3910   return 0;
3911 }
3912 
3913 /* Emit code to multiply OP0 and OP1 (where OP1 is an integer constant),
3914    putting the high half of the result in TARGET if that is convenient,
3915    and return where the result is.  If the operation cannot be performed,
3916    0 is returned.
3917 
3918    MODE is the mode of operation and result.
3919 
3920    UNSIGNEDP nonzero means unsigned multiply.
3921 
3922    MAX_COST is the total allowed cost for the expanded RTL.  */
3923 
3924 static rtx
expmed_mult_highpart(scalar_int_mode mode,rtx op0,rtx op1,rtx target,int unsignedp,int max_cost)3925 expmed_mult_highpart (scalar_int_mode mode, rtx op0, rtx op1,
3926 		      rtx target, int unsignedp, int max_cost)
3927 {
3928   unsigned HOST_WIDE_INT cnst1;
3929   int extra_cost;
3930   bool sign_adjust = false;
3931   enum mult_variant variant;
3932   struct algorithm alg;
3933   rtx tem;
3934   bool speed = optimize_insn_for_speed_p ();
3935 
3936   /* We can't support modes wider than HOST_BITS_PER_INT.  */
3937   gcc_assert (HWI_COMPUTABLE_MODE_P (mode));
3938 
3939   cnst1 = INTVAL (op1) & GET_MODE_MASK (mode);
3940 
3941   /* We can't optimize modes wider than BITS_PER_WORD.
3942      ??? We might be able to perform double-word arithmetic if
3943      mode == word_mode, however all the cost calculations in
3944      synth_mult etc. assume single-word operations.  */
3945   scalar_int_mode wider_mode = GET_MODE_WIDER_MODE (mode).require ();
3946   if (GET_MODE_BITSIZE (wider_mode) > BITS_PER_WORD)
3947     return expmed_mult_highpart_optab (mode, op0, op1, target,
3948 				       unsignedp, max_cost);
3949 
3950   extra_cost = shift_cost (speed, mode, GET_MODE_BITSIZE (mode) - 1);
3951 
3952   /* Check whether we try to multiply by a negative constant.  */
3953   if (!unsignedp && ((cnst1 >> (GET_MODE_BITSIZE (mode) - 1)) & 1))
3954     {
3955       sign_adjust = true;
3956       extra_cost += add_cost (speed, mode);
3957     }
3958 
3959   /* See whether shift/add multiplication is cheap enough.  */
3960   if (choose_mult_variant (wider_mode, cnst1, &alg, &variant,
3961 			   max_cost - extra_cost))
3962     {
3963       /* See whether the specialized multiplication optabs are
3964 	 cheaper than the shift/add version.  */
3965       tem = expmed_mult_highpart_optab (mode, op0, op1, target, unsignedp,
3966 					alg.cost.cost + extra_cost);
3967       if (tem)
3968 	return tem;
3969 
3970       tem = convert_to_mode (wider_mode, op0, unsignedp);
3971       tem = expand_mult_const (wider_mode, tem, cnst1, 0, &alg, variant);
3972       tem = extract_high_half (mode, tem);
3973 
3974       /* Adjust result for signedness.  */
3975       if (sign_adjust)
3976 	tem = force_operand (gen_rtx_MINUS (mode, tem, op0), tem);
3977 
3978       return tem;
3979     }
3980   return expmed_mult_highpart_optab (mode, op0, op1, target,
3981 				     unsignedp, max_cost);
3982 }
3983 
3984 
3985 /* Expand signed modulus of OP0 by a power of two D in mode MODE.  */
3986 
3987 static rtx
expand_smod_pow2(scalar_int_mode mode,rtx op0,HOST_WIDE_INT d)3988 expand_smod_pow2 (scalar_int_mode mode, rtx op0, HOST_WIDE_INT d)
3989 {
3990   rtx result, temp, shift;
3991   rtx_code_label *label;
3992   int logd;
3993   int prec = GET_MODE_PRECISION (mode);
3994 
3995   logd = floor_log2 (d);
3996   result = gen_reg_rtx (mode);
3997 
3998   /* Avoid conditional branches when they're expensive.  */
3999   if (BRANCH_COST (optimize_insn_for_speed_p (), false) >= 2
4000       && optimize_insn_for_speed_p ())
4001     {
4002       rtx signmask = emit_store_flag (result, LT, op0, const0_rtx,
4003 				      mode, 0, -1);
4004       if (signmask)
4005 	{
4006 	  HOST_WIDE_INT masklow = (HOST_WIDE_INT_1 << logd) - 1;
4007 	  signmask = force_reg (mode, signmask);
4008 	  shift = gen_int_shift_amount (mode, GET_MODE_BITSIZE (mode) - logd);
4009 
4010 	  /* Use the rtx_cost of a LSHIFTRT instruction to determine
4011 	     which instruction sequence to use.  If logical right shifts
4012 	     are expensive the use 2 XORs, 2 SUBs and an AND, otherwise
4013 	     use a LSHIFTRT, 1 ADD, 1 SUB and an AND.  */
4014 
4015 	  temp = gen_rtx_LSHIFTRT (mode, result, shift);
4016 	  if (optab_handler (lshr_optab, mode) == CODE_FOR_nothing
4017 	      || (set_src_cost (temp, mode, optimize_insn_for_speed_p ())
4018 		  > COSTS_N_INSNS (2)))
4019 	    {
4020 	      temp = expand_binop (mode, xor_optab, op0, signmask,
4021 				   NULL_RTX, 1, OPTAB_LIB_WIDEN);
4022 	      temp = expand_binop (mode, sub_optab, temp, signmask,
4023 				   NULL_RTX, 1, OPTAB_LIB_WIDEN);
4024 	      temp = expand_binop (mode, and_optab, temp,
4025 				   gen_int_mode (masklow, mode),
4026 				   NULL_RTX, 1, OPTAB_LIB_WIDEN);
4027 	      temp = expand_binop (mode, xor_optab, temp, signmask,
4028 				   NULL_RTX, 1, OPTAB_LIB_WIDEN);
4029 	      temp = expand_binop (mode, sub_optab, temp, signmask,
4030 				   NULL_RTX, 1, OPTAB_LIB_WIDEN);
4031 	    }
4032 	  else
4033 	    {
4034 	      signmask = expand_binop (mode, lshr_optab, signmask, shift,
4035 				       NULL_RTX, 1, OPTAB_LIB_WIDEN);
4036 	      signmask = force_reg (mode, signmask);
4037 
4038 	      temp = expand_binop (mode, add_optab, op0, signmask,
4039 				   NULL_RTX, 1, OPTAB_LIB_WIDEN);
4040 	      temp = expand_binop (mode, and_optab, temp,
4041 				   gen_int_mode (masklow, mode),
4042 				   NULL_RTX, 1, OPTAB_LIB_WIDEN);
4043 	      temp = expand_binop (mode, sub_optab, temp, signmask,
4044 				   NULL_RTX, 1, OPTAB_LIB_WIDEN);
4045 	    }
4046 	  return temp;
4047 	}
4048     }
4049 
4050   /* Mask contains the mode's signbit and the significant bits of the
4051      modulus.  By including the signbit in the operation, many targets
4052      can avoid an explicit compare operation in the following comparison
4053      against zero.  */
4054   wide_int mask = wi::mask (logd, false, prec);
4055   mask = wi::set_bit (mask, prec - 1);
4056 
4057   temp = expand_binop (mode, and_optab, op0,
4058 		       immed_wide_int_const (mask, mode),
4059 		       result, 1, OPTAB_LIB_WIDEN);
4060   if (temp != result)
4061     emit_move_insn (result, temp);
4062 
4063   label = gen_label_rtx ();
4064   do_cmp_and_jump (result, const0_rtx, GE, mode, label);
4065 
4066   temp = expand_binop (mode, sub_optab, result, const1_rtx, result,
4067 		       0, OPTAB_LIB_WIDEN);
4068 
4069   mask = wi::mask (logd, true, prec);
4070   temp = expand_binop (mode, ior_optab, temp,
4071 		       immed_wide_int_const (mask, mode),
4072 		       result, 1, OPTAB_LIB_WIDEN);
4073   temp = expand_binop (mode, add_optab, temp, const1_rtx, result,
4074 		       0, OPTAB_LIB_WIDEN);
4075   if (temp != result)
4076     emit_move_insn (result, temp);
4077   emit_label (label);
4078   return result;
4079 }
4080 
4081 /* Expand signed division of OP0 by a power of two D in mode MODE.
4082    This routine is only called for positive values of D.  */
4083 
4084 static rtx
expand_sdiv_pow2(scalar_int_mode mode,rtx op0,HOST_WIDE_INT d)4085 expand_sdiv_pow2 (scalar_int_mode mode, rtx op0, HOST_WIDE_INT d)
4086 {
4087   rtx temp;
4088   rtx_code_label *label;
4089   int logd;
4090 
4091   logd = floor_log2 (d);
4092 
4093   if (d == 2
4094       && BRANCH_COST (optimize_insn_for_speed_p (),
4095 		      false) >= 1)
4096     {
4097       temp = gen_reg_rtx (mode);
4098       temp = emit_store_flag (temp, LT, op0, const0_rtx, mode, 0, 1);
4099       temp = expand_binop (mode, add_optab, temp, op0, NULL_RTX,
4100 			   0, OPTAB_LIB_WIDEN);
4101       return expand_shift (RSHIFT_EXPR, mode, temp, logd, NULL_RTX, 0);
4102     }
4103 
4104   if (HAVE_conditional_move
4105       && BRANCH_COST (optimize_insn_for_speed_p (), false) >= 2)
4106     {
4107       rtx temp2;
4108 
4109       start_sequence ();
4110       temp2 = copy_to_mode_reg (mode, op0);
4111       temp = expand_binop (mode, add_optab, temp2, gen_int_mode (d - 1, mode),
4112 			   NULL_RTX, 0, OPTAB_LIB_WIDEN);
4113       temp = force_reg (mode, temp);
4114 
4115       /* Construct "temp2 = (temp2 < 0) ? temp : temp2".  */
4116       temp2 = emit_conditional_move (temp2, LT, temp2, const0_rtx,
4117 				     mode, temp, temp2, mode, 0);
4118       if (temp2)
4119 	{
4120 	  rtx_insn *seq = get_insns ();
4121 	  end_sequence ();
4122 	  emit_insn (seq);
4123 	  return expand_shift (RSHIFT_EXPR, mode, temp2, logd, NULL_RTX, 0);
4124 	}
4125       end_sequence ();
4126     }
4127 
4128   if (BRANCH_COST (optimize_insn_for_speed_p (),
4129 		   false) >= 2)
4130     {
4131       int ushift = GET_MODE_BITSIZE (mode) - logd;
4132 
4133       temp = gen_reg_rtx (mode);
4134       temp = emit_store_flag (temp, LT, op0, const0_rtx, mode, 0, -1);
4135       if (GET_MODE_BITSIZE (mode) >= BITS_PER_WORD
4136 	  || shift_cost (optimize_insn_for_speed_p (), mode, ushift)
4137 	     > COSTS_N_INSNS (1))
4138 	temp = expand_binop (mode, and_optab, temp, gen_int_mode (d - 1, mode),
4139 			     NULL_RTX, 0, OPTAB_LIB_WIDEN);
4140       else
4141 	temp = expand_shift (RSHIFT_EXPR, mode, temp,
4142 			     ushift, NULL_RTX, 1);
4143       temp = expand_binop (mode, add_optab, temp, op0, NULL_RTX,
4144 			   0, OPTAB_LIB_WIDEN);
4145       return expand_shift (RSHIFT_EXPR, mode, temp, logd, NULL_RTX, 0);
4146     }
4147 
4148   label = gen_label_rtx ();
4149   temp = copy_to_mode_reg (mode, op0);
4150   do_cmp_and_jump (temp, const0_rtx, GE, mode, label);
4151   expand_inc (temp, gen_int_mode (d - 1, mode));
4152   emit_label (label);
4153   return expand_shift (RSHIFT_EXPR, mode, temp, logd, NULL_RTX, 0);
4154 }
4155 
4156 /* Emit the code to divide OP0 by OP1, putting the result in TARGET
4157    if that is convenient, and returning where the result is.
4158    You may request either the quotient or the remainder as the result;
4159    specify REM_FLAG nonzero to get the remainder.
4160 
4161    CODE is the expression code for which kind of division this is;
4162    it controls how rounding is done.  MODE is the machine mode to use.
4163    UNSIGNEDP nonzero means do unsigned division.  */
4164 
4165 /* ??? For CEIL_MOD_EXPR, can compute incorrect remainder with ANDI
4166    and then correct it by or'ing in missing high bits
4167    if result of ANDI is nonzero.
4168    For ROUND_MOD_EXPR, can use ANDI and then sign-extend the result.
4169    This could optimize to a bfexts instruction.
4170    But C doesn't use these operations, so their optimizations are
4171    left for later.  */
4172 /* ??? For modulo, we don't actually need the highpart of the first product,
4173    the low part will do nicely.  And for small divisors, the second multiply
4174    can also be a low-part only multiply or even be completely left out.
4175    E.g. to calculate the remainder of a division by 3 with a 32 bit
4176    multiply, multiply with 0x55555556 and extract the upper two bits;
4177    the result is exact for inputs up to 0x1fffffff.
4178    The input range can be reduced by using cross-sum rules.
4179    For odd divisors >= 3, the following table gives right shift counts
4180    so that if a number is shifted by an integer multiple of the given
4181    amount, the remainder stays the same:
4182    2, 4, 3, 6, 10, 12, 4, 8, 18, 6, 11, 20, 18, 0, 5, 10, 12, 0, 12, 20,
4183    14, 12, 23, 21, 8, 0, 20, 18, 0, 0, 6, 12, 0, 22, 0, 18, 20, 30, 0, 0,
4184    0, 8, 0, 11, 12, 10, 36, 0, 30, 0, 0, 12, 0, 0, 0, 0, 44, 12, 24, 0,
4185    20, 0, 7, 14, 0, 18, 36, 0, 0, 46, 60, 0, 42, 0, 15, 24, 20, 0, 0, 33,
4186    0, 20, 0, 0, 18, 0, 60, 0, 0, 0, 0, 0, 40, 18, 0, 0, 12
4187 
4188    Cross-sum rules for even numbers can be derived by leaving as many bits
4189    to the right alone as the divisor has zeros to the right.
4190    E.g. if x is an unsigned 32 bit number:
4191    (x mod 12) == (((x & 1023) + ((x >> 8) & ~3)) * 0x15555558 >> 2 * 3) >> 28
4192    */
4193 
4194 rtx
expand_divmod(int rem_flag,enum tree_code code,machine_mode mode,rtx op0,rtx op1,rtx target,int unsignedp)4195 expand_divmod (int rem_flag, enum tree_code code, machine_mode mode,
4196 	       rtx op0, rtx op1, rtx target, int unsignedp)
4197 {
4198   machine_mode compute_mode;
4199   rtx tquotient;
4200   rtx quotient = 0, remainder = 0;
4201   rtx_insn *last;
4202   rtx_insn *insn;
4203   optab optab1, optab2;
4204   int op1_is_constant, op1_is_pow2 = 0;
4205   int max_cost, extra_cost;
4206   static HOST_WIDE_INT last_div_const = 0;
4207   bool speed = optimize_insn_for_speed_p ();
4208 
4209   op1_is_constant = CONST_INT_P (op1);
4210   if (op1_is_constant)
4211     {
4212       wide_int ext_op1 = rtx_mode_t (op1, mode);
4213       op1_is_pow2 = (wi::popcount (ext_op1) == 1
4214 		     || (! unsignedp
4215 			 && wi::popcount (wi::neg (ext_op1)) == 1));
4216     }
4217 
4218   /*
4219      This is the structure of expand_divmod:
4220 
4221      First comes code to fix up the operands so we can perform the operations
4222      correctly and efficiently.
4223 
4224      Second comes a switch statement with code specific for each rounding mode.
4225      For some special operands this code emits all RTL for the desired
4226      operation, for other cases, it generates only a quotient and stores it in
4227      QUOTIENT.  The case for trunc division/remainder might leave quotient = 0,
4228      to indicate that it has not done anything.
4229 
4230      Last comes code that finishes the operation.  If QUOTIENT is set and
4231      REM_FLAG is set, the remainder is computed as OP0 - QUOTIENT * OP1.  If
4232      QUOTIENT is not set, it is computed using trunc rounding.
4233 
4234      We try to generate special code for division and remainder when OP1 is a
4235      constant.  If |OP1| = 2**n we can use shifts and some other fast
4236      operations.  For other values of OP1, we compute a carefully selected
4237      fixed-point approximation m = 1/OP1, and generate code that multiplies OP0
4238      by m.
4239 
4240      In all cases but EXACT_DIV_EXPR, this multiplication requires the upper
4241      half of the product.  Different strategies for generating the product are
4242      implemented in expmed_mult_highpart.
4243 
4244      If what we actually want is the remainder, we generate that by another
4245      by-constant multiplication and a subtraction.  */
4246 
4247   /* We shouldn't be called with OP1 == const1_rtx, but some of the
4248      code below will malfunction if we are, so check here and handle
4249      the special case if so.  */
4250   if (op1 == const1_rtx)
4251     return rem_flag ? const0_rtx : op0;
4252 
4253     /* When dividing by -1, we could get an overflow.
4254      negv_optab can handle overflows.  */
4255   if (! unsignedp && op1 == constm1_rtx)
4256     {
4257       if (rem_flag)
4258 	return const0_rtx;
4259       return expand_unop (mode, flag_trapv && GET_MODE_CLASS (mode) == MODE_INT
4260 			  ? negv_optab : neg_optab, op0, target, 0);
4261     }
4262 
4263   if (target
4264       /* Don't use the function value register as a target
4265 	 since we have to read it as well as write it,
4266 	 and function-inlining gets confused by this.  */
4267       && ((REG_P (target) && REG_FUNCTION_VALUE_P (target))
4268 	  /* Don't clobber an operand while doing a multi-step calculation.  */
4269 	  || ((rem_flag || op1_is_constant)
4270 	      && (reg_mentioned_p (target, op0)
4271 		  || (MEM_P (op0) && MEM_P (target))))
4272 	  || reg_mentioned_p (target, op1)
4273 	  || (MEM_P (op1) && MEM_P (target))))
4274     target = 0;
4275 
4276   /* Get the mode in which to perform this computation.  Normally it will
4277      be MODE, but sometimes we can't do the desired operation in MODE.
4278      If so, pick a wider mode in which we can do the operation.  Convert
4279      to that mode at the start to avoid repeated conversions.
4280 
4281      First see what operations we need.  These depend on the expression
4282      we are evaluating.  (We assume that divxx3 insns exist under the
4283      same conditions that modxx3 insns and that these insns don't normally
4284      fail.  If these assumptions are not correct, we may generate less
4285      efficient code in some cases.)
4286 
4287      Then see if we find a mode in which we can open-code that operation
4288      (either a division, modulus, or shift).  Finally, check for the smallest
4289      mode for which we can do the operation with a library call.  */
4290 
4291   /* We might want to refine this now that we have division-by-constant
4292      optimization.  Since expmed_mult_highpart tries so many variants, it is
4293      not straightforward to generalize this.  Maybe we should make an array
4294      of possible modes in init_expmed?  Save this for GCC 2.7.  */
4295 
4296   optab1 = (op1_is_pow2
4297 	    ? (unsignedp ? lshr_optab : ashr_optab)
4298 	    : (unsignedp ? udiv_optab : sdiv_optab));
4299   optab2 = (op1_is_pow2 ? optab1
4300 	    : (unsignedp ? udivmod_optab : sdivmod_optab));
4301 
4302   FOR_EACH_MODE_FROM (compute_mode, mode)
4303     if (optab_handler (optab1, compute_mode) != CODE_FOR_nothing
4304 	|| optab_handler (optab2, compute_mode) != CODE_FOR_nothing)
4305       break;
4306 
4307   if (compute_mode == VOIDmode)
4308     FOR_EACH_MODE_FROM (compute_mode, mode)
4309       if (optab_libfunc (optab1, compute_mode)
4310 	  || optab_libfunc (optab2, compute_mode))
4311 	break;
4312 
4313   /* If we still couldn't find a mode, use MODE, but expand_binop will
4314      probably die.  */
4315   if (compute_mode == VOIDmode)
4316     compute_mode = mode;
4317 
4318   if (target && GET_MODE (target) == compute_mode)
4319     tquotient = target;
4320   else
4321     tquotient = gen_reg_rtx (compute_mode);
4322 
4323 #if 0
4324   /* It should be possible to restrict the precision to GET_MODE_BITSIZE
4325      (mode), and thereby get better code when OP1 is a constant.  Do that
4326      later.  It will require going over all usages of SIZE below.  */
4327   size = GET_MODE_BITSIZE (mode);
4328 #endif
4329 
4330   /* Only deduct something for a REM if the last divide done was
4331      for a different constant.   Then set the constant of the last
4332      divide.  */
4333   max_cost = (unsignedp
4334 	      ? udiv_cost (speed, compute_mode)
4335 	      : sdiv_cost (speed, compute_mode));
4336   if (rem_flag && ! (last_div_const != 0 && op1_is_constant
4337 		     && INTVAL (op1) == last_div_const))
4338     max_cost -= (mul_cost (speed, compute_mode)
4339 		 + add_cost (speed, compute_mode));
4340 
4341   last_div_const = ! rem_flag && op1_is_constant ? INTVAL (op1) : 0;
4342 
4343   /* Now convert to the best mode to use.  */
4344   if (compute_mode != mode)
4345     {
4346       op0 = convert_modes (compute_mode, mode, op0, unsignedp);
4347       op1 = convert_modes (compute_mode, mode, op1, unsignedp);
4348 
4349       /* convert_modes may have placed op1 into a register, so we
4350 	 must recompute the following.  */
4351       op1_is_constant = CONST_INT_P (op1);
4352       if (op1_is_constant)
4353 	{
4354 	  wide_int ext_op1 = rtx_mode_t (op1, compute_mode);
4355 	  op1_is_pow2 = (wi::popcount (ext_op1) == 1
4356 			 || (! unsignedp
4357 			     && wi::popcount (wi::neg (ext_op1)) == 1));
4358 	}
4359       else
4360 	op1_is_pow2 = 0;
4361     }
4362 
4363   /* If one of the operands is a volatile MEM, copy it into a register.  */
4364 
4365   if (MEM_P (op0) && MEM_VOLATILE_P (op0))
4366     op0 = force_reg (compute_mode, op0);
4367   if (MEM_P (op1) && MEM_VOLATILE_P (op1))
4368     op1 = force_reg (compute_mode, op1);
4369 
4370   /* If we need the remainder or if OP1 is constant, we need to
4371      put OP0 in a register in case it has any queued subexpressions.  */
4372   if (rem_flag || op1_is_constant)
4373     op0 = force_reg (compute_mode, op0);
4374 
4375   last = get_last_insn ();
4376 
4377   /* Promote floor rounding to trunc rounding for unsigned operations.  */
4378   if (unsignedp)
4379     {
4380       if (code == FLOOR_DIV_EXPR)
4381 	code = TRUNC_DIV_EXPR;
4382       if (code == FLOOR_MOD_EXPR)
4383 	code = TRUNC_MOD_EXPR;
4384       if (code == EXACT_DIV_EXPR && op1_is_pow2)
4385 	code = TRUNC_DIV_EXPR;
4386     }
4387 
4388   if (op1 != const0_rtx)
4389     switch (code)
4390       {
4391       case TRUNC_MOD_EXPR:
4392       case TRUNC_DIV_EXPR:
4393 	if (op1_is_constant)
4394 	  {
4395 	    scalar_int_mode int_mode = as_a <scalar_int_mode> (compute_mode);
4396 	    int size = GET_MODE_BITSIZE (int_mode);
4397 	    if (unsignedp)
4398 	      {
4399 		unsigned HOST_WIDE_INT mh, ml;
4400 		int pre_shift, post_shift;
4401 		int dummy;
4402 		wide_int wd = rtx_mode_t (op1, int_mode);
4403 		unsigned HOST_WIDE_INT d = wd.to_uhwi ();
4404 
4405 		if (wi::popcount (wd) == 1)
4406 		  {
4407 		    pre_shift = floor_log2 (d);
4408 		    if (rem_flag)
4409 		      {
4410 			unsigned HOST_WIDE_INT mask
4411 			  = (HOST_WIDE_INT_1U << pre_shift) - 1;
4412 			remainder
4413 			  = expand_binop (int_mode, and_optab, op0,
4414 					  gen_int_mode (mask, int_mode),
4415 					  remainder, 1,
4416 					  OPTAB_LIB_WIDEN);
4417 			if (remainder)
4418 			  return gen_lowpart (mode, remainder);
4419 		      }
4420 		    quotient = expand_shift (RSHIFT_EXPR, int_mode, op0,
4421 					     pre_shift, tquotient, 1);
4422 		  }
4423 		else if (size <= HOST_BITS_PER_WIDE_INT)
4424 		  {
4425 		    if (d >= (HOST_WIDE_INT_1U << (size - 1)))
4426 		      {
4427 			/* Most significant bit of divisor is set; emit an scc
4428 			   insn.  */
4429 			quotient = emit_store_flag_force (tquotient, GEU, op0, op1,
4430 							  int_mode, 1, 1);
4431 		      }
4432 		    else
4433 		      {
4434 			/* Find a suitable multiplier and right shift count
4435 			   instead of multiplying with D.  */
4436 
4437 			mh = choose_multiplier (d, size, size,
4438 						&ml, &post_shift, &dummy);
4439 
4440 			/* If the suggested multiplier is more than SIZE bits,
4441 			   we can do better for even divisors, using an
4442 			   initial right shift.  */
4443 			if (mh != 0 && (d & 1) == 0)
4444 			  {
4445 			    pre_shift = ctz_or_zero (d);
4446 			    mh = choose_multiplier (d >> pre_shift, size,
4447 						    size - pre_shift,
4448 						    &ml, &post_shift, &dummy);
4449 			    gcc_assert (!mh);
4450 			  }
4451 			else
4452 			  pre_shift = 0;
4453 
4454 			if (mh != 0)
4455 			  {
4456 			    rtx t1, t2, t3, t4;
4457 
4458 			    if (post_shift - 1 >= BITS_PER_WORD)
4459 			      goto fail1;
4460 
4461 			    extra_cost
4462 			      = (shift_cost (speed, int_mode, post_shift - 1)
4463 				 + shift_cost (speed, int_mode, 1)
4464 				 + 2 * add_cost (speed, int_mode));
4465 			    t1 = expmed_mult_highpart
4466 			      (int_mode, op0, gen_int_mode (ml, int_mode),
4467 			       NULL_RTX, 1, max_cost - extra_cost);
4468 			    if (t1 == 0)
4469 			      goto fail1;
4470 			    t2 = force_operand (gen_rtx_MINUS (int_mode,
4471 							       op0, t1),
4472 						NULL_RTX);
4473 			    t3 = expand_shift (RSHIFT_EXPR, int_mode,
4474 					       t2, 1, NULL_RTX, 1);
4475 			    t4 = force_operand (gen_rtx_PLUS (int_mode,
4476 							      t1, t3),
4477 						NULL_RTX);
4478 			    quotient = expand_shift
4479 			      (RSHIFT_EXPR, int_mode, t4,
4480 			       post_shift - 1, tquotient, 1);
4481 			  }
4482 			else
4483 			  {
4484 			    rtx t1, t2;
4485 
4486 			    if (pre_shift >= BITS_PER_WORD
4487 				|| post_shift >= BITS_PER_WORD)
4488 			      goto fail1;
4489 
4490 			    t1 = expand_shift
4491 			      (RSHIFT_EXPR, int_mode, op0,
4492 			       pre_shift, NULL_RTX, 1);
4493 			    extra_cost
4494 			      = (shift_cost (speed, int_mode, pre_shift)
4495 				 + shift_cost (speed, int_mode, post_shift));
4496 			    t2 = expmed_mult_highpart
4497 			      (int_mode, t1,
4498 			       gen_int_mode (ml, int_mode),
4499 			       NULL_RTX, 1, max_cost - extra_cost);
4500 			    if (t2 == 0)
4501 			      goto fail1;
4502 			    quotient = expand_shift
4503 			      (RSHIFT_EXPR, int_mode, t2,
4504 			       post_shift, tquotient, 1);
4505 			  }
4506 		      }
4507 		  }
4508 		else		/* Too wide mode to use tricky code */
4509 		  break;
4510 
4511 		insn = get_last_insn ();
4512 		if (insn != last)
4513 		  set_dst_reg_note (insn, REG_EQUAL,
4514 				    gen_rtx_UDIV (int_mode, op0, op1),
4515 				    quotient);
4516 	      }
4517 	    else		/* TRUNC_DIV, signed */
4518 	      {
4519 		unsigned HOST_WIDE_INT ml;
4520 		int lgup, post_shift;
4521 		rtx mlr;
4522 		HOST_WIDE_INT d = INTVAL (op1);
4523 		unsigned HOST_WIDE_INT abs_d;
4524 
4525 		/* Not prepared to handle division/remainder by
4526 		   0xffffffffffffffff8000000000000000 etc.  */
4527 		if (d == HOST_WIDE_INT_MIN && size > HOST_BITS_PER_WIDE_INT)
4528 		  break;
4529 
4530 		/* Since d might be INT_MIN, we have to cast to
4531 		   unsigned HOST_WIDE_INT before negating to avoid
4532 		   undefined signed overflow.  */
4533 		abs_d = (d >= 0
4534 			 ? (unsigned HOST_WIDE_INT) d
4535 			 : - (unsigned HOST_WIDE_INT) d);
4536 
4537 		/* n rem d = n rem -d */
4538 		if (rem_flag && d < 0)
4539 		  {
4540 		    d = abs_d;
4541 		    op1 = gen_int_mode (abs_d, int_mode);
4542 		  }
4543 
4544 		if (d == 1)
4545 		  quotient = op0;
4546 		else if (d == -1)
4547 		  quotient = expand_unop (int_mode, neg_optab, op0,
4548 					  tquotient, 0);
4549 		else if (size <= HOST_BITS_PER_WIDE_INT
4550 			 && abs_d == HOST_WIDE_INT_1U << (size - 1))
4551 		  {
4552 		    /* This case is not handled correctly below.  */
4553 		    quotient = emit_store_flag (tquotient, EQ, op0, op1,
4554 						int_mode, 1, 1);
4555 		    if (quotient == 0)
4556 		      goto fail1;
4557 		  }
4558 		else if (EXACT_POWER_OF_2_OR_ZERO_P (d)
4559 			 && (size <= HOST_BITS_PER_WIDE_INT || d >= 0)
4560 			 && (rem_flag
4561 			     ? smod_pow2_cheap (speed, int_mode)
4562 			     : sdiv_pow2_cheap (speed, int_mode))
4563 			 /* We assume that cheap metric is true if the
4564 			    optab has an expander for this mode.  */
4565 			 && ((optab_handler ((rem_flag ? smod_optab
4566 					      : sdiv_optab),
4567 					     int_mode)
4568 			      != CODE_FOR_nothing)
4569 			     || (optab_handler (sdivmod_optab, int_mode)
4570 				 != CODE_FOR_nothing)))
4571 		  ;
4572 		else if (EXACT_POWER_OF_2_OR_ZERO_P (abs_d))
4573 		  {
4574 		    if (rem_flag)
4575 		      {
4576 			remainder = expand_smod_pow2 (int_mode, op0, d);
4577 			if (remainder)
4578 			  return gen_lowpart (mode, remainder);
4579 		      }
4580 
4581 		    if (sdiv_pow2_cheap (speed, int_mode)
4582 			&& ((optab_handler (sdiv_optab, int_mode)
4583 			     != CODE_FOR_nothing)
4584 			    || (optab_handler (sdivmod_optab, int_mode)
4585 				!= CODE_FOR_nothing)))
4586 		      quotient = expand_divmod (0, TRUNC_DIV_EXPR,
4587 						int_mode, op0,
4588 						gen_int_mode (abs_d,
4589 							      int_mode),
4590 						NULL_RTX, 0);
4591 		    else
4592 		      quotient = expand_sdiv_pow2 (int_mode, op0, abs_d);
4593 
4594 		    /* We have computed OP0 / abs(OP1).  If OP1 is negative,
4595 		       negate the quotient.  */
4596 		    if (d < 0)
4597 		      {
4598 			insn = get_last_insn ();
4599 			if (insn != last
4600 			    && abs_d < (HOST_WIDE_INT_1U
4601 					<< (HOST_BITS_PER_WIDE_INT - 1)))
4602 			  set_dst_reg_note (insn, REG_EQUAL,
4603 					    gen_rtx_DIV (int_mode, op0,
4604 							 gen_int_mode
4605 							   (abs_d,
4606 							    int_mode)),
4607 					    quotient);
4608 
4609 			quotient = expand_unop (int_mode, neg_optab,
4610 						quotient, quotient, 0);
4611 		      }
4612 		  }
4613 		else if (size <= HOST_BITS_PER_WIDE_INT)
4614 		  {
4615 		    choose_multiplier (abs_d, size, size - 1,
4616 				       &ml, &post_shift, &lgup);
4617 		    if (ml < HOST_WIDE_INT_1U << (size - 1))
4618 		      {
4619 			rtx t1, t2, t3;
4620 
4621 			if (post_shift >= BITS_PER_WORD
4622 			    || size - 1 >= BITS_PER_WORD)
4623 			  goto fail1;
4624 
4625 			extra_cost = (shift_cost (speed, int_mode, post_shift)
4626 				      + shift_cost (speed, int_mode, size - 1)
4627 				      + add_cost (speed, int_mode));
4628 			t1 = expmed_mult_highpart
4629 			  (int_mode, op0, gen_int_mode (ml, int_mode),
4630 			   NULL_RTX, 0, max_cost - extra_cost);
4631 			if (t1 == 0)
4632 			  goto fail1;
4633 			t2 = expand_shift
4634 			  (RSHIFT_EXPR, int_mode, t1,
4635 			   post_shift, NULL_RTX, 0);
4636 			t3 = expand_shift
4637 			  (RSHIFT_EXPR, int_mode, op0,
4638 			   size - 1, NULL_RTX, 0);
4639 			if (d < 0)
4640 			  quotient
4641 			    = force_operand (gen_rtx_MINUS (int_mode, t3, t2),
4642 					     tquotient);
4643 			else
4644 			  quotient
4645 			    = force_operand (gen_rtx_MINUS (int_mode, t2, t3),
4646 					     tquotient);
4647 		      }
4648 		    else
4649 		      {
4650 			rtx t1, t2, t3, t4;
4651 
4652 			if (post_shift >= BITS_PER_WORD
4653 			    || size - 1 >= BITS_PER_WORD)
4654 			  goto fail1;
4655 
4656 			ml |= HOST_WIDE_INT_M1U << (size - 1);
4657 			mlr = gen_int_mode (ml, int_mode);
4658 			extra_cost = (shift_cost (speed, int_mode, post_shift)
4659 				      + shift_cost (speed, int_mode, size - 1)
4660 				      + 2 * add_cost (speed, int_mode));
4661 			t1 = expmed_mult_highpart (int_mode, op0, mlr,
4662 						   NULL_RTX, 0,
4663 						   max_cost - extra_cost);
4664 			if (t1 == 0)
4665 			  goto fail1;
4666 			t2 = force_operand (gen_rtx_PLUS (int_mode, t1, op0),
4667 					    NULL_RTX);
4668 			t3 = expand_shift
4669 			  (RSHIFT_EXPR, int_mode, t2,
4670 			   post_shift, NULL_RTX, 0);
4671 			t4 = expand_shift
4672 			  (RSHIFT_EXPR, int_mode, op0,
4673 			   size - 1, NULL_RTX, 0);
4674 			if (d < 0)
4675 			  quotient
4676 			    = force_operand (gen_rtx_MINUS (int_mode, t4, t3),
4677 					     tquotient);
4678 			else
4679 			  quotient
4680 			    = force_operand (gen_rtx_MINUS (int_mode, t3, t4),
4681 					     tquotient);
4682 		      }
4683 		  }
4684 		else		/* Too wide mode to use tricky code */
4685 		  break;
4686 
4687 		insn = get_last_insn ();
4688 		if (insn != last)
4689 		  set_dst_reg_note (insn, REG_EQUAL,
4690 				    gen_rtx_DIV (int_mode, op0, op1),
4691 				    quotient);
4692 	      }
4693 	    break;
4694 	  }
4695       fail1:
4696 	delete_insns_since (last);
4697 	break;
4698 
4699       case FLOOR_DIV_EXPR:
4700       case FLOOR_MOD_EXPR:
4701       /* We will come here only for signed operations.  */
4702 	if (op1_is_constant && HWI_COMPUTABLE_MODE_P (compute_mode))
4703 	  {
4704 	    scalar_int_mode int_mode = as_a <scalar_int_mode> (compute_mode);
4705 	    int size = GET_MODE_BITSIZE (int_mode);
4706 	    unsigned HOST_WIDE_INT mh, ml;
4707 	    int pre_shift, lgup, post_shift;
4708 	    HOST_WIDE_INT d = INTVAL (op1);
4709 
4710 	    if (d > 0)
4711 	      {
4712 		/* We could just as easily deal with negative constants here,
4713 		   but it does not seem worth the trouble for GCC 2.6.  */
4714 		if (EXACT_POWER_OF_2_OR_ZERO_P (d))
4715 		  {
4716 		    pre_shift = floor_log2 (d);
4717 		    if (rem_flag)
4718 		      {
4719 			unsigned HOST_WIDE_INT mask
4720 			  = (HOST_WIDE_INT_1U << pre_shift) - 1;
4721 			remainder = expand_binop
4722 			  (int_mode, and_optab, op0,
4723 			   gen_int_mode (mask, int_mode),
4724 			   remainder, 0, OPTAB_LIB_WIDEN);
4725 			if (remainder)
4726 			  return gen_lowpart (mode, remainder);
4727 		      }
4728 		    quotient = expand_shift
4729 		      (RSHIFT_EXPR, int_mode, op0,
4730 		       pre_shift, tquotient, 0);
4731 		  }
4732 		else
4733 		  {
4734 		    rtx t1, t2, t3, t4;
4735 
4736 		    mh = choose_multiplier (d, size, size - 1,
4737 					    &ml, &post_shift, &lgup);
4738 		    gcc_assert (!mh);
4739 
4740 		    if (post_shift < BITS_PER_WORD
4741 			&& size - 1 < BITS_PER_WORD)
4742 		      {
4743 			t1 = expand_shift
4744 			  (RSHIFT_EXPR, int_mode, op0,
4745 			   size - 1, NULL_RTX, 0);
4746 			t2 = expand_binop (int_mode, xor_optab, op0, t1,
4747 					   NULL_RTX, 0, OPTAB_WIDEN);
4748 			extra_cost = (shift_cost (speed, int_mode, post_shift)
4749 				      + shift_cost (speed, int_mode, size - 1)
4750 				      + 2 * add_cost (speed, int_mode));
4751 			t3 = expmed_mult_highpart
4752 			  (int_mode, t2, gen_int_mode (ml, int_mode),
4753 			   NULL_RTX, 1, max_cost - extra_cost);
4754 			if (t3 != 0)
4755 			  {
4756 			    t4 = expand_shift
4757 			      (RSHIFT_EXPR, int_mode, t3,
4758 			       post_shift, NULL_RTX, 1);
4759 			    quotient = expand_binop (int_mode, xor_optab,
4760 						     t4, t1, tquotient, 0,
4761 						     OPTAB_WIDEN);
4762 			  }
4763 		      }
4764 		  }
4765 	      }
4766 	    else
4767 	      {
4768 		rtx nsign, t1, t2, t3, t4;
4769 		t1 = force_operand (gen_rtx_PLUS (int_mode,
4770 						  op0, constm1_rtx), NULL_RTX);
4771 		t2 = expand_binop (int_mode, ior_optab, op0, t1, NULL_RTX,
4772 				   0, OPTAB_WIDEN);
4773 		nsign = expand_shift (RSHIFT_EXPR, int_mode, t2,
4774 				      size - 1, NULL_RTX, 0);
4775 		t3 = force_operand (gen_rtx_MINUS (int_mode, t1, nsign),
4776 				    NULL_RTX);
4777 		t4 = expand_divmod (0, TRUNC_DIV_EXPR, int_mode, t3, op1,
4778 				    NULL_RTX, 0);
4779 		if (t4)
4780 		  {
4781 		    rtx t5;
4782 		    t5 = expand_unop (int_mode, one_cmpl_optab, nsign,
4783 				      NULL_RTX, 0);
4784 		    quotient = force_operand (gen_rtx_PLUS (int_mode, t4, t5),
4785 					      tquotient);
4786 		  }
4787 	      }
4788 	  }
4789 
4790 	if (quotient != 0)
4791 	  break;
4792 	delete_insns_since (last);
4793 
4794 	/* Try using an instruction that produces both the quotient and
4795 	   remainder, using truncation.  We can easily compensate the quotient
4796 	   or remainder to get floor rounding, once we have the remainder.
4797 	   Notice that we compute also the final remainder value here,
4798 	   and return the result right away.  */
4799 	if (target == 0 || GET_MODE (target) != compute_mode)
4800 	  target = gen_reg_rtx (compute_mode);
4801 
4802 	if (rem_flag)
4803 	  {
4804 	    remainder
4805 	      = REG_P (target) ? target : gen_reg_rtx (compute_mode);
4806 	    quotient = gen_reg_rtx (compute_mode);
4807 	  }
4808 	else
4809 	  {
4810 	    quotient
4811 	      = REG_P (target) ? target : gen_reg_rtx (compute_mode);
4812 	    remainder = gen_reg_rtx (compute_mode);
4813 	  }
4814 
4815 	if (expand_twoval_binop (sdivmod_optab, op0, op1,
4816 				 quotient, remainder, 0))
4817 	  {
4818 	    /* This could be computed with a branch-less sequence.
4819 	       Save that for later.  */
4820 	    rtx tem;
4821 	    rtx_code_label *label = gen_label_rtx ();
4822 	    do_cmp_and_jump (remainder, const0_rtx, EQ, compute_mode, label);
4823 	    tem = expand_binop (compute_mode, xor_optab, op0, op1,
4824 				NULL_RTX, 0, OPTAB_WIDEN);
4825 	    do_cmp_and_jump (tem, const0_rtx, GE, compute_mode, label);
4826 	    expand_dec (quotient, const1_rtx);
4827 	    expand_inc (remainder, op1);
4828 	    emit_label (label);
4829 	    return gen_lowpart (mode, rem_flag ? remainder : quotient);
4830 	  }
4831 
4832 	/* No luck with division elimination or divmod.  Have to do it
4833 	   by conditionally adjusting op0 *and* the result.  */
4834 	{
4835 	  rtx_code_label *label1, *label2, *label3, *label4, *label5;
4836 	  rtx adjusted_op0;
4837 	  rtx tem;
4838 
4839 	  quotient = gen_reg_rtx (compute_mode);
4840 	  adjusted_op0 = copy_to_mode_reg (compute_mode, op0);
4841 	  label1 = gen_label_rtx ();
4842 	  label2 = gen_label_rtx ();
4843 	  label3 = gen_label_rtx ();
4844 	  label4 = gen_label_rtx ();
4845 	  label5 = gen_label_rtx ();
4846 	  do_cmp_and_jump (op1, const0_rtx, LT, compute_mode, label2);
4847 	  do_cmp_and_jump (adjusted_op0, const0_rtx, LT, compute_mode, label1);
4848 	  tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4849 			      quotient, 0, OPTAB_LIB_WIDEN);
4850 	  if (tem != quotient)
4851 	    emit_move_insn (quotient, tem);
4852 	  emit_jump_insn (targetm.gen_jump (label5));
4853 	  emit_barrier ();
4854 	  emit_label (label1);
4855 	  expand_inc (adjusted_op0, const1_rtx);
4856 	  emit_jump_insn (targetm.gen_jump (label4));
4857 	  emit_barrier ();
4858 	  emit_label (label2);
4859 	  do_cmp_and_jump (adjusted_op0, const0_rtx, GT, compute_mode, label3);
4860 	  tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4861 			      quotient, 0, OPTAB_LIB_WIDEN);
4862 	  if (tem != quotient)
4863 	    emit_move_insn (quotient, tem);
4864 	  emit_jump_insn (targetm.gen_jump (label5));
4865 	  emit_barrier ();
4866 	  emit_label (label3);
4867 	  expand_dec (adjusted_op0, const1_rtx);
4868 	  emit_label (label4);
4869 	  tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4870 			      quotient, 0, OPTAB_LIB_WIDEN);
4871 	  if (tem != quotient)
4872 	    emit_move_insn (quotient, tem);
4873 	  expand_dec (quotient, const1_rtx);
4874 	  emit_label (label5);
4875 	}
4876 	break;
4877 
4878       case CEIL_DIV_EXPR:
4879       case CEIL_MOD_EXPR:
4880 	if (unsignedp)
4881 	  {
4882 	    if (op1_is_constant
4883 		&& EXACT_POWER_OF_2_OR_ZERO_P (INTVAL (op1))
4884 		&& (HWI_COMPUTABLE_MODE_P (compute_mode)
4885 		    || INTVAL (op1) >= 0))
4886 	      {
4887 		scalar_int_mode int_mode
4888 		  = as_a <scalar_int_mode> (compute_mode);
4889 		rtx t1, t2, t3;
4890 		unsigned HOST_WIDE_INT d = INTVAL (op1);
4891 		t1 = expand_shift (RSHIFT_EXPR, int_mode, op0,
4892 				   floor_log2 (d), tquotient, 1);
4893 		t2 = expand_binop (int_mode, and_optab, op0,
4894 				   gen_int_mode (d - 1, int_mode),
4895 				   NULL_RTX, 1, OPTAB_LIB_WIDEN);
4896 		t3 = gen_reg_rtx (int_mode);
4897 		t3 = emit_store_flag (t3, NE, t2, const0_rtx, int_mode, 1, 1);
4898 		if (t3 == 0)
4899 		  {
4900 		    rtx_code_label *lab;
4901 		    lab = gen_label_rtx ();
4902 		    do_cmp_and_jump (t2, const0_rtx, EQ, int_mode, lab);
4903 		    expand_inc (t1, const1_rtx);
4904 		    emit_label (lab);
4905 		    quotient = t1;
4906 		  }
4907 		else
4908 		  quotient = force_operand (gen_rtx_PLUS (int_mode, t1, t3),
4909 					    tquotient);
4910 		break;
4911 	      }
4912 
4913 	    /* Try using an instruction that produces both the quotient and
4914 	       remainder, using truncation.  We can easily compensate the
4915 	       quotient or remainder to get ceiling rounding, once we have the
4916 	       remainder.  Notice that we compute also the final remainder
4917 	       value here, and return the result right away.  */
4918 	    if (target == 0 || GET_MODE (target) != compute_mode)
4919 	      target = gen_reg_rtx (compute_mode);
4920 
4921 	    if (rem_flag)
4922 	      {
4923 		remainder = (REG_P (target)
4924 			     ? target : gen_reg_rtx (compute_mode));
4925 		quotient = gen_reg_rtx (compute_mode);
4926 	      }
4927 	    else
4928 	      {
4929 		quotient = (REG_P (target)
4930 			    ? target : gen_reg_rtx (compute_mode));
4931 		remainder = gen_reg_rtx (compute_mode);
4932 	      }
4933 
4934 	    if (expand_twoval_binop (udivmod_optab, op0, op1, quotient,
4935 				     remainder, 1))
4936 	      {
4937 		/* This could be computed with a branch-less sequence.
4938 		   Save that for later.  */
4939 		rtx_code_label *label = gen_label_rtx ();
4940 		do_cmp_and_jump (remainder, const0_rtx, EQ,
4941 				 compute_mode, label);
4942 		expand_inc (quotient, const1_rtx);
4943 		expand_dec (remainder, op1);
4944 		emit_label (label);
4945 		return gen_lowpart (mode, rem_flag ? remainder : quotient);
4946 	      }
4947 
4948 	    /* No luck with division elimination or divmod.  Have to do it
4949 	       by conditionally adjusting op0 *and* the result.  */
4950 	    {
4951 	      rtx_code_label *label1, *label2;
4952 	      rtx adjusted_op0, tem;
4953 
4954 	      quotient = gen_reg_rtx (compute_mode);
4955 	      adjusted_op0 = copy_to_mode_reg (compute_mode, op0);
4956 	      label1 = gen_label_rtx ();
4957 	      label2 = gen_label_rtx ();
4958 	      do_cmp_and_jump (adjusted_op0, const0_rtx, NE,
4959 			       compute_mode, label1);
4960 	      emit_move_insn  (quotient, const0_rtx);
4961 	      emit_jump_insn (targetm.gen_jump (label2));
4962 	      emit_barrier ();
4963 	      emit_label (label1);
4964 	      expand_dec (adjusted_op0, const1_rtx);
4965 	      tem = expand_binop (compute_mode, udiv_optab, adjusted_op0, op1,
4966 				  quotient, 1, OPTAB_LIB_WIDEN);
4967 	      if (tem != quotient)
4968 		emit_move_insn (quotient, tem);
4969 	      expand_inc (quotient, const1_rtx);
4970 	      emit_label (label2);
4971 	    }
4972 	  }
4973 	else /* signed */
4974 	  {
4975 	    if (op1_is_constant && EXACT_POWER_OF_2_OR_ZERO_P (INTVAL (op1))
4976 		&& INTVAL (op1) >= 0)
4977 	      {
4978 		/* This is extremely similar to the code for the unsigned case
4979 		   above.  For 2.7 we should merge these variants, but for
4980 		   2.6.1 I don't want to touch the code for unsigned since that
4981 		   get used in C.  The signed case will only be used by other
4982 		   languages (Ada).  */
4983 
4984 		rtx t1, t2, t3;
4985 		unsigned HOST_WIDE_INT d = INTVAL (op1);
4986 		t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
4987 				   floor_log2 (d), tquotient, 0);
4988 		t2 = expand_binop (compute_mode, and_optab, op0,
4989 				   gen_int_mode (d - 1, compute_mode),
4990 				   NULL_RTX, 1, OPTAB_LIB_WIDEN);
4991 		t3 = gen_reg_rtx (compute_mode);
4992 		t3 = emit_store_flag (t3, NE, t2, const0_rtx,
4993 				      compute_mode, 1, 1);
4994 		if (t3 == 0)
4995 		  {
4996 		    rtx_code_label *lab;
4997 		    lab = gen_label_rtx ();
4998 		    do_cmp_and_jump (t2, const0_rtx, EQ, compute_mode, lab);
4999 		    expand_inc (t1, const1_rtx);
5000 		    emit_label (lab);
5001 		    quotient = t1;
5002 		  }
5003 		else
5004 		  quotient = force_operand (gen_rtx_PLUS (compute_mode,
5005 							  t1, t3),
5006 					    tquotient);
5007 		break;
5008 	      }
5009 
5010 	    /* Try using an instruction that produces both the quotient and
5011 	       remainder, using truncation.  We can easily compensate the
5012 	       quotient or remainder to get ceiling rounding, once we have the
5013 	       remainder.  Notice that we compute also the final remainder
5014 	       value here, and return the result right away.  */
5015 	    if (target == 0 || GET_MODE (target) != compute_mode)
5016 	      target = gen_reg_rtx (compute_mode);
5017 	    if (rem_flag)
5018 	      {
5019 		remainder= (REG_P (target)
5020 			    ? target : gen_reg_rtx (compute_mode));
5021 		quotient = gen_reg_rtx (compute_mode);
5022 	      }
5023 	    else
5024 	      {
5025 		quotient = (REG_P (target)
5026 			    ? target : gen_reg_rtx (compute_mode));
5027 		remainder = gen_reg_rtx (compute_mode);
5028 	      }
5029 
5030 	    if (expand_twoval_binop (sdivmod_optab, op0, op1, quotient,
5031 				     remainder, 0))
5032 	      {
5033 		/* This could be computed with a branch-less sequence.
5034 		   Save that for later.  */
5035 		rtx tem;
5036 		rtx_code_label *label = gen_label_rtx ();
5037 		do_cmp_and_jump (remainder, const0_rtx, EQ,
5038 				 compute_mode, label);
5039 		tem = expand_binop (compute_mode, xor_optab, op0, op1,
5040 				    NULL_RTX, 0, OPTAB_WIDEN);
5041 		do_cmp_and_jump (tem, const0_rtx, LT, compute_mode, label);
5042 		expand_inc (quotient, const1_rtx);
5043 		expand_dec (remainder, op1);
5044 		emit_label (label);
5045 		return gen_lowpart (mode, rem_flag ? remainder : quotient);
5046 	      }
5047 
5048 	    /* No luck with division elimination or divmod.  Have to do it
5049 	       by conditionally adjusting op0 *and* the result.  */
5050 	    {
5051 	      rtx_code_label *label1, *label2, *label3, *label4, *label5;
5052 	      rtx adjusted_op0;
5053 	      rtx tem;
5054 
5055 	      quotient = gen_reg_rtx (compute_mode);
5056 	      adjusted_op0 = copy_to_mode_reg (compute_mode, op0);
5057 	      label1 = gen_label_rtx ();
5058 	      label2 = gen_label_rtx ();
5059 	      label3 = gen_label_rtx ();
5060 	      label4 = gen_label_rtx ();
5061 	      label5 = gen_label_rtx ();
5062 	      do_cmp_and_jump (op1, const0_rtx, LT, compute_mode, label2);
5063 	      do_cmp_and_jump (adjusted_op0, const0_rtx, GT,
5064 			       compute_mode, label1);
5065 	      tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
5066 				  quotient, 0, OPTAB_LIB_WIDEN);
5067 	      if (tem != quotient)
5068 		emit_move_insn (quotient, tem);
5069 	      emit_jump_insn (targetm.gen_jump (label5));
5070 	      emit_barrier ();
5071 	      emit_label (label1);
5072 	      expand_dec (adjusted_op0, const1_rtx);
5073 	      emit_jump_insn (targetm.gen_jump (label4));
5074 	      emit_barrier ();
5075 	      emit_label (label2);
5076 	      do_cmp_and_jump (adjusted_op0, const0_rtx, LT,
5077 			       compute_mode, label3);
5078 	      tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
5079 				  quotient, 0, OPTAB_LIB_WIDEN);
5080 	      if (tem != quotient)
5081 		emit_move_insn (quotient, tem);
5082 	      emit_jump_insn (targetm.gen_jump (label5));
5083 	      emit_barrier ();
5084 	      emit_label (label3);
5085 	      expand_inc (adjusted_op0, const1_rtx);
5086 	      emit_label (label4);
5087 	      tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
5088 				  quotient, 0, OPTAB_LIB_WIDEN);
5089 	      if (tem != quotient)
5090 		emit_move_insn (quotient, tem);
5091 	      expand_inc (quotient, const1_rtx);
5092 	      emit_label (label5);
5093 	    }
5094 	  }
5095 	break;
5096 
5097       case EXACT_DIV_EXPR:
5098 	if (op1_is_constant && HWI_COMPUTABLE_MODE_P (compute_mode))
5099 	  {
5100 	    scalar_int_mode int_mode = as_a <scalar_int_mode> (compute_mode);
5101 	    int size = GET_MODE_BITSIZE (int_mode);
5102 	    HOST_WIDE_INT d = INTVAL (op1);
5103 	    unsigned HOST_WIDE_INT ml;
5104 	    int pre_shift;
5105 	    rtx t1;
5106 
5107 	    pre_shift = ctz_or_zero (d);
5108 	    ml = invert_mod2n (d >> pre_shift, size);
5109 	    t1 = expand_shift (RSHIFT_EXPR, int_mode, op0,
5110 			       pre_shift, NULL_RTX, unsignedp);
5111 	    quotient = expand_mult (int_mode, t1, gen_int_mode (ml, int_mode),
5112 				    NULL_RTX, 1);
5113 
5114 	    insn = get_last_insn ();
5115 	    set_dst_reg_note (insn, REG_EQUAL,
5116 			      gen_rtx_fmt_ee (unsignedp ? UDIV : DIV,
5117 					      int_mode, op0, op1),
5118 			      quotient);
5119 	  }
5120 	break;
5121 
5122       case ROUND_DIV_EXPR:
5123       case ROUND_MOD_EXPR:
5124 	if (unsignedp)
5125 	  {
5126 	    scalar_int_mode int_mode = as_a <scalar_int_mode> (compute_mode);
5127 	    rtx tem;
5128 	    rtx_code_label *label;
5129 	    label = gen_label_rtx ();
5130 	    quotient = gen_reg_rtx (int_mode);
5131 	    remainder = gen_reg_rtx (int_mode);
5132 	    if (expand_twoval_binop (udivmod_optab, op0, op1, quotient, remainder, 1) == 0)
5133 	      {
5134 		rtx tem;
5135 		quotient = expand_binop (int_mode, udiv_optab, op0, op1,
5136 					 quotient, 1, OPTAB_LIB_WIDEN);
5137 		tem = expand_mult (int_mode, quotient, op1, NULL_RTX, 1);
5138 		remainder = expand_binop (int_mode, sub_optab, op0, tem,
5139 					  remainder, 1, OPTAB_LIB_WIDEN);
5140 	      }
5141 	    tem = plus_constant (int_mode, op1, -1);
5142 	    tem = expand_shift (RSHIFT_EXPR, int_mode, tem, 1, NULL_RTX, 1);
5143 	    do_cmp_and_jump (remainder, tem, LEU, int_mode, label);
5144 	    expand_inc (quotient, const1_rtx);
5145 	    expand_dec (remainder, op1);
5146 	    emit_label (label);
5147 	  }
5148 	else
5149 	  {
5150 	    scalar_int_mode int_mode = as_a <scalar_int_mode> (compute_mode);
5151 	    int size = GET_MODE_BITSIZE (int_mode);
5152 	    rtx abs_rem, abs_op1, tem, mask;
5153 	    rtx_code_label *label;
5154 	    label = gen_label_rtx ();
5155 	    quotient = gen_reg_rtx (int_mode);
5156 	    remainder = gen_reg_rtx (int_mode);
5157 	    if (expand_twoval_binop (sdivmod_optab, op0, op1, quotient, remainder, 0) == 0)
5158 	      {
5159 		rtx tem;
5160 		quotient = expand_binop (int_mode, sdiv_optab, op0, op1,
5161 					 quotient, 0, OPTAB_LIB_WIDEN);
5162 		tem = expand_mult (int_mode, quotient, op1, NULL_RTX, 0);
5163 		remainder = expand_binop (int_mode, sub_optab, op0, tem,
5164 					  remainder, 0, OPTAB_LIB_WIDEN);
5165 	      }
5166 	    abs_rem = expand_abs (int_mode, remainder, NULL_RTX, 1, 0);
5167 	    abs_op1 = expand_abs (int_mode, op1, NULL_RTX, 1, 0);
5168 	    tem = expand_shift (LSHIFT_EXPR, int_mode, abs_rem,
5169 				1, NULL_RTX, 1);
5170 	    do_cmp_and_jump (tem, abs_op1, LTU, int_mode, label);
5171 	    tem = expand_binop (int_mode, xor_optab, op0, op1,
5172 				NULL_RTX, 0, OPTAB_WIDEN);
5173 	    mask = expand_shift (RSHIFT_EXPR, int_mode, tem,
5174 				 size - 1, NULL_RTX, 0);
5175 	    tem = expand_binop (int_mode, xor_optab, mask, const1_rtx,
5176 				NULL_RTX, 0, OPTAB_WIDEN);
5177 	    tem = expand_binop (int_mode, sub_optab, tem, mask,
5178 				NULL_RTX, 0, OPTAB_WIDEN);
5179 	    expand_inc (quotient, tem);
5180 	    tem = expand_binop (int_mode, xor_optab, mask, op1,
5181 				NULL_RTX, 0, OPTAB_WIDEN);
5182 	    tem = expand_binop (int_mode, sub_optab, tem, mask,
5183 				NULL_RTX, 0, OPTAB_WIDEN);
5184 	    expand_dec (remainder, tem);
5185 	    emit_label (label);
5186 	  }
5187 	return gen_lowpart (mode, rem_flag ? remainder : quotient);
5188 
5189       default:
5190 	gcc_unreachable ();
5191       }
5192 
5193   if (quotient == 0)
5194     {
5195       if (target && GET_MODE (target) != compute_mode)
5196 	target = 0;
5197 
5198       if (rem_flag)
5199 	{
5200 	  /* Try to produce the remainder without producing the quotient.
5201 	     If we seem to have a divmod pattern that does not require widening,
5202 	     don't try widening here.  We should really have a WIDEN argument
5203 	     to expand_twoval_binop, since what we'd really like to do here is
5204 	     1) try a mod insn in compute_mode
5205 	     2) try a divmod insn in compute_mode
5206 	     3) try a div insn in compute_mode and multiply-subtract to get
5207 	        remainder
5208 	     4) try the same things with widening allowed.  */
5209 	  remainder
5210 	    = sign_expand_binop (compute_mode, umod_optab, smod_optab,
5211 				 op0, op1, target,
5212 				 unsignedp,
5213 				 ((optab_handler (optab2, compute_mode)
5214 				   != CODE_FOR_nothing)
5215 				  ? OPTAB_DIRECT : OPTAB_WIDEN));
5216 	  if (remainder == 0)
5217 	    {
5218 	      /* No luck there.  Can we do remainder and divide at once
5219 		 without a library call?  */
5220 	      remainder = gen_reg_rtx (compute_mode);
5221 	      if (! expand_twoval_binop ((unsignedp
5222 					  ? udivmod_optab
5223 					  : sdivmod_optab),
5224 					 op0, op1,
5225 					 NULL_RTX, remainder, unsignedp))
5226 		remainder = 0;
5227 	    }
5228 
5229 	  if (remainder)
5230 	    return gen_lowpart (mode, remainder);
5231 	}
5232 
5233       /* Produce the quotient.  Try a quotient insn, but not a library call.
5234 	 If we have a divmod in this mode, use it in preference to widening
5235 	 the div (for this test we assume it will not fail). Note that optab2
5236 	 is set to the one of the two optabs that the call below will use.  */
5237       quotient
5238 	= sign_expand_binop (compute_mode, udiv_optab, sdiv_optab,
5239 			     op0, op1, rem_flag ? NULL_RTX : target,
5240 			     unsignedp,
5241 			     ((optab_handler (optab2, compute_mode)
5242 			       != CODE_FOR_nothing)
5243 			      ? OPTAB_DIRECT : OPTAB_WIDEN));
5244 
5245       if (quotient == 0)
5246 	{
5247 	  /* No luck there.  Try a quotient-and-remainder insn,
5248 	     keeping the quotient alone.  */
5249 	  quotient = gen_reg_rtx (compute_mode);
5250 	  if (! expand_twoval_binop (unsignedp ? udivmod_optab : sdivmod_optab,
5251 				     op0, op1,
5252 				     quotient, NULL_RTX, unsignedp))
5253 	    {
5254 	      quotient = 0;
5255 	      if (! rem_flag)
5256 		/* Still no luck.  If we are not computing the remainder,
5257 		   use a library call for the quotient.  */
5258 		quotient = sign_expand_binop (compute_mode,
5259 					      udiv_optab, sdiv_optab,
5260 					      op0, op1, target,
5261 					      unsignedp, OPTAB_LIB_WIDEN);
5262 	    }
5263 	}
5264     }
5265 
5266   if (rem_flag)
5267     {
5268       if (target && GET_MODE (target) != compute_mode)
5269 	target = 0;
5270 
5271       if (quotient == 0)
5272 	{
5273 	  /* No divide instruction either.  Use library for remainder.  */
5274 	  remainder = sign_expand_binop (compute_mode, umod_optab, smod_optab,
5275 					 op0, op1, target,
5276 					 unsignedp, OPTAB_LIB_WIDEN);
5277 	  /* No remainder function.  Try a quotient-and-remainder
5278 	     function, keeping the remainder.  */
5279 	  if (!remainder)
5280 	    {
5281 	      remainder = gen_reg_rtx (compute_mode);
5282 	      if (!expand_twoval_binop_libfunc
5283 		  (unsignedp ? udivmod_optab : sdivmod_optab,
5284 		   op0, op1,
5285 		   NULL_RTX, remainder,
5286 		   unsignedp ? UMOD : MOD))
5287 		remainder = NULL_RTX;
5288 	    }
5289 	}
5290       else
5291 	{
5292 	  /* We divided.  Now finish doing X - Y * (X / Y).  */
5293 	  remainder = expand_mult (compute_mode, quotient, op1,
5294 				   NULL_RTX, unsignedp);
5295 	  remainder = expand_binop (compute_mode, sub_optab, op0,
5296 				    remainder, target, unsignedp,
5297 				    OPTAB_LIB_WIDEN);
5298 	}
5299     }
5300 
5301   return gen_lowpart (mode, rem_flag ? remainder : quotient);
5302 }
5303 
5304 /* Return a tree node with data type TYPE, describing the value of X.
5305    Usually this is an VAR_DECL, if there is no obvious better choice.
5306    X may be an expression, however we only support those expressions
5307    generated by loop.c.  */
5308 
5309 tree
make_tree(tree type,rtx x)5310 make_tree (tree type, rtx x)
5311 {
5312   tree t;
5313 
5314   switch (GET_CODE (x))
5315     {
5316     case CONST_INT:
5317     case CONST_WIDE_INT:
5318       t = wide_int_to_tree (type, rtx_mode_t (x, TYPE_MODE (type)));
5319       return t;
5320 
5321     case CONST_DOUBLE:
5322       STATIC_ASSERT (HOST_BITS_PER_WIDE_INT * 2 <= MAX_BITSIZE_MODE_ANY_INT);
5323       if (TARGET_SUPPORTS_WIDE_INT == 0 && GET_MODE (x) == VOIDmode)
5324 	t = wide_int_to_tree (type,
5325 			      wide_int::from_array (&CONST_DOUBLE_LOW (x), 2,
5326 						    HOST_BITS_PER_WIDE_INT * 2));
5327       else
5328 	t = build_real (type, *CONST_DOUBLE_REAL_VALUE (x));
5329 
5330       return t;
5331 
5332     case CONST_VECTOR:
5333       {
5334 	unsigned int npatterns = CONST_VECTOR_NPATTERNS (x);
5335 	unsigned int nelts_per_pattern = CONST_VECTOR_NELTS_PER_PATTERN (x);
5336 	tree itype = TREE_TYPE (type);
5337 
5338 	/* Build a tree with vector elements.  */
5339 	tree_vector_builder elts (type, npatterns, nelts_per_pattern);
5340 	unsigned int count = elts.encoded_nelts ();
5341 	for (unsigned int i = 0; i < count; ++i)
5342 	  {
5343 	    rtx elt = CONST_VECTOR_ELT (x, i);
5344 	    elts.quick_push (make_tree (itype, elt));
5345 	  }
5346 
5347 	return elts.build ();
5348       }
5349 
5350     case PLUS:
5351       return fold_build2 (PLUS_EXPR, type, make_tree (type, XEXP (x, 0)),
5352 			  make_tree (type, XEXP (x, 1)));
5353 
5354     case MINUS:
5355       return fold_build2 (MINUS_EXPR, type, make_tree (type, XEXP (x, 0)),
5356 			  make_tree (type, XEXP (x, 1)));
5357 
5358     case NEG:
5359       return fold_build1 (NEGATE_EXPR, type, make_tree (type, XEXP (x, 0)));
5360 
5361     case MULT:
5362       return fold_build2 (MULT_EXPR, type, make_tree (type, XEXP (x, 0)),
5363 			  make_tree (type, XEXP (x, 1)));
5364 
5365     case ASHIFT:
5366       return fold_build2 (LSHIFT_EXPR, type, make_tree (type, XEXP (x, 0)),
5367 			  make_tree (type, XEXP (x, 1)));
5368 
5369     case LSHIFTRT:
5370       t = unsigned_type_for (type);
5371       return fold_convert (type, build2 (RSHIFT_EXPR, t,
5372 			    		 make_tree (t, XEXP (x, 0)),
5373 				    	 make_tree (type, XEXP (x, 1))));
5374 
5375     case ASHIFTRT:
5376       t = signed_type_for (type);
5377       return fold_convert (type, build2 (RSHIFT_EXPR, t,
5378 					 make_tree (t, XEXP (x, 0)),
5379 				    	 make_tree (type, XEXP (x, 1))));
5380 
5381     case DIV:
5382       if (TREE_CODE (type) != REAL_TYPE)
5383 	t = signed_type_for (type);
5384       else
5385 	t = type;
5386 
5387       return fold_convert (type, build2 (TRUNC_DIV_EXPR, t,
5388 				    	 make_tree (t, XEXP (x, 0)),
5389 				    	 make_tree (t, XEXP (x, 1))));
5390     case UDIV:
5391       t = unsigned_type_for (type);
5392       return fold_convert (type, build2 (TRUNC_DIV_EXPR, t,
5393 				    	 make_tree (t, XEXP (x, 0)),
5394 				    	 make_tree (t, XEXP (x, 1))));
5395 
5396     case SIGN_EXTEND:
5397     case ZERO_EXTEND:
5398       t = lang_hooks.types.type_for_mode (GET_MODE (XEXP (x, 0)),
5399 					  GET_CODE (x) == ZERO_EXTEND);
5400       return fold_convert (type, make_tree (t, XEXP (x, 0)));
5401 
5402     case CONST:
5403       return make_tree (type, XEXP (x, 0));
5404 
5405     case SYMBOL_REF:
5406       t = SYMBOL_REF_DECL (x);
5407       if (t)
5408 	return fold_convert (type, build_fold_addr_expr (t));
5409       /* fall through.  */
5410 
5411     default:
5412       if (CONST_POLY_INT_P (x))
5413 	return wide_int_to_tree (t, const_poly_int_value (x));
5414 
5415       t = build_decl (RTL_LOCATION (x), VAR_DECL, NULL_TREE, type);
5416 
5417       /* If TYPE is a POINTER_TYPE, we might need to convert X from
5418 	 address mode to pointer mode.  */
5419       if (POINTER_TYPE_P (type))
5420 	x = convert_memory_address_addr_space
5421 	  (SCALAR_INT_TYPE_MODE (type), x, TYPE_ADDR_SPACE (TREE_TYPE (type)));
5422 
5423       /* Note that we do *not* use SET_DECL_RTL here, because we do not
5424 	 want set_decl_rtl to go adjusting REG_ATTRS for this temporary.  */
5425       t->decl_with_rtl.rtl = x;
5426 
5427       return t;
5428     }
5429 }
5430 
5431 /* Compute the logical-and of OP0 and OP1, storing it in TARGET
5432    and returning TARGET.
5433 
5434    If TARGET is 0, a pseudo-register or constant is returned.  */
5435 
5436 rtx
expand_and(machine_mode mode,rtx op0,rtx op1,rtx target)5437 expand_and (machine_mode mode, rtx op0, rtx op1, rtx target)
5438 {
5439   rtx tem = 0;
5440 
5441   if (GET_MODE (op0) == VOIDmode && GET_MODE (op1) == VOIDmode)
5442     tem = simplify_binary_operation (AND, mode, op0, op1);
5443   if (tem == 0)
5444     tem = expand_binop (mode, and_optab, op0, op1, target, 0, OPTAB_LIB_WIDEN);
5445 
5446   if (target == 0)
5447     target = tem;
5448   else if (tem != target)
5449     emit_move_insn (target, tem);
5450   return target;
5451 }
5452 
5453 /* Helper function for emit_store_flag.  */
5454 rtx
emit_cstore(rtx target,enum insn_code icode,enum rtx_code code,machine_mode mode,machine_mode compare_mode,int unsignedp,rtx x,rtx y,int normalizep,machine_mode target_mode)5455 emit_cstore (rtx target, enum insn_code icode, enum rtx_code code,
5456 	     machine_mode mode, machine_mode compare_mode,
5457 	     int unsignedp, rtx x, rtx y, int normalizep,
5458 	     machine_mode target_mode)
5459 {
5460   class expand_operand ops[4];
5461   rtx op0, comparison, subtarget;
5462   rtx_insn *last;
5463   scalar_int_mode result_mode = targetm.cstore_mode (icode);
5464   scalar_int_mode int_target_mode;
5465 
5466   last = get_last_insn ();
5467   x = prepare_operand (icode, x, 2, mode, compare_mode, unsignedp);
5468   y = prepare_operand (icode, y, 3, mode, compare_mode, unsignedp);
5469   if (!x || !y)
5470     {
5471       delete_insns_since (last);
5472       return NULL_RTX;
5473     }
5474 
5475   if (target_mode == VOIDmode)
5476     int_target_mode = result_mode;
5477   else
5478     int_target_mode = as_a <scalar_int_mode> (target_mode);
5479   if (!target)
5480     target = gen_reg_rtx (int_target_mode);
5481 
5482   comparison = gen_rtx_fmt_ee (code, result_mode, x, y);
5483 
5484   create_output_operand (&ops[0], optimize ? NULL_RTX : target, result_mode);
5485   create_fixed_operand (&ops[1], comparison);
5486   create_fixed_operand (&ops[2], x);
5487   create_fixed_operand (&ops[3], y);
5488   if (!maybe_expand_insn (icode, 4, ops))
5489     {
5490       delete_insns_since (last);
5491       return NULL_RTX;
5492     }
5493   subtarget = ops[0].value;
5494 
5495   /* If we are converting to a wider mode, first convert to
5496      INT_TARGET_MODE, then normalize.  This produces better combining
5497      opportunities on machines that have a SIGN_EXTRACT when we are
5498      testing a single bit.  This mostly benefits the 68k.
5499 
5500      If STORE_FLAG_VALUE does not have the sign bit set when
5501      interpreted in MODE, we can do this conversion as unsigned, which
5502      is usually more efficient.  */
5503   if (GET_MODE_PRECISION (int_target_mode) > GET_MODE_PRECISION (result_mode))
5504     {
5505       gcc_assert (GET_MODE_PRECISION (result_mode) != 1
5506 		  || STORE_FLAG_VALUE == 1 || STORE_FLAG_VALUE == -1);
5507 
5508       bool unsignedp = (STORE_FLAG_VALUE >= 0);
5509       convert_move (target, subtarget, unsignedp);
5510 
5511       op0 = target;
5512       result_mode = int_target_mode;
5513     }
5514   else
5515     op0 = subtarget;
5516 
5517   /* If we want to keep subexpressions around, don't reuse our last
5518      target.  */
5519   if (optimize)
5520     subtarget = 0;
5521 
5522   /* Now normalize to the proper value in MODE.  Sometimes we don't
5523      have to do anything.  */
5524   if (normalizep == 0 || normalizep == STORE_FLAG_VALUE)
5525     ;
5526   /* STORE_FLAG_VALUE might be the most negative number, so write
5527      the comparison this way to avoid a compiler-time warning.  */
5528   else if (- normalizep == STORE_FLAG_VALUE)
5529     op0 = expand_unop (result_mode, neg_optab, op0, subtarget, 0);
5530 
5531   /* We don't want to use STORE_FLAG_VALUE < 0 below since this makes
5532      it hard to use a value of just the sign bit due to ANSI integer
5533      constant typing rules.  */
5534   else if (val_signbit_known_set_p (result_mode, STORE_FLAG_VALUE))
5535     op0 = expand_shift (RSHIFT_EXPR, result_mode, op0,
5536 			GET_MODE_BITSIZE (result_mode) - 1, subtarget,
5537 			normalizep == 1);
5538   else
5539     {
5540       gcc_assert (STORE_FLAG_VALUE & 1);
5541 
5542       op0 = expand_and (result_mode, op0, const1_rtx, subtarget);
5543       if (normalizep == -1)
5544 	op0 = expand_unop (result_mode, neg_optab, op0, op0, 0);
5545     }
5546 
5547   /* If we were converting to a smaller mode, do the conversion now.  */
5548   if (int_target_mode != result_mode)
5549     {
5550       convert_move (target, op0, 0);
5551       return target;
5552     }
5553   else
5554     return op0;
5555 }
5556 
5557 
5558 /* A subroutine of emit_store_flag only including "tricks" that do not
5559    need a recursive call.  These are kept separate to avoid infinite
5560    loops.  */
5561 
5562 static rtx
emit_store_flag_1(rtx target,enum rtx_code code,rtx op0,rtx op1,machine_mode mode,int unsignedp,int normalizep,machine_mode target_mode)5563 emit_store_flag_1 (rtx target, enum rtx_code code, rtx op0, rtx op1,
5564 		   machine_mode mode, int unsignedp, int normalizep,
5565 		   machine_mode target_mode)
5566 {
5567   rtx subtarget;
5568   enum insn_code icode;
5569   machine_mode compare_mode;
5570   enum mode_class mclass;
5571   enum rtx_code scode;
5572 
5573   if (unsignedp)
5574     code = unsigned_condition (code);
5575   scode = swap_condition (code);
5576 
5577   /* If one operand is constant, make it the second one.  Only do this
5578      if the other operand is not constant as well.  */
5579 
5580   if (swap_commutative_operands_p (op0, op1))
5581     {
5582       std::swap (op0, op1);
5583       code = swap_condition (code);
5584     }
5585 
5586   if (mode == VOIDmode)
5587     mode = GET_MODE (op0);
5588 
5589   if (CONST_SCALAR_INT_P (op1))
5590     canonicalize_comparison (mode, &code, &op1);
5591 
5592   /* For some comparisons with 1 and -1, we can convert this to
5593      comparisons with zero.  This will often produce more opportunities for
5594      store-flag insns.  */
5595 
5596   switch (code)
5597     {
5598     case LT:
5599       if (op1 == const1_rtx)
5600 	op1 = const0_rtx, code = LE;
5601       break;
5602     case LE:
5603       if (op1 == constm1_rtx)
5604 	op1 = const0_rtx, code = LT;
5605       break;
5606     case GE:
5607       if (op1 == const1_rtx)
5608 	op1 = const0_rtx, code = GT;
5609       break;
5610     case GT:
5611       if (op1 == constm1_rtx)
5612 	op1 = const0_rtx, code = GE;
5613       break;
5614     case GEU:
5615       if (op1 == const1_rtx)
5616 	op1 = const0_rtx, code = NE;
5617       break;
5618     case LTU:
5619       if (op1 == const1_rtx)
5620 	op1 = const0_rtx, code = EQ;
5621       break;
5622     default:
5623       break;
5624     }
5625 
5626   /* If we are comparing a double-word integer with zero or -1, we can
5627      convert the comparison into one involving a single word.  */
5628   scalar_int_mode int_mode;
5629   if (is_int_mode (mode, &int_mode)
5630       && GET_MODE_BITSIZE (int_mode) == BITS_PER_WORD * 2
5631       && (!MEM_P (op0) || ! MEM_VOLATILE_P (op0)))
5632     {
5633       rtx tem;
5634       if ((code == EQ || code == NE)
5635 	  && (op1 == const0_rtx || op1 == constm1_rtx))
5636 	{
5637 	  rtx op00, op01;
5638 
5639 	  /* Do a logical OR or AND of the two words and compare the
5640 	     result.  */
5641 	  op00 = simplify_gen_subreg (word_mode, op0, int_mode, 0);
5642 	  op01 = simplify_gen_subreg (word_mode, op0, int_mode, UNITS_PER_WORD);
5643 	  tem = expand_binop (word_mode,
5644 			      op1 == const0_rtx ? ior_optab : and_optab,
5645 			      op00, op01, NULL_RTX, unsignedp,
5646 			      OPTAB_DIRECT);
5647 
5648 	  if (tem != 0)
5649 	    tem = emit_store_flag (NULL_RTX, code, tem, op1, word_mode,
5650 				   unsignedp, normalizep);
5651 	}
5652       else if ((code == LT || code == GE) && op1 == const0_rtx)
5653 	{
5654 	  rtx op0h;
5655 
5656 	  /* If testing the sign bit, can just test on high word.  */
5657 	  op0h = simplify_gen_subreg (word_mode, op0, int_mode,
5658 				      subreg_highpart_offset (word_mode,
5659 							      int_mode));
5660 	  tem = emit_store_flag (NULL_RTX, code, op0h, op1, word_mode,
5661 				 unsignedp, normalizep);
5662 	}
5663       else
5664 	tem = NULL_RTX;
5665 
5666       if (tem)
5667 	{
5668 	  if (target_mode == VOIDmode || GET_MODE (tem) == target_mode)
5669 	    return tem;
5670 	  if (!target)
5671 	    target = gen_reg_rtx (target_mode);
5672 
5673 	  convert_move (target, tem,
5674 			!val_signbit_known_set_p (word_mode,
5675 						  (normalizep ? normalizep
5676 						   : STORE_FLAG_VALUE)));
5677 	  return target;
5678 	}
5679     }
5680 
5681   /* If this is A < 0 or A >= 0, we can do this by taking the ones
5682      complement of A (for GE) and shifting the sign bit to the low bit.  */
5683   if (op1 == const0_rtx && (code == LT || code == GE)
5684       && is_int_mode (mode, &int_mode)
5685       && (normalizep || STORE_FLAG_VALUE == 1
5686 	  || val_signbit_p (int_mode, STORE_FLAG_VALUE)))
5687     {
5688       scalar_int_mode int_target_mode;
5689       subtarget = target;
5690 
5691       if (!target)
5692 	int_target_mode = int_mode;
5693       else
5694 	{
5695 	  /* If the result is to be wider than OP0, it is best to convert it
5696 	     first.  If it is to be narrower, it is *incorrect* to convert it
5697 	     first.  */
5698 	  int_target_mode = as_a <scalar_int_mode> (target_mode);
5699 	  if (GET_MODE_SIZE (int_target_mode) > GET_MODE_SIZE (int_mode))
5700 	    {
5701 	      op0 = convert_modes (int_target_mode, int_mode, op0, 0);
5702 	      int_mode = int_target_mode;
5703 	    }
5704 	}
5705 
5706       if (int_target_mode != int_mode)
5707 	subtarget = 0;
5708 
5709       if (code == GE)
5710 	op0 = expand_unop (int_mode, one_cmpl_optab, op0,
5711 			   ((STORE_FLAG_VALUE == 1 || normalizep)
5712 			    ? 0 : subtarget), 0);
5713 
5714       if (STORE_FLAG_VALUE == 1 || normalizep)
5715 	/* If we are supposed to produce a 0/1 value, we want to do
5716 	   a logical shift from the sign bit to the low-order bit; for
5717 	   a -1/0 value, we do an arithmetic shift.  */
5718 	op0 = expand_shift (RSHIFT_EXPR, int_mode, op0,
5719 			    GET_MODE_BITSIZE (int_mode) - 1,
5720 			    subtarget, normalizep != -1);
5721 
5722       if (int_mode != int_target_mode)
5723 	op0 = convert_modes (int_target_mode, int_mode, op0, 0);
5724 
5725       return op0;
5726     }
5727 
5728   mclass = GET_MODE_CLASS (mode);
5729   FOR_EACH_MODE_FROM (compare_mode, mode)
5730     {
5731      machine_mode optab_mode = mclass == MODE_CC ? CCmode : compare_mode;
5732      icode = optab_handler (cstore_optab, optab_mode);
5733      if (icode != CODE_FOR_nothing)
5734 	{
5735 	  do_pending_stack_adjust ();
5736 	  rtx tem = emit_cstore (target, icode, code, mode, compare_mode,
5737 				 unsignedp, op0, op1, normalizep, target_mode);
5738 	  if (tem)
5739 	    return tem;
5740 
5741 	  if (GET_MODE_CLASS (mode) == MODE_FLOAT)
5742 	    {
5743 	      tem = emit_cstore (target, icode, scode, mode, compare_mode,
5744 				 unsignedp, op1, op0, normalizep, target_mode);
5745 	      if (tem)
5746 	        return tem;
5747 	    }
5748 	  break;
5749 	}
5750     }
5751 
5752   return 0;
5753 }
5754 
5755 /* Subroutine of emit_store_flag that handles cases in which the operands
5756    are scalar integers.  SUBTARGET is the target to use for temporary
5757    operations and TRUEVAL is the value to store when the condition is
5758    true.  All other arguments are as for emit_store_flag.  */
5759 
5760 rtx
emit_store_flag_int(rtx target,rtx subtarget,enum rtx_code code,rtx op0,rtx op1,scalar_int_mode mode,int unsignedp,int normalizep,rtx trueval)5761 emit_store_flag_int (rtx target, rtx subtarget, enum rtx_code code, rtx op0,
5762 		     rtx op1, scalar_int_mode mode, int unsignedp,
5763 		     int normalizep, rtx trueval)
5764 {
5765   machine_mode target_mode = target ? GET_MODE (target) : VOIDmode;
5766   rtx_insn *last = get_last_insn ();
5767 
5768   /* If this is an equality comparison of integers, we can try to exclusive-or
5769      (or subtract) the two operands and use a recursive call to try the
5770      comparison with zero.  Don't do any of these cases if branches are
5771      very cheap.  */
5772 
5773   if ((code == EQ || code == NE) && op1 != const0_rtx)
5774     {
5775       rtx tem = expand_binop (mode, xor_optab, op0, op1, subtarget, 1,
5776 			      OPTAB_WIDEN);
5777 
5778       if (tem == 0)
5779 	tem = expand_binop (mode, sub_optab, op0, op1, subtarget, 1,
5780 			    OPTAB_WIDEN);
5781       if (tem != 0)
5782 	tem = emit_store_flag (target, code, tem, const0_rtx,
5783 			       mode, unsignedp, normalizep);
5784       if (tem != 0)
5785 	return tem;
5786 
5787       delete_insns_since (last);
5788     }
5789 
5790   /* For integer comparisons, try the reverse comparison.  However, for
5791      small X and if we'd have anyway to extend, implementing "X != 0"
5792      as "-(int)X >> 31" is still cheaper than inverting "(int)X == 0".  */
5793   rtx_code rcode = reverse_condition (code);
5794   if (can_compare_p (rcode, mode, ccp_store_flag)
5795       && ! (optab_handler (cstore_optab, mode) == CODE_FOR_nothing
5796 	    && code == NE
5797 	    && GET_MODE_SIZE (mode) < UNITS_PER_WORD
5798 	    && op1 == const0_rtx))
5799     {
5800       int want_add = ((STORE_FLAG_VALUE == 1 && normalizep == -1)
5801 		      || (STORE_FLAG_VALUE == -1 && normalizep == 1));
5802 
5803       /* Again, for the reverse comparison, use either an addition or a XOR.  */
5804       if (want_add
5805 	  && rtx_cost (GEN_INT (normalizep), mode, PLUS, 1,
5806 		       optimize_insn_for_speed_p ()) == 0)
5807 	{
5808 	  rtx tem = emit_store_flag_1 (subtarget, rcode, op0, op1, mode, 0,
5809 				       STORE_FLAG_VALUE, target_mode);
5810 	  if (tem != 0)
5811 	    tem = expand_binop (target_mode, add_optab, tem,
5812 				gen_int_mode (normalizep, target_mode),
5813 				target, 0, OPTAB_WIDEN);
5814 	  if (tem != 0)
5815 	    return tem;
5816 	}
5817       else if (!want_add
5818 	       && rtx_cost (trueval, mode, XOR, 1,
5819 			    optimize_insn_for_speed_p ()) == 0)
5820 	{
5821 	  rtx tem = emit_store_flag_1 (subtarget, rcode, op0, op1, mode, 0,
5822 				       normalizep, target_mode);
5823 	  if (tem != 0)
5824 	    tem = expand_binop (target_mode, xor_optab, tem, trueval, target,
5825 				INTVAL (trueval) >= 0, OPTAB_WIDEN);
5826 	  if (tem != 0)
5827 	    return tem;
5828 	}
5829 
5830       delete_insns_since (last);
5831     }
5832 
5833   /* Some other cases we can do are EQ, NE, LE, and GT comparisons with
5834      the constant zero.  Reject all other comparisons at this point.  Only
5835      do LE and GT if branches are expensive since they are expensive on
5836      2-operand machines.  */
5837 
5838   if (op1 != const0_rtx
5839       || (code != EQ && code != NE
5840 	  && (BRANCH_COST (optimize_insn_for_speed_p (),
5841 			   false) <= 1 || (code != LE && code != GT))))
5842     return 0;
5843 
5844   /* Try to put the result of the comparison in the sign bit.  Assume we can't
5845      do the necessary operation below.  */
5846 
5847   rtx tem = 0;
5848 
5849   /* To see if A <= 0, compute (A | (A - 1)).  A <= 0 iff that result has
5850      the sign bit set.  */
5851 
5852   if (code == LE)
5853     {
5854       /* This is destructive, so SUBTARGET can't be OP0.  */
5855       if (rtx_equal_p (subtarget, op0))
5856 	subtarget = 0;
5857 
5858       tem = expand_binop (mode, sub_optab, op0, const1_rtx, subtarget, 0,
5859 			  OPTAB_WIDEN);
5860       if (tem)
5861 	tem = expand_binop (mode, ior_optab, op0, tem, subtarget, 0,
5862 			    OPTAB_WIDEN);
5863     }
5864 
5865   /* To see if A > 0, compute (((signed) A) << BITS) - A, where BITS is the
5866      number of bits in the mode of OP0, minus one.  */
5867 
5868   if (code == GT)
5869     {
5870       if (rtx_equal_p (subtarget, op0))
5871 	subtarget = 0;
5872 
5873       tem = maybe_expand_shift (RSHIFT_EXPR, mode, op0,
5874 				GET_MODE_BITSIZE (mode) - 1,
5875 				subtarget, 0);
5876       if (tem)
5877 	tem = expand_binop (mode, sub_optab, tem, op0, subtarget, 0,
5878 			    OPTAB_WIDEN);
5879     }
5880 
5881   if (code == EQ || code == NE)
5882     {
5883       /* For EQ or NE, one way to do the comparison is to apply an operation
5884 	 that converts the operand into a positive number if it is nonzero
5885 	 or zero if it was originally zero.  Then, for EQ, we subtract 1 and
5886 	 for NE we negate.  This puts the result in the sign bit.  Then we
5887 	 normalize with a shift, if needed.
5888 
5889 	 Two operations that can do the above actions are ABS and FFS, so try
5890 	 them.  If that doesn't work, and MODE is smaller than a full word,
5891 	 we can use zero-extension to the wider mode (an unsigned conversion)
5892 	 as the operation.  */
5893 
5894       /* Note that ABS doesn't yield a positive number for INT_MIN, but
5895 	 that is compensated by the subsequent overflow when subtracting
5896 	 one / negating.  */
5897 
5898       if (optab_handler (abs_optab, mode) != CODE_FOR_nothing)
5899 	tem = expand_unop (mode, abs_optab, op0, subtarget, 1);
5900       else if (optab_handler (ffs_optab, mode) != CODE_FOR_nothing)
5901 	tem = expand_unop (mode, ffs_optab, op0, subtarget, 1);
5902       else if (GET_MODE_SIZE (mode) < UNITS_PER_WORD)
5903 	{
5904 	  tem = convert_modes (word_mode, mode, op0, 1);
5905 	  mode = word_mode;
5906 	}
5907 
5908       if (tem != 0)
5909 	{
5910 	  if (code == EQ)
5911 	    tem = expand_binop (mode, sub_optab, tem, const1_rtx, subtarget,
5912 				0, OPTAB_WIDEN);
5913 	  else
5914 	    tem = expand_unop (mode, neg_optab, tem, subtarget, 0);
5915 	}
5916 
5917       /* If we couldn't do it that way, for NE we can "or" the two's complement
5918 	 of the value with itself.  For EQ, we take the one's complement of
5919 	 that "or", which is an extra insn, so we only handle EQ if branches
5920 	 are expensive.  */
5921 
5922       if (tem == 0
5923 	  && (code == NE
5924 	      || BRANCH_COST (optimize_insn_for_speed_p (),
5925 			      false) > 1))
5926 	{
5927 	  if (rtx_equal_p (subtarget, op0))
5928 	    subtarget = 0;
5929 
5930 	  tem = expand_unop (mode, neg_optab, op0, subtarget, 0);
5931 	  tem = expand_binop (mode, ior_optab, tem, op0, subtarget, 0,
5932 			      OPTAB_WIDEN);
5933 
5934 	  if (tem && code == EQ)
5935 	    tem = expand_unop (mode, one_cmpl_optab, tem, subtarget, 0);
5936 	}
5937     }
5938 
5939   if (tem && normalizep)
5940     tem = maybe_expand_shift (RSHIFT_EXPR, mode, tem,
5941 			      GET_MODE_BITSIZE (mode) - 1,
5942 			      subtarget, normalizep == 1);
5943 
5944   if (tem)
5945     {
5946       if (!target)
5947 	;
5948       else if (GET_MODE (tem) != target_mode)
5949 	{
5950 	  convert_move (target, tem, 0);
5951 	  tem = target;
5952 	}
5953       else if (!subtarget)
5954 	{
5955 	  emit_move_insn (target, tem);
5956 	  tem = target;
5957 	}
5958     }
5959   else
5960     delete_insns_since (last);
5961 
5962   return tem;
5963 }
5964 
5965 /* Emit a store-flags instruction for comparison CODE on OP0 and OP1
5966    and storing in TARGET.  Normally return TARGET.
5967    Return 0 if that cannot be done.
5968 
5969    MODE is the mode to use for OP0 and OP1 should they be CONST_INTs.  If
5970    it is VOIDmode, they cannot both be CONST_INT.
5971 
5972    UNSIGNEDP is for the case where we have to widen the operands
5973    to perform the operation.  It says to use zero-extension.
5974 
5975    NORMALIZEP is 1 if we should convert the result to be either zero
5976    or one.  Normalize is -1 if we should convert the result to be
5977    either zero or -1.  If NORMALIZEP is zero, the result will be left
5978    "raw" out of the scc insn.  */
5979 
5980 rtx
emit_store_flag(rtx target,enum rtx_code code,rtx op0,rtx op1,machine_mode mode,int unsignedp,int normalizep)5981 emit_store_flag (rtx target, enum rtx_code code, rtx op0, rtx op1,
5982 		 machine_mode mode, int unsignedp, int normalizep)
5983 {
5984   machine_mode target_mode = target ? GET_MODE (target) : VOIDmode;
5985   enum rtx_code rcode;
5986   rtx subtarget;
5987   rtx tem, trueval;
5988   rtx_insn *last;
5989 
5990   /* If we compare constants, we shouldn't use a store-flag operation,
5991      but a constant load.  We can get there via the vanilla route that
5992      usually generates a compare-branch sequence, but will in this case
5993      fold the comparison to a constant, and thus elide the branch.  */
5994   if (CONSTANT_P (op0) && CONSTANT_P (op1))
5995     return NULL_RTX;
5996 
5997   tem = emit_store_flag_1 (target, code, op0, op1, mode, unsignedp, normalizep,
5998 			   target_mode);
5999   if (tem)
6000     return tem;
6001 
6002   /* If we reached here, we can't do this with a scc insn, however there
6003      are some comparisons that can be done in other ways.  Don't do any
6004      of these cases if branches are very cheap.  */
6005   if (BRANCH_COST (optimize_insn_for_speed_p (), false) == 0)
6006     return 0;
6007 
6008   /* See what we need to return.  We can only return a 1, -1, or the
6009      sign bit.  */
6010 
6011   if (normalizep == 0)
6012     {
6013       if (STORE_FLAG_VALUE == 1 || STORE_FLAG_VALUE == -1)
6014 	normalizep = STORE_FLAG_VALUE;
6015 
6016       else if (val_signbit_p (mode, STORE_FLAG_VALUE))
6017 	;
6018       else
6019 	return 0;
6020     }
6021 
6022   last = get_last_insn ();
6023 
6024   /* If optimizing, use different pseudo registers for each insn, instead
6025      of reusing the same pseudo.  This leads to better CSE, but slows
6026      down the compiler, since there are more pseudos.  */
6027   subtarget = (!optimize
6028 	       && (target_mode == mode)) ? target : NULL_RTX;
6029   trueval = GEN_INT (normalizep ? normalizep : STORE_FLAG_VALUE);
6030 
6031   /* For floating-point comparisons, try the reverse comparison or try
6032      changing the "orderedness" of the comparison.  */
6033   if (GET_MODE_CLASS (mode) == MODE_FLOAT)
6034     {
6035       enum rtx_code first_code;
6036       bool and_them;
6037 
6038       rcode = reverse_condition_maybe_unordered (code);
6039       if (can_compare_p (rcode, mode, ccp_store_flag)
6040 	  && (code == ORDERED || code == UNORDERED
6041 	      || (! HONOR_NANS (mode) && (code == LTGT || code == UNEQ))
6042 	      || (! HONOR_SNANS (mode) && (code == EQ || code == NE))))
6043 	{
6044 	  int want_add = ((STORE_FLAG_VALUE == 1 && normalizep == -1)
6045 			  || (STORE_FLAG_VALUE == -1 && normalizep == 1));
6046 
6047 	  /* For the reverse comparison, use either an addition or a XOR.  */
6048 	  if (want_add
6049 	      && rtx_cost (GEN_INT (normalizep), mode, PLUS, 1,
6050 			   optimize_insn_for_speed_p ()) == 0)
6051 	    {
6052 	      tem = emit_store_flag_1 (subtarget, rcode, op0, op1, mode, 0,
6053 				       STORE_FLAG_VALUE, target_mode);
6054 	      if (tem)
6055 		return expand_binop (target_mode, add_optab, tem,
6056 				     gen_int_mode (normalizep, target_mode),
6057 				     target, 0, OPTAB_WIDEN);
6058 	    }
6059 	  else if (!want_add
6060 		   && rtx_cost (trueval, mode, XOR, 1,
6061 				optimize_insn_for_speed_p ()) == 0)
6062 	    {
6063 	      tem = emit_store_flag_1 (subtarget, rcode, op0, op1, mode, 0,
6064 				       normalizep, target_mode);
6065 	      if (tem)
6066 		return expand_binop (target_mode, xor_optab, tem, trueval,
6067 				     target, INTVAL (trueval) >= 0,
6068 				     OPTAB_WIDEN);
6069 	    }
6070 	}
6071 
6072       delete_insns_since (last);
6073 
6074       /* Cannot split ORDERED and UNORDERED, only try the above trick.  */
6075       if (code == ORDERED || code == UNORDERED)
6076 	return 0;
6077 
6078       and_them = split_comparison (code, mode, &first_code, &code);
6079 
6080       /* If there are no NaNs, the first comparison should always fall through.
6081 	 Effectively change the comparison to the other one.  */
6082       if (!HONOR_NANS (mode))
6083 	{
6084 	  gcc_assert (first_code == (and_them ? ORDERED : UNORDERED));
6085 	  return emit_store_flag_1 (target, code, op0, op1, mode, 0, normalizep,
6086 				    target_mode);
6087 	}
6088 
6089       if (!HAVE_conditional_move)
6090 	return 0;
6091 
6092       /* Do not turn a trapping comparison into a non-trapping one.  */
6093       if ((code != EQ && code != NE && code != UNEQ && code != LTGT)
6094 	  && flag_trapping_math)
6095 	return 0;
6096 
6097       /* Try using a setcc instruction for ORDERED/UNORDERED, followed by a
6098 	 conditional move.  */
6099       tem = emit_store_flag_1 (subtarget, first_code, op0, op1, mode, 0,
6100 			       normalizep, target_mode);
6101       if (tem == 0)
6102 	return 0;
6103 
6104       if (and_them)
6105 	tem = emit_conditional_move (target, code, op0, op1, mode,
6106 				     tem, const0_rtx, GET_MODE (tem), 0);
6107       else
6108 	tem = emit_conditional_move (target, code, op0, op1, mode,
6109 				     trueval, tem, GET_MODE (tem), 0);
6110 
6111       if (tem == 0)
6112 	delete_insns_since (last);
6113       return tem;
6114     }
6115 
6116   /* The remaining tricks only apply to integer comparisons.  */
6117 
6118   scalar_int_mode int_mode;
6119   if (is_int_mode (mode, &int_mode))
6120     return emit_store_flag_int (target, subtarget, code, op0, op1, int_mode,
6121 				unsignedp, normalizep, trueval);
6122 
6123   return 0;
6124 }
6125 
6126 /* Like emit_store_flag, but always succeeds.  */
6127 
6128 rtx
emit_store_flag_force(rtx target,enum rtx_code code,rtx op0,rtx op1,machine_mode mode,int unsignedp,int normalizep)6129 emit_store_flag_force (rtx target, enum rtx_code code, rtx op0, rtx op1,
6130 		       machine_mode mode, int unsignedp, int normalizep)
6131 {
6132   rtx tem;
6133   rtx_code_label *label;
6134   rtx trueval, falseval;
6135 
6136   /* First see if emit_store_flag can do the job.  */
6137   tem = emit_store_flag (target, code, op0, op1, mode, unsignedp, normalizep);
6138   if (tem != 0)
6139     return tem;
6140 
6141   /* If one operand is constant, make it the second one.  Only do this
6142      if the other operand is not constant as well.  */
6143   if (swap_commutative_operands_p (op0, op1))
6144     {
6145       std::swap (op0, op1);
6146       code = swap_condition (code);
6147     }
6148 
6149   if (mode == VOIDmode)
6150     mode = GET_MODE (op0);
6151 
6152   if (!target)
6153     target = gen_reg_rtx (word_mode);
6154 
6155   /* If this failed, we have to do this with set/compare/jump/set code.
6156      For foo != 0, if foo is in OP0, just replace it with 1 if nonzero.  */
6157   trueval = normalizep ? GEN_INT (normalizep) : const1_rtx;
6158   if (code == NE
6159       && GET_MODE_CLASS (mode) == MODE_INT
6160       && REG_P (target)
6161       && op0 == target
6162       && op1 == const0_rtx)
6163     {
6164       label = gen_label_rtx ();
6165       do_compare_rtx_and_jump (target, const0_rtx, EQ, unsignedp, mode,
6166 			       NULL_RTX, NULL, label,
6167 			       profile_probability::uninitialized ());
6168       emit_move_insn (target, trueval);
6169       emit_label (label);
6170       return target;
6171     }
6172 
6173   if (!REG_P (target)
6174       || reg_mentioned_p (target, op0) || reg_mentioned_p (target, op1))
6175     target = gen_reg_rtx (GET_MODE (target));
6176 
6177   /* Jump in the right direction if the target cannot implement CODE
6178      but can jump on its reverse condition.  */
6179   falseval = const0_rtx;
6180   if (! can_compare_p (code, mode, ccp_jump)
6181       && (! FLOAT_MODE_P (mode)
6182           || code == ORDERED || code == UNORDERED
6183           || (! HONOR_NANS (mode) && (code == LTGT || code == UNEQ))
6184           || (! HONOR_SNANS (mode) && (code == EQ || code == NE))))
6185     {
6186       enum rtx_code rcode;
6187       if (FLOAT_MODE_P (mode))
6188         rcode = reverse_condition_maybe_unordered (code);
6189       else
6190         rcode = reverse_condition (code);
6191 
6192       /* Canonicalize to UNORDERED for the libcall.  */
6193       if (can_compare_p (rcode, mode, ccp_jump)
6194           || (code == ORDERED && ! can_compare_p (ORDERED, mode, ccp_jump)))
6195 	{
6196 	  falseval = trueval;
6197 	  trueval = const0_rtx;
6198 	  code = rcode;
6199 	}
6200     }
6201 
6202   emit_move_insn (target, trueval);
6203   label = gen_label_rtx ();
6204   do_compare_rtx_and_jump (op0, op1, code, unsignedp, mode, NULL_RTX, NULL,
6205 			   label, profile_probability::uninitialized ());
6206 
6207   emit_move_insn (target, falseval);
6208   emit_label (label);
6209 
6210   return target;
6211 }
6212 
6213 /* Helper function for canonicalize_cmp_for_target.  Swap between inclusive
6214    and exclusive ranges in order to create an equivalent comparison.  See
6215    canonicalize_cmp_for_target for the possible cases.  */
6216 
6217 static enum rtx_code
equivalent_cmp_code(enum rtx_code code)6218 equivalent_cmp_code (enum rtx_code code)
6219 {
6220   switch (code)
6221     {
6222     case GT:
6223       return GE;
6224     case GE:
6225       return GT;
6226     case LT:
6227       return LE;
6228     case LE:
6229       return LT;
6230     case GTU:
6231       return GEU;
6232     case GEU:
6233       return GTU;
6234     case LTU:
6235       return LEU;
6236     case LEU:
6237       return LTU;
6238 
6239     default:
6240       return code;
6241     }
6242 }
6243 
6244 /* Choose the more appropiate immediate in scalar integer comparisons.  The
6245    purpose of this is to end up with an immediate which can be loaded into a
6246    register in fewer moves, if possible.
6247 
6248    For each integer comparison there exists an equivalent choice:
6249      i)   a >  b or a >= b + 1
6250      ii)  a <= b or a <  b + 1
6251      iii) a >= b or a >  b - 1
6252      iv)  a <  b or a <= b - 1
6253 
6254    MODE is the mode of the first operand.
6255    CODE points to the comparison code.
6256    IMM points to the rtx containing the immediate.  *IMM must satisfy
6257    CONST_SCALAR_INT_P on entry and continues to satisfy CONST_SCALAR_INT_P
6258    on exit.  */
6259 
6260 void
canonicalize_comparison(machine_mode mode,enum rtx_code * code,rtx * imm)6261 canonicalize_comparison (machine_mode mode, enum rtx_code *code, rtx *imm)
6262 {
6263   if (!SCALAR_INT_MODE_P (mode))
6264     return;
6265 
6266   int to_add = 0;
6267   enum signop sgn = unsigned_condition_p (*code) ? UNSIGNED : SIGNED;
6268 
6269   /* Extract the immediate value from the rtx.  */
6270   wide_int imm_val = rtx_mode_t (*imm, mode);
6271 
6272   if (*code == GT || *code == GTU || *code == LE || *code == LEU)
6273     to_add = 1;
6274   else if (*code == GE || *code == GEU || *code == LT || *code == LTU)
6275     to_add = -1;
6276   else
6277     return;
6278 
6279   /* Check for overflow/underflow in the case of signed values and
6280      wrapping around in the case of unsigned values.  If any occur
6281      cancel the optimization.  */
6282   wi::overflow_type overflow = wi::OVF_NONE;
6283   wide_int imm_modif;
6284 
6285   if (to_add == 1)
6286     imm_modif = wi::add (imm_val, 1, sgn, &overflow);
6287   else
6288     imm_modif = wi::sub (imm_val, 1, sgn, &overflow);
6289 
6290   if (overflow)
6291     return;
6292 
6293   /* The following creates a pseudo; if we cannot do that, bail out.  */
6294   if (!can_create_pseudo_p ())
6295     return;
6296 
6297   rtx reg = gen_rtx_REG (mode, LAST_VIRTUAL_REGISTER + 1);
6298   rtx new_imm = immed_wide_int_const (imm_modif, mode);
6299 
6300   rtx_insn *old_rtx = gen_move_insn (reg, *imm);
6301   rtx_insn *new_rtx = gen_move_insn (reg, new_imm);
6302 
6303   /* Update the immediate and the code.  */
6304   if (insn_cost (old_rtx, true) > insn_cost (new_rtx, true))
6305     {
6306       *code = equivalent_cmp_code (*code);
6307       *imm = new_imm;
6308     }
6309 }
6310 
6311 
6312 
6313 /* Perform possibly multi-word comparison and conditional jump to LABEL
6314    if ARG1 OP ARG2 true where ARG1 and ARG2 are of mode MODE.  This is
6315    now a thin wrapper around do_compare_rtx_and_jump.  */
6316 
6317 static void
do_cmp_and_jump(rtx arg1,rtx arg2,enum rtx_code op,machine_mode mode,rtx_code_label * label)6318 do_cmp_and_jump (rtx arg1, rtx arg2, enum rtx_code op, machine_mode mode,
6319 		 rtx_code_label *label)
6320 {
6321   int unsignedp = (op == LTU || op == LEU || op == GTU || op == GEU);
6322   do_compare_rtx_and_jump (arg1, arg2, op, unsignedp, mode, NULL_RTX,
6323 			   NULL, label, profile_probability::uninitialized ());
6324 }
6325