xref: /netbsd-src/external/gpl3/gcc/dist/gcc/expmed.cc (revision b1e838363e3c6fc78a55519254d99869742dd33c)
1 /* Medium-level subroutines: convert bit-field store and extract
2    and shifts, multiplies and divides to rtl instructions.
3    Copyright (C) 1987-2022 Free Software Foundation, Inc.
4 
5 This file is part of GCC.
6 
7 GCC is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 3, or (at your option) any later
10 version.
11 
12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
15 for more details.
16 
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3.  If not see
19 <http://www.gnu.org/licenses/>.  */
20 
21 /* Work around tree-optimization/91825.  */
22 #pragma GCC diagnostic warning "-Wmaybe-uninitialized"
23 
24 #include "config.h"
25 #include "system.h"
26 #include "coretypes.h"
27 #include "backend.h"
28 #include "target.h"
29 #include "rtl.h"
30 #include "tree.h"
31 #include "predict.h"
32 #include "memmodel.h"
33 #include "tm_p.h"
34 #include "optabs.h"
35 #include "expmed.h"
36 #include "regs.h"
37 #include "emit-rtl.h"
38 #include "diagnostic-core.h"
39 #include "fold-const.h"
40 #include "stor-layout.h"
41 #include "dojump.h"
42 #include "explow.h"
43 #include "expr.h"
44 #include "langhooks.h"
45 #include "tree-vector-builder.h"
46 
47 struct target_expmed default_target_expmed;
48 #if SWITCHABLE_TARGET
49 struct target_expmed *this_target_expmed = &default_target_expmed;
50 #endif
51 
52 static bool store_integral_bit_field (rtx, opt_scalar_int_mode,
53 				      unsigned HOST_WIDE_INT,
54 				      unsigned HOST_WIDE_INT,
55 				      poly_uint64, poly_uint64,
56 				      machine_mode, rtx, bool, bool);
57 static void store_fixed_bit_field (rtx, opt_scalar_int_mode,
58 				   unsigned HOST_WIDE_INT,
59 				   unsigned HOST_WIDE_INT,
60 				   poly_uint64, poly_uint64,
61 				   rtx, scalar_int_mode, bool);
62 static void store_fixed_bit_field_1 (rtx, scalar_int_mode,
63 				     unsigned HOST_WIDE_INT,
64 				     unsigned HOST_WIDE_INT,
65 				     rtx, scalar_int_mode, bool);
66 static void store_split_bit_field (rtx, opt_scalar_int_mode,
67 				   unsigned HOST_WIDE_INT,
68 				   unsigned HOST_WIDE_INT,
69 				   poly_uint64, poly_uint64,
70 				   rtx, scalar_int_mode, bool);
71 static rtx extract_integral_bit_field (rtx, opt_scalar_int_mode,
72 				       unsigned HOST_WIDE_INT,
73 				       unsigned HOST_WIDE_INT, int, rtx,
74 				       machine_mode, machine_mode, bool, bool);
75 static rtx extract_fixed_bit_field (machine_mode, rtx, opt_scalar_int_mode,
76 				    unsigned HOST_WIDE_INT,
77 				    unsigned HOST_WIDE_INT, rtx, int, bool);
78 static rtx extract_fixed_bit_field_1 (machine_mode, rtx, scalar_int_mode,
79 				      unsigned HOST_WIDE_INT,
80 				      unsigned HOST_WIDE_INT, rtx, int, bool);
81 static rtx lshift_value (machine_mode, unsigned HOST_WIDE_INT, int);
82 static rtx extract_split_bit_field (rtx, opt_scalar_int_mode,
83 				    unsigned HOST_WIDE_INT,
84 				    unsigned HOST_WIDE_INT, int, bool);
85 static void do_cmp_and_jump (rtx, rtx, enum rtx_code, machine_mode, rtx_code_label *);
86 static rtx expand_smod_pow2 (scalar_int_mode, rtx, HOST_WIDE_INT);
87 static rtx expand_sdiv_pow2 (scalar_int_mode, rtx, HOST_WIDE_INT);
88 
89 /* Return a constant integer mask value of mode MODE with BITSIZE ones
90    followed by BITPOS zeros, or the complement of that if COMPLEMENT.
91    The mask is truncated if necessary to the width of mode MODE.  The
92    mask is zero-extended if BITSIZE+BITPOS is too small for MODE.  */
93 
94 static inline rtx
mask_rtx(scalar_int_mode mode,int bitpos,int bitsize,bool complement)95 mask_rtx (scalar_int_mode mode, int bitpos, int bitsize, bool complement)
96 {
97   return immed_wide_int_const
98     (wi::shifted_mask (bitpos, bitsize, complement,
99 		       GET_MODE_PRECISION (mode)), mode);
100 }
101 
102 /* Test whether a value is zero of a power of two.  */
103 #define EXACT_POWER_OF_2_OR_ZERO_P(x) \
104   (((x) & ((x) - HOST_WIDE_INT_1U)) == 0)
105 
106 struct init_expmed_rtl
107 {
108   rtx reg;
109   rtx plus;
110   rtx neg;
111   rtx mult;
112   rtx sdiv;
113   rtx udiv;
114   rtx sdiv_32;
115   rtx smod_32;
116   rtx wide_mult;
117   rtx wide_lshr;
118   rtx wide_trunc;
119   rtx shift;
120   rtx shift_mult;
121   rtx shift_add;
122   rtx shift_sub0;
123   rtx shift_sub1;
124   rtx zext;
125   rtx trunc;
126 
127   rtx pow2[MAX_BITS_PER_WORD];
128   rtx cint[MAX_BITS_PER_WORD];
129 };
130 
131 static void
init_expmed_one_conv(struct init_expmed_rtl * all,scalar_int_mode to_mode,scalar_int_mode from_mode,bool speed)132 init_expmed_one_conv (struct init_expmed_rtl *all, scalar_int_mode to_mode,
133 		      scalar_int_mode from_mode, bool speed)
134 {
135   int to_size, from_size;
136   rtx which;
137 
138   to_size = GET_MODE_PRECISION (to_mode);
139   from_size = GET_MODE_PRECISION (from_mode);
140 
141   /* Most partial integers have a precision less than the "full"
142      integer it requires for storage.  In case one doesn't, for
143      comparison purposes here, reduce the bit size by one in that
144      case.  */
145   if (GET_MODE_CLASS (to_mode) == MODE_PARTIAL_INT
146       && pow2p_hwi (to_size))
147     to_size --;
148   if (GET_MODE_CLASS (from_mode) == MODE_PARTIAL_INT
149       && pow2p_hwi (from_size))
150     from_size --;
151 
152   /* Assume cost of zero-extend and sign-extend is the same.  */
153   which = (to_size < from_size ? all->trunc : all->zext);
154 
155   PUT_MODE (all->reg, from_mode);
156   set_convert_cost (to_mode, from_mode, speed,
157 		    set_src_cost (which, to_mode, speed));
158   /* Restore all->reg's mode.  */
159   PUT_MODE (all->reg, to_mode);
160 }
161 
162 static void
init_expmed_one_mode(struct init_expmed_rtl * all,machine_mode mode,int speed)163 init_expmed_one_mode (struct init_expmed_rtl *all,
164 		      machine_mode mode, int speed)
165 {
166   int m, n, mode_bitsize;
167   machine_mode mode_from;
168 
169   mode_bitsize = GET_MODE_UNIT_BITSIZE (mode);
170 
171   PUT_MODE (all->reg, mode);
172   PUT_MODE (all->plus, mode);
173   PUT_MODE (all->neg, mode);
174   PUT_MODE (all->mult, mode);
175   PUT_MODE (all->sdiv, mode);
176   PUT_MODE (all->udiv, mode);
177   PUT_MODE (all->sdiv_32, mode);
178   PUT_MODE (all->smod_32, mode);
179   PUT_MODE (all->wide_trunc, mode);
180   PUT_MODE (all->shift, mode);
181   PUT_MODE (all->shift_mult, mode);
182   PUT_MODE (all->shift_add, mode);
183   PUT_MODE (all->shift_sub0, mode);
184   PUT_MODE (all->shift_sub1, mode);
185   PUT_MODE (all->zext, mode);
186   PUT_MODE (all->trunc, mode);
187 
188   set_add_cost (speed, mode, set_src_cost (all->plus, mode, speed));
189   set_neg_cost (speed, mode, set_src_cost (all->neg, mode, speed));
190   set_mul_cost (speed, mode, set_src_cost (all->mult, mode, speed));
191   set_sdiv_cost (speed, mode, set_src_cost (all->sdiv, mode, speed));
192   set_udiv_cost (speed, mode, set_src_cost (all->udiv, mode, speed));
193 
194   set_sdiv_pow2_cheap (speed, mode, (set_src_cost (all->sdiv_32, mode, speed)
195 				     <= 2 * add_cost (speed, mode)));
196   set_smod_pow2_cheap (speed, mode, (set_src_cost (all->smod_32, mode, speed)
197 				     <= 4 * add_cost (speed, mode)));
198 
199   set_shift_cost (speed, mode, 0, 0);
200   {
201     int cost = add_cost (speed, mode);
202     set_shiftadd_cost (speed, mode, 0, cost);
203     set_shiftsub0_cost (speed, mode, 0, cost);
204     set_shiftsub1_cost (speed, mode, 0, cost);
205   }
206 
207   n = MIN (MAX_BITS_PER_WORD, mode_bitsize);
208   for (m = 1; m < n; m++)
209     {
210       XEXP (all->shift, 1) = all->cint[m];
211       XEXP (all->shift_mult, 1) = all->pow2[m];
212 
213       set_shift_cost (speed, mode, m, set_src_cost (all->shift, mode, speed));
214       set_shiftadd_cost (speed, mode, m, set_src_cost (all->shift_add, mode,
215 						       speed));
216       set_shiftsub0_cost (speed, mode, m, set_src_cost (all->shift_sub0, mode,
217 							speed));
218       set_shiftsub1_cost (speed, mode, m, set_src_cost (all->shift_sub1, mode,
219 							speed));
220     }
221 
222   scalar_int_mode int_mode_to;
223   if (is_a <scalar_int_mode> (mode, &int_mode_to))
224     {
225       for (mode_from = MIN_MODE_INT; mode_from <= MAX_MODE_INT;
226 	   mode_from = (machine_mode)(mode_from + 1))
227 	init_expmed_one_conv (all, int_mode_to,
228 			      as_a <scalar_int_mode> (mode_from), speed);
229 
230       scalar_int_mode wider_mode;
231       if (GET_MODE_CLASS (int_mode_to) == MODE_INT
232 	  && GET_MODE_WIDER_MODE (int_mode_to).exists (&wider_mode))
233 	{
234 	  PUT_MODE (all->reg, mode);
235 	  PUT_MODE (all->zext, wider_mode);
236 	  PUT_MODE (all->wide_mult, wider_mode);
237 	  PUT_MODE (all->wide_lshr, wider_mode);
238 	  XEXP (all->wide_lshr, 1)
239 	    = gen_int_shift_amount (wider_mode, mode_bitsize);
240 
241 	  set_mul_widen_cost (speed, wider_mode,
242 			      set_src_cost (all->wide_mult, wider_mode, speed));
243 	  set_mul_highpart_cost (speed, int_mode_to,
244 				 set_src_cost (all->wide_trunc,
245 					       int_mode_to, speed));
246 	}
247     }
248 }
249 
250 void
init_expmed(void)251 init_expmed (void)
252 {
253   struct init_expmed_rtl all;
254   machine_mode mode = QImode;
255   int m, speed;
256 
257   memset (&all, 0, sizeof all);
258   for (m = 1; m < MAX_BITS_PER_WORD; m++)
259     {
260       all.pow2[m] = GEN_INT (HOST_WIDE_INT_1 << m);
261       all.cint[m] = GEN_INT (m);
262     }
263 
264   /* Avoid using hard regs in ways which may be unsupported.  */
265   all.reg = gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1);
266   all.plus = gen_rtx_PLUS (mode, all.reg, all.reg);
267   all.neg = gen_rtx_NEG (mode, all.reg);
268   all.mult = gen_rtx_MULT (mode, all.reg, all.reg);
269   all.sdiv = gen_rtx_DIV (mode, all.reg, all.reg);
270   all.udiv = gen_rtx_UDIV (mode, all.reg, all.reg);
271   all.sdiv_32 = gen_rtx_DIV (mode, all.reg, all.pow2[5]);
272   all.smod_32 = gen_rtx_MOD (mode, all.reg, all.pow2[5]);
273   all.zext = gen_rtx_ZERO_EXTEND (mode, all.reg);
274   all.wide_mult = gen_rtx_MULT (mode, all.zext, all.zext);
275   all.wide_lshr = gen_rtx_LSHIFTRT (mode, all.wide_mult, all.reg);
276   all.wide_trunc = gen_rtx_TRUNCATE (mode, all.wide_lshr);
277   all.shift = gen_rtx_ASHIFT (mode, all.reg, all.reg);
278   all.shift_mult = gen_rtx_MULT (mode, all.reg, all.reg);
279   all.shift_add = gen_rtx_PLUS (mode, all.shift_mult, all.reg);
280   all.shift_sub0 = gen_rtx_MINUS (mode, all.shift_mult, all.reg);
281   all.shift_sub1 = gen_rtx_MINUS (mode, all.reg, all.shift_mult);
282   all.trunc = gen_rtx_TRUNCATE (mode, all.reg);
283 
284   for (speed = 0; speed < 2; speed++)
285     {
286       crtl->maybe_hot_insn_p = speed;
287       set_zero_cost (speed, set_src_cost (const0_rtx, mode, speed));
288 
289       for (mode = MIN_MODE_INT; mode <= MAX_MODE_INT;
290 	   mode = (machine_mode)(mode + 1))
291 	init_expmed_one_mode (&all, mode, speed);
292 
293       if (MIN_MODE_PARTIAL_INT != VOIDmode)
294 	for (mode = MIN_MODE_PARTIAL_INT; mode <= MAX_MODE_PARTIAL_INT;
295 	     mode = (machine_mode)(mode + 1))
296 	  init_expmed_one_mode (&all, mode, speed);
297 
298       if (MIN_MODE_VECTOR_INT != VOIDmode)
299 	for (mode = MIN_MODE_VECTOR_INT; mode <= MAX_MODE_VECTOR_INT;
300 	     mode = (machine_mode)(mode + 1))
301 	  init_expmed_one_mode (&all, mode, speed);
302     }
303 
304   if (alg_hash_used_p ())
305     {
306       struct alg_hash_entry *p = alg_hash_entry_ptr (0);
307       memset (p, 0, sizeof (*p) * NUM_ALG_HASH_ENTRIES);
308     }
309   else
310     set_alg_hash_used_p (true);
311   default_rtl_profile ();
312 
313   ggc_free (all.trunc);
314   ggc_free (all.shift_sub1);
315   ggc_free (all.shift_sub0);
316   ggc_free (all.shift_add);
317   ggc_free (all.shift_mult);
318   ggc_free (all.shift);
319   ggc_free (all.wide_trunc);
320   ggc_free (all.wide_lshr);
321   ggc_free (all.wide_mult);
322   ggc_free (all.zext);
323   ggc_free (all.smod_32);
324   ggc_free (all.sdiv_32);
325   ggc_free (all.udiv);
326   ggc_free (all.sdiv);
327   ggc_free (all.mult);
328   ggc_free (all.neg);
329   ggc_free (all.plus);
330   ggc_free (all.reg);
331 }
332 
333 /* Return an rtx representing minus the value of X.
334    MODE is the intended mode of the result,
335    useful if X is a CONST_INT.  */
336 
337 rtx
negate_rtx(machine_mode mode,rtx x)338 negate_rtx (machine_mode mode, rtx x)
339 {
340   rtx result = simplify_unary_operation (NEG, mode, x, mode);
341 
342   if (result == 0)
343     result = expand_unop (mode, neg_optab, x, NULL_RTX, 0);
344 
345   return result;
346 }
347 
348 /* Whether reverse storage order is supported on the target.  */
349 static int reverse_storage_order_supported = -1;
350 
351 /* Check whether reverse storage order is supported on the target.  */
352 
353 static void
check_reverse_storage_order_support(void)354 check_reverse_storage_order_support (void)
355 {
356   if (BYTES_BIG_ENDIAN != WORDS_BIG_ENDIAN)
357     {
358       reverse_storage_order_supported = 0;
359       sorry ("reverse scalar storage order");
360     }
361   else
362     reverse_storage_order_supported = 1;
363 }
364 
365 /* Whether reverse FP storage order is supported on the target.  */
366 static int reverse_float_storage_order_supported = -1;
367 
368 /* Check whether reverse FP storage order is supported on the target.  */
369 
370 static void
check_reverse_float_storage_order_support(void)371 check_reverse_float_storage_order_support (void)
372 {
373   if (FLOAT_WORDS_BIG_ENDIAN != WORDS_BIG_ENDIAN)
374     {
375       reverse_float_storage_order_supported = 0;
376       sorry ("reverse floating-point scalar storage order");
377     }
378   else
379     reverse_float_storage_order_supported = 1;
380 }
381 
382 /* Return an rtx representing value of X with reverse storage order.
383    MODE is the intended mode of the result,
384    useful if X is a CONST_INT.  */
385 
386 rtx
flip_storage_order(machine_mode mode,rtx x)387 flip_storage_order (machine_mode mode, rtx x)
388 {
389   scalar_int_mode int_mode;
390   rtx result;
391 
392   if (mode == QImode)
393     return x;
394 
395   if (COMPLEX_MODE_P (mode))
396     {
397       rtx real = read_complex_part (x, false);
398       rtx imag = read_complex_part (x, true);
399 
400       real = flip_storage_order (GET_MODE_INNER (mode), real);
401       imag = flip_storage_order (GET_MODE_INNER (mode), imag);
402 
403       return gen_rtx_CONCAT (mode, real, imag);
404     }
405 
406   if (__builtin_expect (reverse_storage_order_supported < 0, 0))
407     check_reverse_storage_order_support ();
408 
409   if (!is_a <scalar_int_mode> (mode, &int_mode))
410     {
411       if (FLOAT_MODE_P (mode)
412 	  && __builtin_expect (reverse_float_storage_order_supported < 0, 0))
413 	check_reverse_float_storage_order_support ();
414 
415       if (!int_mode_for_size (GET_MODE_PRECISION (mode), 0).exists (&int_mode)
416 	  || !targetm.scalar_mode_supported_p (int_mode))
417 	{
418 	  sorry ("reverse storage order for %smode", GET_MODE_NAME (mode));
419 	  return x;
420 	}
421       x = gen_lowpart (int_mode, x);
422     }
423 
424   result = simplify_unary_operation (BSWAP, int_mode, x, int_mode);
425   if (result == 0)
426     result = expand_unop (int_mode, bswap_optab, x, NULL_RTX, 1);
427 
428   if (int_mode != mode)
429     result = gen_lowpart (mode, result);
430 
431   return result;
432 }
433 
434 /* If MODE is set, adjust bitfield memory MEM so that it points to the
435    first unit of mode MODE that contains a bitfield of size BITSIZE at
436    bit position BITNUM.  If MODE is not set, return a BLKmode reference
437    to every byte in the bitfield.  Set *NEW_BITNUM to the bit position
438    of the field within the new memory.  */
439 
440 static rtx
narrow_bit_field_mem(rtx mem,opt_scalar_int_mode mode,unsigned HOST_WIDE_INT bitsize,unsigned HOST_WIDE_INT bitnum,unsigned HOST_WIDE_INT * new_bitnum)441 narrow_bit_field_mem (rtx mem, opt_scalar_int_mode mode,
442 		      unsigned HOST_WIDE_INT bitsize,
443 		      unsigned HOST_WIDE_INT bitnum,
444 		      unsigned HOST_WIDE_INT *new_bitnum)
445 {
446   scalar_int_mode imode;
447   if (mode.exists (&imode))
448     {
449       unsigned int unit = GET_MODE_BITSIZE (imode);
450       *new_bitnum = bitnum % unit;
451       HOST_WIDE_INT offset = (bitnum - *new_bitnum) / BITS_PER_UNIT;
452       return adjust_bitfield_address (mem, imode, offset);
453     }
454   else
455     {
456       *new_bitnum = bitnum % BITS_PER_UNIT;
457       HOST_WIDE_INT offset = bitnum / BITS_PER_UNIT;
458       HOST_WIDE_INT size = ((*new_bitnum + bitsize + BITS_PER_UNIT - 1)
459 			    / BITS_PER_UNIT);
460       return adjust_bitfield_address_size (mem, BLKmode, offset, size);
461     }
462 }
463 
464 /* The caller wants to perform insertion or extraction PATTERN on a
465    bitfield of size BITSIZE at BITNUM bits into memory operand OP0.
466    BITREGION_START and BITREGION_END are as for store_bit_field
467    and FIELDMODE is the natural mode of the field.
468 
469    Search for a mode that is compatible with the memory access
470    restrictions and (where applicable) with a register insertion or
471    extraction.  Return the new memory on success, storing the adjusted
472    bit position in *NEW_BITNUM.  Return null otherwise.  */
473 
474 static rtx
adjust_bit_field_mem_for_reg(enum extraction_pattern pattern,rtx op0,HOST_WIDE_INT bitsize,HOST_WIDE_INT bitnum,poly_uint64 bitregion_start,poly_uint64 bitregion_end,machine_mode fieldmode,unsigned HOST_WIDE_INT * new_bitnum)475 adjust_bit_field_mem_for_reg (enum extraction_pattern pattern,
476 			      rtx op0, HOST_WIDE_INT bitsize,
477 			      HOST_WIDE_INT bitnum,
478 			      poly_uint64 bitregion_start,
479 			      poly_uint64 bitregion_end,
480 			      machine_mode fieldmode,
481 			      unsigned HOST_WIDE_INT *new_bitnum)
482 {
483   bit_field_mode_iterator iter (bitsize, bitnum, bitregion_start,
484 				bitregion_end, MEM_ALIGN (op0),
485 				MEM_VOLATILE_P (op0));
486   scalar_int_mode best_mode;
487   if (iter.next_mode (&best_mode))
488     {
489       /* We can use a memory in BEST_MODE.  See whether this is true for
490 	 any wider modes.  All other things being equal, we prefer to
491 	 use the widest mode possible because it tends to expose more
492 	 CSE opportunities.  */
493       if (!iter.prefer_smaller_modes ())
494 	{
495 	  /* Limit the search to the mode required by the corresponding
496 	     register insertion or extraction instruction, if any.  */
497 	  scalar_int_mode limit_mode = word_mode;
498 	  extraction_insn insn;
499 	  if (get_best_reg_extraction_insn (&insn, pattern,
500 					    GET_MODE_BITSIZE (best_mode),
501 					    fieldmode))
502 	    limit_mode = insn.field_mode;
503 
504 	  scalar_int_mode wider_mode;
505 	  while (iter.next_mode (&wider_mode)
506 		 && GET_MODE_SIZE (wider_mode) <= GET_MODE_SIZE (limit_mode))
507 	    best_mode = wider_mode;
508 	}
509       return narrow_bit_field_mem (op0, best_mode, bitsize, bitnum,
510 				   new_bitnum);
511     }
512   return NULL_RTX;
513 }
514 
515 /* Return true if a bitfield of size BITSIZE at bit number BITNUM within
516    a structure of mode STRUCT_MODE represents a lowpart subreg.   The subreg
517    offset is then BITNUM / BITS_PER_UNIT.  */
518 
519 static bool
lowpart_bit_field_p(poly_uint64 bitnum,poly_uint64 bitsize,machine_mode struct_mode)520 lowpart_bit_field_p (poly_uint64 bitnum, poly_uint64 bitsize,
521 		     machine_mode struct_mode)
522 {
523   poly_uint64 regsize = REGMODE_NATURAL_SIZE (struct_mode);
524   if (BYTES_BIG_ENDIAN)
525     return (multiple_p (bitnum, BITS_PER_UNIT)
526 	    && (known_eq (bitnum + bitsize, GET_MODE_BITSIZE (struct_mode))
527 		|| multiple_p (bitnum + bitsize,
528 			       regsize * BITS_PER_UNIT)));
529   else
530     return multiple_p (bitnum, regsize * BITS_PER_UNIT);
531 }
532 
533 /* Return true if -fstrict-volatile-bitfields applies to an access of OP0
534    containing BITSIZE bits starting at BITNUM, with field mode FIELDMODE.
535    Return false if the access would touch memory outside the range
536    BITREGION_START to BITREGION_END for conformance to the C++ memory
537    model.  */
538 
539 static bool
strict_volatile_bitfield_p(rtx op0,unsigned HOST_WIDE_INT bitsize,unsigned HOST_WIDE_INT bitnum,scalar_int_mode fieldmode,poly_uint64 bitregion_start,poly_uint64 bitregion_end)540 strict_volatile_bitfield_p (rtx op0, unsigned HOST_WIDE_INT bitsize,
541 			    unsigned HOST_WIDE_INT bitnum,
542 			    scalar_int_mode fieldmode,
543 			    poly_uint64 bitregion_start,
544 			    poly_uint64 bitregion_end)
545 {
546   unsigned HOST_WIDE_INT modesize = GET_MODE_BITSIZE (fieldmode);
547 
548   /* -fstrict-volatile-bitfields must be enabled and we must have a
549      volatile MEM.  */
550   if (!MEM_P (op0)
551       || !MEM_VOLATILE_P (op0)
552       || flag_strict_volatile_bitfields <= 0)
553     return false;
554 
555   /* The bit size must not be larger than the field mode, and
556      the field mode must not be larger than a word.  */
557   if (bitsize > modesize || modesize > BITS_PER_WORD)
558     return false;
559 
560   /* Check for cases of unaligned fields that must be split.  */
561   if (bitnum % modesize + bitsize > modesize)
562     return false;
563 
564   /* The memory must be sufficiently aligned for a MODESIZE access.
565      This condition guarantees, that the memory access will not
566      touch anything after the end of the structure.  */
567   if (MEM_ALIGN (op0) < modesize)
568     return false;
569 
570   /* Check for cases where the C++ memory model applies.  */
571   if (maybe_ne (bitregion_end, 0U)
572       && (maybe_lt (bitnum - bitnum % modesize, bitregion_start)
573 	  || maybe_gt (bitnum - bitnum % modesize + modesize - 1,
574 		       bitregion_end)))
575     return false;
576 
577   return true;
578 }
579 
580 /* Return true if OP is a memory and if a bitfield of size BITSIZE at
581    bit number BITNUM can be treated as a simple value of mode MODE.
582    Store the byte offset in *BYTENUM if so.  */
583 
584 static bool
simple_mem_bitfield_p(rtx op0,poly_uint64 bitsize,poly_uint64 bitnum,machine_mode mode,poly_uint64 * bytenum)585 simple_mem_bitfield_p (rtx op0, poly_uint64 bitsize, poly_uint64 bitnum,
586 		       machine_mode mode, poly_uint64 *bytenum)
587 {
588   return (MEM_P (op0)
589 	  && multiple_p (bitnum, BITS_PER_UNIT, bytenum)
590 	  && known_eq (bitsize, GET_MODE_BITSIZE (mode))
591 	  && (!targetm.slow_unaligned_access (mode, MEM_ALIGN (op0))
592 	      || (multiple_p (bitnum, GET_MODE_ALIGNMENT (mode))
593 		  && MEM_ALIGN (op0) >= GET_MODE_ALIGNMENT (mode))));
594 }
595 
596 /* Try to use instruction INSV to store VALUE into a field of OP0.
597    If OP0_MODE is defined, it is the mode of OP0, otherwise OP0 is a
598    BLKmode MEM.  VALUE_MODE is the mode of VALUE.  BITSIZE and BITNUM
599    are as for store_bit_field.  */
600 
601 static bool
store_bit_field_using_insv(const extraction_insn * insv,rtx op0,opt_scalar_int_mode op0_mode,unsigned HOST_WIDE_INT bitsize,unsigned HOST_WIDE_INT bitnum,rtx value,scalar_int_mode value_mode)602 store_bit_field_using_insv (const extraction_insn *insv, rtx op0,
603 			    opt_scalar_int_mode op0_mode,
604 			    unsigned HOST_WIDE_INT bitsize,
605 			    unsigned HOST_WIDE_INT bitnum,
606 			    rtx value, scalar_int_mode value_mode)
607 {
608   class expand_operand ops[4];
609   rtx value1;
610   rtx xop0 = op0;
611   rtx_insn *last = get_last_insn ();
612   bool copy_back = false;
613 
614   scalar_int_mode op_mode = insv->field_mode;
615   unsigned int unit = GET_MODE_BITSIZE (op_mode);
616   if (bitsize == 0 || bitsize > unit)
617     return false;
618 
619   if (MEM_P (xop0))
620     /* Get a reference to the first byte of the field.  */
621     xop0 = narrow_bit_field_mem (xop0, insv->struct_mode, bitsize, bitnum,
622 				 &bitnum);
623   else
624     {
625       /* Convert from counting within OP0 to counting in OP_MODE.  */
626       if (BYTES_BIG_ENDIAN)
627 	bitnum += unit - GET_MODE_BITSIZE (op0_mode.require ());
628 
629       /* If xop0 is a register, we need it in OP_MODE
630 	 to make it acceptable to the format of insv.  */
631       if (GET_CODE (xop0) == SUBREG)
632 	{
633 	  /* If such a SUBREG can't be created, give up.  */
634 	  if (!validate_subreg (op_mode, GET_MODE (SUBREG_REG (xop0)),
635 				SUBREG_REG (xop0), SUBREG_BYTE (xop0)))
636 	    return false;
637 	  /* We can't just change the mode, because this might clobber op0,
638 	     and we will need the original value of op0 if insv fails.  */
639 	  xop0 = gen_rtx_SUBREG (op_mode, SUBREG_REG (xop0),
640 				 SUBREG_BYTE (xop0));
641 	}
642       if (REG_P (xop0) && GET_MODE (xop0) != op_mode)
643 	xop0 = gen_lowpart_SUBREG (op_mode, xop0);
644     }
645 
646   /* If the destination is a paradoxical subreg such that we need a
647      truncate to the inner mode, perform the insertion on a temporary and
648      truncate the result to the original destination.  Note that we can't
649      just truncate the paradoxical subreg as (truncate:N (subreg:W (reg:N
650      X) 0)) is (reg:N X).  */
651   if (GET_CODE (xop0) == SUBREG
652       && REG_P (SUBREG_REG (xop0))
653       && !TRULY_NOOP_TRUNCATION_MODES_P (GET_MODE (SUBREG_REG (xop0)),
654 					 op_mode))
655     {
656       rtx tem = gen_reg_rtx (op_mode);
657       emit_move_insn (tem, xop0);
658       xop0 = tem;
659       copy_back = true;
660     }
661 
662   /* There are similar overflow check at the start of store_bit_field_1,
663      but that only check the situation where the field lies completely
664      outside the register, while there do have situation where the field
665      lies partialy in the register, we need to adjust bitsize for this
666      partial overflow situation.  Without this fix, pr48335-2.c on big-endian
667      will broken on those arch support bit insert instruction, like arm, aarch64
668      etc.  */
669   if (bitsize + bitnum > unit && bitnum < unit)
670     {
671       warning (OPT_Wextra, "write of %wu-bit data outside the bound of "
672 	       "destination object, data truncated into %wu-bit",
673 	       bitsize, unit - bitnum);
674       bitsize = unit - bitnum;
675     }
676 
677   /* If BITS_BIG_ENDIAN is zero on a BYTES_BIG_ENDIAN machine, we count
678      "backwards" from the size of the unit we are inserting into.
679      Otherwise, we count bits from the most significant on a
680      BYTES/BITS_BIG_ENDIAN machine.  */
681 
682   if (BITS_BIG_ENDIAN != BYTES_BIG_ENDIAN)
683     bitnum = unit - bitsize - bitnum;
684 
685   /* Convert VALUE to op_mode (which insv insn wants) in VALUE1.  */
686   value1 = value;
687   if (value_mode != op_mode)
688     {
689       if (GET_MODE_BITSIZE (value_mode) >= bitsize)
690 	{
691 	  rtx tmp;
692 	  /* Optimization: Don't bother really extending VALUE
693 	     if it has all the bits we will actually use.  However,
694 	     if we must narrow it, be sure we do it correctly.  */
695 
696 	  if (GET_MODE_SIZE (value_mode) < GET_MODE_SIZE (op_mode))
697 	    {
698 	      tmp = simplify_subreg (op_mode, value1, value_mode, 0);
699 	      if (! tmp)
700 		tmp = simplify_gen_subreg (op_mode,
701 					   force_reg (value_mode, value1),
702 					   value_mode, 0);
703 	    }
704 	  else
705 	    {
706 	      tmp = gen_lowpart_if_possible (op_mode, value1);
707 	      if (! tmp)
708 		tmp = gen_lowpart (op_mode, force_reg (value_mode, value1));
709 	    }
710 	  value1 = tmp;
711 	}
712       else if (CONST_INT_P (value))
713 	value1 = gen_int_mode (INTVAL (value), op_mode);
714       else
715 	/* Parse phase is supposed to make VALUE's data type
716 	   match that of the component reference, which is a type
717 	   at least as wide as the field; so VALUE should have
718 	   a mode that corresponds to that type.  */
719 	gcc_assert (CONSTANT_P (value));
720     }
721 
722   create_fixed_operand (&ops[0], xop0);
723   create_integer_operand (&ops[1], bitsize);
724   create_integer_operand (&ops[2], bitnum);
725   create_input_operand (&ops[3], value1, op_mode);
726   if (maybe_expand_insn (insv->icode, 4, ops))
727     {
728       if (copy_back)
729 	convert_move (op0, xop0, true);
730       return true;
731     }
732   delete_insns_since (last);
733   return false;
734 }
735 
736 /* A subroutine of store_bit_field, with the same arguments.  Return true
737    if the operation could be implemented.
738 
739    If FALLBACK_P is true, fall back to store_fixed_bit_field if we have
740    no other way of implementing the operation.  If FALLBACK_P is false,
741    return false instead.  */
742 
743 static bool
store_bit_field_1(rtx str_rtx,poly_uint64 bitsize,poly_uint64 bitnum,poly_uint64 bitregion_start,poly_uint64 bitregion_end,machine_mode fieldmode,rtx value,bool reverse,bool fallback_p)744 store_bit_field_1 (rtx str_rtx, poly_uint64 bitsize, poly_uint64 bitnum,
745 		   poly_uint64 bitregion_start, poly_uint64 bitregion_end,
746 		   machine_mode fieldmode,
747 		   rtx value, bool reverse, bool fallback_p)
748 {
749   rtx op0 = str_rtx;
750 
751   while (GET_CODE (op0) == SUBREG)
752     {
753       bitnum += subreg_memory_offset (op0) * BITS_PER_UNIT;
754       op0 = SUBREG_REG (op0);
755     }
756 
757   /* No action is needed if the target is a register and if the field
758      lies completely outside that register.  This can occur if the source
759      code contains an out-of-bounds access to a small array.  */
760   if (REG_P (op0) && known_ge (bitnum, GET_MODE_BITSIZE (GET_MODE (op0))))
761     return true;
762 
763   /* Use vec_set patterns for inserting parts of vectors whenever
764      available.  */
765   machine_mode outermode = GET_MODE (op0);
766   scalar_mode innermode = GET_MODE_INNER (outermode);
767   poly_uint64 pos;
768   if (VECTOR_MODE_P (outermode)
769       && !MEM_P (op0)
770       && optab_handler (vec_set_optab, outermode) != CODE_FOR_nothing
771       && fieldmode == innermode
772       && known_eq (bitsize, GET_MODE_BITSIZE (innermode))
773       && multiple_p (bitnum, GET_MODE_BITSIZE (innermode), &pos))
774     {
775       class expand_operand ops[3];
776       enum insn_code icode = optab_handler (vec_set_optab, outermode);
777 
778       create_fixed_operand (&ops[0], op0);
779       create_input_operand (&ops[1], value, innermode);
780       create_integer_operand (&ops[2], pos);
781       if (maybe_expand_insn (icode, 3, ops))
782 	return true;
783     }
784 
785   /* If the target is a register, overwriting the entire object, or storing
786      a full-word or multi-word field can be done with just a SUBREG.  */
787   if (!MEM_P (op0)
788       && known_eq (bitsize, GET_MODE_BITSIZE (fieldmode)))
789     {
790       /* Use the subreg machinery either to narrow OP0 to the required
791 	 words or to cope with mode punning between equal-sized modes.
792 	 In the latter case, use subreg on the rhs side, not lhs.  */
793       rtx sub;
794       HOST_WIDE_INT regnum;
795       poly_uint64 regsize = REGMODE_NATURAL_SIZE (GET_MODE (op0));
796       if (known_eq (bitnum, 0U)
797 	  && known_eq (bitsize, GET_MODE_BITSIZE (GET_MODE (op0))))
798 	{
799 	  sub = simplify_gen_subreg (GET_MODE (op0), value, fieldmode, 0);
800 	  if (sub)
801 	    {
802 	      if (reverse)
803 		sub = flip_storage_order (GET_MODE (op0), sub);
804 	      emit_move_insn (op0, sub);
805 	      return true;
806 	    }
807 	}
808       else if (constant_multiple_p (bitnum, regsize * BITS_PER_UNIT, &regnum)
809 	       && multiple_p (bitsize, regsize * BITS_PER_UNIT)
810 	       && known_ge (GET_MODE_BITSIZE (GET_MODE (op0)), bitsize))
811 	{
812 	  sub = simplify_gen_subreg (fieldmode, op0, GET_MODE (op0),
813 				     regnum * regsize);
814 	  if (sub)
815 	    {
816 	      if (reverse)
817 		value = flip_storage_order (fieldmode, value);
818 	      emit_move_insn (sub, value);
819 	      return true;
820 	    }
821 	}
822     }
823 
824   /* If the target is memory, storing any naturally aligned field can be
825      done with a simple store.  For targets that support fast unaligned
826      memory, any naturally sized, unit aligned field can be done directly.  */
827   poly_uint64 bytenum;
828   if (simple_mem_bitfield_p (op0, bitsize, bitnum, fieldmode, &bytenum))
829     {
830       op0 = adjust_bitfield_address (op0, fieldmode, bytenum);
831       if (reverse)
832 	value = flip_storage_order (fieldmode, value);
833       emit_move_insn (op0, value);
834       return true;
835     }
836 
837   /* It's possible we'll need to handle other cases here for
838      polynomial bitnum and bitsize.  */
839 
840   /* From here on we need to be looking at a fixed-size insertion.  */
841   unsigned HOST_WIDE_INT ibitsize = bitsize.to_constant ();
842   unsigned HOST_WIDE_INT ibitnum = bitnum.to_constant ();
843 
844   /* Make sure we are playing with integral modes.  Pun with subregs
845      if we aren't.  This must come after the entire register case above,
846      since that case is valid for any mode.  The following cases are only
847      valid for integral modes.  */
848   opt_scalar_int_mode op0_mode = int_mode_for_mode (GET_MODE (op0));
849   scalar_int_mode imode;
850   if (!op0_mode.exists (&imode) || imode != GET_MODE (op0))
851     {
852       if (MEM_P (op0))
853 	op0 = adjust_bitfield_address_size (op0, op0_mode.else_blk (),
854 					    0, MEM_SIZE (op0));
855       else if (!op0_mode.exists ())
856 	{
857 	  if (ibitnum == 0
858 	      && known_eq (ibitsize, GET_MODE_BITSIZE (GET_MODE (op0)))
859 	      && MEM_P (value)
860 	      && !reverse)
861 	    {
862 	      value = adjust_address (value, GET_MODE (op0), 0);
863 	      emit_move_insn (op0, value);
864 	      return true;
865 	    }
866 	  if (!fallback_p)
867 	    return false;
868 	  rtx temp = assign_stack_temp (GET_MODE (op0),
869 					GET_MODE_SIZE (GET_MODE (op0)));
870 	  emit_move_insn (temp, op0);
871 	  store_bit_field_1 (temp, bitsize, bitnum, 0, 0, fieldmode, value,
872 			     reverse, fallback_p);
873 	  emit_move_insn (op0, temp);
874 	  return true;
875 	}
876       else
877 	op0 = gen_lowpart (op0_mode.require (), op0);
878     }
879 
880   return store_integral_bit_field (op0, op0_mode, ibitsize, ibitnum,
881 				   bitregion_start, bitregion_end,
882 				   fieldmode, value, reverse, fallback_p);
883 }
884 
885 /* Subroutine of store_bit_field_1, with the same arguments, except
886    that BITSIZE and BITNUM are constant.  Handle cases specific to
887    integral modes.  If OP0_MODE is defined, it is the mode of OP0,
888    otherwise OP0 is a BLKmode MEM.  */
889 
890 static bool
store_integral_bit_field(rtx op0,opt_scalar_int_mode op0_mode,unsigned HOST_WIDE_INT bitsize,unsigned HOST_WIDE_INT bitnum,poly_uint64 bitregion_start,poly_uint64 bitregion_end,machine_mode fieldmode,rtx value,bool reverse,bool fallback_p)891 store_integral_bit_field (rtx op0, opt_scalar_int_mode op0_mode,
892 			  unsigned HOST_WIDE_INT bitsize,
893 			  unsigned HOST_WIDE_INT bitnum,
894 			  poly_uint64 bitregion_start,
895 			  poly_uint64 bitregion_end,
896 			  machine_mode fieldmode,
897 			  rtx value, bool reverse, bool fallback_p)
898 {
899   /* Storing an lsb-aligned field in a register
900      can be done with a movstrict instruction.  */
901 
902   if (!MEM_P (op0)
903       && !reverse
904       && lowpart_bit_field_p (bitnum, bitsize, op0_mode.require ())
905       && known_eq (bitsize, GET_MODE_BITSIZE (fieldmode))
906       && optab_handler (movstrict_optab, fieldmode) != CODE_FOR_nothing)
907     {
908       class expand_operand ops[2];
909       enum insn_code icode = optab_handler (movstrict_optab, fieldmode);
910       rtx arg0 = op0;
911       unsigned HOST_WIDE_INT subreg_off;
912 
913       if (GET_CODE (arg0) == SUBREG)
914 	{
915 	  /* Else we've got some float mode source being extracted into
916 	     a different float mode destination -- this combination of
917 	     subregs results in Severe Tire Damage.  */
918 	  gcc_assert (GET_MODE (SUBREG_REG (arg0)) == fieldmode
919 		      || GET_MODE_CLASS (fieldmode) == MODE_INT
920 		      || GET_MODE_CLASS (fieldmode) == MODE_PARTIAL_INT);
921 	  arg0 = SUBREG_REG (arg0);
922 	}
923 
924       subreg_off = bitnum / BITS_PER_UNIT;
925       if (validate_subreg (fieldmode, GET_MODE (arg0), arg0, subreg_off)
926 	  /* STRICT_LOW_PART must have a non-paradoxical subreg as
927 	     operand.  */
928 	  && !paradoxical_subreg_p (fieldmode, GET_MODE (arg0)))
929 	{
930 	  arg0 = gen_rtx_SUBREG (fieldmode, arg0, subreg_off);
931 
932 	  create_fixed_operand (&ops[0], arg0);
933 	  /* Shrink the source operand to FIELDMODE.  */
934 	  create_convert_operand_to (&ops[1], value, fieldmode, false);
935 	  if (maybe_expand_insn (icode, 2, ops))
936 	    return true;
937 	}
938     }
939 
940   /* Handle fields bigger than a word.  */
941 
942   if (bitsize > BITS_PER_WORD)
943     {
944       /* Here we transfer the words of the field
945 	 in the order least significant first.
946 	 This is because the most significant word is the one which may
947 	 be less than full.
948 	 However, only do that if the value is not BLKmode.  */
949 
950       const bool backwards = WORDS_BIG_ENDIAN && fieldmode != BLKmode;
951       const int nwords = (bitsize + (BITS_PER_WORD - 1)) / BITS_PER_WORD;
952       rtx_insn *last;
953 
954       /* This is the mode we must force value to, so that there will be enough
955 	 subwords to extract.  Note that fieldmode will often (always?) be
956 	 VOIDmode, because that is what store_field uses to indicate that this
957 	 is a bit field, but passing VOIDmode to operand_subword_force
958 	 is not allowed.
959 
960 	 The mode must be fixed-size, since insertions into variable-sized
961 	 objects are meant to be handled before calling this function.  */
962       fixed_size_mode value_mode = as_a <fixed_size_mode> (GET_MODE (value));
963       if (value_mode == VOIDmode)
964 	value_mode = smallest_int_mode_for_size (nwords * BITS_PER_WORD);
965 
966       last = get_last_insn ();
967       for (int i = 0; i < nwords; i++)
968 	{
969 	  /* Number of bits to be stored in this iteration, i.e. BITS_PER_WORD
970 	     except maybe for the last iteration.  */
971 	  const unsigned HOST_WIDE_INT new_bitsize
972 	    = MIN (BITS_PER_WORD, bitsize - i * BITS_PER_WORD);
973 	  /* Bit offset from the starting bit number in the target.  */
974 	  const unsigned int bit_offset
975 	    = backwards ^ reverse
976 	      ? MAX ((int) bitsize - (i + 1) * BITS_PER_WORD, 0)
977 	      : i * BITS_PER_WORD;
978 	  /* Starting word number in the value.  */
979 	  const unsigned int wordnum
980 	    = backwards
981 	      ? GET_MODE_SIZE (value_mode) / UNITS_PER_WORD - (i + 1)
982 	      : i;
983 	  /* The chunk of the value in word_mode.  We use bit-field extraction
984 	      in BLKmode to handle unaligned memory references and to shift the
985 	      last chunk right on big-endian machines if need be.  */
986 	  rtx value_word
987 	    = fieldmode == BLKmode
988 	      ? extract_bit_field (value, new_bitsize, wordnum * BITS_PER_WORD,
989 				   1, NULL_RTX, word_mode, word_mode, false,
990 				   NULL)
991 	      : operand_subword_force (value, wordnum, value_mode);
992 
993 	  if (!store_bit_field_1 (op0, new_bitsize,
994 				  bitnum + bit_offset,
995 				  bitregion_start, bitregion_end,
996 				  word_mode,
997 				  value_word, reverse, fallback_p))
998 	    {
999 	      delete_insns_since (last);
1000 	      return false;
1001 	    }
1002 	}
1003       return true;
1004     }
1005 
1006   /* If VALUE has a floating-point or complex mode, access it as an
1007      integer of the corresponding size.  This can occur on a machine
1008      with 64 bit registers that uses SFmode for float.  It can also
1009      occur for unaligned float or complex fields.  */
1010   rtx orig_value = value;
1011   scalar_int_mode value_mode;
1012   if (GET_MODE (value) == VOIDmode)
1013     /* By this point we've dealt with values that are bigger than a word,
1014        so word_mode is a conservatively correct choice.  */
1015     value_mode = word_mode;
1016   else if (!is_a <scalar_int_mode> (GET_MODE (value), &value_mode))
1017     {
1018       value_mode = int_mode_for_mode (GET_MODE (value)).require ();
1019       value = gen_reg_rtx (value_mode);
1020       emit_move_insn (gen_lowpart (GET_MODE (orig_value), value), orig_value);
1021     }
1022 
1023   /* If OP0 is a multi-word register, narrow it to the affected word.
1024      If the region spans two words, defer to store_split_bit_field.
1025      Don't do this if op0 is a single hard register wider than word
1026      such as a float or vector register.  */
1027   if (!MEM_P (op0)
1028       && GET_MODE_SIZE (op0_mode.require ()) > UNITS_PER_WORD
1029       && (!REG_P (op0)
1030 	  || !HARD_REGISTER_P (op0)
1031 	  || hard_regno_nregs (REGNO (op0), op0_mode.require ()) != 1))
1032     {
1033       if (bitnum % BITS_PER_WORD + bitsize > BITS_PER_WORD)
1034 	{
1035 	  if (!fallback_p)
1036 	    return false;
1037 
1038 	  store_split_bit_field (op0, op0_mode, bitsize, bitnum,
1039 				 bitregion_start, bitregion_end,
1040 				 value, value_mode, reverse);
1041 	  return true;
1042 	}
1043       op0 = simplify_gen_subreg (word_mode, op0, op0_mode.require (),
1044 				 bitnum / BITS_PER_WORD * UNITS_PER_WORD);
1045       gcc_assert (op0);
1046       op0_mode = word_mode;
1047       bitnum %= BITS_PER_WORD;
1048     }
1049 
1050   /* From here on we can assume that the field to be stored in fits
1051      within a word.  If the destination is a register, it too fits
1052      in a word.  */
1053 
1054   extraction_insn insv;
1055   if (!MEM_P (op0)
1056       && !reverse
1057       && get_best_reg_extraction_insn (&insv, EP_insv,
1058 				       GET_MODE_BITSIZE (op0_mode.require ()),
1059 				       fieldmode)
1060       && store_bit_field_using_insv (&insv, op0, op0_mode,
1061 				     bitsize, bitnum, value, value_mode))
1062     return true;
1063 
1064   /* If OP0 is a memory, try copying it to a register and seeing if a
1065      cheap register alternative is available.  */
1066   if (MEM_P (op0) && !reverse)
1067     {
1068       if (get_best_mem_extraction_insn (&insv, EP_insv, bitsize, bitnum,
1069 					fieldmode)
1070 	  && store_bit_field_using_insv (&insv, op0, op0_mode,
1071 					 bitsize, bitnum, value, value_mode))
1072 	return true;
1073 
1074       rtx_insn *last = get_last_insn ();
1075 
1076       /* Try loading part of OP0 into a register, inserting the bitfield
1077 	 into that, and then copying the result back to OP0.  */
1078       unsigned HOST_WIDE_INT bitpos;
1079       rtx xop0 = adjust_bit_field_mem_for_reg (EP_insv, op0, bitsize, bitnum,
1080 					       bitregion_start, bitregion_end,
1081 					       fieldmode, &bitpos);
1082       if (xop0)
1083 	{
1084 	  rtx tempreg = copy_to_reg (xop0);
1085 	  if (store_bit_field_1 (tempreg, bitsize, bitpos,
1086 				 bitregion_start, bitregion_end,
1087 				 fieldmode, orig_value, reverse, false))
1088 	    {
1089 	      emit_move_insn (xop0, tempreg);
1090 	      return true;
1091 	    }
1092 	  delete_insns_since (last);
1093 	}
1094     }
1095 
1096   if (!fallback_p)
1097     return false;
1098 
1099   store_fixed_bit_field (op0, op0_mode, bitsize, bitnum, bitregion_start,
1100 			 bitregion_end, value, value_mode, reverse);
1101   return true;
1102 }
1103 
1104 /* Generate code to store value from rtx VALUE
1105    into a bit-field within structure STR_RTX
1106    containing BITSIZE bits starting at bit BITNUM.
1107 
1108    BITREGION_START is bitpos of the first bitfield in this region.
1109    BITREGION_END is the bitpos of the ending bitfield in this region.
1110    These two fields are 0, if the C++ memory model does not apply,
1111    or we are not interested in keeping track of bitfield regions.
1112 
1113    FIELDMODE is the machine-mode of the FIELD_DECL node for this field.
1114 
1115    If REVERSE is true, the store is to be done in reverse order.  */
1116 
1117 void
store_bit_field(rtx str_rtx,poly_uint64 bitsize,poly_uint64 bitnum,poly_uint64 bitregion_start,poly_uint64 bitregion_end,machine_mode fieldmode,rtx value,bool reverse)1118 store_bit_field (rtx str_rtx, poly_uint64 bitsize, poly_uint64 bitnum,
1119 		 poly_uint64 bitregion_start, poly_uint64 bitregion_end,
1120 		 machine_mode fieldmode,
1121 		 rtx value, bool reverse)
1122 {
1123   /* Handle -fstrict-volatile-bitfields in the cases where it applies.  */
1124   unsigned HOST_WIDE_INT ibitsize = 0, ibitnum = 0;
1125   scalar_int_mode int_mode;
1126   if (bitsize.is_constant (&ibitsize)
1127       && bitnum.is_constant (&ibitnum)
1128       && is_a <scalar_int_mode> (fieldmode, &int_mode)
1129       && strict_volatile_bitfield_p (str_rtx, ibitsize, ibitnum, int_mode,
1130 				     bitregion_start, bitregion_end))
1131     {
1132       /* Storing of a full word can be done with a simple store.
1133 	 We know here that the field can be accessed with one single
1134 	 instruction.  For targets that support unaligned memory,
1135 	 an unaligned access may be necessary.  */
1136       if (ibitsize == GET_MODE_BITSIZE (int_mode))
1137 	{
1138 	  str_rtx = adjust_bitfield_address (str_rtx, int_mode,
1139 					     ibitnum / BITS_PER_UNIT);
1140 	  if (reverse)
1141 	    value = flip_storage_order (int_mode, value);
1142 	  gcc_assert (ibitnum % BITS_PER_UNIT == 0);
1143 	  emit_move_insn (str_rtx, value);
1144 	}
1145       else
1146 	{
1147 	  rtx temp;
1148 
1149 	  str_rtx = narrow_bit_field_mem (str_rtx, int_mode, ibitsize,
1150 					  ibitnum, &ibitnum);
1151 	  gcc_assert (ibitnum + ibitsize <= GET_MODE_BITSIZE (int_mode));
1152 	  temp = copy_to_reg (str_rtx);
1153 	  if (!store_bit_field_1 (temp, ibitsize, ibitnum, 0, 0,
1154 				  int_mode, value, reverse, true))
1155 	    gcc_unreachable ();
1156 
1157 	  emit_move_insn (str_rtx, temp);
1158 	}
1159 
1160       return;
1161     }
1162 
1163   /* Under the C++0x memory model, we must not touch bits outside the
1164      bit region.  Adjust the address to start at the beginning of the
1165      bit region.  */
1166   if (MEM_P (str_rtx) && maybe_ne (bitregion_start, 0U))
1167     {
1168       scalar_int_mode best_mode;
1169       machine_mode addr_mode = VOIDmode;
1170 
1171       poly_uint64 offset = exact_div (bitregion_start, BITS_PER_UNIT);
1172       bitnum -= bitregion_start;
1173       poly_int64 size = bits_to_bytes_round_up (bitnum + bitsize);
1174       bitregion_end -= bitregion_start;
1175       bitregion_start = 0;
1176       if (bitsize.is_constant (&ibitsize)
1177 	  && bitnum.is_constant (&ibitnum)
1178 	  && get_best_mode (ibitsize, ibitnum,
1179 			    bitregion_start, bitregion_end,
1180 			    MEM_ALIGN (str_rtx), INT_MAX,
1181 			    MEM_VOLATILE_P (str_rtx), &best_mode))
1182 	addr_mode = best_mode;
1183       str_rtx = adjust_bitfield_address_size (str_rtx, addr_mode,
1184 					      offset, size);
1185     }
1186 
1187   if (!store_bit_field_1 (str_rtx, bitsize, bitnum,
1188 			  bitregion_start, bitregion_end,
1189 			  fieldmode, value, reverse, true))
1190     gcc_unreachable ();
1191 }
1192 
1193 /* Use shifts and boolean operations to store VALUE into a bit field of
1194    width BITSIZE in OP0, starting at bit BITNUM.  If OP0_MODE is defined,
1195    it is the mode of OP0, otherwise OP0 is a BLKmode MEM.  VALUE_MODE is
1196    the mode of VALUE.
1197 
1198    If REVERSE is true, the store is to be done in reverse order.  */
1199 
1200 static void
store_fixed_bit_field(rtx op0,opt_scalar_int_mode op0_mode,unsigned HOST_WIDE_INT bitsize,unsigned HOST_WIDE_INT bitnum,poly_uint64 bitregion_start,poly_uint64 bitregion_end,rtx value,scalar_int_mode value_mode,bool reverse)1201 store_fixed_bit_field (rtx op0, opt_scalar_int_mode op0_mode,
1202 		       unsigned HOST_WIDE_INT bitsize,
1203 		       unsigned HOST_WIDE_INT bitnum,
1204 		       poly_uint64 bitregion_start, poly_uint64 bitregion_end,
1205 		       rtx value, scalar_int_mode value_mode, bool reverse)
1206 {
1207   /* There is a case not handled here:
1208      a structure with a known alignment of just a halfword
1209      and a field split across two aligned halfwords within the structure.
1210      Or likewise a structure with a known alignment of just a byte
1211      and a field split across two bytes.
1212      Such cases are not supposed to be able to occur.  */
1213 
1214   scalar_int_mode best_mode;
1215   if (MEM_P (op0))
1216     {
1217       unsigned int max_bitsize = BITS_PER_WORD;
1218       scalar_int_mode imode;
1219       if (op0_mode.exists (&imode) && GET_MODE_BITSIZE (imode) < max_bitsize)
1220 	max_bitsize = GET_MODE_BITSIZE (imode);
1221 
1222       if (!get_best_mode (bitsize, bitnum, bitregion_start, bitregion_end,
1223 			  MEM_ALIGN (op0), max_bitsize, MEM_VOLATILE_P (op0),
1224 			  &best_mode))
1225 	{
1226 	  /* The only way this should occur is if the field spans word
1227 	     boundaries.  */
1228 	  store_split_bit_field (op0, op0_mode, bitsize, bitnum,
1229 				 bitregion_start, bitregion_end,
1230 				 value, value_mode, reverse);
1231 	  return;
1232 	}
1233 
1234       op0 = narrow_bit_field_mem (op0, best_mode, bitsize, bitnum, &bitnum);
1235     }
1236   else
1237     best_mode = op0_mode.require ();
1238 
1239   store_fixed_bit_field_1 (op0, best_mode, bitsize, bitnum,
1240 			   value, value_mode, reverse);
1241 }
1242 
1243 /* Helper function for store_fixed_bit_field, stores
1244    the bit field always using MODE, which is the mode of OP0.  The other
1245    arguments are as for store_fixed_bit_field.  */
1246 
1247 static void
store_fixed_bit_field_1(rtx op0,scalar_int_mode mode,unsigned HOST_WIDE_INT bitsize,unsigned HOST_WIDE_INT bitnum,rtx value,scalar_int_mode value_mode,bool reverse)1248 store_fixed_bit_field_1 (rtx op0, scalar_int_mode mode,
1249 			 unsigned HOST_WIDE_INT bitsize,
1250 			 unsigned HOST_WIDE_INT bitnum,
1251 			 rtx value, scalar_int_mode value_mode, bool reverse)
1252 {
1253   rtx temp;
1254   int all_zero = 0;
1255   int all_one = 0;
1256 
1257   /* Note that bitsize + bitnum can be greater than GET_MODE_BITSIZE (mode)
1258      for invalid input, such as f5 from gcc.dg/pr48335-2.c.  */
1259 
1260   if (reverse ? !BYTES_BIG_ENDIAN : BYTES_BIG_ENDIAN)
1261     /* BITNUM is the distance between our msb
1262        and that of the containing datum.
1263        Convert it to the distance from the lsb.  */
1264     bitnum = GET_MODE_BITSIZE (mode) - bitsize - bitnum;
1265 
1266   /* Now BITNUM is always the distance between our lsb
1267      and that of OP0.  */
1268 
1269   /* Shift VALUE left by BITNUM bits.  If VALUE is not constant,
1270      we must first convert its mode to MODE.  */
1271 
1272   if (CONST_INT_P (value))
1273     {
1274       unsigned HOST_WIDE_INT v = UINTVAL (value);
1275 
1276       if (bitsize < HOST_BITS_PER_WIDE_INT)
1277 	v &= (HOST_WIDE_INT_1U << bitsize) - 1;
1278 
1279       if (v == 0)
1280 	all_zero = 1;
1281       else if ((bitsize < HOST_BITS_PER_WIDE_INT
1282 		&& v == (HOST_WIDE_INT_1U << bitsize) - 1)
1283 	       || (bitsize == HOST_BITS_PER_WIDE_INT
1284 		   && v == HOST_WIDE_INT_M1U))
1285 	all_one = 1;
1286 
1287       value = lshift_value (mode, v, bitnum);
1288     }
1289   else
1290     {
1291       int must_and = (GET_MODE_BITSIZE (value_mode) != bitsize
1292 		      && bitnum + bitsize != GET_MODE_BITSIZE (mode));
1293 
1294       if (value_mode != mode)
1295 	value = convert_to_mode (mode, value, 1);
1296 
1297       if (must_and)
1298 	value = expand_binop (mode, and_optab, value,
1299 			      mask_rtx (mode, 0, bitsize, 0),
1300 			      NULL_RTX, 1, OPTAB_LIB_WIDEN);
1301       if (bitnum > 0)
1302 	value = expand_shift (LSHIFT_EXPR, mode, value,
1303 			      bitnum, NULL_RTX, 1);
1304     }
1305 
1306   if (reverse)
1307     value = flip_storage_order (mode, value);
1308 
1309   /* Now clear the chosen bits in OP0,
1310      except that if VALUE is -1 we need not bother.  */
1311   /* We keep the intermediates in registers to allow CSE to combine
1312      consecutive bitfield assignments.  */
1313 
1314   temp = force_reg (mode, op0);
1315 
1316   if (! all_one)
1317     {
1318       rtx mask = mask_rtx (mode, bitnum, bitsize, 1);
1319       if (reverse)
1320 	mask = flip_storage_order (mode, mask);
1321       temp = expand_binop (mode, and_optab, temp, mask,
1322 			   NULL_RTX, 1, OPTAB_LIB_WIDEN);
1323       temp = force_reg (mode, temp);
1324     }
1325 
1326   /* Now logical-or VALUE into OP0, unless it is zero.  */
1327 
1328   if (! all_zero)
1329     {
1330       temp = expand_binop (mode, ior_optab, temp, value,
1331 			   NULL_RTX, 1, OPTAB_LIB_WIDEN);
1332       temp = force_reg (mode, temp);
1333     }
1334 
1335   if (op0 != temp)
1336     {
1337       op0 = copy_rtx (op0);
1338       emit_move_insn (op0, temp);
1339     }
1340 }
1341 
1342 /* Store a bit field that is split across multiple accessible memory objects.
1343 
1344    OP0 is the REG, SUBREG or MEM rtx for the first of the objects.
1345    BITSIZE is the field width; BITPOS the position of its first bit
1346    (within the word).
1347    VALUE is the value to store, which has mode VALUE_MODE.
1348    If OP0_MODE is defined, it is the mode of OP0, otherwise OP0 is
1349    a BLKmode MEM.
1350 
1351    If REVERSE is true, the store is to be done in reverse order.
1352 
1353    This does not yet handle fields wider than BITS_PER_WORD.  */
1354 
1355 static void
store_split_bit_field(rtx op0,opt_scalar_int_mode op0_mode,unsigned HOST_WIDE_INT bitsize,unsigned HOST_WIDE_INT bitpos,poly_uint64 bitregion_start,poly_uint64 bitregion_end,rtx value,scalar_int_mode value_mode,bool reverse)1356 store_split_bit_field (rtx op0, opt_scalar_int_mode op0_mode,
1357 		       unsigned HOST_WIDE_INT bitsize,
1358 		       unsigned HOST_WIDE_INT bitpos,
1359 		       poly_uint64 bitregion_start, poly_uint64 bitregion_end,
1360 		       rtx value, scalar_int_mode value_mode, bool reverse)
1361 {
1362   unsigned int unit, total_bits, bitsdone = 0;
1363 
1364   /* Make sure UNIT isn't larger than BITS_PER_WORD, we can only handle that
1365      much at a time.  */
1366   if (REG_P (op0) || GET_CODE (op0) == SUBREG)
1367     unit = BITS_PER_WORD;
1368   else
1369     unit = MIN (MEM_ALIGN (op0), BITS_PER_WORD);
1370 
1371   /* If OP0 is a memory with a mode, then UNIT must not be larger than
1372      OP0's mode as well.  Otherwise, store_fixed_bit_field will call us
1373      again, and we will mutually recurse forever.  */
1374   if (MEM_P (op0) && op0_mode.exists ())
1375     unit = MIN (unit, GET_MODE_BITSIZE (op0_mode.require ()));
1376 
1377   /* If VALUE is a constant other than a CONST_INT, get it into a register in
1378      WORD_MODE.  If we can do this using gen_lowpart_common, do so.  Note
1379      that VALUE might be a floating-point constant.  */
1380   if (CONSTANT_P (value) && !CONST_INT_P (value))
1381     {
1382       rtx word = gen_lowpart_common (word_mode, value);
1383 
1384       if (word && (value != word))
1385 	value = word;
1386       else
1387 	value = gen_lowpart_common (word_mode, force_reg (value_mode, value));
1388       value_mode = word_mode;
1389     }
1390 
1391   total_bits = GET_MODE_BITSIZE (value_mode);
1392 
1393   while (bitsdone < bitsize)
1394     {
1395       unsigned HOST_WIDE_INT thissize;
1396       unsigned HOST_WIDE_INT thispos;
1397       unsigned HOST_WIDE_INT offset;
1398       rtx part;
1399 
1400       offset = (bitpos + bitsdone) / unit;
1401       thispos = (bitpos + bitsdone) % unit;
1402 
1403       /* When region of bytes we can touch is restricted, decrease
1404 	 UNIT close to the end of the region as needed.  If op0 is a REG
1405 	 or SUBREG of REG, don't do this, as there can't be data races
1406 	 on a register and we can expand shorter code in some cases.  */
1407       if (maybe_ne (bitregion_end, 0U)
1408 	  && unit > BITS_PER_UNIT
1409 	  && maybe_gt (bitpos + bitsdone - thispos + unit, bitregion_end + 1)
1410 	  && !REG_P (op0)
1411 	  && (GET_CODE (op0) != SUBREG || !REG_P (SUBREG_REG (op0))))
1412 	{
1413 	  unit = unit / 2;
1414 	  continue;
1415 	}
1416 
1417       /* THISSIZE must not overrun a word boundary.  Otherwise,
1418 	 store_fixed_bit_field will call us again, and we will mutually
1419 	 recurse forever.  */
1420       thissize = MIN (bitsize - bitsdone, BITS_PER_WORD);
1421       thissize = MIN (thissize, unit - thispos);
1422 
1423       if (reverse ? !BYTES_BIG_ENDIAN : BYTES_BIG_ENDIAN)
1424 	{
1425 	  /* Fetch successively less significant portions.  */
1426 	  if (CONST_INT_P (value))
1427 	    part = GEN_INT (((unsigned HOST_WIDE_INT) (INTVAL (value))
1428 			     >> (bitsize - bitsdone - thissize))
1429 			    & ((HOST_WIDE_INT_1 << thissize) - 1));
1430           /* Likewise, but the source is little-endian.  */
1431           else if (reverse)
1432 	    part = extract_fixed_bit_field (word_mode, value, value_mode,
1433 					    thissize,
1434 					    bitsize - bitsdone - thissize,
1435 					    NULL_RTX, 1, false);
1436 	  else
1437 	    /* The args are chosen so that the last part includes the
1438 	       lsb.  Give extract_bit_field the value it needs (with
1439 	       endianness compensation) to fetch the piece we want.  */
1440 	    part = extract_fixed_bit_field (word_mode, value, value_mode,
1441 					    thissize,
1442 					    total_bits - bitsize + bitsdone,
1443 					    NULL_RTX, 1, false);
1444 	}
1445       else
1446 	{
1447 	  /* Fetch successively more significant portions.  */
1448 	  if (CONST_INT_P (value))
1449 	    part = GEN_INT (((unsigned HOST_WIDE_INT) (INTVAL (value))
1450 			     >> bitsdone)
1451 			    & ((HOST_WIDE_INT_1 << thissize) - 1));
1452 	  /* Likewise, but the source is big-endian.  */
1453           else if (reverse)
1454 	    part = extract_fixed_bit_field (word_mode, value, value_mode,
1455 					    thissize,
1456 					    total_bits - bitsdone - thissize,
1457 					    NULL_RTX, 1, false);
1458 	  else
1459 	    part = extract_fixed_bit_field (word_mode, value, value_mode,
1460 					    thissize, bitsdone, NULL_RTX,
1461 					    1, false);
1462 	}
1463 
1464       /* If OP0 is a register, then handle OFFSET here.  */
1465       rtx op0_piece = op0;
1466       opt_scalar_int_mode op0_piece_mode = op0_mode;
1467       if (SUBREG_P (op0) || REG_P (op0))
1468 	{
1469 	  scalar_int_mode imode;
1470 	  if (op0_mode.exists (&imode)
1471 	      && GET_MODE_SIZE (imode) < UNITS_PER_WORD)
1472 	    {
1473 	      if (offset)
1474 		op0_piece = const0_rtx;
1475 	    }
1476 	  else
1477 	    {
1478 	      op0_piece = operand_subword_force (op0,
1479 						 offset * unit / BITS_PER_WORD,
1480 						 GET_MODE (op0));
1481 	      op0_piece_mode = word_mode;
1482 	    }
1483 	  offset &= BITS_PER_WORD / unit - 1;
1484 	}
1485 
1486       /* OFFSET is in UNITs, and UNIT is in bits.  If WORD is const0_rtx,
1487 	 it is just an out-of-bounds access.  Ignore it.  */
1488       if (op0_piece != const0_rtx)
1489 	store_fixed_bit_field (op0_piece, op0_piece_mode, thissize,
1490 			       offset * unit + thispos, bitregion_start,
1491 			       bitregion_end, part, word_mode, reverse);
1492       bitsdone += thissize;
1493     }
1494 }
1495 
1496 /* A subroutine of extract_bit_field_1 that converts return value X
1497    to either MODE or TMODE.  MODE, TMODE and UNSIGNEDP are arguments
1498    to extract_bit_field.  */
1499 
1500 static rtx
convert_extracted_bit_field(rtx x,machine_mode mode,machine_mode tmode,bool unsignedp)1501 convert_extracted_bit_field (rtx x, machine_mode mode,
1502 			     machine_mode tmode, bool unsignedp)
1503 {
1504   if (GET_MODE (x) == tmode || GET_MODE (x) == mode)
1505     return x;
1506 
1507   /* If the x mode is not a scalar integral, first convert to the
1508      integer mode of that size and then access it as a floating-point
1509      value via a SUBREG.  */
1510   if (!SCALAR_INT_MODE_P (tmode))
1511     {
1512       scalar_int_mode int_mode = int_mode_for_mode (tmode).require ();
1513       x = convert_to_mode (int_mode, x, unsignedp);
1514       x = force_reg (int_mode, x);
1515       return gen_lowpart (tmode, x);
1516     }
1517 
1518   return convert_to_mode (tmode, x, unsignedp);
1519 }
1520 
1521 /* Try to use an ext(z)v pattern to extract a field from OP0.
1522    Return the extracted value on success, otherwise return null.
1523    EXTV describes the extraction instruction to use.  If OP0_MODE
1524    is defined, it is the mode of OP0, otherwise OP0 is a BLKmode MEM.
1525    The other arguments are as for extract_bit_field.  */
1526 
1527 static rtx
extract_bit_field_using_extv(const extraction_insn * extv,rtx op0,opt_scalar_int_mode op0_mode,unsigned HOST_WIDE_INT bitsize,unsigned HOST_WIDE_INT bitnum,int unsignedp,rtx target,machine_mode mode,machine_mode tmode)1528 extract_bit_field_using_extv (const extraction_insn *extv, rtx op0,
1529 			      opt_scalar_int_mode op0_mode,
1530 			      unsigned HOST_WIDE_INT bitsize,
1531 			      unsigned HOST_WIDE_INT bitnum,
1532 			      int unsignedp, rtx target,
1533 			      machine_mode mode, machine_mode tmode)
1534 {
1535   class expand_operand ops[4];
1536   rtx spec_target = target;
1537   rtx spec_target_subreg = 0;
1538   scalar_int_mode ext_mode = extv->field_mode;
1539   unsigned unit = GET_MODE_BITSIZE (ext_mode);
1540 
1541   if (bitsize == 0 || unit < bitsize)
1542     return NULL_RTX;
1543 
1544   if (MEM_P (op0))
1545     /* Get a reference to the first byte of the field.  */
1546     op0 = narrow_bit_field_mem (op0, extv->struct_mode, bitsize, bitnum,
1547 				&bitnum);
1548   else
1549     {
1550       /* Convert from counting within OP0 to counting in EXT_MODE.  */
1551       if (BYTES_BIG_ENDIAN)
1552 	bitnum += unit - GET_MODE_BITSIZE (op0_mode.require ());
1553 
1554       /* If op0 is a register, we need it in EXT_MODE to make it
1555 	 acceptable to the format of ext(z)v.  */
1556       if (GET_CODE (op0) == SUBREG && op0_mode.require () != ext_mode)
1557 	return NULL_RTX;
1558       if (REG_P (op0) && op0_mode.require () != ext_mode)
1559 	op0 = gen_lowpart_SUBREG (ext_mode, op0);
1560     }
1561 
1562   /* If BITS_BIG_ENDIAN is zero on a BYTES_BIG_ENDIAN machine, we count
1563      "backwards" from the size of the unit we are extracting from.
1564      Otherwise, we count bits from the most significant on a
1565      BYTES/BITS_BIG_ENDIAN machine.  */
1566 
1567   if (BITS_BIG_ENDIAN != BYTES_BIG_ENDIAN)
1568     bitnum = unit - bitsize - bitnum;
1569 
1570   if (target == 0)
1571     target = spec_target = gen_reg_rtx (tmode);
1572 
1573   if (GET_MODE (target) != ext_mode)
1574     {
1575       rtx temp;
1576       /* Don't use LHS paradoxical subreg if explicit truncation is needed
1577 	 between the mode of the extraction (word_mode) and the target
1578 	 mode.  Instead, create a temporary and use convert_move to set
1579 	 the target.  */
1580       if (REG_P (target)
1581 	  && TRULY_NOOP_TRUNCATION_MODES_P (GET_MODE (target), ext_mode)
1582 	  && (temp = gen_lowpart_if_possible (ext_mode, target)))
1583 	{
1584 	  target = temp;
1585 	  if (partial_subreg_p (GET_MODE (spec_target), ext_mode))
1586 	    spec_target_subreg = target;
1587 	}
1588       else
1589 	target = gen_reg_rtx (ext_mode);
1590     }
1591 
1592   create_output_operand (&ops[0], target, ext_mode);
1593   create_fixed_operand (&ops[1], op0);
1594   create_integer_operand (&ops[2], bitsize);
1595   create_integer_operand (&ops[3], bitnum);
1596   if (maybe_expand_insn (extv->icode, 4, ops))
1597     {
1598       target = ops[0].value;
1599       if (target == spec_target)
1600 	return target;
1601       if (target == spec_target_subreg)
1602 	return spec_target;
1603       return convert_extracted_bit_field (target, mode, tmode, unsignedp);
1604     }
1605   return NULL_RTX;
1606 }
1607 
1608 /* See whether it would be valid to extract the part of OP0 with
1609    mode OP0_MODE described by BITNUM and BITSIZE into a value of
1610    mode MODE using a subreg operation.
1611    Return the subreg if so, otherwise return null.  */
1612 
1613 static rtx
extract_bit_field_as_subreg(machine_mode mode,rtx op0,machine_mode op0_mode,poly_uint64 bitsize,poly_uint64 bitnum)1614 extract_bit_field_as_subreg (machine_mode mode, rtx op0,
1615 			     machine_mode op0_mode,
1616 			     poly_uint64 bitsize, poly_uint64 bitnum)
1617 {
1618   poly_uint64 bytenum;
1619   if (multiple_p (bitnum, BITS_PER_UNIT, &bytenum)
1620       && known_eq (bitsize, GET_MODE_BITSIZE (mode))
1621       && lowpart_bit_field_p (bitnum, bitsize, op0_mode)
1622       && TRULY_NOOP_TRUNCATION_MODES_P (mode, op0_mode))
1623     return simplify_gen_subreg (mode, op0, op0_mode, bytenum);
1624   return NULL_RTX;
1625 }
1626 
1627 /* A subroutine of extract_bit_field, with the same arguments.
1628    If FALLBACK_P is true, fall back to extract_fixed_bit_field
1629    if we can find no other means of implementing the operation.
1630    if FALLBACK_P is false, return NULL instead.  */
1631 
1632 static rtx
extract_bit_field_1(rtx str_rtx,poly_uint64 bitsize,poly_uint64 bitnum,int unsignedp,rtx target,machine_mode mode,machine_mode tmode,bool reverse,bool fallback_p,rtx * alt_rtl)1633 extract_bit_field_1 (rtx str_rtx, poly_uint64 bitsize, poly_uint64 bitnum,
1634 		     int unsignedp, rtx target, machine_mode mode,
1635 		     machine_mode tmode, bool reverse, bool fallback_p,
1636 		     rtx *alt_rtl)
1637 {
1638   rtx op0 = str_rtx;
1639   machine_mode mode1;
1640 
1641   if (tmode == VOIDmode)
1642     tmode = mode;
1643 
1644   while (GET_CODE (op0) == SUBREG)
1645     {
1646       bitnum += SUBREG_BYTE (op0) * BITS_PER_UNIT;
1647       op0 = SUBREG_REG (op0);
1648     }
1649 
1650   /* If we have an out-of-bounds access to a register, just return an
1651      uninitialized register of the required mode.  This can occur if the
1652      source code contains an out-of-bounds access to a small array.  */
1653   if (REG_P (op0) && known_ge (bitnum, GET_MODE_BITSIZE (GET_MODE (op0))))
1654     return gen_reg_rtx (tmode);
1655 
1656   if (REG_P (op0)
1657       && mode == GET_MODE (op0)
1658       && known_eq (bitnum, 0U)
1659       && known_eq (bitsize, GET_MODE_BITSIZE (GET_MODE (op0))))
1660     {
1661       if (reverse)
1662 	op0 = flip_storage_order (mode, op0);
1663       /* We're trying to extract a full register from itself.  */
1664       return op0;
1665     }
1666 
1667   /* First try to check for vector from vector extractions.  */
1668   if (VECTOR_MODE_P (GET_MODE (op0))
1669       && !MEM_P (op0)
1670       && VECTOR_MODE_P (tmode)
1671       && known_eq (bitsize, GET_MODE_BITSIZE (tmode))
1672       && maybe_gt (GET_MODE_SIZE (GET_MODE (op0)), GET_MODE_SIZE (tmode)))
1673     {
1674       machine_mode new_mode = GET_MODE (op0);
1675       if (GET_MODE_INNER (new_mode) != GET_MODE_INNER (tmode))
1676 	{
1677 	  scalar_mode inner_mode = GET_MODE_INNER (tmode);
1678 	  poly_uint64 nunits;
1679 	  if (!multiple_p (GET_MODE_BITSIZE (GET_MODE (op0)),
1680 			   GET_MODE_UNIT_BITSIZE (tmode), &nunits)
1681 	      || !related_vector_mode (tmode, inner_mode,
1682 				       nunits).exists (&new_mode)
1683 	      || maybe_ne (GET_MODE_SIZE (new_mode),
1684 			   GET_MODE_SIZE (GET_MODE (op0))))
1685 	    new_mode = VOIDmode;
1686 	}
1687       poly_uint64 pos;
1688       if (new_mode != VOIDmode
1689 	  && (convert_optab_handler (vec_extract_optab, new_mode, tmode)
1690 	      != CODE_FOR_nothing)
1691 	  && multiple_p (bitnum, GET_MODE_BITSIZE (tmode), &pos))
1692 	{
1693 	  class expand_operand ops[3];
1694 	  machine_mode outermode = new_mode;
1695 	  machine_mode innermode = tmode;
1696 	  enum insn_code icode
1697 	    = convert_optab_handler (vec_extract_optab, outermode, innermode);
1698 
1699 	  if (new_mode != GET_MODE (op0))
1700 	    op0 = gen_lowpart (new_mode, op0);
1701 	  create_output_operand (&ops[0], target, innermode);
1702 	  ops[0].target = 1;
1703 	  create_input_operand (&ops[1], op0, outermode);
1704 	  create_integer_operand (&ops[2], pos);
1705 	  if (maybe_expand_insn (icode, 3, ops))
1706 	    {
1707 	      if (alt_rtl && ops[0].target)
1708 		*alt_rtl = target;
1709 	      target = ops[0].value;
1710 	      if (GET_MODE (target) != mode)
1711 		return gen_lowpart (tmode, target);
1712 	      return target;
1713 	    }
1714 	}
1715     }
1716 
1717   /* See if we can get a better vector mode before extracting.  */
1718   if (VECTOR_MODE_P (GET_MODE (op0))
1719       && !MEM_P (op0)
1720       && GET_MODE_INNER (GET_MODE (op0)) != tmode)
1721     {
1722       machine_mode new_mode;
1723 
1724       if (GET_MODE_CLASS (tmode) == MODE_FLOAT)
1725 	new_mode = MIN_MODE_VECTOR_FLOAT;
1726       else if (GET_MODE_CLASS (tmode) == MODE_FRACT)
1727 	new_mode = MIN_MODE_VECTOR_FRACT;
1728       else if (GET_MODE_CLASS (tmode) == MODE_UFRACT)
1729 	new_mode = MIN_MODE_VECTOR_UFRACT;
1730       else if (GET_MODE_CLASS (tmode) == MODE_ACCUM)
1731 	new_mode = MIN_MODE_VECTOR_ACCUM;
1732       else if (GET_MODE_CLASS (tmode) == MODE_UACCUM)
1733 	new_mode = MIN_MODE_VECTOR_UACCUM;
1734       else
1735 	new_mode = MIN_MODE_VECTOR_INT;
1736 
1737       FOR_EACH_MODE_FROM (new_mode, new_mode)
1738 	if (known_eq (GET_MODE_SIZE (new_mode), GET_MODE_SIZE (GET_MODE (op0)))
1739 	    && known_eq (GET_MODE_UNIT_SIZE (new_mode), GET_MODE_SIZE (tmode))
1740 	    && targetm.vector_mode_supported_p (new_mode)
1741 	    && targetm.modes_tieable_p (GET_MODE (op0), new_mode))
1742 	  break;
1743       if (new_mode != VOIDmode)
1744 	op0 = gen_lowpart (new_mode, op0);
1745     }
1746 
1747   /* Use vec_extract patterns for extracting parts of vectors whenever
1748      available.  If that fails, see whether the current modes and bitregion
1749      give a natural subreg.  */
1750   machine_mode outermode = GET_MODE (op0);
1751   if (VECTOR_MODE_P (outermode) && !MEM_P (op0))
1752     {
1753       scalar_mode innermode = GET_MODE_INNER (outermode);
1754       enum insn_code icode
1755 	= convert_optab_handler (vec_extract_optab, outermode, innermode);
1756       poly_uint64 pos;
1757       if (icode != CODE_FOR_nothing
1758 	  && known_eq (bitsize, GET_MODE_BITSIZE (innermode))
1759 	  && multiple_p (bitnum, GET_MODE_BITSIZE (innermode), &pos))
1760 	{
1761 	  class expand_operand ops[3];
1762 
1763 	  create_output_operand (&ops[0], target, innermode);
1764 	  ops[0].target = 1;
1765 	  create_input_operand (&ops[1], op0, outermode);
1766 	  create_integer_operand (&ops[2], pos);
1767 	  if (maybe_expand_insn (icode, 3, ops))
1768 	    {
1769 	      if (alt_rtl && ops[0].target)
1770 		*alt_rtl = target;
1771 	      target = ops[0].value;
1772 	      if (GET_MODE (target) != mode)
1773 		return gen_lowpart (tmode, target);
1774 	      return target;
1775 	    }
1776 	}
1777       /* Using subregs is useful if we're extracting one register vector
1778 	 from a multi-register vector.  extract_bit_field_as_subreg checks
1779 	 for valid bitsize and bitnum, so we don't need to do that here.  */
1780       if (VECTOR_MODE_P (mode))
1781 	{
1782 	  rtx sub = extract_bit_field_as_subreg (mode, op0, outermode,
1783 						 bitsize, bitnum);
1784 	  if (sub)
1785 	    return sub;
1786 	}
1787     }
1788 
1789   /* Make sure we are playing with integral modes.  Pun with subregs
1790      if we aren't.  */
1791   opt_scalar_int_mode op0_mode = int_mode_for_mode (GET_MODE (op0));
1792   scalar_int_mode imode;
1793   if (!op0_mode.exists (&imode) || imode != GET_MODE (op0))
1794     {
1795       if (MEM_P (op0))
1796 	op0 = adjust_bitfield_address_size (op0, op0_mode.else_blk (),
1797 					    0, MEM_SIZE (op0));
1798       else if (op0_mode.exists (&imode))
1799 	{
1800 	  op0 = gen_lowpart (imode, op0);
1801 
1802 	  /* If we got a SUBREG, force it into a register since we
1803 	     aren't going to be able to do another SUBREG on it.  */
1804 	  if (GET_CODE (op0) == SUBREG)
1805 	    op0 = force_reg (imode, op0);
1806 	}
1807       else
1808 	{
1809 	  poly_int64 size = GET_MODE_SIZE (GET_MODE (op0));
1810 	  rtx mem = assign_stack_temp (GET_MODE (op0), size);
1811 	  emit_move_insn (mem, op0);
1812 	  op0 = adjust_bitfield_address_size (mem, BLKmode, 0, size);
1813 	}
1814     }
1815 
1816   /* ??? We currently assume TARGET is at least as big as BITSIZE.
1817      If that's wrong, the solution is to test for it and set TARGET to 0
1818      if needed.  */
1819 
1820   /* Get the mode of the field to use for atomic access or subreg
1821      conversion.  */
1822   if (!SCALAR_INT_MODE_P (tmode)
1823       || !mode_for_size (bitsize, GET_MODE_CLASS (tmode), 0).exists (&mode1))
1824     mode1 = mode;
1825   gcc_assert (mode1 != BLKmode);
1826 
1827   /* Extraction of a full MODE1 value can be done with a subreg as long
1828      as the least significant bit of the value is the least significant
1829      bit of either OP0 or a word of OP0.  */
1830   if (!MEM_P (op0) && !reverse && op0_mode.exists (&imode))
1831     {
1832       rtx sub = extract_bit_field_as_subreg (mode1, op0, imode,
1833 					     bitsize, bitnum);
1834       if (sub)
1835 	return convert_extracted_bit_field (sub, mode, tmode, unsignedp);
1836     }
1837 
1838   /* Extraction of a full MODE1 value can be done with a load as long as
1839      the field is on a byte boundary and is sufficiently aligned.  */
1840   poly_uint64 bytenum;
1841   if (simple_mem_bitfield_p (op0, bitsize, bitnum, mode1, &bytenum))
1842     {
1843       op0 = adjust_bitfield_address (op0, mode1, bytenum);
1844       if (reverse)
1845 	op0 = flip_storage_order (mode1, op0);
1846       return convert_extracted_bit_field (op0, mode, tmode, unsignedp);
1847     }
1848 
1849   /* If we have a memory source and a non-constant bit offset, restrict
1850      the memory to the referenced bytes.  This is a worst-case fallback
1851      but is useful for things like vector booleans.  */
1852   if (MEM_P (op0) && !bitnum.is_constant ())
1853     {
1854       bytenum = bits_to_bytes_round_down (bitnum);
1855       bitnum = num_trailing_bits (bitnum);
1856       poly_uint64 bytesize = bits_to_bytes_round_up (bitnum + bitsize);
1857       op0 = adjust_bitfield_address_size (op0, BLKmode, bytenum, bytesize);
1858       op0_mode = opt_scalar_int_mode ();
1859     }
1860 
1861   /* It's possible we'll need to handle other cases here for
1862      polynomial bitnum and bitsize.  */
1863 
1864   /* From here on we need to be looking at a fixed-size insertion.  */
1865   return extract_integral_bit_field (op0, op0_mode, bitsize.to_constant (),
1866 				     bitnum.to_constant (), unsignedp,
1867 				     target, mode, tmode, reverse, fallback_p);
1868 }
1869 
1870 /* Subroutine of extract_bit_field_1, with the same arguments, except
1871    that BITSIZE and BITNUM are constant.  Handle cases specific to
1872    integral modes.  If OP0_MODE is defined, it is the mode of OP0,
1873    otherwise OP0 is a BLKmode MEM.  */
1874 
1875 static rtx
extract_integral_bit_field(rtx op0,opt_scalar_int_mode op0_mode,unsigned HOST_WIDE_INT bitsize,unsigned HOST_WIDE_INT bitnum,int unsignedp,rtx target,machine_mode mode,machine_mode tmode,bool reverse,bool fallback_p)1876 extract_integral_bit_field (rtx op0, opt_scalar_int_mode op0_mode,
1877 			    unsigned HOST_WIDE_INT bitsize,
1878 			    unsigned HOST_WIDE_INT bitnum, int unsignedp,
1879 			    rtx target, machine_mode mode, machine_mode tmode,
1880 			    bool reverse, bool fallback_p)
1881 {
1882   /* Handle fields bigger than a word.  */
1883 
1884   if (bitsize > BITS_PER_WORD)
1885     {
1886       /* Here we transfer the words of the field
1887 	 in the order least significant first.
1888 	 This is because the most significant word is the one which may
1889 	 be less than full.  */
1890 
1891       const bool backwards = WORDS_BIG_ENDIAN;
1892       unsigned int nwords = (bitsize + (BITS_PER_WORD - 1)) / BITS_PER_WORD;
1893       unsigned int i;
1894       rtx_insn *last;
1895 
1896       if (target == 0 || !REG_P (target) || !valid_multiword_target_p (target))
1897 	target = gen_reg_rtx (mode);
1898 
1899       /* In case we're about to clobber a base register or something
1900 	 (see gcc.c-torture/execute/20040625-1.c).   */
1901       if (reg_mentioned_p (target, op0))
1902 	target = gen_reg_rtx (mode);
1903 
1904       /* Indicate for flow that the entire target reg is being set.  */
1905       emit_clobber (target);
1906 
1907       /* The mode must be fixed-size, since extract_bit_field_1 handles
1908 	 extractions from variable-sized objects before calling this
1909 	 function.  */
1910       unsigned int target_size
1911 	= GET_MODE_SIZE (GET_MODE (target)).to_constant ();
1912       last = get_last_insn ();
1913       for (i = 0; i < nwords; i++)
1914 	{
1915 	  /* If I is 0, use the low-order word in both field and target;
1916 	     if I is 1, use the next to lowest word; and so on.  */
1917 	  /* Word number in TARGET to use.  */
1918 	  unsigned int wordnum
1919 	    = (backwards ? target_size / UNITS_PER_WORD - i - 1 : i);
1920 	  /* Offset from start of field in OP0.  */
1921 	  unsigned int bit_offset = (backwards ^ reverse
1922 				     ? MAX ((int) bitsize - ((int) i + 1)
1923 					    * BITS_PER_WORD,
1924 					    0)
1925 				     : (int) i * BITS_PER_WORD);
1926 	  rtx target_part = operand_subword (target, wordnum, 1, VOIDmode);
1927 	  rtx result_part
1928 	    = extract_bit_field_1 (op0, MIN (BITS_PER_WORD,
1929 					     bitsize - i * BITS_PER_WORD),
1930 				   bitnum + bit_offset, 1, target_part,
1931 				   mode, word_mode, reverse, fallback_p, NULL);
1932 
1933 	  gcc_assert (target_part);
1934 	  if (!result_part)
1935 	    {
1936 	      delete_insns_since (last);
1937 	      return NULL;
1938 	    }
1939 
1940 	  if (result_part != target_part)
1941 	    emit_move_insn (target_part, result_part);
1942 	}
1943 
1944       if (unsignedp)
1945 	{
1946 	  /* Unless we've filled TARGET, the upper regs in a multi-reg value
1947 	     need to be zero'd out.  */
1948 	  if (target_size > nwords * UNITS_PER_WORD)
1949 	    {
1950 	      unsigned int i, total_words;
1951 
1952 	      total_words = target_size / UNITS_PER_WORD;
1953 	      for (i = nwords; i < total_words; i++)
1954 		emit_move_insn
1955 		  (operand_subword (target,
1956 				    backwards ? total_words - i - 1 : i,
1957 				    1, VOIDmode),
1958 		   const0_rtx);
1959 	    }
1960 	  return target;
1961 	}
1962 
1963       /* Signed bit field: sign-extend with two arithmetic shifts.  */
1964       target = expand_shift (LSHIFT_EXPR, mode, target,
1965 			     GET_MODE_BITSIZE (mode) - bitsize, NULL_RTX, 0);
1966       return expand_shift (RSHIFT_EXPR, mode, target,
1967 			   GET_MODE_BITSIZE (mode) - bitsize, NULL_RTX, 0);
1968     }
1969 
1970   /* If OP0 is a multi-word register, narrow it to the affected word.
1971      If the region spans two words, defer to extract_split_bit_field.  */
1972   if (!MEM_P (op0) && GET_MODE_SIZE (op0_mode.require ()) > UNITS_PER_WORD)
1973     {
1974       if (bitnum % BITS_PER_WORD + bitsize > BITS_PER_WORD)
1975 	{
1976 	  if (!fallback_p)
1977 	    return NULL_RTX;
1978 	  target = extract_split_bit_field (op0, op0_mode, bitsize, bitnum,
1979 					    unsignedp, reverse);
1980 	  return convert_extracted_bit_field (target, mode, tmode, unsignedp);
1981 	}
1982       /* If OP0 is a hard register, copy it to a pseudo before calling
1983 	 simplify_gen_subreg.  */
1984       if (REG_P (op0) && HARD_REGISTER_P (op0))
1985 	op0 = copy_to_reg (op0);
1986       op0 = simplify_gen_subreg (word_mode, op0, op0_mode.require (),
1987 				 bitnum / BITS_PER_WORD * UNITS_PER_WORD);
1988       op0_mode = word_mode;
1989       bitnum %= BITS_PER_WORD;
1990     }
1991 
1992   /* From here on we know the desired field is smaller than a word.
1993      If OP0 is a register, it too fits within a word.  */
1994   enum extraction_pattern pattern = unsignedp ? EP_extzv : EP_extv;
1995   extraction_insn extv;
1996   if (!MEM_P (op0)
1997       && !reverse
1998       /* ??? We could limit the structure size to the part of OP0 that
1999 	 contains the field, with appropriate checks for endianness
2000 	 and TARGET_TRULY_NOOP_TRUNCATION.  */
2001       && get_best_reg_extraction_insn (&extv, pattern,
2002 				       GET_MODE_BITSIZE (op0_mode.require ()),
2003 				       tmode))
2004     {
2005       rtx result = extract_bit_field_using_extv (&extv, op0, op0_mode,
2006 						 bitsize, bitnum,
2007 						 unsignedp, target, mode,
2008 						 tmode);
2009       if (result)
2010 	return result;
2011     }
2012 
2013   /* If OP0 is a memory, try copying it to a register and seeing if a
2014      cheap register alternative is available.  */
2015   if (MEM_P (op0) & !reverse)
2016     {
2017       if (get_best_mem_extraction_insn (&extv, pattern, bitsize, bitnum,
2018 					tmode))
2019 	{
2020 	  rtx result = extract_bit_field_using_extv (&extv, op0, op0_mode,
2021 						     bitsize, bitnum,
2022 						     unsignedp, target, mode,
2023 						     tmode);
2024 	  if (result)
2025 	    return result;
2026 	}
2027 
2028       rtx_insn *last = get_last_insn ();
2029 
2030       /* Try loading part of OP0 into a register and extracting the
2031 	 bitfield from that.  */
2032       unsigned HOST_WIDE_INT bitpos;
2033       rtx xop0 = adjust_bit_field_mem_for_reg (pattern, op0, bitsize, bitnum,
2034 					       0, 0, tmode, &bitpos);
2035       if (xop0)
2036 	{
2037 	  xop0 = copy_to_reg (xop0);
2038 	  rtx result = extract_bit_field_1 (xop0, bitsize, bitpos,
2039 					    unsignedp, target,
2040 					    mode, tmode, reverse, false, NULL);
2041 	  if (result)
2042 	    return result;
2043 	  delete_insns_since (last);
2044 	}
2045     }
2046 
2047   if (!fallback_p)
2048     return NULL;
2049 
2050   /* Find a correspondingly-sized integer field, so we can apply
2051      shifts and masks to it.  */
2052   scalar_int_mode int_mode;
2053   if (!int_mode_for_mode (tmode).exists (&int_mode))
2054     /* If this fails, we should probably push op0 out to memory and then
2055        do a load.  */
2056     int_mode = int_mode_for_mode (mode).require ();
2057 
2058   target = extract_fixed_bit_field (int_mode, op0, op0_mode, bitsize,
2059 				    bitnum, target, unsignedp, reverse);
2060 
2061   /* Complex values must be reversed piecewise, so we need to undo the global
2062      reversal, convert to the complex mode and reverse again.  */
2063   if (reverse && COMPLEX_MODE_P (tmode))
2064     {
2065       target = flip_storage_order (int_mode, target);
2066       target = convert_extracted_bit_field (target, mode, tmode, unsignedp);
2067       target = flip_storage_order (tmode, target);
2068     }
2069   else
2070     target = convert_extracted_bit_field (target, mode, tmode, unsignedp);
2071 
2072   return target;
2073 }
2074 
2075 /* Generate code to extract a byte-field from STR_RTX
2076    containing BITSIZE bits, starting at BITNUM,
2077    and put it in TARGET if possible (if TARGET is nonzero).
2078    Regardless of TARGET, we return the rtx for where the value is placed.
2079 
2080    STR_RTX is the structure containing the byte (a REG or MEM).
2081    UNSIGNEDP is nonzero if this is an unsigned bit field.
2082    MODE is the natural mode of the field value once extracted.
2083    TMODE is the mode the caller would like the value to have;
2084    but the value may be returned with type MODE instead.
2085 
2086    If REVERSE is true, the extraction is to be done in reverse order.
2087 
2088    If a TARGET is specified and we can store in it at no extra cost,
2089    we do so, and return TARGET.
2090    Otherwise, we return a REG of mode TMODE or MODE, with TMODE preferred
2091    if they are equally easy.
2092 
2093    If the result can be stored at TARGET, and ALT_RTL is non-NULL,
2094    then *ALT_RTL is set to TARGET (before legitimziation).  */
2095 
2096 rtx
extract_bit_field(rtx str_rtx,poly_uint64 bitsize,poly_uint64 bitnum,int unsignedp,rtx target,machine_mode mode,machine_mode tmode,bool reverse,rtx * alt_rtl)2097 extract_bit_field (rtx str_rtx, poly_uint64 bitsize, poly_uint64 bitnum,
2098 		   int unsignedp, rtx target, machine_mode mode,
2099 		   machine_mode tmode, bool reverse, rtx *alt_rtl)
2100 {
2101   machine_mode mode1;
2102 
2103   /* Handle -fstrict-volatile-bitfields in the cases where it applies.  */
2104   if (maybe_ne (GET_MODE_BITSIZE (GET_MODE (str_rtx)), 0))
2105     mode1 = GET_MODE (str_rtx);
2106   else if (target && maybe_ne (GET_MODE_BITSIZE (GET_MODE (target)), 0))
2107     mode1 = GET_MODE (target);
2108   else
2109     mode1 = tmode;
2110 
2111   unsigned HOST_WIDE_INT ibitsize, ibitnum;
2112   scalar_int_mode int_mode;
2113   if (bitsize.is_constant (&ibitsize)
2114       && bitnum.is_constant (&ibitnum)
2115       && is_a <scalar_int_mode> (mode1, &int_mode)
2116       && strict_volatile_bitfield_p (str_rtx, ibitsize, ibitnum,
2117 				     int_mode, 0, 0))
2118     {
2119       /* Extraction of a full INT_MODE value can be done with a simple load.
2120 	 We know here that the field can be accessed with one single
2121 	 instruction.  For targets that support unaligned memory,
2122 	 an unaligned access may be necessary.  */
2123       if (ibitsize == GET_MODE_BITSIZE (int_mode))
2124 	{
2125 	  rtx result = adjust_bitfield_address (str_rtx, int_mode,
2126 						ibitnum / BITS_PER_UNIT);
2127 	  if (reverse)
2128 	    result = flip_storage_order (int_mode, result);
2129 	  gcc_assert (ibitnum % BITS_PER_UNIT == 0);
2130 	  return convert_extracted_bit_field (result, mode, tmode, unsignedp);
2131 	}
2132 
2133       str_rtx = narrow_bit_field_mem (str_rtx, int_mode, ibitsize, ibitnum,
2134 				      &ibitnum);
2135       gcc_assert (ibitnum + ibitsize <= GET_MODE_BITSIZE (int_mode));
2136       str_rtx = copy_to_reg (str_rtx);
2137       return extract_bit_field_1 (str_rtx, ibitsize, ibitnum, unsignedp,
2138 				  target, mode, tmode, reverse, true, alt_rtl);
2139     }
2140 
2141   return extract_bit_field_1 (str_rtx, bitsize, bitnum, unsignedp,
2142 			      target, mode, tmode, reverse, true, alt_rtl);
2143 }
2144 
2145 /* Use shifts and boolean operations to extract a field of BITSIZE bits
2146    from bit BITNUM of OP0.  If OP0_MODE is defined, it is the mode of OP0,
2147    otherwise OP0 is a BLKmode MEM.
2148 
2149    UNSIGNEDP is nonzero for an unsigned bit field (don't sign-extend value).
2150    If REVERSE is true, the extraction is to be done in reverse order.
2151 
2152    If TARGET is nonzero, attempts to store the value there
2153    and return TARGET, but this is not guaranteed.
2154    If TARGET is not used, create a pseudo-reg of mode TMODE for the value.  */
2155 
2156 static rtx
extract_fixed_bit_field(machine_mode tmode,rtx op0,opt_scalar_int_mode op0_mode,unsigned HOST_WIDE_INT bitsize,unsigned HOST_WIDE_INT bitnum,rtx target,int unsignedp,bool reverse)2157 extract_fixed_bit_field (machine_mode tmode, rtx op0,
2158 			 opt_scalar_int_mode op0_mode,
2159 			 unsigned HOST_WIDE_INT bitsize,
2160 			 unsigned HOST_WIDE_INT bitnum, rtx target,
2161 			 int unsignedp, bool reverse)
2162 {
2163   scalar_int_mode mode;
2164   if (MEM_P (op0))
2165     {
2166       if (!get_best_mode (bitsize, bitnum, 0, 0, MEM_ALIGN (op0),
2167 			  BITS_PER_WORD, MEM_VOLATILE_P (op0), &mode))
2168 	/* The only way this should occur is if the field spans word
2169 	   boundaries.  */
2170 	return extract_split_bit_field (op0, op0_mode, bitsize, bitnum,
2171 					unsignedp, reverse);
2172 
2173       op0 = narrow_bit_field_mem (op0, mode, bitsize, bitnum, &bitnum);
2174     }
2175   else
2176     mode = op0_mode.require ();
2177 
2178   return extract_fixed_bit_field_1 (tmode, op0, mode, bitsize, bitnum,
2179 				    target, unsignedp, reverse);
2180 }
2181 
2182 /* Helper function for extract_fixed_bit_field, extracts
2183    the bit field always using MODE, which is the mode of OP0.
2184    The other arguments are as for extract_fixed_bit_field.  */
2185 
2186 static rtx
extract_fixed_bit_field_1(machine_mode tmode,rtx op0,scalar_int_mode mode,unsigned HOST_WIDE_INT bitsize,unsigned HOST_WIDE_INT bitnum,rtx target,int unsignedp,bool reverse)2187 extract_fixed_bit_field_1 (machine_mode tmode, rtx op0, scalar_int_mode mode,
2188 			   unsigned HOST_WIDE_INT bitsize,
2189 			   unsigned HOST_WIDE_INT bitnum, rtx target,
2190 			   int unsignedp, bool reverse)
2191 {
2192   /* Note that bitsize + bitnum can be greater than GET_MODE_BITSIZE (mode)
2193      for invalid input, such as extract equivalent of f5 from
2194      gcc.dg/pr48335-2.c.  */
2195 
2196   if (reverse ? !BYTES_BIG_ENDIAN : BYTES_BIG_ENDIAN)
2197     /* BITNUM is the distance between our msb and that of OP0.
2198        Convert it to the distance from the lsb.  */
2199     bitnum = GET_MODE_BITSIZE (mode) - bitsize - bitnum;
2200 
2201   /* Now BITNUM is always the distance between the field's lsb and that of OP0.
2202      We have reduced the big-endian case to the little-endian case.  */
2203   if (reverse)
2204     op0 = flip_storage_order (mode, op0);
2205 
2206   if (unsignedp)
2207     {
2208       if (bitnum)
2209 	{
2210 	  /* If the field does not already start at the lsb,
2211 	     shift it so it does.  */
2212 	  /* Maybe propagate the target for the shift.  */
2213 	  rtx subtarget = (target != 0 && REG_P (target) ? target : 0);
2214 	  if (tmode != mode)
2215 	    subtarget = 0;
2216 	  op0 = expand_shift (RSHIFT_EXPR, mode, op0, bitnum, subtarget, 1);
2217 	}
2218       /* Convert the value to the desired mode.  TMODE must also be a
2219 	 scalar integer for this conversion to make sense, since we
2220 	 shouldn't reinterpret the bits.  */
2221       scalar_int_mode new_mode = as_a <scalar_int_mode> (tmode);
2222       if (mode != new_mode)
2223 	op0 = convert_to_mode (new_mode, op0, 1);
2224 
2225       /* Unless the msb of the field used to be the msb when we shifted,
2226 	 mask out the upper bits.  */
2227 
2228       if (GET_MODE_BITSIZE (mode) != bitnum + bitsize)
2229 	return expand_binop (new_mode, and_optab, op0,
2230 			     mask_rtx (new_mode, 0, bitsize, 0),
2231 			     target, 1, OPTAB_LIB_WIDEN);
2232       return op0;
2233     }
2234 
2235   /* To extract a signed bit-field, first shift its msb to the msb of the word,
2236      then arithmetic-shift its lsb to the lsb of the word.  */
2237   op0 = force_reg (mode, op0);
2238 
2239   /* Find the narrowest integer mode that contains the field.  */
2240 
2241   opt_scalar_int_mode mode_iter;
2242   FOR_EACH_MODE_IN_CLASS (mode_iter, MODE_INT)
2243     if (GET_MODE_BITSIZE (mode_iter.require ()) >= bitsize + bitnum)
2244       break;
2245 
2246   mode = mode_iter.require ();
2247   op0 = convert_to_mode (mode, op0, 0);
2248 
2249   if (mode != tmode)
2250     target = 0;
2251 
2252   if (GET_MODE_BITSIZE (mode) != (bitsize + bitnum))
2253     {
2254       int amount = GET_MODE_BITSIZE (mode) - (bitsize + bitnum);
2255       /* Maybe propagate the target for the shift.  */
2256       rtx subtarget = (target != 0 && REG_P (target) ? target : 0);
2257       op0 = expand_shift (LSHIFT_EXPR, mode, op0, amount, subtarget, 1);
2258     }
2259 
2260   return expand_shift (RSHIFT_EXPR, mode, op0,
2261 		       GET_MODE_BITSIZE (mode) - bitsize, target, 0);
2262 }
2263 
2264 /* Return a constant integer (CONST_INT or CONST_DOUBLE) rtx with the value
2265    VALUE << BITPOS.  */
2266 
2267 static rtx
lshift_value(machine_mode mode,unsigned HOST_WIDE_INT value,int bitpos)2268 lshift_value (machine_mode mode, unsigned HOST_WIDE_INT value,
2269 	      int bitpos)
2270 {
2271   return immed_wide_int_const (wi::lshift (value, bitpos), mode);
2272 }
2273 
2274 /* Extract a bit field that is split across two words
2275    and return an RTX for the result.
2276 
2277    OP0 is the REG, SUBREG or MEM rtx for the first of the two words.
2278    BITSIZE is the field width; BITPOS, position of its first bit, in the word.
2279    UNSIGNEDP is 1 if should zero-extend the contents; else sign-extend.
2280    If OP0_MODE is defined, it is the mode of OP0, otherwise OP0 is
2281    a BLKmode MEM.
2282 
2283    If REVERSE is true, the extraction is to be done in reverse order.  */
2284 
2285 static rtx
extract_split_bit_field(rtx op0,opt_scalar_int_mode op0_mode,unsigned HOST_WIDE_INT bitsize,unsigned HOST_WIDE_INT bitpos,int unsignedp,bool reverse)2286 extract_split_bit_field (rtx op0, opt_scalar_int_mode op0_mode,
2287 			 unsigned HOST_WIDE_INT bitsize,
2288 			 unsigned HOST_WIDE_INT bitpos, int unsignedp,
2289 			 bool reverse)
2290 {
2291   unsigned int unit;
2292   unsigned int bitsdone = 0;
2293   rtx result = NULL_RTX;
2294   int first = 1;
2295 
2296   /* Make sure UNIT isn't larger than BITS_PER_WORD, we can only handle that
2297      much at a time.  */
2298   if (REG_P (op0) || GET_CODE (op0) == SUBREG)
2299     unit = BITS_PER_WORD;
2300   else
2301     unit = MIN (MEM_ALIGN (op0), BITS_PER_WORD);
2302 
2303   while (bitsdone < bitsize)
2304     {
2305       unsigned HOST_WIDE_INT thissize;
2306       rtx part;
2307       unsigned HOST_WIDE_INT thispos;
2308       unsigned HOST_WIDE_INT offset;
2309 
2310       offset = (bitpos + bitsdone) / unit;
2311       thispos = (bitpos + bitsdone) % unit;
2312 
2313       /* THISSIZE must not overrun a word boundary.  Otherwise,
2314 	 extract_fixed_bit_field will call us again, and we will mutually
2315 	 recurse forever.  */
2316       thissize = MIN (bitsize - bitsdone, BITS_PER_WORD);
2317       thissize = MIN (thissize, unit - thispos);
2318 
2319       /* If OP0 is a register, then handle OFFSET here.  */
2320       rtx op0_piece = op0;
2321       opt_scalar_int_mode op0_piece_mode = op0_mode;
2322       if (SUBREG_P (op0) || REG_P (op0))
2323 	{
2324 	  op0_piece = operand_subword_force (op0, offset, op0_mode.require ());
2325 	  op0_piece_mode = word_mode;
2326 	  offset = 0;
2327 	}
2328 
2329       /* Extract the parts in bit-counting order,
2330 	 whose meaning is determined by BYTES_PER_UNIT.
2331 	 OFFSET is in UNITs, and UNIT is in bits.  */
2332       part = extract_fixed_bit_field (word_mode, op0_piece, op0_piece_mode,
2333 				      thissize, offset * unit + thispos,
2334 				      0, 1, reverse);
2335       bitsdone += thissize;
2336 
2337       /* Shift this part into place for the result.  */
2338       if (reverse ? !BYTES_BIG_ENDIAN : BYTES_BIG_ENDIAN)
2339 	{
2340 	  if (bitsize != bitsdone)
2341 	    part = expand_shift (LSHIFT_EXPR, word_mode, part,
2342 				 bitsize - bitsdone, 0, 1);
2343 	}
2344       else
2345 	{
2346 	  if (bitsdone != thissize)
2347 	    part = expand_shift (LSHIFT_EXPR, word_mode, part,
2348 				 bitsdone - thissize, 0, 1);
2349 	}
2350 
2351       if (first)
2352 	result = part;
2353       else
2354 	/* Combine the parts with bitwise or.  This works
2355 	   because we extracted each part as an unsigned bit field.  */
2356 	result = expand_binop (word_mode, ior_optab, part, result, NULL_RTX, 1,
2357 			       OPTAB_LIB_WIDEN);
2358 
2359       first = 0;
2360     }
2361 
2362   /* Unsigned bit field: we are done.  */
2363   if (unsignedp)
2364     return result;
2365   /* Signed bit field: sign-extend with two arithmetic shifts.  */
2366   result = expand_shift (LSHIFT_EXPR, word_mode, result,
2367 			 BITS_PER_WORD - bitsize, NULL_RTX, 0);
2368   return expand_shift (RSHIFT_EXPR, word_mode, result,
2369 		       BITS_PER_WORD - bitsize, NULL_RTX, 0);
2370 }
2371 
2372 /* Try to read the low bits of SRC as an rvalue of mode MODE, preserving
2373    the bit pattern.  SRC_MODE is the mode of SRC; if this is smaller than
2374    MODE, fill the upper bits with zeros.  Fail if the layout of either
2375    mode is unknown (as for CC modes) or if the extraction would involve
2376    unprofitable mode punning.  Return the value on success, otherwise
2377    return null.
2378 
2379    This is different from gen_lowpart* in these respects:
2380 
2381      - the returned value must always be considered an rvalue
2382 
2383      - when MODE is wider than SRC_MODE, the extraction involves
2384        a zero extension
2385 
2386      - when MODE is smaller than SRC_MODE, the extraction involves
2387        a truncation (and is thus subject to TARGET_TRULY_NOOP_TRUNCATION).
2388 
2389    In other words, this routine performs a computation, whereas the
2390    gen_lowpart* routines are conceptually lvalue or rvalue subreg
2391    operations.  */
2392 
2393 rtx
extract_low_bits(machine_mode mode,machine_mode src_mode,rtx src)2394 extract_low_bits (machine_mode mode, machine_mode src_mode, rtx src)
2395 {
2396   scalar_int_mode int_mode, src_int_mode;
2397 
2398   if (mode == src_mode)
2399     return src;
2400 
2401   if (CONSTANT_P (src))
2402     {
2403       /* simplify_gen_subreg can't be used here, as if simplify_subreg
2404 	 fails, it will happily create (subreg (symbol_ref)) or similar
2405 	 invalid SUBREGs.  */
2406       poly_uint64 byte = subreg_lowpart_offset (mode, src_mode);
2407       rtx ret = simplify_subreg (mode, src, src_mode, byte);
2408       if (ret)
2409 	return ret;
2410 
2411       if (GET_MODE (src) == VOIDmode
2412 	  || !validate_subreg (mode, src_mode, src, byte))
2413 	return NULL_RTX;
2414 
2415       src = force_reg (GET_MODE (src), src);
2416       return gen_rtx_SUBREG (mode, src, byte);
2417     }
2418 
2419   if (GET_MODE_CLASS (mode) == MODE_CC || GET_MODE_CLASS (src_mode) == MODE_CC)
2420     return NULL_RTX;
2421 
2422   if (known_eq (GET_MODE_BITSIZE (mode), GET_MODE_BITSIZE (src_mode))
2423       && targetm.modes_tieable_p (mode, src_mode))
2424     {
2425       rtx x = gen_lowpart_common (mode, src);
2426       if (x)
2427         return x;
2428     }
2429 
2430   if (!int_mode_for_mode (src_mode).exists (&src_int_mode)
2431       || !int_mode_for_mode (mode).exists (&int_mode))
2432     return NULL_RTX;
2433 
2434   if (!targetm.modes_tieable_p (src_int_mode, src_mode))
2435     return NULL_RTX;
2436   if (!targetm.modes_tieable_p (int_mode, mode))
2437     return NULL_RTX;
2438 
2439   src = gen_lowpart (src_int_mode, src);
2440   if (!validate_subreg (int_mode, src_int_mode, src,
2441 			subreg_lowpart_offset (int_mode, src_int_mode)))
2442     return NULL_RTX;
2443 
2444   src = convert_modes (int_mode, src_int_mode, src, true);
2445   src = gen_lowpart (mode, src);
2446   return src;
2447 }
2448 
2449 /* Add INC into TARGET.  */
2450 
2451 void
expand_inc(rtx target,rtx inc)2452 expand_inc (rtx target, rtx inc)
2453 {
2454   rtx value = expand_binop (GET_MODE (target), add_optab,
2455 			    target, inc,
2456 			    target, 0, OPTAB_LIB_WIDEN);
2457   if (value != target)
2458     emit_move_insn (target, value);
2459 }
2460 
2461 /* Subtract DEC from TARGET.  */
2462 
2463 void
expand_dec(rtx target,rtx dec)2464 expand_dec (rtx target, rtx dec)
2465 {
2466   rtx value = expand_binop (GET_MODE (target), sub_optab,
2467 			    target, dec,
2468 			    target, 0, OPTAB_LIB_WIDEN);
2469   if (value != target)
2470     emit_move_insn (target, value);
2471 }
2472 
2473 /* Output a shift instruction for expression code CODE,
2474    with SHIFTED being the rtx for the value to shift,
2475    and AMOUNT the rtx for the amount to shift by.
2476    Store the result in the rtx TARGET, if that is convenient.
2477    If UNSIGNEDP is nonzero, do a logical shift; otherwise, arithmetic.
2478    Return the rtx for where the value is.
2479    If that cannot be done, abort the compilation unless MAY_FAIL is true,
2480    in which case 0 is returned.  */
2481 
2482 static rtx
expand_shift_1(enum tree_code code,machine_mode mode,rtx shifted,rtx amount,rtx target,int unsignedp,bool may_fail=false)2483 expand_shift_1 (enum tree_code code, machine_mode mode, rtx shifted,
2484 		rtx amount, rtx target, int unsignedp, bool may_fail = false)
2485 {
2486   rtx op1, temp = 0;
2487   int left = (code == LSHIFT_EXPR || code == LROTATE_EXPR);
2488   int rotate = (code == LROTATE_EXPR || code == RROTATE_EXPR);
2489   optab lshift_optab = ashl_optab;
2490   optab rshift_arith_optab = ashr_optab;
2491   optab rshift_uns_optab = lshr_optab;
2492   optab lrotate_optab = rotl_optab;
2493   optab rrotate_optab = rotr_optab;
2494   machine_mode op1_mode;
2495   scalar_mode scalar_mode = GET_MODE_INNER (mode);
2496   int attempt;
2497   bool speed = optimize_insn_for_speed_p ();
2498 
2499   op1 = amount;
2500   op1_mode = GET_MODE (op1);
2501 
2502   /* Determine whether the shift/rotate amount is a vector, or scalar.  If the
2503      shift amount is a vector, use the vector/vector shift patterns.  */
2504   if (VECTOR_MODE_P (mode) && VECTOR_MODE_P (op1_mode))
2505     {
2506       lshift_optab = vashl_optab;
2507       rshift_arith_optab = vashr_optab;
2508       rshift_uns_optab = vlshr_optab;
2509       lrotate_optab = vrotl_optab;
2510       rrotate_optab = vrotr_optab;
2511     }
2512 
2513   /* Previously detected shift-counts computed by NEGATE_EXPR
2514      and shifted in the other direction; but that does not work
2515      on all machines.  */
2516 
2517   if (SHIFT_COUNT_TRUNCATED)
2518     {
2519       if (CONST_INT_P (op1)
2520 	  && ((unsigned HOST_WIDE_INT) INTVAL (op1) >=
2521 	      (unsigned HOST_WIDE_INT) GET_MODE_BITSIZE (scalar_mode)))
2522 	op1 = gen_int_shift_amount (mode,
2523 				    (unsigned HOST_WIDE_INT) INTVAL (op1)
2524 				    % GET_MODE_BITSIZE (scalar_mode));
2525       else if (GET_CODE (op1) == SUBREG
2526 	       && subreg_lowpart_p (op1)
2527 	       && SCALAR_INT_MODE_P (GET_MODE (SUBREG_REG (op1)))
2528 	       && SCALAR_INT_MODE_P (GET_MODE (op1)))
2529 	op1 = SUBREG_REG (op1);
2530     }
2531 
2532   /* Canonicalize rotates by constant amount.  If op1 is bitsize / 2,
2533      prefer left rotation, if op1 is from bitsize / 2 + 1 to
2534      bitsize - 1, use other direction of rotate with 1 .. bitsize / 2 - 1
2535      amount instead.  */
2536   if (rotate
2537       && CONST_INT_P (op1)
2538       && IN_RANGE (INTVAL (op1), GET_MODE_BITSIZE (scalar_mode) / 2 + left,
2539 		   GET_MODE_BITSIZE (scalar_mode) - 1))
2540     {
2541       op1 = gen_int_shift_amount (mode, (GET_MODE_BITSIZE (scalar_mode)
2542 					 - INTVAL (op1)));
2543       left = !left;
2544       code = left ? LROTATE_EXPR : RROTATE_EXPR;
2545     }
2546 
2547   /* Rotation of 16bit values by 8 bits is effectively equivalent to a bswaphi.
2548      Note that this is not the case for bigger values.  For instance a rotation
2549      of 0x01020304 by 16 bits gives 0x03040102 which is different from
2550      0x04030201 (bswapsi).  */
2551   if (rotate
2552       && CONST_INT_P (op1)
2553       && INTVAL (op1) == BITS_PER_UNIT
2554       && GET_MODE_SIZE (scalar_mode) == 2
2555       && optab_handler (bswap_optab, mode) != CODE_FOR_nothing)
2556     return expand_unop (mode, bswap_optab, shifted, NULL_RTX, unsignedp);
2557 
2558   if (op1 == const0_rtx)
2559     return shifted;
2560 
2561   /* Check whether its cheaper to implement a left shift by a constant
2562      bit count by a sequence of additions.  */
2563   if (code == LSHIFT_EXPR
2564       && CONST_INT_P (op1)
2565       && INTVAL (op1) > 0
2566       && INTVAL (op1) < GET_MODE_PRECISION (scalar_mode)
2567       && INTVAL (op1) < MAX_BITS_PER_WORD
2568       && (shift_cost (speed, mode, INTVAL (op1))
2569 	  > INTVAL (op1) * add_cost (speed, mode))
2570       && shift_cost (speed, mode, INTVAL (op1)) != MAX_COST)
2571     {
2572       int i;
2573       for (i = 0; i < INTVAL (op1); i++)
2574 	{
2575 	  temp = force_reg (mode, shifted);
2576 	  shifted = expand_binop (mode, add_optab, temp, temp, NULL_RTX,
2577 				  unsignedp, OPTAB_LIB_WIDEN);
2578 	}
2579       return shifted;
2580     }
2581 
2582   for (attempt = 0; temp == 0 && attempt < 3; attempt++)
2583     {
2584       enum optab_methods methods;
2585 
2586       if (attempt == 0)
2587 	methods = OPTAB_DIRECT;
2588       else if (attempt == 1)
2589 	methods = OPTAB_WIDEN;
2590       else
2591 	methods = OPTAB_LIB_WIDEN;
2592 
2593       if (rotate)
2594 	{
2595 	  /* Widening does not work for rotation.  */
2596 	  if (methods == OPTAB_WIDEN)
2597 	    continue;
2598 	  else if (methods == OPTAB_LIB_WIDEN)
2599 	    {
2600 	      /* If we have been unable to open-code this by a rotation,
2601 		 do it as the IOR of two shifts.  I.e., to rotate A
2602 		 by N bits, compute
2603 		 (A << N) | ((unsigned) A >> ((-N) & (C - 1)))
2604 		 where C is the bitsize of A.
2605 
2606 		 It is theoretically possible that the target machine might
2607 		 not be able to perform either shift and hence we would
2608 		 be making two libcalls rather than just the one for the
2609 		 shift (similarly if IOR could not be done).  We will allow
2610 		 this extremely unlikely lossage to avoid complicating the
2611 		 code below.  */
2612 
2613 	      rtx subtarget = target == shifted ? 0 : target;
2614 	      rtx new_amount, other_amount;
2615 	      rtx temp1;
2616 
2617 	      new_amount = op1;
2618 	      if (op1 == const0_rtx)
2619 		return shifted;
2620 	      else if (CONST_INT_P (op1))
2621 		other_amount = gen_int_shift_amount
2622 		  (mode, GET_MODE_BITSIZE (scalar_mode) - INTVAL (op1));
2623 	      else
2624 		{
2625 		  other_amount
2626 		    = simplify_gen_unary (NEG, GET_MODE (op1),
2627 					  op1, GET_MODE (op1));
2628 		  HOST_WIDE_INT mask = GET_MODE_PRECISION (scalar_mode) - 1;
2629 		  other_amount
2630 		    = simplify_gen_binary (AND, GET_MODE (op1), other_amount,
2631 					   gen_int_mode (mask, GET_MODE (op1)));
2632 		}
2633 
2634 	      shifted = force_reg (mode, shifted);
2635 
2636 	      temp = expand_shift_1 (left ? LSHIFT_EXPR : RSHIFT_EXPR,
2637 				     mode, shifted, new_amount, 0, 1);
2638 	      temp1 = expand_shift_1 (left ? RSHIFT_EXPR : LSHIFT_EXPR,
2639 				      mode, shifted, other_amount,
2640 				      subtarget, 1);
2641 	      return expand_binop (mode, ior_optab, temp, temp1, target,
2642 				   unsignedp, methods);
2643 	    }
2644 
2645 	  temp = expand_binop (mode,
2646 			       left ? lrotate_optab : rrotate_optab,
2647 			       shifted, op1, target, unsignedp, methods);
2648 	}
2649       else if (unsignedp)
2650 	temp = expand_binop (mode,
2651 			     left ? lshift_optab : rshift_uns_optab,
2652 			     shifted, op1, target, unsignedp, methods);
2653 
2654       /* Do arithmetic shifts.
2655 	 Also, if we are going to widen the operand, we can just as well
2656 	 use an arithmetic right-shift instead of a logical one.  */
2657       if (temp == 0 && ! rotate
2658 	  && (! unsignedp || (! left && methods == OPTAB_WIDEN)))
2659 	{
2660 	  enum optab_methods methods1 = methods;
2661 
2662 	  /* If trying to widen a log shift to an arithmetic shift,
2663 	     don't accept an arithmetic shift of the same size.  */
2664 	  if (unsignedp)
2665 	    methods1 = OPTAB_MUST_WIDEN;
2666 
2667 	  /* Arithmetic shift */
2668 
2669 	  temp = expand_binop (mode,
2670 			       left ? lshift_optab : rshift_arith_optab,
2671 			       shifted, op1, target, unsignedp, methods1);
2672 	}
2673 
2674       /* We used to try extzv here for logical right shifts, but that was
2675 	 only useful for one machine, the VAX, and caused poor code
2676 	 generation there for lshrdi3, so the code was deleted and a
2677 	 define_expand for lshrsi3 was added to vax.md.  */
2678     }
2679 
2680   gcc_assert (temp != NULL_RTX || may_fail);
2681   return temp;
2682 }
2683 
2684 /* Output a shift instruction for expression code CODE,
2685    with SHIFTED being the rtx for the value to shift,
2686    and AMOUNT the amount to shift by.
2687    Store the result in the rtx TARGET, if that is convenient.
2688    If UNSIGNEDP is nonzero, do a logical shift; otherwise, arithmetic.
2689    Return the rtx for where the value is.  */
2690 
2691 rtx
expand_shift(enum tree_code code,machine_mode mode,rtx shifted,poly_int64 amount,rtx target,int unsignedp)2692 expand_shift (enum tree_code code, machine_mode mode, rtx shifted,
2693 	      poly_int64 amount, rtx target, int unsignedp)
2694 {
2695   return expand_shift_1 (code, mode, shifted,
2696 			 gen_int_shift_amount (mode, amount),
2697 			 target, unsignedp);
2698 }
2699 
2700 /* Likewise, but return 0 if that cannot be done.  */
2701 
2702 static rtx
maybe_expand_shift(enum tree_code code,machine_mode mode,rtx shifted,int amount,rtx target,int unsignedp)2703 maybe_expand_shift (enum tree_code code, machine_mode mode, rtx shifted,
2704 		    int amount, rtx target, int unsignedp)
2705 {
2706   return expand_shift_1 (code, mode,
2707 			 shifted, GEN_INT (amount), target, unsignedp, true);
2708 }
2709 
2710 /* Output a shift instruction for expression code CODE,
2711    with SHIFTED being the rtx for the value to shift,
2712    and AMOUNT the tree for the amount to shift by.
2713    Store the result in the rtx TARGET, if that is convenient.
2714    If UNSIGNEDP is nonzero, do a logical shift; otherwise, arithmetic.
2715    Return the rtx for where the value is.  */
2716 
2717 rtx
expand_variable_shift(enum tree_code code,machine_mode mode,rtx shifted,tree amount,rtx target,int unsignedp)2718 expand_variable_shift (enum tree_code code, machine_mode mode, rtx shifted,
2719 		       tree amount, rtx target, int unsignedp)
2720 {
2721   return expand_shift_1 (code, mode,
2722 			 shifted, expand_normal (amount), target, unsignedp);
2723 }
2724 
2725 
2726 static void synth_mult (struct algorithm *, unsigned HOST_WIDE_INT,
2727 			const struct mult_cost *, machine_mode mode);
2728 static rtx expand_mult_const (machine_mode, rtx, HOST_WIDE_INT, rtx,
2729 			      const struct algorithm *, enum mult_variant);
2730 static unsigned HOST_WIDE_INT invert_mod2n (unsigned HOST_WIDE_INT, int);
2731 static rtx extract_high_half (scalar_int_mode, rtx);
2732 static rtx expmed_mult_highpart (scalar_int_mode, rtx, rtx, rtx, int, int);
2733 static rtx expmed_mult_highpart_optab (scalar_int_mode, rtx, rtx, rtx,
2734 				       int, int);
2735 /* Compute and return the best algorithm for multiplying by T.
2736    The algorithm must cost less than cost_limit
2737    If retval.cost >= COST_LIMIT, no algorithm was found and all
2738    other field of the returned struct are undefined.
2739    MODE is the machine mode of the multiplication.  */
2740 
2741 static void
synth_mult(struct algorithm * alg_out,unsigned HOST_WIDE_INT t,const struct mult_cost * cost_limit,machine_mode mode)2742 synth_mult (struct algorithm *alg_out, unsigned HOST_WIDE_INT t,
2743 	    const struct mult_cost *cost_limit, machine_mode mode)
2744 {
2745   int m;
2746   struct algorithm *alg_in, *best_alg;
2747   struct mult_cost best_cost;
2748   struct mult_cost new_limit;
2749   int op_cost, op_latency;
2750   unsigned HOST_WIDE_INT orig_t = t;
2751   unsigned HOST_WIDE_INT q;
2752   int maxm, hash_index;
2753   bool cache_hit = false;
2754   enum alg_code cache_alg = alg_zero;
2755   bool speed = optimize_insn_for_speed_p ();
2756   scalar_int_mode imode;
2757   struct alg_hash_entry *entry_ptr;
2758 
2759   /* Indicate that no algorithm is yet found.  If no algorithm
2760      is found, this value will be returned and indicate failure.  */
2761   alg_out->cost.cost = cost_limit->cost + 1;
2762   alg_out->cost.latency = cost_limit->latency + 1;
2763 
2764   if (cost_limit->cost < 0
2765       || (cost_limit->cost == 0 && cost_limit->latency <= 0))
2766     return;
2767 
2768   /* Be prepared for vector modes.  */
2769   imode = as_a <scalar_int_mode> (GET_MODE_INNER (mode));
2770 
2771   maxm = MIN (BITS_PER_WORD, GET_MODE_BITSIZE (imode));
2772 
2773   /* Restrict the bits of "t" to the multiplication's mode.  */
2774   t &= GET_MODE_MASK (imode);
2775 
2776   /* t == 1 can be done in zero cost.  */
2777   if (t == 1)
2778     {
2779       alg_out->ops = 1;
2780       alg_out->cost.cost = 0;
2781       alg_out->cost.latency = 0;
2782       alg_out->op[0] = alg_m;
2783       return;
2784     }
2785 
2786   /* t == 0 sometimes has a cost.  If it does and it exceeds our limit,
2787      fail now.  */
2788   if (t == 0)
2789     {
2790       if (MULT_COST_LESS (cost_limit, zero_cost (speed)))
2791 	return;
2792       else
2793 	{
2794 	  alg_out->ops = 1;
2795 	  alg_out->cost.cost = zero_cost (speed);
2796 	  alg_out->cost.latency = zero_cost (speed);
2797 	  alg_out->op[0] = alg_zero;
2798 	  return;
2799 	}
2800     }
2801 
2802   /* We'll be needing a couple extra algorithm structures now.  */
2803 
2804   alg_in = XALLOCA (struct algorithm);
2805   best_alg = XALLOCA (struct algorithm);
2806   best_cost = *cost_limit;
2807 
2808   /* Compute the hash index.  */
2809   hash_index = (t ^ (unsigned int) mode ^ (speed * 256)) % NUM_ALG_HASH_ENTRIES;
2810 
2811   /* See if we already know what to do for T.  */
2812   entry_ptr = alg_hash_entry_ptr (hash_index);
2813   if (entry_ptr->t == t
2814       && entry_ptr->mode == mode
2815       && entry_ptr->speed == speed
2816       && entry_ptr->alg != alg_unknown)
2817     {
2818       cache_alg = entry_ptr->alg;
2819 
2820       if (cache_alg == alg_impossible)
2821 	{
2822 	  /* The cache tells us that it's impossible to synthesize
2823 	     multiplication by T within entry_ptr->cost.  */
2824 	  if (!CHEAPER_MULT_COST (&entry_ptr->cost, cost_limit))
2825 	    /* COST_LIMIT is at least as restrictive as the one
2826 	       recorded in the hash table, in which case we have no
2827 	       hope of synthesizing a multiplication.  Just
2828 	       return.  */
2829 	    return;
2830 
2831 	  /* If we get here, COST_LIMIT is less restrictive than the
2832 	     one recorded in the hash table, so we may be able to
2833 	     synthesize a multiplication.  Proceed as if we didn't
2834 	     have the cache entry.  */
2835 	}
2836       else
2837 	{
2838 	  if (CHEAPER_MULT_COST (cost_limit, &entry_ptr->cost))
2839 	    /* The cached algorithm shows that this multiplication
2840 	       requires more cost than COST_LIMIT.  Just return.  This
2841 	       way, we don't clobber this cache entry with
2842 	       alg_impossible but retain useful information.  */
2843 	    return;
2844 
2845 	  cache_hit = true;
2846 
2847 	  switch (cache_alg)
2848 	    {
2849 	    case alg_shift:
2850 	      goto do_alg_shift;
2851 
2852 	    case alg_add_t_m2:
2853 	    case alg_sub_t_m2:
2854 	      goto do_alg_addsub_t_m2;
2855 
2856 	    case alg_add_factor:
2857 	    case alg_sub_factor:
2858 	      goto do_alg_addsub_factor;
2859 
2860 	    case alg_add_t2_m:
2861 	      goto do_alg_add_t2_m;
2862 
2863 	    case alg_sub_t2_m:
2864 	      goto do_alg_sub_t2_m;
2865 
2866 	    default:
2867 	      gcc_unreachable ();
2868 	    }
2869 	}
2870     }
2871 
2872   /* If we have a group of zero bits at the low-order part of T, try
2873      multiplying by the remaining bits and then doing a shift.  */
2874 
2875   if ((t & 1) == 0)
2876     {
2877     do_alg_shift:
2878       m = ctz_or_zero (t); /* m = number of low zero bits */
2879       if (m < maxm)
2880 	{
2881 	  q = t >> m;
2882 	  /* The function expand_shift will choose between a shift and
2883 	     a sequence of additions, so the observed cost is given as
2884 	     MIN (m * add_cost(speed, mode), shift_cost(speed, mode, m)).  */
2885 	  op_cost = m * add_cost (speed, mode);
2886 	  if (shift_cost (speed, mode, m) < op_cost)
2887 	    op_cost = shift_cost (speed, mode, m);
2888 	  new_limit.cost = best_cost.cost - op_cost;
2889 	  new_limit.latency = best_cost.latency - op_cost;
2890 	  synth_mult (alg_in, q, &new_limit, mode);
2891 
2892 	  alg_in->cost.cost += op_cost;
2893 	  alg_in->cost.latency += op_cost;
2894 	  if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2895 	    {
2896 	      best_cost = alg_in->cost;
2897 	      std::swap (alg_in, best_alg);
2898 	      best_alg->log[best_alg->ops] = m;
2899 	      best_alg->op[best_alg->ops] = alg_shift;
2900 	    }
2901 
2902 	  /* See if treating ORIG_T as a signed number yields a better
2903 	     sequence.  Try this sequence only for a negative ORIG_T
2904 	     as it would be useless for a non-negative ORIG_T.  */
2905 	  if ((HOST_WIDE_INT) orig_t < 0)
2906 	    {
2907 	      /* Shift ORIG_T as follows because a right shift of a
2908 		 negative-valued signed type is implementation
2909 		 defined.  */
2910 	      q = ~(~orig_t >> m);
2911 	      /* The function expand_shift will choose between a shift
2912 		 and a sequence of additions, so the observed cost is
2913 		 given as MIN (m * add_cost(speed, mode),
2914 		 shift_cost(speed, mode, m)).  */
2915 	      op_cost = m * add_cost (speed, mode);
2916 	      if (shift_cost (speed, mode, m) < op_cost)
2917 		op_cost = shift_cost (speed, mode, m);
2918 	      new_limit.cost = best_cost.cost - op_cost;
2919 	      new_limit.latency = best_cost.latency - op_cost;
2920 	      synth_mult (alg_in, q, &new_limit, mode);
2921 
2922 	      alg_in->cost.cost += op_cost;
2923 	      alg_in->cost.latency += op_cost;
2924 	      if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2925 		{
2926 		  best_cost = alg_in->cost;
2927 		  std::swap (alg_in, best_alg);
2928 		  best_alg->log[best_alg->ops] = m;
2929 		  best_alg->op[best_alg->ops] = alg_shift;
2930 		}
2931 	    }
2932 	}
2933       if (cache_hit)
2934 	goto done;
2935     }
2936 
2937   /* If we have an odd number, add or subtract one.  */
2938   if ((t & 1) != 0)
2939     {
2940       unsigned HOST_WIDE_INT w;
2941 
2942     do_alg_addsub_t_m2:
2943       for (w = 1; (w & t) != 0; w <<= 1)
2944 	;
2945       /* If T was -1, then W will be zero after the loop.  This is another
2946 	 case where T ends with ...111.  Handling this with (T + 1) and
2947 	 subtract 1 produces slightly better code and results in algorithm
2948 	 selection much faster than treating it like the ...0111 case
2949 	 below.  */
2950       if (w == 0
2951 	  || (w > 2
2952 	      /* Reject the case where t is 3.
2953 		 Thus we prefer addition in that case.  */
2954 	      && t != 3))
2955 	{
2956 	  /* T ends with ...111.  Multiply by (T + 1) and subtract T.  */
2957 
2958 	  op_cost = add_cost (speed, mode);
2959 	  new_limit.cost = best_cost.cost - op_cost;
2960 	  new_limit.latency = best_cost.latency - op_cost;
2961 	  synth_mult (alg_in, t + 1, &new_limit, mode);
2962 
2963 	  alg_in->cost.cost += op_cost;
2964 	  alg_in->cost.latency += op_cost;
2965 	  if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2966 	    {
2967 	      best_cost = alg_in->cost;
2968 	      std::swap (alg_in, best_alg);
2969 	      best_alg->log[best_alg->ops] = 0;
2970 	      best_alg->op[best_alg->ops] = alg_sub_t_m2;
2971 	    }
2972 	}
2973       else
2974 	{
2975 	  /* T ends with ...01 or ...011.  Multiply by (T - 1) and add T.  */
2976 
2977 	  op_cost = add_cost (speed, mode);
2978 	  new_limit.cost = best_cost.cost - op_cost;
2979 	  new_limit.latency = best_cost.latency - op_cost;
2980 	  synth_mult (alg_in, t - 1, &new_limit, mode);
2981 
2982 	  alg_in->cost.cost += op_cost;
2983 	  alg_in->cost.latency += op_cost;
2984 	  if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2985 	    {
2986 	      best_cost = alg_in->cost;
2987 	      std::swap (alg_in, best_alg);
2988 	      best_alg->log[best_alg->ops] = 0;
2989 	      best_alg->op[best_alg->ops] = alg_add_t_m2;
2990 	    }
2991 	}
2992 
2993       /* We may be able to calculate a * -7, a * -15, a * -31, etc
2994 	 quickly with a - a * n for some appropriate constant n.  */
2995       m = exact_log2 (-orig_t + 1);
2996       if (m >= 0 && m < maxm)
2997 	{
2998 	  op_cost = add_cost (speed, mode) + shift_cost (speed, mode, m);
2999 	  /* If the target has a cheap shift-and-subtract insn use
3000 	     that in preference to a shift insn followed by a sub insn.
3001 	     Assume that the shift-and-sub is "atomic" with a latency
3002 	     equal to it's cost, otherwise assume that on superscalar
3003 	     hardware the shift may be executed concurrently with the
3004 	     earlier steps in the algorithm.  */
3005 	  if (shiftsub1_cost (speed, mode, m) <= op_cost)
3006 	    {
3007 	      op_cost = shiftsub1_cost (speed, mode, m);
3008 	      op_latency = op_cost;
3009 	    }
3010 	  else
3011 	    op_latency = add_cost (speed, mode);
3012 
3013 	  new_limit.cost = best_cost.cost - op_cost;
3014 	  new_limit.latency = best_cost.latency - op_latency;
3015 	  synth_mult (alg_in, (unsigned HOST_WIDE_INT) (-orig_t + 1) >> m,
3016 		      &new_limit, mode);
3017 
3018 	  alg_in->cost.cost += op_cost;
3019 	  alg_in->cost.latency += op_latency;
3020 	  if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
3021 	    {
3022 	      best_cost = alg_in->cost;
3023 	      std::swap (alg_in, best_alg);
3024 	      best_alg->log[best_alg->ops] = m;
3025 	      best_alg->op[best_alg->ops] = alg_sub_t_m2;
3026 	    }
3027 	}
3028 
3029       if (cache_hit)
3030 	goto done;
3031     }
3032 
3033   /* Look for factors of t of the form
3034      t = q(2**m +- 1), 2 <= m <= floor(log2(t - 1)).
3035      If we find such a factor, we can multiply by t using an algorithm that
3036      multiplies by q, shift the result by m and add/subtract it to itself.
3037 
3038      We search for large factors first and loop down, even if large factors
3039      are less probable than small; if we find a large factor we will find a
3040      good sequence quickly, and therefore be able to prune (by decreasing
3041      COST_LIMIT) the search.  */
3042 
3043  do_alg_addsub_factor:
3044   for (m = floor_log2 (t - 1); m >= 2; m--)
3045     {
3046       unsigned HOST_WIDE_INT d;
3047 
3048       d = (HOST_WIDE_INT_1U << m) + 1;
3049       if (t % d == 0 && t > d && m < maxm
3050 	  && (!cache_hit || cache_alg == alg_add_factor))
3051 	{
3052 	  op_cost = add_cost (speed, mode) + shift_cost (speed, mode, m);
3053 	  if (shiftadd_cost (speed, mode, m) <= op_cost)
3054 	    op_cost = shiftadd_cost (speed, mode, m);
3055 
3056 	  op_latency = op_cost;
3057 
3058 
3059 	  new_limit.cost = best_cost.cost - op_cost;
3060 	  new_limit.latency = best_cost.latency - op_latency;
3061 	  synth_mult (alg_in, t / d, &new_limit, mode);
3062 
3063 	  alg_in->cost.cost += op_cost;
3064 	  alg_in->cost.latency += op_latency;
3065 	  if (alg_in->cost.latency < op_cost)
3066 	    alg_in->cost.latency = op_cost;
3067 	  if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
3068 	    {
3069 	      best_cost = alg_in->cost;
3070 	      std::swap (alg_in, best_alg);
3071 	      best_alg->log[best_alg->ops] = m;
3072 	      best_alg->op[best_alg->ops] = alg_add_factor;
3073 	    }
3074 	  /* Other factors will have been taken care of in the recursion.  */
3075 	  break;
3076 	}
3077 
3078       d = (HOST_WIDE_INT_1U << m) - 1;
3079       if (t % d == 0 && t > d && m < maxm
3080 	  && (!cache_hit || cache_alg == alg_sub_factor))
3081 	{
3082 	  op_cost = add_cost (speed, mode) + shift_cost (speed, mode, m);
3083 	  if (shiftsub0_cost (speed, mode, m) <= op_cost)
3084 	    op_cost = shiftsub0_cost (speed, mode, m);
3085 
3086 	  op_latency = op_cost;
3087 
3088 	  new_limit.cost = best_cost.cost - op_cost;
3089 	  new_limit.latency = best_cost.latency - op_latency;
3090 	  synth_mult (alg_in, t / d, &new_limit, mode);
3091 
3092 	  alg_in->cost.cost += op_cost;
3093 	  alg_in->cost.latency += op_latency;
3094 	  if (alg_in->cost.latency < op_cost)
3095 	    alg_in->cost.latency = op_cost;
3096 	  if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
3097 	    {
3098 	      best_cost = alg_in->cost;
3099 	      std::swap (alg_in, best_alg);
3100 	      best_alg->log[best_alg->ops] = m;
3101 	      best_alg->op[best_alg->ops] = alg_sub_factor;
3102 	    }
3103 	  break;
3104 	}
3105     }
3106   if (cache_hit)
3107     goto done;
3108 
3109   /* Try shift-and-add (load effective address) instructions,
3110      i.e. do a*3, a*5, a*9.  */
3111   if ((t & 1) != 0)
3112     {
3113     do_alg_add_t2_m:
3114       q = t - 1;
3115       m = ctz_hwi (q);
3116       if (q && m < maxm)
3117 	{
3118 	  op_cost = shiftadd_cost (speed, mode, m);
3119 	  new_limit.cost = best_cost.cost - op_cost;
3120 	  new_limit.latency = best_cost.latency - op_cost;
3121 	  synth_mult (alg_in, (t - 1) >> m, &new_limit, mode);
3122 
3123 	  alg_in->cost.cost += op_cost;
3124 	  alg_in->cost.latency += op_cost;
3125 	  if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
3126 	    {
3127 	      best_cost = alg_in->cost;
3128 	      std::swap (alg_in, best_alg);
3129 	      best_alg->log[best_alg->ops] = m;
3130 	      best_alg->op[best_alg->ops] = alg_add_t2_m;
3131 	    }
3132 	}
3133       if (cache_hit)
3134 	goto done;
3135 
3136     do_alg_sub_t2_m:
3137       q = t + 1;
3138       m = ctz_hwi (q);
3139       if (q && m < maxm)
3140 	{
3141 	  op_cost = shiftsub0_cost (speed, mode, m);
3142 	  new_limit.cost = best_cost.cost - op_cost;
3143 	  new_limit.latency = best_cost.latency - op_cost;
3144 	  synth_mult (alg_in, (t + 1) >> m, &new_limit, mode);
3145 
3146 	  alg_in->cost.cost += op_cost;
3147 	  alg_in->cost.latency += op_cost;
3148 	  if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
3149 	    {
3150 	      best_cost = alg_in->cost;
3151 	      std::swap (alg_in, best_alg);
3152 	      best_alg->log[best_alg->ops] = m;
3153 	      best_alg->op[best_alg->ops] = alg_sub_t2_m;
3154 	    }
3155 	}
3156       if (cache_hit)
3157 	goto done;
3158     }
3159 
3160  done:
3161   /* If best_cost has not decreased, we have not found any algorithm.  */
3162   if (!CHEAPER_MULT_COST (&best_cost, cost_limit))
3163     {
3164       /* We failed to find an algorithm.  Record alg_impossible for
3165 	 this case (that is, <T, MODE, COST_LIMIT>) so that next time
3166 	 we are asked to find an algorithm for T within the same or
3167 	 lower COST_LIMIT, we can immediately return to the
3168 	 caller.  */
3169       entry_ptr->t = t;
3170       entry_ptr->mode = mode;
3171       entry_ptr->speed = speed;
3172       entry_ptr->alg = alg_impossible;
3173       entry_ptr->cost = *cost_limit;
3174       return;
3175     }
3176 
3177   /* Cache the result.  */
3178   if (!cache_hit)
3179     {
3180       entry_ptr->t = t;
3181       entry_ptr->mode = mode;
3182       entry_ptr->speed = speed;
3183       entry_ptr->alg = best_alg->op[best_alg->ops];
3184       entry_ptr->cost.cost = best_cost.cost;
3185       entry_ptr->cost.latency = best_cost.latency;
3186     }
3187 
3188   /* If we are getting a too long sequence for `struct algorithm'
3189      to record, make this search fail.  */
3190   if (best_alg->ops == MAX_BITS_PER_WORD)
3191     return;
3192 
3193   /* Copy the algorithm from temporary space to the space at alg_out.
3194      We avoid using structure assignment because the majority of
3195      best_alg is normally undefined, and this is a critical function.  */
3196   alg_out->ops = best_alg->ops + 1;
3197   alg_out->cost = best_cost;
3198   memcpy (alg_out->op, best_alg->op,
3199 	  alg_out->ops * sizeof *alg_out->op);
3200   memcpy (alg_out->log, best_alg->log,
3201 	  alg_out->ops * sizeof *alg_out->log);
3202 }
3203 
3204 /* Find the cheapest way of multiplying a value of mode MODE by VAL.
3205    Try three variations:
3206 
3207        - a shift/add sequence based on VAL itself
3208        - a shift/add sequence based on -VAL, followed by a negation
3209        - a shift/add sequence based on VAL - 1, followed by an addition.
3210 
3211    Return true if the cheapest of these cost less than MULT_COST,
3212    describing the algorithm in *ALG and final fixup in *VARIANT.  */
3213 
3214 bool
choose_mult_variant(machine_mode mode,HOST_WIDE_INT val,struct algorithm * alg,enum mult_variant * variant,int mult_cost)3215 choose_mult_variant (machine_mode mode, HOST_WIDE_INT val,
3216 		     struct algorithm *alg, enum mult_variant *variant,
3217 		     int mult_cost)
3218 {
3219   struct algorithm alg2;
3220   struct mult_cost limit;
3221   int op_cost;
3222   bool speed = optimize_insn_for_speed_p ();
3223 
3224   /* Fail quickly for impossible bounds.  */
3225   if (mult_cost < 0)
3226     return false;
3227 
3228   /* Ensure that mult_cost provides a reasonable upper bound.
3229      Any constant multiplication can be performed with less
3230      than 2 * bits additions.  */
3231   op_cost = 2 * GET_MODE_UNIT_BITSIZE (mode) * add_cost (speed, mode);
3232   if (mult_cost > op_cost)
3233     mult_cost = op_cost;
3234 
3235   *variant = basic_variant;
3236   limit.cost = mult_cost;
3237   limit.latency = mult_cost;
3238   synth_mult (alg, val, &limit, mode);
3239 
3240   /* This works only if the inverted value actually fits in an
3241      `unsigned int' */
3242   if (HOST_BITS_PER_INT >= GET_MODE_UNIT_BITSIZE (mode))
3243     {
3244       op_cost = neg_cost (speed, mode);
3245       if (MULT_COST_LESS (&alg->cost, mult_cost))
3246 	{
3247 	  limit.cost = alg->cost.cost - op_cost;
3248 	  limit.latency = alg->cost.latency - op_cost;
3249 	}
3250       else
3251 	{
3252 	  limit.cost = mult_cost - op_cost;
3253 	  limit.latency = mult_cost - op_cost;
3254 	}
3255 
3256       synth_mult (&alg2, -val, &limit, mode);
3257       alg2.cost.cost += op_cost;
3258       alg2.cost.latency += op_cost;
3259       if (CHEAPER_MULT_COST (&alg2.cost, &alg->cost))
3260 	*alg = alg2, *variant = negate_variant;
3261     }
3262 
3263   /* This proves very useful for division-by-constant.  */
3264   op_cost = add_cost (speed, mode);
3265   if (MULT_COST_LESS (&alg->cost, mult_cost))
3266     {
3267       limit.cost = alg->cost.cost - op_cost;
3268       limit.latency = alg->cost.latency - op_cost;
3269     }
3270   else
3271     {
3272       limit.cost = mult_cost - op_cost;
3273       limit.latency = mult_cost - op_cost;
3274     }
3275 
3276   synth_mult (&alg2, val - 1, &limit, mode);
3277   alg2.cost.cost += op_cost;
3278   alg2.cost.latency += op_cost;
3279   if (CHEAPER_MULT_COST (&alg2.cost, &alg->cost))
3280     *alg = alg2, *variant = add_variant;
3281 
3282   return MULT_COST_LESS (&alg->cost, mult_cost);
3283 }
3284 
3285 /* A subroutine of expand_mult, used for constant multiplications.
3286    Multiply OP0 by VAL in mode MODE, storing the result in TARGET if
3287    convenient.  Use the shift/add sequence described by ALG and apply
3288    the final fixup specified by VARIANT.  */
3289 
3290 static rtx
expand_mult_const(machine_mode mode,rtx op0,HOST_WIDE_INT val,rtx target,const struct algorithm * alg,enum mult_variant variant)3291 expand_mult_const (machine_mode mode, rtx op0, HOST_WIDE_INT val,
3292 		   rtx target, const struct algorithm *alg,
3293 		   enum mult_variant variant)
3294 {
3295   unsigned HOST_WIDE_INT val_so_far;
3296   rtx_insn *insn;
3297   rtx accum, tem;
3298   int opno;
3299   machine_mode nmode;
3300 
3301   /* Avoid referencing memory over and over and invalid sharing
3302      on SUBREGs.  */
3303   op0 = force_reg (mode, op0);
3304 
3305   /* ACCUM starts out either as OP0 or as a zero, depending on
3306      the first operation.  */
3307 
3308   if (alg->op[0] == alg_zero)
3309     {
3310       accum = copy_to_mode_reg (mode, CONST0_RTX (mode));
3311       val_so_far = 0;
3312     }
3313   else if (alg->op[0] == alg_m)
3314     {
3315       accum = copy_to_mode_reg (mode, op0);
3316       val_so_far = 1;
3317     }
3318   else
3319     gcc_unreachable ();
3320 
3321   for (opno = 1; opno < alg->ops; opno++)
3322     {
3323       int log = alg->log[opno];
3324       rtx shift_subtarget = optimize ? 0 : accum;
3325       rtx add_target
3326 	= (opno == alg->ops - 1 && target != 0 && variant != add_variant
3327 	   && !optimize)
3328 	  ? target : 0;
3329       rtx accum_target = optimize ? 0 : accum;
3330       rtx accum_inner;
3331 
3332       switch (alg->op[opno])
3333 	{
3334 	case alg_shift:
3335 	  tem = expand_shift (LSHIFT_EXPR, mode, accum, log, NULL_RTX, 0);
3336 	  /* REG_EQUAL note will be attached to the following insn.  */
3337 	  emit_move_insn (accum, tem);
3338 	  val_so_far <<= log;
3339 	  break;
3340 
3341 	case alg_add_t_m2:
3342 	  tem = expand_shift (LSHIFT_EXPR, mode, op0, log, NULL_RTX, 0);
3343 	  accum = force_operand (gen_rtx_PLUS (mode, accum, tem),
3344 				 add_target ? add_target : accum_target);
3345 	  val_so_far += HOST_WIDE_INT_1U << log;
3346 	  break;
3347 
3348 	case alg_sub_t_m2:
3349 	  tem = expand_shift (LSHIFT_EXPR, mode, op0, log, NULL_RTX, 0);
3350 	  accum = force_operand (gen_rtx_MINUS (mode, accum, tem),
3351 				 add_target ? add_target : accum_target);
3352 	  val_so_far -= HOST_WIDE_INT_1U << log;
3353 	  break;
3354 
3355 	case alg_add_t2_m:
3356 	  accum = expand_shift (LSHIFT_EXPR, mode, accum,
3357 				log, shift_subtarget, 0);
3358 	  accum = force_operand (gen_rtx_PLUS (mode, accum, op0),
3359 				 add_target ? add_target : accum_target);
3360 	  val_so_far = (val_so_far << log) + 1;
3361 	  break;
3362 
3363 	case alg_sub_t2_m:
3364 	  accum = expand_shift (LSHIFT_EXPR, mode, accum,
3365 				log, shift_subtarget, 0);
3366 	  accum = force_operand (gen_rtx_MINUS (mode, accum, op0),
3367 				 add_target ? add_target : accum_target);
3368 	  val_so_far = (val_so_far << log) - 1;
3369 	  break;
3370 
3371 	case alg_add_factor:
3372 	  tem = expand_shift (LSHIFT_EXPR, mode, accum, log, NULL_RTX, 0);
3373 	  accum = force_operand (gen_rtx_PLUS (mode, accum, tem),
3374 				 add_target ? add_target : accum_target);
3375 	  val_so_far += val_so_far << log;
3376 	  break;
3377 
3378 	case alg_sub_factor:
3379 	  tem = expand_shift (LSHIFT_EXPR, mode, accum, log, NULL_RTX, 0);
3380 	  accum = force_operand (gen_rtx_MINUS (mode, tem, accum),
3381 				 (add_target
3382 				  ? add_target : (optimize ? 0 : tem)));
3383 	  val_so_far = (val_so_far << log) - val_so_far;
3384 	  break;
3385 
3386 	default:
3387 	  gcc_unreachable ();
3388 	}
3389 
3390       if (SCALAR_INT_MODE_P (mode))
3391 	{
3392 	  /* Write a REG_EQUAL note on the last insn so that we can cse
3393 	     multiplication sequences.  Note that if ACCUM is a SUBREG,
3394 	     we've set the inner register and must properly indicate that.  */
3395 	  tem = op0, nmode = mode;
3396 	  accum_inner = accum;
3397 	  if (GET_CODE (accum) == SUBREG)
3398 	    {
3399 	      accum_inner = SUBREG_REG (accum);
3400 	      nmode = GET_MODE (accum_inner);
3401 	      tem = gen_lowpart (nmode, op0);
3402 	    }
3403 
3404 	  /* Don't add a REG_EQUAL note if tem is a paradoxical SUBREG.
3405 	     In that case, only the low bits of accum would be guaranteed to
3406 	     be equal to the content of the REG_EQUAL note, the upper bits
3407 	     can be anything.  */
3408 	  if (!paradoxical_subreg_p (tem))
3409 	    {
3410 	      insn = get_last_insn ();
3411 	      wide_int wval_so_far
3412 		= wi::uhwi (val_so_far,
3413 			    GET_MODE_PRECISION (as_a <scalar_mode> (nmode)));
3414 	      rtx c = immed_wide_int_const (wval_so_far, nmode);
3415 	      set_dst_reg_note (insn, REG_EQUAL, gen_rtx_MULT (nmode, tem, c),
3416 				accum_inner);
3417 	    }
3418 	}
3419     }
3420 
3421   if (variant == negate_variant)
3422     {
3423       val_so_far = -val_so_far;
3424       accum = expand_unop (mode, neg_optab, accum, target, 0);
3425     }
3426   else if (variant == add_variant)
3427     {
3428       val_so_far = val_so_far + 1;
3429       accum = force_operand (gen_rtx_PLUS (mode, accum, op0), target);
3430     }
3431 
3432   /* Compare only the bits of val and val_so_far that are significant
3433      in the result mode, to avoid sign-/zero-extension confusion.  */
3434   nmode = GET_MODE_INNER (mode);
3435   val &= GET_MODE_MASK (nmode);
3436   val_so_far &= GET_MODE_MASK (nmode);
3437   gcc_assert (val == (HOST_WIDE_INT) val_so_far);
3438 
3439   return accum;
3440 }
3441 
3442 /* Perform a multiplication and return an rtx for the result.
3443    MODE is mode of value; OP0 and OP1 are what to multiply (rtx's);
3444    TARGET is a suggestion for where to store the result (an rtx).
3445 
3446    We check specially for a constant integer as OP1.
3447    If you want this check for OP0 as well, then before calling
3448    you should swap the two operands if OP0 would be constant.  */
3449 
3450 rtx
expand_mult(machine_mode mode,rtx op0,rtx op1,rtx target,int unsignedp,bool no_libcall)3451 expand_mult (machine_mode mode, rtx op0, rtx op1, rtx target,
3452 	     int unsignedp, bool no_libcall)
3453 {
3454   enum mult_variant variant;
3455   struct algorithm algorithm;
3456   rtx scalar_op1;
3457   int max_cost;
3458   bool speed = optimize_insn_for_speed_p ();
3459   bool do_trapv = flag_trapv && SCALAR_INT_MODE_P (mode) && !unsignedp;
3460 
3461   if (CONSTANT_P (op0))
3462     std::swap (op0, op1);
3463 
3464   /* For vectors, there are several simplifications that can be made if
3465      all elements of the vector constant are identical.  */
3466   scalar_op1 = unwrap_const_vec_duplicate (op1);
3467 
3468   if (INTEGRAL_MODE_P (mode))
3469     {
3470       rtx fake_reg;
3471       HOST_WIDE_INT coeff;
3472       bool is_neg;
3473       int mode_bitsize;
3474 
3475       if (op1 == CONST0_RTX (mode))
3476 	return op1;
3477       if (op1 == CONST1_RTX (mode))
3478 	return op0;
3479       if (op1 == CONSTM1_RTX (mode))
3480 	return expand_unop (mode, do_trapv ? negv_optab : neg_optab,
3481 			    op0, target, 0);
3482 
3483       if (do_trapv)
3484 	goto skip_synth;
3485 
3486       /* If mode is integer vector mode, check if the backend supports
3487 	 vector lshift (by scalar or vector) at all.  If not, we can't use
3488 	 synthetized multiply.  */
3489       if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT
3490 	  && optab_handler (vashl_optab, mode) == CODE_FOR_nothing
3491 	  && optab_handler (ashl_optab, mode) == CODE_FOR_nothing)
3492 	goto skip_synth;
3493 
3494       /* These are the operations that are potentially turned into
3495 	 a sequence of shifts and additions.  */
3496       mode_bitsize = GET_MODE_UNIT_BITSIZE (mode);
3497 
3498       /* synth_mult does an `unsigned int' multiply.  As long as the mode is
3499 	 less than or equal in size to `unsigned int' this doesn't matter.
3500 	 If the mode is larger than `unsigned int', then synth_mult works
3501 	 only if the constant value exactly fits in an `unsigned int' without
3502 	 any truncation.  This means that multiplying by negative values does
3503 	 not work; results are off by 2^32 on a 32 bit machine.  */
3504       if (CONST_INT_P (scalar_op1))
3505 	{
3506 	  coeff = INTVAL (scalar_op1);
3507 	  is_neg = coeff < 0;
3508 	}
3509 #if TARGET_SUPPORTS_WIDE_INT
3510       else if (CONST_WIDE_INT_P (scalar_op1))
3511 #else
3512       else if (CONST_DOUBLE_AS_INT_P (scalar_op1))
3513 #endif
3514 	{
3515 	  int shift = wi::exact_log2 (rtx_mode_t (scalar_op1, mode));
3516 	  /* Perfect power of 2 (other than 1, which is handled above).  */
3517 	  if (shift > 0)
3518 	    return expand_shift (LSHIFT_EXPR, mode, op0,
3519 				 shift, target, unsignedp);
3520 	  else
3521 	    goto skip_synth;
3522 	}
3523       else
3524 	goto skip_synth;
3525 
3526       /* We used to test optimize here, on the grounds that it's better to
3527 	 produce a smaller program when -O is not used.  But this causes
3528 	 such a terrible slowdown sometimes that it seems better to always
3529 	 use synth_mult.  */
3530 
3531       /* Special case powers of two.  */
3532       if (EXACT_POWER_OF_2_OR_ZERO_P (coeff)
3533 	  && !(is_neg && mode_bitsize > HOST_BITS_PER_WIDE_INT))
3534 	return expand_shift (LSHIFT_EXPR, mode, op0,
3535 			     floor_log2 (coeff), target, unsignedp);
3536 
3537       fake_reg = gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1);
3538 
3539       /* Attempt to handle multiplication of DImode values by negative
3540 	 coefficients, by performing the multiplication by a positive
3541 	 multiplier and then inverting the result.  */
3542       if (is_neg && mode_bitsize > HOST_BITS_PER_WIDE_INT)
3543 	{
3544 	  /* Its safe to use -coeff even for INT_MIN, as the
3545 	     result is interpreted as an unsigned coefficient.
3546 	     Exclude cost of op0 from max_cost to match the cost
3547 	     calculation of the synth_mult.  */
3548 	  coeff = -(unsigned HOST_WIDE_INT) coeff;
3549 	  max_cost = (set_src_cost (gen_rtx_MULT (mode, fake_reg, op1),
3550 				    mode, speed)
3551 		      - neg_cost (speed, mode));
3552 	  if (max_cost <= 0)
3553 	    goto skip_synth;
3554 
3555 	  /* Special case powers of two.  */
3556 	  if (EXACT_POWER_OF_2_OR_ZERO_P (coeff))
3557 	    {
3558 	      rtx temp = expand_shift (LSHIFT_EXPR, mode, op0,
3559 				       floor_log2 (coeff), target, unsignedp);
3560 	      return expand_unop (mode, neg_optab, temp, target, 0);
3561 	    }
3562 
3563 	  if (choose_mult_variant (mode, coeff, &algorithm, &variant,
3564 				   max_cost))
3565 	    {
3566 	      rtx temp = expand_mult_const (mode, op0, coeff, NULL_RTX,
3567 					    &algorithm, variant);
3568 	      return expand_unop (mode, neg_optab, temp, target, 0);
3569 	    }
3570 	  goto skip_synth;
3571 	}
3572 
3573       /* Exclude cost of op0 from max_cost to match the cost
3574 	 calculation of the synth_mult.  */
3575       max_cost = set_src_cost (gen_rtx_MULT (mode, fake_reg, op1), mode, speed);
3576       if (choose_mult_variant (mode, coeff, &algorithm, &variant, max_cost))
3577 	return expand_mult_const (mode, op0, coeff, target,
3578 				  &algorithm, variant);
3579     }
3580  skip_synth:
3581 
3582   /* Expand x*2.0 as x+x.  */
3583   if (CONST_DOUBLE_AS_FLOAT_P (scalar_op1)
3584       && real_equal (CONST_DOUBLE_REAL_VALUE (scalar_op1), &dconst2))
3585     {
3586       op0 = force_reg (GET_MODE (op0), op0);
3587       return expand_binop (mode, add_optab, op0, op0,
3588 			   target, unsignedp,
3589 			   no_libcall ? OPTAB_WIDEN : OPTAB_LIB_WIDEN);
3590     }
3591 
3592   /* This used to use umul_optab if unsigned, but for non-widening multiply
3593      there is no difference between signed and unsigned.  */
3594   op0 = expand_binop (mode, do_trapv ? smulv_optab : smul_optab,
3595 		      op0, op1, target, unsignedp,
3596 		      no_libcall ? OPTAB_WIDEN : OPTAB_LIB_WIDEN);
3597   gcc_assert (op0 || no_libcall);
3598   return op0;
3599 }
3600 
3601 /* Return a cost estimate for multiplying a register by the given
3602    COEFFicient in the given MODE and SPEED.  */
3603 
3604 int
mult_by_coeff_cost(HOST_WIDE_INT coeff,machine_mode mode,bool speed)3605 mult_by_coeff_cost (HOST_WIDE_INT coeff, machine_mode mode, bool speed)
3606 {
3607   int max_cost;
3608   struct algorithm algorithm;
3609   enum mult_variant variant;
3610 
3611   rtx fake_reg = gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1);
3612   max_cost = set_src_cost (gen_rtx_MULT (mode, fake_reg, fake_reg),
3613 			   mode, speed);
3614   if (choose_mult_variant (mode, coeff, &algorithm, &variant, max_cost))
3615     return algorithm.cost.cost;
3616   else
3617     return max_cost;
3618 }
3619 
3620 /* Perform a widening multiplication and return an rtx for the result.
3621    MODE is mode of value; OP0 and OP1 are what to multiply (rtx's);
3622    TARGET is a suggestion for where to store the result (an rtx).
3623    THIS_OPTAB is the optab we should use, it must be either umul_widen_optab
3624    or smul_widen_optab.
3625 
3626    We check specially for a constant integer as OP1, comparing the
3627    cost of a widening multiply against the cost of a sequence of shifts
3628    and adds.  */
3629 
3630 rtx
expand_widening_mult(machine_mode mode,rtx op0,rtx op1,rtx target,int unsignedp,optab this_optab)3631 expand_widening_mult (machine_mode mode, rtx op0, rtx op1, rtx target,
3632 		      int unsignedp, optab this_optab)
3633 {
3634   bool speed = optimize_insn_for_speed_p ();
3635   rtx cop1;
3636 
3637   if (CONST_INT_P (op1)
3638       && GET_MODE (op0) != VOIDmode
3639       && (cop1 = convert_modes (mode, GET_MODE (op0), op1,
3640 				this_optab == umul_widen_optab))
3641       && CONST_INT_P (cop1)
3642       && (INTVAL (cop1) >= 0
3643 	  || HWI_COMPUTABLE_MODE_P (mode)))
3644     {
3645       HOST_WIDE_INT coeff = INTVAL (cop1);
3646       int max_cost;
3647       enum mult_variant variant;
3648       struct algorithm algorithm;
3649 
3650       if (coeff == 0)
3651 	return CONST0_RTX (mode);
3652 
3653       /* Special case powers of two.  */
3654       if (EXACT_POWER_OF_2_OR_ZERO_P (coeff))
3655 	{
3656 	  op0 = convert_to_mode (mode, op0, this_optab == umul_widen_optab);
3657 	  return expand_shift (LSHIFT_EXPR, mode, op0,
3658 			       floor_log2 (coeff), target, unsignedp);
3659 	}
3660 
3661       /* Exclude cost of op0 from max_cost to match the cost
3662 	 calculation of the synth_mult.  */
3663       max_cost = mul_widen_cost (speed, mode);
3664       if (choose_mult_variant (mode, coeff, &algorithm, &variant,
3665 			       max_cost))
3666 	{
3667 	  op0 = convert_to_mode (mode, op0, this_optab == umul_widen_optab);
3668 	  return expand_mult_const (mode, op0, coeff, target,
3669 				    &algorithm, variant);
3670 	}
3671     }
3672   return expand_binop (mode, this_optab, op0, op1, target,
3673 		       unsignedp, OPTAB_LIB_WIDEN);
3674 }
3675 
3676 /* Choose a minimal N + 1 bit approximation to 1/D that can be used to
3677    replace division by D, and put the least significant N bits of the result
3678    in *MULTIPLIER_PTR and return the most significant bit.
3679 
3680    The width of operations is N (should be <= HOST_BITS_PER_WIDE_INT), the
3681    needed precision is in PRECISION (should be <= N).
3682 
3683    PRECISION should be as small as possible so this function can choose
3684    multiplier more freely.
3685 
3686    The rounded-up logarithm of D is placed in *lgup_ptr.  A shift count that
3687    is to be used for a final right shift is placed in *POST_SHIFT_PTR.
3688 
3689    Using this function, x/D will be equal to (x * m) >> (*POST_SHIFT_PTR),
3690    where m is the full HOST_BITS_PER_WIDE_INT + 1 bit multiplier.  */
3691 
3692 unsigned HOST_WIDE_INT
choose_multiplier(unsigned HOST_WIDE_INT d,int n,int precision,unsigned HOST_WIDE_INT * multiplier_ptr,int * post_shift_ptr,int * lgup_ptr)3693 choose_multiplier (unsigned HOST_WIDE_INT d, int n, int precision,
3694 		   unsigned HOST_WIDE_INT *multiplier_ptr,
3695 		   int *post_shift_ptr, int *lgup_ptr)
3696 {
3697   int lgup, post_shift;
3698   int pow, pow2;
3699 
3700   /* lgup = ceil(log2(divisor)); */
3701   lgup = ceil_log2 (d);
3702 
3703   gcc_assert (lgup <= n);
3704 
3705   pow = n + lgup;
3706   pow2 = n + lgup - precision;
3707 
3708   /* mlow = 2^(N + lgup)/d */
3709   wide_int val = wi::set_bit_in_zero (pow, HOST_BITS_PER_DOUBLE_INT);
3710   wide_int mlow = wi::udiv_trunc (val, d);
3711 
3712   /* mhigh = (2^(N + lgup) + 2^(N + lgup - precision))/d */
3713   val |= wi::set_bit_in_zero (pow2, HOST_BITS_PER_DOUBLE_INT);
3714   wide_int mhigh = wi::udiv_trunc (val, d);
3715 
3716   /* If precision == N, then mlow, mhigh exceed 2^N
3717      (but they do not exceed 2^(N+1)).  */
3718 
3719   /* Reduce to lowest terms.  */
3720   for (post_shift = lgup; post_shift > 0; post_shift--)
3721     {
3722       unsigned HOST_WIDE_INT ml_lo = wi::extract_uhwi (mlow, 1,
3723 						       HOST_BITS_PER_WIDE_INT);
3724       unsigned HOST_WIDE_INT mh_lo = wi::extract_uhwi (mhigh, 1,
3725 						       HOST_BITS_PER_WIDE_INT);
3726       if (ml_lo >= mh_lo)
3727 	break;
3728 
3729       mlow = wi::uhwi (ml_lo, HOST_BITS_PER_DOUBLE_INT);
3730       mhigh = wi::uhwi (mh_lo, HOST_BITS_PER_DOUBLE_INT);
3731     }
3732 
3733   *post_shift_ptr = post_shift;
3734   *lgup_ptr = lgup;
3735   if (n < HOST_BITS_PER_WIDE_INT)
3736     {
3737       unsigned HOST_WIDE_INT mask = (HOST_WIDE_INT_1U << n) - 1;
3738       *multiplier_ptr = mhigh.to_uhwi () & mask;
3739       return mhigh.to_uhwi () > mask;
3740     }
3741   else
3742     {
3743       *multiplier_ptr = mhigh.to_uhwi ();
3744       return wi::extract_uhwi (mhigh, HOST_BITS_PER_WIDE_INT, 1);
3745     }
3746 }
3747 
3748 /* Compute the inverse of X mod 2**n, i.e., find Y such that X * Y is
3749    congruent to 1 (mod 2**N).  */
3750 
3751 static unsigned HOST_WIDE_INT
invert_mod2n(unsigned HOST_WIDE_INT x,int n)3752 invert_mod2n (unsigned HOST_WIDE_INT x, int n)
3753 {
3754   /* Solve x*y == 1 (mod 2^n), where x is odd.  Return y.  */
3755 
3756   /* The algorithm notes that the choice y = x satisfies
3757      x*y == 1 mod 2^3, since x is assumed odd.
3758      Each iteration doubles the number of bits of significance in y.  */
3759 
3760   unsigned HOST_WIDE_INT mask;
3761   unsigned HOST_WIDE_INT y = x;
3762   int nbit = 3;
3763 
3764   mask = (n == HOST_BITS_PER_WIDE_INT
3765 	  ? HOST_WIDE_INT_M1U
3766 	  : (HOST_WIDE_INT_1U << n) - 1);
3767 
3768   while (nbit < n)
3769     {
3770       y = y * (2 - x*y) & mask;		/* Modulo 2^N */
3771       nbit *= 2;
3772     }
3773   return y;
3774 }
3775 
3776 /* Emit code to adjust ADJ_OPERAND after multiplication of wrong signedness
3777    flavor of OP0 and OP1.  ADJ_OPERAND is already the high half of the
3778    product OP0 x OP1.  If UNSIGNEDP is nonzero, adjust the signed product
3779    to become unsigned, if UNSIGNEDP is zero, adjust the unsigned product to
3780    become signed.
3781 
3782    The result is put in TARGET if that is convenient.
3783 
3784    MODE is the mode of operation.  */
3785 
3786 rtx
expand_mult_highpart_adjust(scalar_int_mode mode,rtx adj_operand,rtx op0,rtx op1,rtx target,int unsignedp)3787 expand_mult_highpart_adjust (scalar_int_mode mode, rtx adj_operand, rtx op0,
3788 			     rtx op1, rtx target, int unsignedp)
3789 {
3790   rtx tem;
3791   enum rtx_code adj_code = unsignedp ? PLUS : MINUS;
3792 
3793   tem = expand_shift (RSHIFT_EXPR, mode, op0,
3794 		      GET_MODE_BITSIZE (mode) - 1, NULL_RTX, 0);
3795   tem = expand_and (mode, tem, op1, NULL_RTX);
3796   adj_operand
3797     = force_operand (gen_rtx_fmt_ee (adj_code, mode, adj_operand, tem),
3798 		     adj_operand);
3799 
3800   tem = expand_shift (RSHIFT_EXPR, mode, op1,
3801 		      GET_MODE_BITSIZE (mode) - 1, NULL_RTX, 0);
3802   tem = expand_and (mode, tem, op0, NULL_RTX);
3803   target = force_operand (gen_rtx_fmt_ee (adj_code, mode, adj_operand, tem),
3804 			  target);
3805 
3806   return target;
3807 }
3808 
3809 /* Subroutine of expmed_mult_highpart.  Return the MODE high part of OP.  */
3810 
3811 static rtx
extract_high_half(scalar_int_mode mode,rtx op)3812 extract_high_half (scalar_int_mode mode, rtx op)
3813 {
3814   if (mode == word_mode)
3815     return gen_highpart (mode, op);
3816 
3817   scalar_int_mode wider_mode = GET_MODE_WIDER_MODE (mode).require ();
3818 
3819   op = expand_shift (RSHIFT_EXPR, wider_mode, op,
3820 		     GET_MODE_BITSIZE (mode), 0, 1);
3821   return convert_modes (mode, wider_mode, op, 0);
3822 }
3823 
3824 /* Like expmed_mult_highpart, but only consider using a multiplication
3825    optab.  OP1 is an rtx for the constant operand.  */
3826 
3827 static rtx
expmed_mult_highpart_optab(scalar_int_mode mode,rtx op0,rtx op1,rtx target,int unsignedp,int max_cost)3828 expmed_mult_highpart_optab (scalar_int_mode mode, rtx op0, rtx op1,
3829 			    rtx target, int unsignedp, int max_cost)
3830 {
3831   rtx narrow_op1 = gen_int_mode (INTVAL (op1), mode);
3832   optab moptab;
3833   rtx tem;
3834   int size;
3835   bool speed = optimize_insn_for_speed_p ();
3836 
3837   scalar_int_mode wider_mode = GET_MODE_WIDER_MODE (mode).require ();
3838 
3839   size = GET_MODE_BITSIZE (mode);
3840 
3841   /* Firstly, try using a multiplication insn that only generates the needed
3842      high part of the product, and in the sign flavor of unsignedp.  */
3843   if (mul_highpart_cost (speed, mode) < max_cost)
3844     {
3845       moptab = unsignedp ? umul_highpart_optab : smul_highpart_optab;
3846       tem = expand_binop (mode, moptab, op0, narrow_op1, target,
3847 			  unsignedp, OPTAB_DIRECT);
3848       if (tem)
3849 	return tem;
3850     }
3851 
3852   /* Secondly, same as above, but use sign flavor opposite of unsignedp.
3853      Need to adjust the result after the multiplication.  */
3854   if (size - 1 < BITS_PER_WORD
3855       && (mul_highpart_cost (speed, mode)
3856 	  + 2 * shift_cost (speed, mode, size-1)
3857 	  + 4 * add_cost (speed, mode) < max_cost))
3858     {
3859       moptab = unsignedp ? smul_highpart_optab : umul_highpart_optab;
3860       tem = expand_binop (mode, moptab, op0, narrow_op1, target,
3861 			  unsignedp, OPTAB_DIRECT);
3862       if (tem)
3863 	/* We used the wrong signedness.  Adjust the result.  */
3864 	return expand_mult_highpart_adjust (mode, tem, op0, narrow_op1,
3865 					    tem, unsignedp);
3866     }
3867 
3868   /* Try widening multiplication.  */
3869   moptab = unsignedp ? umul_widen_optab : smul_widen_optab;
3870   if (convert_optab_handler (moptab, wider_mode, mode) != CODE_FOR_nothing
3871       && mul_widen_cost (speed, wider_mode) < max_cost)
3872     {
3873       tem = expand_binop (wider_mode, moptab, op0, narrow_op1, 0,
3874 			  unsignedp, OPTAB_WIDEN);
3875       if (tem)
3876 	return extract_high_half (mode, tem);
3877     }
3878 
3879   /* Try widening the mode and perform a non-widening multiplication.  */
3880   if (optab_handler (smul_optab, wider_mode) != CODE_FOR_nothing
3881       && size - 1 < BITS_PER_WORD
3882       && (mul_cost (speed, wider_mode) + shift_cost (speed, mode, size-1)
3883 	  < max_cost))
3884     {
3885       rtx_insn *insns;
3886       rtx wop0, wop1;
3887 
3888       /* We need to widen the operands, for example to ensure the
3889 	 constant multiplier is correctly sign or zero extended.
3890 	 Use a sequence to clean-up any instructions emitted by
3891 	 the conversions if things don't work out.  */
3892       start_sequence ();
3893       wop0 = convert_modes (wider_mode, mode, op0, unsignedp);
3894       wop1 = convert_modes (wider_mode, mode, op1, unsignedp);
3895       tem = expand_binop (wider_mode, smul_optab, wop0, wop1, 0,
3896 			  unsignedp, OPTAB_WIDEN);
3897       insns = get_insns ();
3898       end_sequence ();
3899 
3900       if (tem)
3901 	{
3902 	  emit_insn (insns);
3903 	  return extract_high_half (mode, tem);
3904 	}
3905     }
3906 
3907   /* Try widening multiplication of opposite signedness, and adjust.  */
3908   moptab = unsignedp ? smul_widen_optab : umul_widen_optab;
3909   if (convert_optab_handler (moptab, wider_mode, mode) != CODE_FOR_nothing
3910       && size - 1 < BITS_PER_WORD
3911       && (mul_widen_cost (speed, wider_mode)
3912 	  + 2 * shift_cost (speed, mode, size-1)
3913 	  + 4 * add_cost (speed, mode) < max_cost))
3914     {
3915       tem = expand_binop (wider_mode, moptab, op0, narrow_op1,
3916 			  NULL_RTX, ! unsignedp, OPTAB_WIDEN);
3917       if (tem != 0)
3918 	{
3919 	  tem = extract_high_half (mode, tem);
3920 	  /* We used the wrong signedness.  Adjust the result.  */
3921 	  return expand_mult_highpart_adjust (mode, tem, op0, narrow_op1,
3922 					      target, unsignedp);
3923 	}
3924     }
3925 
3926   return 0;
3927 }
3928 
3929 /* Emit code to multiply OP0 and OP1 (where OP1 is an integer constant),
3930    putting the high half of the result in TARGET if that is convenient,
3931    and return where the result is.  If the operation cannot be performed,
3932    0 is returned.
3933 
3934    MODE is the mode of operation and result.
3935 
3936    UNSIGNEDP nonzero means unsigned multiply.
3937 
3938    MAX_COST is the total allowed cost for the expanded RTL.  */
3939 
3940 static rtx
expmed_mult_highpart(scalar_int_mode mode,rtx op0,rtx op1,rtx target,int unsignedp,int max_cost)3941 expmed_mult_highpart (scalar_int_mode mode, rtx op0, rtx op1,
3942 		      rtx target, int unsignedp, int max_cost)
3943 {
3944   unsigned HOST_WIDE_INT cnst1;
3945   int extra_cost;
3946   bool sign_adjust = false;
3947   enum mult_variant variant;
3948   struct algorithm alg;
3949   rtx tem;
3950   bool speed = optimize_insn_for_speed_p ();
3951 
3952   /* We can't support modes wider than HOST_BITS_PER_INT.  */
3953   gcc_assert (HWI_COMPUTABLE_MODE_P (mode));
3954 
3955   cnst1 = INTVAL (op1) & GET_MODE_MASK (mode);
3956 
3957   /* We can't optimize modes wider than BITS_PER_WORD.
3958      ??? We might be able to perform double-word arithmetic if
3959      mode == word_mode, however all the cost calculations in
3960      synth_mult etc. assume single-word operations.  */
3961   scalar_int_mode wider_mode = GET_MODE_WIDER_MODE (mode).require ();
3962   if (GET_MODE_BITSIZE (wider_mode) > BITS_PER_WORD)
3963     return expmed_mult_highpart_optab (mode, op0, op1, target,
3964 				       unsignedp, max_cost);
3965 
3966   extra_cost = shift_cost (speed, mode, GET_MODE_BITSIZE (mode) - 1);
3967 
3968   /* Check whether we try to multiply by a negative constant.  */
3969   if (!unsignedp && ((cnst1 >> (GET_MODE_BITSIZE (mode) - 1)) & 1))
3970     {
3971       sign_adjust = true;
3972       extra_cost += add_cost (speed, mode);
3973     }
3974 
3975   /* See whether shift/add multiplication is cheap enough.  */
3976   if (choose_mult_variant (wider_mode, cnst1, &alg, &variant,
3977 			   max_cost - extra_cost))
3978     {
3979       /* See whether the specialized multiplication optabs are
3980 	 cheaper than the shift/add version.  */
3981       tem = expmed_mult_highpart_optab (mode, op0, op1, target, unsignedp,
3982 					alg.cost.cost + extra_cost);
3983       if (tem)
3984 	return tem;
3985 
3986       tem = convert_to_mode (wider_mode, op0, unsignedp);
3987       tem = expand_mult_const (wider_mode, tem, cnst1, 0, &alg, variant);
3988       tem = extract_high_half (mode, tem);
3989 
3990       /* Adjust result for signedness.  */
3991       if (sign_adjust)
3992 	tem = force_operand (gen_rtx_MINUS (mode, tem, op0), tem);
3993 
3994       return tem;
3995     }
3996   return expmed_mult_highpart_optab (mode, op0, op1, target,
3997 				     unsignedp, max_cost);
3998 }
3999 
4000 
4001 /* Expand signed modulus of OP0 by a power of two D in mode MODE.  */
4002 
4003 static rtx
expand_smod_pow2(scalar_int_mode mode,rtx op0,HOST_WIDE_INT d)4004 expand_smod_pow2 (scalar_int_mode mode, rtx op0, HOST_WIDE_INT d)
4005 {
4006   rtx result, temp, shift;
4007   rtx_code_label *label;
4008   int logd;
4009   int prec = GET_MODE_PRECISION (mode);
4010 
4011   logd = floor_log2 (d);
4012   result = gen_reg_rtx (mode);
4013 
4014   /* Avoid conditional branches when they're expensive.  */
4015   if (BRANCH_COST (optimize_insn_for_speed_p (), false) >= 2
4016       && optimize_insn_for_speed_p ())
4017     {
4018       rtx signmask = emit_store_flag (result, LT, op0, const0_rtx,
4019 				      mode, 0, -1);
4020       if (signmask)
4021 	{
4022 	  HOST_WIDE_INT masklow = (HOST_WIDE_INT_1 << logd) - 1;
4023 	  signmask = force_reg (mode, signmask);
4024 	  shift = gen_int_shift_amount (mode, GET_MODE_BITSIZE (mode) - logd);
4025 
4026 	  /* Use the rtx_cost of a LSHIFTRT instruction to determine
4027 	     which instruction sequence to use.  If logical right shifts
4028 	     are expensive the use 2 XORs, 2 SUBs and an AND, otherwise
4029 	     use a LSHIFTRT, 1 ADD, 1 SUB and an AND.  */
4030 
4031 	  temp = gen_rtx_LSHIFTRT (mode, result, shift);
4032 	  if (optab_handler (lshr_optab, mode) == CODE_FOR_nothing
4033 	      || (set_src_cost (temp, mode, optimize_insn_for_speed_p ())
4034 		  > COSTS_N_INSNS (2)))
4035 	    {
4036 	      temp = expand_binop (mode, xor_optab, op0, signmask,
4037 				   NULL_RTX, 1, OPTAB_LIB_WIDEN);
4038 	      temp = expand_binop (mode, sub_optab, temp, signmask,
4039 				   NULL_RTX, 1, OPTAB_LIB_WIDEN);
4040 	      temp = expand_binop (mode, and_optab, temp,
4041 				   gen_int_mode (masklow, mode),
4042 				   NULL_RTX, 1, OPTAB_LIB_WIDEN);
4043 	      temp = expand_binop (mode, xor_optab, temp, signmask,
4044 				   NULL_RTX, 1, OPTAB_LIB_WIDEN);
4045 	      temp = expand_binop (mode, sub_optab, temp, signmask,
4046 				   NULL_RTX, 1, OPTAB_LIB_WIDEN);
4047 	    }
4048 	  else
4049 	    {
4050 	      signmask = expand_binop (mode, lshr_optab, signmask, shift,
4051 				       NULL_RTX, 1, OPTAB_LIB_WIDEN);
4052 	      signmask = force_reg (mode, signmask);
4053 
4054 	      temp = expand_binop (mode, add_optab, op0, signmask,
4055 				   NULL_RTX, 1, OPTAB_LIB_WIDEN);
4056 	      temp = expand_binop (mode, and_optab, temp,
4057 				   gen_int_mode (masklow, mode),
4058 				   NULL_RTX, 1, OPTAB_LIB_WIDEN);
4059 	      temp = expand_binop (mode, sub_optab, temp, signmask,
4060 				   NULL_RTX, 1, OPTAB_LIB_WIDEN);
4061 	    }
4062 	  return temp;
4063 	}
4064     }
4065 
4066   /* Mask contains the mode's signbit and the significant bits of the
4067      modulus.  By including the signbit in the operation, many targets
4068      can avoid an explicit compare operation in the following comparison
4069      against zero.  */
4070   wide_int mask = wi::mask (logd, false, prec);
4071   mask = wi::set_bit (mask, prec - 1);
4072 
4073   temp = expand_binop (mode, and_optab, op0,
4074 		       immed_wide_int_const (mask, mode),
4075 		       result, 1, OPTAB_LIB_WIDEN);
4076   if (temp != result)
4077     emit_move_insn (result, temp);
4078 
4079   label = gen_label_rtx ();
4080   do_cmp_and_jump (result, const0_rtx, GE, mode, label);
4081 
4082   temp = expand_binop (mode, sub_optab, result, const1_rtx, result,
4083 		       0, OPTAB_LIB_WIDEN);
4084 
4085   mask = wi::mask (logd, true, prec);
4086   temp = expand_binop (mode, ior_optab, temp,
4087 		       immed_wide_int_const (mask, mode),
4088 		       result, 1, OPTAB_LIB_WIDEN);
4089   temp = expand_binop (mode, add_optab, temp, const1_rtx, result,
4090 		       0, OPTAB_LIB_WIDEN);
4091   if (temp != result)
4092     emit_move_insn (result, temp);
4093   emit_label (label);
4094   return result;
4095 }
4096 
4097 /* Expand signed division of OP0 by a power of two D in mode MODE.
4098    This routine is only called for positive values of D.  */
4099 
4100 static rtx
expand_sdiv_pow2(scalar_int_mode mode,rtx op0,HOST_WIDE_INT d)4101 expand_sdiv_pow2 (scalar_int_mode mode, rtx op0, HOST_WIDE_INT d)
4102 {
4103   rtx temp;
4104   rtx_code_label *label;
4105   int logd;
4106 
4107   logd = floor_log2 (d);
4108 
4109   if (d == 2
4110       && BRANCH_COST (optimize_insn_for_speed_p (),
4111 		      false) >= 1)
4112     {
4113       temp = gen_reg_rtx (mode);
4114       temp = emit_store_flag (temp, LT, op0, const0_rtx, mode, 0, 1);
4115       if (temp != NULL_RTX)
4116 	{
4117 	  temp = expand_binop (mode, add_optab, temp, op0, NULL_RTX,
4118 			       0, OPTAB_LIB_WIDEN);
4119 	  return expand_shift (RSHIFT_EXPR, mode, temp, logd, NULL_RTX, 0);
4120 	}
4121     }
4122 
4123   if (HAVE_conditional_move
4124       && BRANCH_COST (optimize_insn_for_speed_p (), false) >= 2)
4125     {
4126       rtx temp2;
4127 
4128       start_sequence ();
4129       temp2 = copy_to_mode_reg (mode, op0);
4130       temp = expand_binop (mode, add_optab, temp2, gen_int_mode (d - 1, mode),
4131 			   NULL_RTX, 0, OPTAB_LIB_WIDEN);
4132       temp = force_reg (mode, temp);
4133 
4134       /* Construct "temp2 = (temp2 < 0) ? temp : temp2".  */
4135       temp2 = emit_conditional_move (temp2, { LT, temp2, const0_rtx, mode },
4136 				     temp, temp2, mode, 0);
4137       if (temp2)
4138 	{
4139 	  rtx_insn *seq = get_insns ();
4140 	  end_sequence ();
4141 	  emit_insn (seq);
4142 	  return expand_shift (RSHIFT_EXPR, mode, temp2, logd, NULL_RTX, 0);
4143 	}
4144       end_sequence ();
4145     }
4146 
4147   if (BRANCH_COST (optimize_insn_for_speed_p (),
4148 		   false) >= 2)
4149     {
4150       int ushift = GET_MODE_BITSIZE (mode) - logd;
4151 
4152       temp = gen_reg_rtx (mode);
4153       temp = emit_store_flag (temp, LT, op0, const0_rtx, mode, 0, -1);
4154       if (temp != NULL_RTX)
4155 	{
4156 	  if (GET_MODE_BITSIZE (mode) >= BITS_PER_WORD
4157 	      || shift_cost (optimize_insn_for_speed_p (), mode, ushift)
4158 	      > COSTS_N_INSNS (1))
4159 	    temp = expand_binop (mode, and_optab, temp,
4160 				 gen_int_mode (d - 1, mode),
4161 				 NULL_RTX, 0, OPTAB_LIB_WIDEN);
4162 	  else
4163 	    temp = expand_shift (RSHIFT_EXPR, mode, temp,
4164 				 ushift, NULL_RTX, 1);
4165 	  temp = expand_binop (mode, add_optab, temp, op0, NULL_RTX,
4166 			       0, OPTAB_LIB_WIDEN);
4167 	  return expand_shift (RSHIFT_EXPR, mode, temp, logd, NULL_RTX, 0);
4168 	}
4169     }
4170 
4171   label = gen_label_rtx ();
4172   temp = copy_to_mode_reg (mode, op0);
4173   do_cmp_and_jump (temp, const0_rtx, GE, mode, label);
4174   expand_inc (temp, gen_int_mode (d - 1, mode));
4175   emit_label (label);
4176   return expand_shift (RSHIFT_EXPR, mode, temp, logd, NULL_RTX, 0);
4177 }
4178 
4179 /* Emit the code to divide OP0 by OP1, putting the result in TARGET
4180    if that is convenient, and returning where the result is.
4181    You may request either the quotient or the remainder as the result;
4182    specify REM_FLAG nonzero to get the remainder.
4183 
4184    CODE is the expression code for which kind of division this is;
4185    it controls how rounding is done.  MODE is the machine mode to use.
4186    UNSIGNEDP nonzero means do unsigned division.  */
4187 
4188 /* ??? For CEIL_MOD_EXPR, can compute incorrect remainder with ANDI
4189    and then correct it by or'ing in missing high bits
4190    if result of ANDI is nonzero.
4191    For ROUND_MOD_EXPR, can use ANDI and then sign-extend the result.
4192    This could optimize to a bfexts instruction.
4193    But C doesn't use these operations, so their optimizations are
4194    left for later.  */
4195 /* ??? For modulo, we don't actually need the highpart of the first product,
4196    the low part will do nicely.  And for small divisors, the second multiply
4197    can also be a low-part only multiply or even be completely left out.
4198    E.g. to calculate the remainder of a division by 3 with a 32 bit
4199    multiply, multiply with 0x55555556 and extract the upper two bits;
4200    the result is exact for inputs up to 0x1fffffff.
4201    The input range can be reduced by using cross-sum rules.
4202    For odd divisors >= 3, the following table gives right shift counts
4203    so that if a number is shifted by an integer multiple of the given
4204    amount, the remainder stays the same:
4205    2, 4, 3, 6, 10, 12, 4, 8, 18, 6, 11, 20, 18, 0, 5, 10, 12, 0, 12, 20,
4206    14, 12, 23, 21, 8, 0, 20, 18, 0, 0, 6, 12, 0, 22, 0, 18, 20, 30, 0, 0,
4207    0, 8, 0, 11, 12, 10, 36, 0, 30, 0, 0, 12, 0, 0, 0, 0, 44, 12, 24, 0,
4208    20, 0, 7, 14, 0, 18, 36, 0, 0, 46, 60, 0, 42, 0, 15, 24, 20, 0, 0, 33,
4209    0, 20, 0, 0, 18, 0, 60, 0, 0, 0, 0, 0, 40, 18, 0, 0, 12
4210 
4211    Cross-sum rules for even numbers can be derived by leaving as many bits
4212    to the right alone as the divisor has zeros to the right.
4213    E.g. if x is an unsigned 32 bit number:
4214    (x mod 12) == (((x & 1023) + ((x >> 8) & ~3)) * 0x15555558 >> 2 * 3) >> 28
4215    */
4216 
4217 rtx
expand_divmod(int rem_flag,enum tree_code code,machine_mode mode,rtx op0,rtx op1,rtx target,int unsignedp,enum optab_methods methods)4218 expand_divmod (int rem_flag, enum tree_code code, machine_mode mode,
4219 	       rtx op0, rtx op1, rtx target, int unsignedp,
4220 	       enum optab_methods methods)
4221 {
4222   machine_mode compute_mode;
4223   rtx tquotient;
4224   rtx quotient = 0, remainder = 0;
4225   rtx_insn *last;
4226   rtx_insn *insn;
4227   optab optab1, optab2;
4228   int op1_is_constant, op1_is_pow2 = 0;
4229   int max_cost, extra_cost;
4230   static HOST_WIDE_INT last_div_const = 0;
4231   bool speed = optimize_insn_for_speed_p ();
4232 
4233   op1_is_constant = CONST_INT_P (op1);
4234   if (op1_is_constant)
4235     {
4236       wide_int ext_op1 = rtx_mode_t (op1, mode);
4237       op1_is_pow2 = (wi::popcount (ext_op1) == 1
4238 		     || (! unsignedp
4239 			 && wi::popcount (wi::neg (ext_op1)) == 1));
4240     }
4241 
4242   /*
4243      This is the structure of expand_divmod:
4244 
4245      First comes code to fix up the operands so we can perform the operations
4246      correctly and efficiently.
4247 
4248      Second comes a switch statement with code specific for each rounding mode.
4249      For some special operands this code emits all RTL for the desired
4250      operation, for other cases, it generates only a quotient and stores it in
4251      QUOTIENT.  The case for trunc division/remainder might leave quotient = 0,
4252      to indicate that it has not done anything.
4253 
4254      Last comes code that finishes the operation.  If QUOTIENT is set and
4255      REM_FLAG is set, the remainder is computed as OP0 - QUOTIENT * OP1.  If
4256      QUOTIENT is not set, it is computed using trunc rounding.
4257 
4258      We try to generate special code for division and remainder when OP1 is a
4259      constant.  If |OP1| = 2**n we can use shifts and some other fast
4260      operations.  For other values of OP1, we compute a carefully selected
4261      fixed-point approximation m = 1/OP1, and generate code that multiplies OP0
4262      by m.
4263 
4264      In all cases but EXACT_DIV_EXPR, this multiplication requires the upper
4265      half of the product.  Different strategies for generating the product are
4266      implemented in expmed_mult_highpart.
4267 
4268      If what we actually want is the remainder, we generate that by another
4269      by-constant multiplication and a subtraction.  */
4270 
4271   /* We shouldn't be called with OP1 == const1_rtx, but some of the
4272      code below will malfunction if we are, so check here and handle
4273      the special case if so.  */
4274   if (op1 == const1_rtx)
4275     return rem_flag ? const0_rtx : op0;
4276 
4277     /* When dividing by -1, we could get an overflow.
4278      negv_optab can handle overflows.  */
4279   if (! unsignedp && op1 == constm1_rtx)
4280     {
4281       if (rem_flag)
4282 	return const0_rtx;
4283       return expand_unop (mode, flag_trapv && GET_MODE_CLASS (mode) == MODE_INT
4284 			  ? negv_optab : neg_optab, op0, target, 0);
4285     }
4286 
4287   if (target
4288       /* Don't use the function value register as a target
4289 	 since we have to read it as well as write it,
4290 	 and function-inlining gets confused by this.  */
4291       && ((REG_P (target) && REG_FUNCTION_VALUE_P (target))
4292 	  /* Don't clobber an operand while doing a multi-step calculation.  */
4293 	  || ((rem_flag || op1_is_constant)
4294 	      && (reg_mentioned_p (target, op0)
4295 		  || (MEM_P (op0) && MEM_P (target))))
4296 	  || reg_mentioned_p (target, op1)
4297 	  || (MEM_P (op1) && MEM_P (target))))
4298     target = 0;
4299 
4300   /* Get the mode in which to perform this computation.  Normally it will
4301      be MODE, but sometimes we can't do the desired operation in MODE.
4302      If so, pick a wider mode in which we can do the operation.  Convert
4303      to that mode at the start to avoid repeated conversions.
4304 
4305      First see what operations we need.  These depend on the expression
4306      we are evaluating.  (We assume that divxx3 insns exist under the
4307      same conditions that modxx3 insns and that these insns don't normally
4308      fail.  If these assumptions are not correct, we may generate less
4309      efficient code in some cases.)
4310 
4311      Then see if we find a mode in which we can open-code that operation
4312      (either a division, modulus, or shift).  Finally, check for the smallest
4313      mode for which we can do the operation with a library call.  */
4314 
4315   /* We might want to refine this now that we have division-by-constant
4316      optimization.  Since expmed_mult_highpart tries so many variants, it is
4317      not straightforward to generalize this.  Maybe we should make an array
4318      of possible modes in init_expmed?  Save this for GCC 2.7.  */
4319 
4320   optab1 = (op1_is_pow2
4321 	    ? (unsignedp ? lshr_optab : ashr_optab)
4322 	    : (unsignedp ? udiv_optab : sdiv_optab));
4323   optab2 = (op1_is_pow2 ? optab1
4324 	    : (unsignedp ? udivmod_optab : sdivmod_optab));
4325 
4326   if (methods == OPTAB_WIDEN || methods == OPTAB_LIB_WIDEN)
4327     {
4328       FOR_EACH_MODE_FROM (compute_mode, mode)
4329       if (optab_handler (optab1, compute_mode) != CODE_FOR_nothing
4330 	  || optab_handler (optab2, compute_mode) != CODE_FOR_nothing)
4331 	break;
4332 
4333       if (compute_mode == VOIDmode && methods == OPTAB_LIB_WIDEN)
4334 	FOR_EACH_MODE_FROM (compute_mode, mode)
4335 	  if (optab_libfunc (optab1, compute_mode)
4336 	      || optab_libfunc (optab2, compute_mode))
4337 	    break;
4338     }
4339   else
4340     compute_mode = mode;
4341 
4342   /* If we still couldn't find a mode, use MODE, but expand_binop will
4343      probably die.  */
4344   if (compute_mode == VOIDmode)
4345     compute_mode = mode;
4346 
4347   if (target && GET_MODE (target) == compute_mode)
4348     tquotient = target;
4349   else
4350     tquotient = gen_reg_rtx (compute_mode);
4351 
4352 #if 0
4353   /* It should be possible to restrict the precision to GET_MODE_BITSIZE
4354      (mode), and thereby get better code when OP1 is a constant.  Do that
4355      later.  It will require going over all usages of SIZE below.  */
4356   size = GET_MODE_BITSIZE (mode);
4357 #endif
4358 
4359   /* Only deduct something for a REM if the last divide done was
4360      for a different constant.   Then set the constant of the last
4361      divide.  */
4362   max_cost = (unsignedp
4363 	      ? udiv_cost (speed, compute_mode)
4364 	      : sdiv_cost (speed, compute_mode));
4365   if (rem_flag && ! (last_div_const != 0 && op1_is_constant
4366 		     && INTVAL (op1) == last_div_const))
4367     max_cost -= (mul_cost (speed, compute_mode)
4368 		 + add_cost (speed, compute_mode));
4369 
4370   last_div_const = ! rem_flag && op1_is_constant ? INTVAL (op1) : 0;
4371 
4372   /* Now convert to the best mode to use.  */
4373   if (compute_mode != mode)
4374     {
4375       op0 = convert_modes (compute_mode, mode, op0, unsignedp);
4376       op1 = convert_modes (compute_mode, mode, op1, unsignedp);
4377 
4378       /* convert_modes may have placed op1 into a register, so we
4379 	 must recompute the following.  */
4380       op1_is_constant = CONST_INT_P (op1);
4381       if (op1_is_constant)
4382 	{
4383 	  wide_int ext_op1 = rtx_mode_t (op1, compute_mode);
4384 	  op1_is_pow2 = (wi::popcount (ext_op1) == 1
4385 			 || (! unsignedp
4386 			     && wi::popcount (wi::neg (ext_op1)) == 1));
4387 	}
4388       else
4389 	op1_is_pow2 = 0;
4390     }
4391 
4392   /* If one of the operands is a volatile MEM, copy it into a register.  */
4393 
4394   if (MEM_P (op0) && MEM_VOLATILE_P (op0))
4395     op0 = force_reg (compute_mode, op0);
4396   if (MEM_P (op1) && MEM_VOLATILE_P (op1))
4397     op1 = force_reg (compute_mode, op1);
4398 
4399   /* If we need the remainder or if OP1 is constant, we need to
4400      put OP0 in a register in case it has any queued subexpressions.  */
4401   if (rem_flag || op1_is_constant)
4402     op0 = force_reg (compute_mode, op0);
4403 
4404   last = get_last_insn ();
4405 
4406   /* Promote floor rounding to trunc rounding for unsigned operations.  */
4407   if (unsignedp)
4408     {
4409       if (code == FLOOR_DIV_EXPR)
4410 	code = TRUNC_DIV_EXPR;
4411       if (code == FLOOR_MOD_EXPR)
4412 	code = TRUNC_MOD_EXPR;
4413       if (code == EXACT_DIV_EXPR && op1_is_pow2)
4414 	code = TRUNC_DIV_EXPR;
4415     }
4416 
4417   if (op1 != const0_rtx)
4418     switch (code)
4419       {
4420       case TRUNC_MOD_EXPR:
4421       case TRUNC_DIV_EXPR:
4422 	if (op1_is_constant)
4423 	  {
4424 	    scalar_int_mode int_mode = as_a <scalar_int_mode> (compute_mode);
4425 	    int size = GET_MODE_BITSIZE (int_mode);
4426 	    if (unsignedp)
4427 	      {
4428 		unsigned HOST_WIDE_INT mh, ml;
4429 		int pre_shift, post_shift;
4430 		int dummy;
4431 		wide_int wd = rtx_mode_t (op1, int_mode);
4432 		unsigned HOST_WIDE_INT d = wd.to_uhwi ();
4433 
4434 		if (wi::popcount (wd) == 1)
4435 		  {
4436 		    pre_shift = floor_log2 (d);
4437 		    if (rem_flag)
4438 		      {
4439 			unsigned HOST_WIDE_INT mask
4440 			  = (HOST_WIDE_INT_1U << pre_shift) - 1;
4441 			remainder
4442 			  = expand_binop (int_mode, and_optab, op0,
4443 					  gen_int_mode (mask, int_mode),
4444 					  remainder, 1, methods);
4445 			if (remainder)
4446 			  return gen_lowpart (mode, remainder);
4447 		      }
4448 		    quotient = expand_shift (RSHIFT_EXPR, int_mode, op0,
4449 					     pre_shift, tquotient, 1);
4450 		  }
4451 		else if (size <= HOST_BITS_PER_WIDE_INT)
4452 		  {
4453 		    if (d >= (HOST_WIDE_INT_1U << (size - 1)))
4454 		      {
4455 			/* Most significant bit of divisor is set; emit an scc
4456 			   insn.  */
4457 			quotient = emit_store_flag_force (tquotient, GEU, op0, op1,
4458 							  int_mode, 1, 1);
4459 		      }
4460 		    else
4461 		      {
4462 			/* Find a suitable multiplier and right shift count
4463 			   instead of multiplying with D.  */
4464 
4465 			mh = choose_multiplier (d, size, size,
4466 						&ml, &post_shift, &dummy);
4467 
4468 			/* If the suggested multiplier is more than SIZE bits,
4469 			   we can do better for even divisors, using an
4470 			   initial right shift.  */
4471 			if (mh != 0 && (d & 1) == 0)
4472 			  {
4473 			    pre_shift = ctz_or_zero (d);
4474 			    mh = choose_multiplier (d >> pre_shift, size,
4475 						    size - pre_shift,
4476 						    &ml, &post_shift, &dummy);
4477 			    gcc_assert (!mh);
4478 			  }
4479 			else
4480 			  pre_shift = 0;
4481 
4482 			if (mh != 0)
4483 			  {
4484 			    rtx t1, t2, t3, t4;
4485 
4486 			    if (post_shift - 1 >= BITS_PER_WORD)
4487 			      goto fail1;
4488 
4489 			    extra_cost
4490 			      = (shift_cost (speed, int_mode, post_shift - 1)
4491 				 + shift_cost (speed, int_mode, 1)
4492 				 + 2 * add_cost (speed, int_mode));
4493 			    t1 = expmed_mult_highpart
4494 			      (int_mode, op0, gen_int_mode (ml, int_mode),
4495 			       NULL_RTX, 1, max_cost - extra_cost);
4496 			    if (t1 == 0)
4497 			      goto fail1;
4498 			    t2 = force_operand (gen_rtx_MINUS (int_mode,
4499 							       op0, t1),
4500 						NULL_RTX);
4501 			    t3 = expand_shift (RSHIFT_EXPR, int_mode,
4502 					       t2, 1, NULL_RTX, 1);
4503 			    t4 = force_operand (gen_rtx_PLUS (int_mode,
4504 							      t1, t3),
4505 						NULL_RTX);
4506 			    quotient = expand_shift
4507 			      (RSHIFT_EXPR, int_mode, t4,
4508 			       post_shift - 1, tquotient, 1);
4509 			  }
4510 			else
4511 			  {
4512 			    rtx t1, t2;
4513 
4514 			    if (pre_shift >= BITS_PER_WORD
4515 				|| post_shift >= BITS_PER_WORD)
4516 			      goto fail1;
4517 
4518 			    t1 = expand_shift
4519 			      (RSHIFT_EXPR, int_mode, op0,
4520 			       pre_shift, NULL_RTX, 1);
4521 			    extra_cost
4522 			      = (shift_cost (speed, int_mode, pre_shift)
4523 				 + shift_cost (speed, int_mode, post_shift));
4524 			    t2 = expmed_mult_highpart
4525 			      (int_mode, t1,
4526 			       gen_int_mode (ml, int_mode),
4527 			       NULL_RTX, 1, max_cost - extra_cost);
4528 			    if (t2 == 0)
4529 			      goto fail1;
4530 			    quotient = expand_shift
4531 			      (RSHIFT_EXPR, int_mode, t2,
4532 			       post_shift, tquotient, 1);
4533 			  }
4534 		      }
4535 		  }
4536 		else		/* Too wide mode to use tricky code */
4537 		  break;
4538 
4539 		insn = get_last_insn ();
4540 		if (insn != last)
4541 		  set_dst_reg_note (insn, REG_EQUAL,
4542 				    gen_rtx_UDIV (int_mode, op0, op1),
4543 				    quotient);
4544 	      }
4545 	    else		/* TRUNC_DIV, signed */
4546 	      {
4547 		unsigned HOST_WIDE_INT ml;
4548 		int lgup, post_shift;
4549 		rtx mlr;
4550 		HOST_WIDE_INT d = INTVAL (op1);
4551 		unsigned HOST_WIDE_INT abs_d;
4552 
4553 		/* Not prepared to handle division/remainder by
4554 		   0xffffffffffffffff8000000000000000 etc.  */
4555 		if (d == HOST_WIDE_INT_MIN && size > HOST_BITS_PER_WIDE_INT)
4556 		  break;
4557 
4558 		/* Since d might be INT_MIN, we have to cast to
4559 		   unsigned HOST_WIDE_INT before negating to avoid
4560 		   undefined signed overflow.  */
4561 		abs_d = (d >= 0
4562 			 ? (unsigned HOST_WIDE_INT) d
4563 			 : - (unsigned HOST_WIDE_INT) d);
4564 
4565 		/* n rem d = n rem -d */
4566 		if (rem_flag && d < 0)
4567 		  {
4568 		    d = abs_d;
4569 		    op1 = gen_int_mode (abs_d, int_mode);
4570 		  }
4571 
4572 		if (d == 1)
4573 		  quotient = op0;
4574 		else if (d == -1)
4575 		  quotient = expand_unop (int_mode, neg_optab, op0,
4576 					  tquotient, 0);
4577 		else if (size <= HOST_BITS_PER_WIDE_INT
4578 			 && abs_d == HOST_WIDE_INT_1U << (size - 1))
4579 		  {
4580 		    /* This case is not handled correctly below.  */
4581 		    quotient = emit_store_flag (tquotient, EQ, op0, op1,
4582 						int_mode, 1, 1);
4583 		    if (quotient == 0)
4584 		      goto fail1;
4585 		  }
4586 		else if (EXACT_POWER_OF_2_OR_ZERO_P (d)
4587 			 && (size <= HOST_BITS_PER_WIDE_INT || d >= 0)
4588 			 && (rem_flag
4589 			     ? smod_pow2_cheap (speed, int_mode)
4590 			     : sdiv_pow2_cheap (speed, int_mode))
4591 			 /* We assume that cheap metric is true if the
4592 			    optab has an expander for this mode.  */
4593 			 && ((optab_handler ((rem_flag ? smod_optab
4594 					      : sdiv_optab),
4595 					     int_mode)
4596 			      != CODE_FOR_nothing)
4597 			     || (optab_handler (sdivmod_optab, int_mode)
4598 				 != CODE_FOR_nothing)))
4599 		  ;
4600 		else if (EXACT_POWER_OF_2_OR_ZERO_P (abs_d))
4601 		  {
4602 		    if (rem_flag)
4603 		      {
4604 			remainder = expand_smod_pow2 (int_mode, op0, d);
4605 			if (remainder)
4606 			  return gen_lowpart (mode, remainder);
4607 		      }
4608 
4609 		    if (sdiv_pow2_cheap (speed, int_mode)
4610 			&& ((optab_handler (sdiv_optab, int_mode)
4611 			     != CODE_FOR_nothing)
4612 			    || (optab_handler (sdivmod_optab, int_mode)
4613 				!= CODE_FOR_nothing)))
4614 		      quotient = expand_divmod (0, TRUNC_DIV_EXPR,
4615 						int_mode, op0,
4616 						gen_int_mode (abs_d,
4617 							      int_mode),
4618 						NULL_RTX, 0);
4619 		    else
4620 		      quotient = expand_sdiv_pow2 (int_mode, op0, abs_d);
4621 
4622 		    /* We have computed OP0 / abs(OP1).  If OP1 is negative,
4623 		       negate the quotient.  */
4624 		    if (d < 0)
4625 		      {
4626 			insn = get_last_insn ();
4627 			if (insn != last
4628 			    && abs_d < (HOST_WIDE_INT_1U
4629 					<< (HOST_BITS_PER_WIDE_INT - 1)))
4630 			  set_dst_reg_note (insn, REG_EQUAL,
4631 					    gen_rtx_DIV (int_mode, op0,
4632 							 gen_int_mode
4633 							   (abs_d,
4634 							    int_mode)),
4635 					    quotient);
4636 
4637 			quotient = expand_unop (int_mode, neg_optab,
4638 						quotient, quotient, 0);
4639 		      }
4640 		  }
4641 		else if (size <= HOST_BITS_PER_WIDE_INT)
4642 		  {
4643 		    choose_multiplier (abs_d, size, size - 1,
4644 				       &ml, &post_shift, &lgup);
4645 		    if (ml < HOST_WIDE_INT_1U << (size - 1))
4646 		      {
4647 			rtx t1, t2, t3;
4648 
4649 			if (post_shift >= BITS_PER_WORD
4650 			    || size - 1 >= BITS_PER_WORD)
4651 			  goto fail1;
4652 
4653 			extra_cost = (shift_cost (speed, int_mode, post_shift)
4654 				      + shift_cost (speed, int_mode, size - 1)
4655 				      + add_cost (speed, int_mode));
4656 			t1 = expmed_mult_highpart
4657 			  (int_mode, op0, gen_int_mode (ml, int_mode),
4658 			   NULL_RTX, 0, max_cost - extra_cost);
4659 			if (t1 == 0)
4660 			  goto fail1;
4661 			t2 = expand_shift
4662 			  (RSHIFT_EXPR, int_mode, t1,
4663 			   post_shift, NULL_RTX, 0);
4664 			t3 = expand_shift
4665 			  (RSHIFT_EXPR, int_mode, op0,
4666 			   size - 1, NULL_RTX, 0);
4667 			if (d < 0)
4668 			  quotient
4669 			    = force_operand (gen_rtx_MINUS (int_mode, t3, t2),
4670 					     tquotient);
4671 			else
4672 			  quotient
4673 			    = force_operand (gen_rtx_MINUS (int_mode, t2, t3),
4674 					     tquotient);
4675 		      }
4676 		    else
4677 		      {
4678 			rtx t1, t2, t3, t4;
4679 
4680 			if (post_shift >= BITS_PER_WORD
4681 			    || size - 1 >= BITS_PER_WORD)
4682 			  goto fail1;
4683 
4684 			ml |= HOST_WIDE_INT_M1U << (size - 1);
4685 			mlr = gen_int_mode (ml, int_mode);
4686 			extra_cost = (shift_cost (speed, int_mode, post_shift)
4687 				      + shift_cost (speed, int_mode, size - 1)
4688 				      + 2 * add_cost (speed, int_mode));
4689 			t1 = expmed_mult_highpart (int_mode, op0, mlr,
4690 						   NULL_RTX, 0,
4691 						   max_cost - extra_cost);
4692 			if (t1 == 0)
4693 			  goto fail1;
4694 			t2 = force_operand (gen_rtx_PLUS (int_mode, t1, op0),
4695 					    NULL_RTX);
4696 			t3 = expand_shift
4697 			  (RSHIFT_EXPR, int_mode, t2,
4698 			   post_shift, NULL_RTX, 0);
4699 			t4 = expand_shift
4700 			  (RSHIFT_EXPR, int_mode, op0,
4701 			   size - 1, NULL_RTX, 0);
4702 			if (d < 0)
4703 			  quotient
4704 			    = force_operand (gen_rtx_MINUS (int_mode, t4, t3),
4705 					     tquotient);
4706 			else
4707 			  quotient
4708 			    = force_operand (gen_rtx_MINUS (int_mode, t3, t4),
4709 					     tquotient);
4710 		      }
4711 		  }
4712 		else		/* Too wide mode to use tricky code */
4713 		  break;
4714 
4715 		insn = get_last_insn ();
4716 		if (insn != last)
4717 		  set_dst_reg_note (insn, REG_EQUAL,
4718 				    gen_rtx_DIV (int_mode, op0, op1),
4719 				    quotient);
4720 	      }
4721 	    break;
4722 	  }
4723       fail1:
4724 	delete_insns_since (last);
4725 	break;
4726 
4727       case FLOOR_DIV_EXPR:
4728       case FLOOR_MOD_EXPR:
4729       /* We will come here only for signed operations.  */
4730 	if (op1_is_constant && HWI_COMPUTABLE_MODE_P (compute_mode))
4731 	  {
4732 	    scalar_int_mode int_mode = as_a <scalar_int_mode> (compute_mode);
4733 	    int size = GET_MODE_BITSIZE (int_mode);
4734 	    unsigned HOST_WIDE_INT mh, ml;
4735 	    int pre_shift, lgup, post_shift;
4736 	    HOST_WIDE_INT d = INTVAL (op1);
4737 
4738 	    if (d > 0)
4739 	      {
4740 		/* We could just as easily deal with negative constants here,
4741 		   but it does not seem worth the trouble for GCC 2.6.  */
4742 		if (EXACT_POWER_OF_2_OR_ZERO_P (d))
4743 		  {
4744 		    pre_shift = floor_log2 (d);
4745 		    if (rem_flag)
4746 		      {
4747 			unsigned HOST_WIDE_INT mask
4748 			  = (HOST_WIDE_INT_1U << pre_shift) - 1;
4749 			remainder = expand_binop
4750 			  (int_mode, and_optab, op0,
4751 			   gen_int_mode (mask, int_mode),
4752 			   remainder, 0, methods);
4753 			if (remainder)
4754 			  return gen_lowpart (mode, remainder);
4755 		      }
4756 		    quotient = expand_shift
4757 		      (RSHIFT_EXPR, int_mode, op0,
4758 		       pre_shift, tquotient, 0);
4759 		  }
4760 		else
4761 		  {
4762 		    rtx t1, t2, t3, t4;
4763 
4764 		    mh = choose_multiplier (d, size, size - 1,
4765 					    &ml, &post_shift, &lgup);
4766 		    gcc_assert (!mh);
4767 
4768 		    if (post_shift < BITS_PER_WORD
4769 			&& size - 1 < BITS_PER_WORD)
4770 		      {
4771 			t1 = expand_shift
4772 			  (RSHIFT_EXPR, int_mode, op0,
4773 			   size - 1, NULL_RTX, 0);
4774 			t2 = expand_binop (int_mode, xor_optab, op0, t1,
4775 					   NULL_RTX, 0, OPTAB_WIDEN);
4776 			extra_cost = (shift_cost (speed, int_mode, post_shift)
4777 				      + shift_cost (speed, int_mode, size - 1)
4778 				      + 2 * add_cost (speed, int_mode));
4779 			t3 = expmed_mult_highpart
4780 			  (int_mode, t2, gen_int_mode (ml, int_mode),
4781 			   NULL_RTX, 1, max_cost - extra_cost);
4782 			if (t3 != 0)
4783 			  {
4784 			    t4 = expand_shift
4785 			      (RSHIFT_EXPR, int_mode, t3,
4786 			       post_shift, NULL_RTX, 1);
4787 			    quotient = expand_binop (int_mode, xor_optab,
4788 						     t4, t1, tquotient, 0,
4789 						     OPTAB_WIDEN);
4790 			  }
4791 		      }
4792 		  }
4793 	      }
4794 	    else
4795 	      {
4796 		rtx nsign, t1, t2, t3, t4;
4797 		t1 = force_operand (gen_rtx_PLUS (int_mode,
4798 						  op0, constm1_rtx), NULL_RTX);
4799 		t2 = expand_binop (int_mode, ior_optab, op0, t1, NULL_RTX,
4800 				   0, OPTAB_WIDEN);
4801 		nsign = expand_shift (RSHIFT_EXPR, int_mode, t2,
4802 				      size - 1, NULL_RTX, 0);
4803 		t3 = force_operand (gen_rtx_MINUS (int_mode, t1, nsign),
4804 				    NULL_RTX);
4805 		t4 = expand_divmod (0, TRUNC_DIV_EXPR, int_mode, t3, op1,
4806 				    NULL_RTX, 0);
4807 		if (t4)
4808 		  {
4809 		    rtx t5;
4810 		    t5 = expand_unop (int_mode, one_cmpl_optab, nsign,
4811 				      NULL_RTX, 0);
4812 		    quotient = force_operand (gen_rtx_PLUS (int_mode, t4, t5),
4813 					      tquotient);
4814 		  }
4815 	      }
4816 	  }
4817 
4818 	if (quotient != 0)
4819 	  break;
4820 	delete_insns_since (last);
4821 
4822 	/* Try using an instruction that produces both the quotient and
4823 	   remainder, using truncation.  We can easily compensate the quotient
4824 	   or remainder to get floor rounding, once we have the remainder.
4825 	   Notice that we compute also the final remainder value here,
4826 	   and return the result right away.  */
4827 	if (target == 0 || GET_MODE (target) != compute_mode)
4828 	  target = gen_reg_rtx (compute_mode);
4829 
4830 	if (rem_flag)
4831 	  {
4832 	    remainder
4833 	      = REG_P (target) ? target : gen_reg_rtx (compute_mode);
4834 	    quotient = gen_reg_rtx (compute_mode);
4835 	  }
4836 	else
4837 	  {
4838 	    quotient
4839 	      = REG_P (target) ? target : gen_reg_rtx (compute_mode);
4840 	    remainder = gen_reg_rtx (compute_mode);
4841 	  }
4842 
4843 	if (expand_twoval_binop (sdivmod_optab, op0, op1,
4844 				 quotient, remainder, 0))
4845 	  {
4846 	    /* This could be computed with a branch-less sequence.
4847 	       Save that for later.  */
4848 	    rtx tem;
4849 	    rtx_code_label *label = gen_label_rtx ();
4850 	    do_cmp_and_jump (remainder, const0_rtx, EQ, compute_mode, label);
4851 	    tem = expand_binop (compute_mode, xor_optab, op0, op1,
4852 				NULL_RTX, 0, OPTAB_WIDEN);
4853 	    do_cmp_and_jump (tem, const0_rtx, GE, compute_mode, label);
4854 	    expand_dec (quotient, const1_rtx);
4855 	    expand_inc (remainder, op1);
4856 	    emit_label (label);
4857 	    return gen_lowpart (mode, rem_flag ? remainder : quotient);
4858 	  }
4859 
4860 	/* No luck with division elimination or divmod.  Have to do it
4861 	   by conditionally adjusting op0 *and* the result.  */
4862 	{
4863 	  rtx_code_label *label1, *label2, *label3, *label4, *label5;
4864 	  rtx adjusted_op0;
4865 	  rtx tem;
4866 
4867 	  quotient = gen_reg_rtx (compute_mode);
4868 	  adjusted_op0 = copy_to_mode_reg (compute_mode, op0);
4869 	  label1 = gen_label_rtx ();
4870 	  label2 = gen_label_rtx ();
4871 	  label3 = gen_label_rtx ();
4872 	  label4 = gen_label_rtx ();
4873 	  label5 = gen_label_rtx ();
4874 	  do_cmp_and_jump (op1, const0_rtx, LT, compute_mode, label2);
4875 	  do_cmp_and_jump (adjusted_op0, const0_rtx, LT, compute_mode, label1);
4876 	  tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4877 			      quotient, 0, methods);
4878 	  if (tem != quotient)
4879 	    emit_move_insn (quotient, tem);
4880 	  emit_jump_insn (targetm.gen_jump (label5));
4881 	  emit_barrier ();
4882 	  emit_label (label1);
4883 	  expand_inc (adjusted_op0, const1_rtx);
4884 	  emit_jump_insn (targetm.gen_jump (label4));
4885 	  emit_barrier ();
4886 	  emit_label (label2);
4887 	  do_cmp_and_jump (adjusted_op0, const0_rtx, GT, compute_mode, label3);
4888 	  tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4889 			      quotient, 0, methods);
4890 	  if (tem != quotient)
4891 	    emit_move_insn (quotient, tem);
4892 	  emit_jump_insn (targetm.gen_jump (label5));
4893 	  emit_barrier ();
4894 	  emit_label (label3);
4895 	  expand_dec (adjusted_op0, const1_rtx);
4896 	  emit_label (label4);
4897 	  tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4898 			      quotient, 0, methods);
4899 	  if (tem != quotient)
4900 	    emit_move_insn (quotient, tem);
4901 	  expand_dec (quotient, const1_rtx);
4902 	  emit_label (label5);
4903 	}
4904 	break;
4905 
4906       case CEIL_DIV_EXPR:
4907       case CEIL_MOD_EXPR:
4908 	if (unsignedp)
4909 	  {
4910 	    if (op1_is_constant
4911 		&& EXACT_POWER_OF_2_OR_ZERO_P (INTVAL (op1))
4912 		&& (HWI_COMPUTABLE_MODE_P (compute_mode)
4913 		    || INTVAL (op1) >= 0))
4914 	      {
4915 		scalar_int_mode int_mode
4916 		  = as_a <scalar_int_mode> (compute_mode);
4917 		rtx t1, t2, t3;
4918 		unsigned HOST_WIDE_INT d = INTVAL (op1);
4919 		t1 = expand_shift (RSHIFT_EXPR, int_mode, op0,
4920 				   floor_log2 (d), tquotient, 1);
4921 		t2 = expand_binop (int_mode, and_optab, op0,
4922 				   gen_int_mode (d - 1, int_mode),
4923 				   NULL_RTX, 1, methods);
4924 		t3 = gen_reg_rtx (int_mode);
4925 		t3 = emit_store_flag (t3, NE, t2, const0_rtx, int_mode, 1, 1);
4926 		if (t3 == 0)
4927 		  {
4928 		    rtx_code_label *lab;
4929 		    lab = gen_label_rtx ();
4930 		    do_cmp_and_jump (t2, const0_rtx, EQ, int_mode, lab);
4931 		    expand_inc (t1, const1_rtx);
4932 		    emit_label (lab);
4933 		    quotient = t1;
4934 		  }
4935 		else
4936 		  quotient = force_operand (gen_rtx_PLUS (int_mode, t1, t3),
4937 					    tquotient);
4938 		break;
4939 	      }
4940 
4941 	    /* Try using an instruction that produces both the quotient and
4942 	       remainder, using truncation.  We can easily compensate the
4943 	       quotient or remainder to get ceiling rounding, once we have the
4944 	       remainder.  Notice that we compute also the final remainder
4945 	       value here, and return the result right away.  */
4946 	    if (target == 0 || GET_MODE (target) != compute_mode)
4947 	      target = gen_reg_rtx (compute_mode);
4948 
4949 	    if (rem_flag)
4950 	      {
4951 		remainder = (REG_P (target)
4952 			     ? target : gen_reg_rtx (compute_mode));
4953 		quotient = gen_reg_rtx (compute_mode);
4954 	      }
4955 	    else
4956 	      {
4957 		quotient = (REG_P (target)
4958 			    ? target : gen_reg_rtx (compute_mode));
4959 		remainder = gen_reg_rtx (compute_mode);
4960 	      }
4961 
4962 	    if (expand_twoval_binop (udivmod_optab, op0, op1, quotient,
4963 				     remainder, 1))
4964 	      {
4965 		/* This could be computed with a branch-less sequence.
4966 		   Save that for later.  */
4967 		rtx_code_label *label = gen_label_rtx ();
4968 		do_cmp_and_jump (remainder, const0_rtx, EQ,
4969 				 compute_mode, label);
4970 		expand_inc (quotient, const1_rtx);
4971 		expand_dec (remainder, op1);
4972 		emit_label (label);
4973 		return gen_lowpart (mode, rem_flag ? remainder : quotient);
4974 	      }
4975 
4976 	    /* No luck with division elimination or divmod.  Have to do it
4977 	       by conditionally adjusting op0 *and* the result.  */
4978 	    {
4979 	      rtx_code_label *label1, *label2;
4980 	      rtx adjusted_op0, tem;
4981 
4982 	      quotient = gen_reg_rtx (compute_mode);
4983 	      adjusted_op0 = copy_to_mode_reg (compute_mode, op0);
4984 	      label1 = gen_label_rtx ();
4985 	      label2 = gen_label_rtx ();
4986 	      do_cmp_and_jump (adjusted_op0, const0_rtx, NE,
4987 			       compute_mode, label1);
4988 	      emit_move_insn  (quotient, const0_rtx);
4989 	      emit_jump_insn (targetm.gen_jump (label2));
4990 	      emit_barrier ();
4991 	      emit_label (label1);
4992 	      expand_dec (adjusted_op0, const1_rtx);
4993 	      tem = expand_binop (compute_mode, udiv_optab, adjusted_op0, op1,
4994 				  quotient, 1, methods);
4995 	      if (tem != quotient)
4996 		emit_move_insn (quotient, tem);
4997 	      expand_inc (quotient, const1_rtx);
4998 	      emit_label (label2);
4999 	    }
5000 	  }
5001 	else /* signed */
5002 	  {
5003 	    if (op1_is_constant && EXACT_POWER_OF_2_OR_ZERO_P (INTVAL (op1))
5004 		&& INTVAL (op1) >= 0)
5005 	      {
5006 		/* This is extremely similar to the code for the unsigned case
5007 		   above.  For 2.7 we should merge these variants, but for
5008 		   2.6.1 I don't want to touch the code for unsigned since that
5009 		   get used in C.  The signed case will only be used by other
5010 		   languages (Ada).  */
5011 
5012 		rtx t1, t2, t3;
5013 		unsigned HOST_WIDE_INT d = INTVAL (op1);
5014 		t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
5015 				   floor_log2 (d), tquotient, 0);
5016 		t2 = expand_binop (compute_mode, and_optab, op0,
5017 				   gen_int_mode (d - 1, compute_mode),
5018 				   NULL_RTX, 1, methods);
5019 		t3 = gen_reg_rtx (compute_mode);
5020 		t3 = emit_store_flag (t3, NE, t2, const0_rtx,
5021 				      compute_mode, 1, 1);
5022 		if (t3 == 0)
5023 		  {
5024 		    rtx_code_label *lab;
5025 		    lab = gen_label_rtx ();
5026 		    do_cmp_and_jump (t2, const0_rtx, EQ, compute_mode, lab);
5027 		    expand_inc (t1, const1_rtx);
5028 		    emit_label (lab);
5029 		    quotient = t1;
5030 		  }
5031 		else
5032 		  quotient = force_operand (gen_rtx_PLUS (compute_mode,
5033 							  t1, t3),
5034 					    tquotient);
5035 		break;
5036 	      }
5037 
5038 	    /* Try using an instruction that produces both the quotient and
5039 	       remainder, using truncation.  We can easily compensate the
5040 	       quotient or remainder to get ceiling rounding, once we have the
5041 	       remainder.  Notice that we compute also the final remainder
5042 	       value here, and return the result right away.  */
5043 	    if (target == 0 || GET_MODE (target) != compute_mode)
5044 	      target = gen_reg_rtx (compute_mode);
5045 	    if (rem_flag)
5046 	      {
5047 		remainder= (REG_P (target)
5048 			    ? target : gen_reg_rtx (compute_mode));
5049 		quotient = gen_reg_rtx (compute_mode);
5050 	      }
5051 	    else
5052 	      {
5053 		quotient = (REG_P (target)
5054 			    ? target : gen_reg_rtx (compute_mode));
5055 		remainder = gen_reg_rtx (compute_mode);
5056 	      }
5057 
5058 	    if (expand_twoval_binop (sdivmod_optab, op0, op1, quotient,
5059 				     remainder, 0))
5060 	      {
5061 		/* This could be computed with a branch-less sequence.
5062 		   Save that for later.  */
5063 		rtx tem;
5064 		rtx_code_label *label = gen_label_rtx ();
5065 		do_cmp_and_jump (remainder, const0_rtx, EQ,
5066 				 compute_mode, label);
5067 		tem = expand_binop (compute_mode, xor_optab, op0, op1,
5068 				    NULL_RTX, 0, OPTAB_WIDEN);
5069 		do_cmp_and_jump (tem, const0_rtx, LT, compute_mode, label);
5070 		expand_inc (quotient, const1_rtx);
5071 		expand_dec (remainder, op1);
5072 		emit_label (label);
5073 		return gen_lowpart (mode, rem_flag ? remainder : quotient);
5074 	      }
5075 
5076 	    /* No luck with division elimination or divmod.  Have to do it
5077 	       by conditionally adjusting op0 *and* the result.  */
5078 	    {
5079 	      rtx_code_label *label1, *label2, *label3, *label4, *label5;
5080 	      rtx adjusted_op0;
5081 	      rtx tem;
5082 
5083 	      quotient = gen_reg_rtx (compute_mode);
5084 	      adjusted_op0 = copy_to_mode_reg (compute_mode, op0);
5085 	      label1 = gen_label_rtx ();
5086 	      label2 = gen_label_rtx ();
5087 	      label3 = gen_label_rtx ();
5088 	      label4 = gen_label_rtx ();
5089 	      label5 = gen_label_rtx ();
5090 	      do_cmp_and_jump (op1, const0_rtx, LT, compute_mode, label2);
5091 	      do_cmp_and_jump (adjusted_op0, const0_rtx, GT,
5092 			       compute_mode, label1);
5093 	      tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
5094 				  quotient, 0, methods);
5095 	      if (tem != quotient)
5096 		emit_move_insn (quotient, tem);
5097 	      emit_jump_insn (targetm.gen_jump (label5));
5098 	      emit_barrier ();
5099 	      emit_label (label1);
5100 	      expand_dec (adjusted_op0, const1_rtx);
5101 	      emit_jump_insn (targetm.gen_jump (label4));
5102 	      emit_barrier ();
5103 	      emit_label (label2);
5104 	      do_cmp_and_jump (adjusted_op0, const0_rtx, LT,
5105 			       compute_mode, label3);
5106 	      tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
5107 				  quotient, 0, methods);
5108 	      if (tem != quotient)
5109 		emit_move_insn (quotient, tem);
5110 	      emit_jump_insn (targetm.gen_jump (label5));
5111 	      emit_barrier ();
5112 	      emit_label (label3);
5113 	      expand_inc (adjusted_op0, const1_rtx);
5114 	      emit_label (label4);
5115 	      tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
5116 				  quotient, 0, methods);
5117 	      if (tem != quotient)
5118 		emit_move_insn (quotient, tem);
5119 	      expand_inc (quotient, const1_rtx);
5120 	      emit_label (label5);
5121 	    }
5122 	  }
5123 	break;
5124 
5125       case EXACT_DIV_EXPR:
5126 	if (op1_is_constant && HWI_COMPUTABLE_MODE_P (compute_mode))
5127 	  {
5128 	    scalar_int_mode int_mode = as_a <scalar_int_mode> (compute_mode);
5129 	    int size = GET_MODE_BITSIZE (int_mode);
5130 	    HOST_WIDE_INT d = INTVAL (op1);
5131 	    unsigned HOST_WIDE_INT ml;
5132 	    int pre_shift;
5133 	    rtx t1;
5134 
5135 	    pre_shift = ctz_or_zero (d);
5136 	    ml = invert_mod2n (d >> pre_shift, size);
5137 	    t1 = expand_shift (RSHIFT_EXPR, int_mode, op0,
5138 			       pre_shift, NULL_RTX, unsignedp);
5139 	    quotient = expand_mult (int_mode, t1, gen_int_mode (ml, int_mode),
5140 				    NULL_RTX, 1);
5141 
5142 	    insn = get_last_insn ();
5143 	    set_dst_reg_note (insn, REG_EQUAL,
5144 			      gen_rtx_fmt_ee (unsignedp ? UDIV : DIV,
5145 					      int_mode, op0, op1),
5146 			      quotient);
5147 	  }
5148 	break;
5149 
5150       case ROUND_DIV_EXPR:
5151       case ROUND_MOD_EXPR:
5152 	if (unsignedp)
5153 	  {
5154 	    scalar_int_mode int_mode = as_a <scalar_int_mode> (compute_mode);
5155 	    rtx tem;
5156 	    rtx_code_label *label;
5157 	    label = gen_label_rtx ();
5158 	    quotient = gen_reg_rtx (int_mode);
5159 	    remainder = gen_reg_rtx (int_mode);
5160 	    if (expand_twoval_binop (udivmod_optab, op0, op1, quotient, remainder, 1) == 0)
5161 	      {
5162 		rtx tem;
5163 		quotient = expand_binop (int_mode, udiv_optab, op0, op1,
5164 					 quotient, 1, methods);
5165 		tem = expand_mult (int_mode, quotient, op1, NULL_RTX, 1);
5166 		remainder = expand_binop (int_mode, sub_optab, op0, tem,
5167 					  remainder, 1, methods);
5168 	      }
5169 	    tem = plus_constant (int_mode, op1, -1);
5170 	    tem = expand_shift (RSHIFT_EXPR, int_mode, tem, 1, NULL_RTX, 1);
5171 	    do_cmp_and_jump (remainder, tem, LEU, int_mode, label);
5172 	    expand_inc (quotient, const1_rtx);
5173 	    expand_dec (remainder, op1);
5174 	    emit_label (label);
5175 	  }
5176 	else
5177 	  {
5178 	    scalar_int_mode int_mode = as_a <scalar_int_mode> (compute_mode);
5179 	    int size = GET_MODE_BITSIZE (int_mode);
5180 	    rtx abs_rem, abs_op1, tem, mask;
5181 	    rtx_code_label *label;
5182 	    label = gen_label_rtx ();
5183 	    quotient = gen_reg_rtx (int_mode);
5184 	    remainder = gen_reg_rtx (int_mode);
5185 	    if (expand_twoval_binop (sdivmod_optab, op0, op1, quotient, remainder, 0) == 0)
5186 	      {
5187 		rtx tem;
5188 		quotient = expand_binop (int_mode, sdiv_optab, op0, op1,
5189 					 quotient, 0, methods);
5190 		tem = expand_mult (int_mode, quotient, op1, NULL_RTX, 0);
5191 		remainder = expand_binop (int_mode, sub_optab, op0, tem,
5192 					  remainder, 0, methods);
5193 	      }
5194 	    abs_rem = expand_abs (int_mode, remainder, NULL_RTX, 1, 0);
5195 	    abs_op1 = expand_abs (int_mode, op1, NULL_RTX, 1, 0);
5196 	    tem = expand_shift (LSHIFT_EXPR, int_mode, abs_rem,
5197 				1, NULL_RTX, 1);
5198 	    do_cmp_and_jump (tem, abs_op1, LTU, int_mode, label);
5199 	    tem = expand_binop (int_mode, xor_optab, op0, op1,
5200 				NULL_RTX, 0, OPTAB_WIDEN);
5201 	    mask = expand_shift (RSHIFT_EXPR, int_mode, tem,
5202 				 size - 1, NULL_RTX, 0);
5203 	    tem = expand_binop (int_mode, xor_optab, mask, const1_rtx,
5204 				NULL_RTX, 0, OPTAB_WIDEN);
5205 	    tem = expand_binop (int_mode, sub_optab, tem, mask,
5206 				NULL_RTX, 0, OPTAB_WIDEN);
5207 	    expand_inc (quotient, tem);
5208 	    tem = expand_binop (int_mode, xor_optab, mask, op1,
5209 				NULL_RTX, 0, OPTAB_WIDEN);
5210 	    tem = expand_binop (int_mode, sub_optab, tem, mask,
5211 				NULL_RTX, 0, OPTAB_WIDEN);
5212 	    expand_dec (remainder, tem);
5213 	    emit_label (label);
5214 	  }
5215 	return gen_lowpart (mode, rem_flag ? remainder : quotient);
5216 
5217       default:
5218 	gcc_unreachable ();
5219       }
5220 
5221   if (quotient == 0)
5222     {
5223       if (target && GET_MODE (target) != compute_mode)
5224 	target = 0;
5225 
5226       if (rem_flag)
5227 	{
5228 	  /* Try to produce the remainder without producing the quotient.
5229 	     If we seem to have a divmod pattern that does not require widening,
5230 	     don't try widening here.  We should really have a WIDEN argument
5231 	     to expand_twoval_binop, since what we'd really like to do here is
5232 	     1) try a mod insn in compute_mode
5233 	     2) try a divmod insn in compute_mode
5234 	     3) try a div insn in compute_mode and multiply-subtract to get
5235 	        remainder
5236 	     4) try the same things with widening allowed.  */
5237 	  remainder
5238 	    = sign_expand_binop (compute_mode, umod_optab, smod_optab,
5239 				 op0, op1, target,
5240 				 unsignedp,
5241 				 ((optab_handler (optab2, compute_mode)
5242 				   != CODE_FOR_nothing)
5243 				  ? OPTAB_DIRECT : OPTAB_WIDEN));
5244 	  if (remainder == 0)
5245 	    {
5246 	      /* No luck there.  Can we do remainder and divide at once
5247 		 without a library call?  */
5248 	      remainder = gen_reg_rtx (compute_mode);
5249 	      if (! expand_twoval_binop ((unsignedp
5250 					  ? udivmod_optab
5251 					  : sdivmod_optab),
5252 					 op0, op1,
5253 					 NULL_RTX, remainder, unsignedp))
5254 		remainder = 0;
5255 	    }
5256 
5257 	  if (remainder)
5258 	    return gen_lowpart (mode, remainder);
5259 	}
5260 
5261       /* Produce the quotient.  Try a quotient insn, but not a library call.
5262 	 If we have a divmod in this mode, use it in preference to widening
5263 	 the div (for this test we assume it will not fail). Note that optab2
5264 	 is set to the one of the two optabs that the call below will use.  */
5265       quotient
5266 	= sign_expand_binop (compute_mode, udiv_optab, sdiv_optab,
5267 			     op0, op1, rem_flag ? NULL_RTX : target,
5268 			     unsignedp,
5269 			     ((optab_handler (optab2, compute_mode)
5270 			       != CODE_FOR_nothing)
5271 			      ? OPTAB_DIRECT : OPTAB_WIDEN));
5272 
5273       if (quotient == 0)
5274 	{
5275 	  /* No luck there.  Try a quotient-and-remainder insn,
5276 	     keeping the quotient alone.  */
5277 	  quotient = gen_reg_rtx (compute_mode);
5278 	  if (! expand_twoval_binop (unsignedp ? udivmod_optab : sdivmod_optab,
5279 				     op0, op1,
5280 				     quotient, NULL_RTX, unsignedp))
5281 	    {
5282 	      quotient = 0;
5283 	      if (! rem_flag)
5284 		/* Still no luck.  If we are not computing the remainder,
5285 		   use a library call for the quotient.  */
5286 		quotient = sign_expand_binop (compute_mode,
5287 					      udiv_optab, sdiv_optab,
5288 					      op0, op1, target,
5289 					      unsignedp, methods);
5290 	    }
5291 	}
5292     }
5293 
5294   if (rem_flag)
5295     {
5296       if (target && GET_MODE (target) != compute_mode)
5297 	target = 0;
5298 
5299       if (quotient == 0)
5300 	{
5301 	  /* No divide instruction either.  Use library for remainder.  */
5302 	  remainder = sign_expand_binop (compute_mode, umod_optab, smod_optab,
5303 					 op0, op1, target,
5304 					 unsignedp, methods);
5305 	  /* No remainder function.  Try a quotient-and-remainder
5306 	     function, keeping the remainder.  */
5307 	  if (!remainder
5308 	      && (methods == OPTAB_LIB || methods == OPTAB_LIB_WIDEN))
5309 	    {
5310 	      remainder = gen_reg_rtx (compute_mode);
5311 	      if (!expand_twoval_binop_libfunc
5312 		  (unsignedp ? udivmod_optab : sdivmod_optab,
5313 		   op0, op1,
5314 		   NULL_RTX, remainder,
5315 		   unsignedp ? UMOD : MOD))
5316 		remainder = NULL_RTX;
5317 	    }
5318 	}
5319       else
5320 	{
5321 	  /* We divided.  Now finish doing X - Y * (X / Y).  */
5322 	  remainder = expand_mult (compute_mode, quotient, op1,
5323 				   NULL_RTX, unsignedp);
5324 	  remainder = expand_binop (compute_mode, sub_optab, op0,
5325 				    remainder, target, unsignedp,
5326 				    methods);
5327 	}
5328     }
5329 
5330   if (methods != OPTAB_LIB_WIDEN
5331       && (rem_flag ? remainder : quotient) == NULL_RTX)
5332     return NULL_RTX;
5333 
5334   return gen_lowpart (mode, rem_flag ? remainder : quotient);
5335 }
5336 
5337 /* Return a tree node with data type TYPE, describing the value of X.
5338    Usually this is an VAR_DECL, if there is no obvious better choice.
5339    X may be an expression, however we only support those expressions
5340    generated by loop.c.  */
5341 
5342 tree
make_tree(tree type,rtx x)5343 make_tree (tree type, rtx x)
5344 {
5345   tree t;
5346 
5347   switch (GET_CODE (x))
5348     {
5349     case CONST_INT:
5350     case CONST_WIDE_INT:
5351       t = wide_int_to_tree (type, rtx_mode_t (x, TYPE_MODE (type)));
5352       return t;
5353 
5354     case CONST_DOUBLE:
5355       STATIC_ASSERT (HOST_BITS_PER_WIDE_INT * 2 <= MAX_BITSIZE_MODE_ANY_INT);
5356       if (TARGET_SUPPORTS_WIDE_INT == 0 && GET_MODE (x) == VOIDmode)
5357 	t = wide_int_to_tree (type,
5358 			      wide_int::from_array (&CONST_DOUBLE_LOW (x), 2,
5359 						    HOST_BITS_PER_WIDE_INT * 2));
5360       else
5361 	t = build_real (type, *CONST_DOUBLE_REAL_VALUE (x));
5362 
5363       return t;
5364 
5365     case CONST_VECTOR:
5366       {
5367 	unsigned int npatterns = CONST_VECTOR_NPATTERNS (x);
5368 	unsigned int nelts_per_pattern = CONST_VECTOR_NELTS_PER_PATTERN (x);
5369 	tree itype = TREE_TYPE (type);
5370 
5371 	/* Build a tree with vector elements.  */
5372 	tree_vector_builder elts (type, npatterns, nelts_per_pattern);
5373 	unsigned int count = elts.encoded_nelts ();
5374 	for (unsigned int i = 0; i < count; ++i)
5375 	  {
5376 	    rtx elt = CONST_VECTOR_ELT (x, i);
5377 	    elts.quick_push (make_tree (itype, elt));
5378 	  }
5379 
5380 	return elts.build ();
5381       }
5382 
5383     case PLUS:
5384       return fold_build2 (PLUS_EXPR, type, make_tree (type, XEXP (x, 0)),
5385 			  make_tree (type, XEXP (x, 1)));
5386 
5387     case MINUS:
5388       return fold_build2 (MINUS_EXPR, type, make_tree (type, XEXP (x, 0)),
5389 			  make_tree (type, XEXP (x, 1)));
5390 
5391     case NEG:
5392       return fold_build1 (NEGATE_EXPR, type, make_tree (type, XEXP (x, 0)));
5393 
5394     case MULT:
5395       return fold_build2 (MULT_EXPR, type, make_tree (type, XEXP (x, 0)),
5396 			  make_tree (type, XEXP (x, 1)));
5397 
5398     case ASHIFT:
5399       return fold_build2 (LSHIFT_EXPR, type, make_tree (type, XEXP (x, 0)),
5400 			  make_tree (type, XEXP (x, 1)));
5401 
5402     case LSHIFTRT:
5403       t = unsigned_type_for (type);
5404       return fold_convert (type, build2 (RSHIFT_EXPR, t,
5405 			    		 make_tree (t, XEXP (x, 0)),
5406 				    	 make_tree (type, XEXP (x, 1))));
5407 
5408     case ASHIFTRT:
5409       t = signed_type_for (type);
5410       return fold_convert (type, build2 (RSHIFT_EXPR, t,
5411 					 make_tree (t, XEXP (x, 0)),
5412 				    	 make_tree (type, XEXP (x, 1))));
5413 
5414     case DIV:
5415       if (TREE_CODE (type) != REAL_TYPE)
5416 	t = signed_type_for (type);
5417       else
5418 	t = type;
5419 
5420       return fold_convert (type, build2 (TRUNC_DIV_EXPR, t,
5421 				    	 make_tree (t, XEXP (x, 0)),
5422 				    	 make_tree (t, XEXP (x, 1))));
5423     case UDIV:
5424       t = unsigned_type_for (type);
5425       return fold_convert (type, build2 (TRUNC_DIV_EXPR, t,
5426 				    	 make_tree (t, XEXP (x, 0)),
5427 				    	 make_tree (t, XEXP (x, 1))));
5428 
5429     case SIGN_EXTEND:
5430     case ZERO_EXTEND:
5431       t = lang_hooks.types.type_for_mode (GET_MODE (XEXP (x, 0)),
5432 					  GET_CODE (x) == ZERO_EXTEND);
5433       return fold_convert (type, make_tree (t, XEXP (x, 0)));
5434 
5435     case CONST:
5436       return make_tree (type, XEXP (x, 0));
5437 
5438     case SYMBOL_REF:
5439       t = SYMBOL_REF_DECL (x);
5440       if (t)
5441 	return fold_convert (type, build_fold_addr_expr (t));
5442       /* fall through.  */
5443 
5444     default:
5445       if (CONST_POLY_INT_P (x))
5446 	return wide_int_to_tree (t, const_poly_int_value (x));
5447 
5448       t = build_decl (RTL_LOCATION (x), VAR_DECL, NULL_TREE, type);
5449 
5450       /* If TYPE is a POINTER_TYPE, we might need to convert X from
5451 	 address mode to pointer mode.  */
5452       if (POINTER_TYPE_P (type))
5453 	x = convert_memory_address_addr_space
5454 	  (SCALAR_INT_TYPE_MODE (type), x, TYPE_ADDR_SPACE (TREE_TYPE (type)));
5455 
5456       /* Note that we do *not* use SET_DECL_RTL here, because we do not
5457 	 want set_decl_rtl to go adjusting REG_ATTRS for this temporary.  */
5458       t->decl_with_rtl.rtl = x;
5459 
5460       return t;
5461     }
5462 }
5463 
5464 /* Compute the logical-and of OP0 and OP1, storing it in TARGET
5465    and returning TARGET.
5466 
5467    If TARGET is 0, a pseudo-register or constant is returned.  */
5468 
5469 rtx
expand_and(machine_mode mode,rtx op0,rtx op1,rtx target)5470 expand_and (machine_mode mode, rtx op0, rtx op1, rtx target)
5471 {
5472   rtx tem = 0;
5473 
5474   if (GET_MODE (op0) == VOIDmode && GET_MODE (op1) == VOIDmode)
5475     tem = simplify_binary_operation (AND, mode, op0, op1);
5476   if (tem == 0)
5477     tem = expand_binop (mode, and_optab, op0, op1, target, 0, OPTAB_LIB_WIDEN);
5478 
5479   if (target == 0)
5480     target = tem;
5481   else if (tem != target)
5482     emit_move_insn (target, tem);
5483   return target;
5484 }
5485 
5486 /* Helper function for emit_store_flag.  */
5487 rtx
emit_cstore(rtx target,enum insn_code icode,enum rtx_code code,machine_mode mode,machine_mode compare_mode,int unsignedp,rtx x,rtx y,int normalizep,machine_mode target_mode)5488 emit_cstore (rtx target, enum insn_code icode, enum rtx_code code,
5489 	     machine_mode mode, machine_mode compare_mode,
5490 	     int unsignedp, rtx x, rtx y, int normalizep,
5491 	     machine_mode target_mode)
5492 {
5493   class expand_operand ops[4];
5494   rtx op0, comparison, subtarget;
5495   rtx_insn *last;
5496   scalar_int_mode result_mode = targetm.cstore_mode (icode);
5497   scalar_int_mode int_target_mode;
5498 
5499   last = get_last_insn ();
5500   x = prepare_operand (icode, x, 2, mode, compare_mode, unsignedp);
5501   y = prepare_operand (icode, y, 3, mode, compare_mode, unsignedp);
5502   if (!x || !y)
5503     {
5504       delete_insns_since (last);
5505       return NULL_RTX;
5506     }
5507 
5508   if (target_mode == VOIDmode)
5509     int_target_mode = result_mode;
5510   else
5511     int_target_mode = as_a <scalar_int_mode> (target_mode);
5512   if (!target)
5513     target = gen_reg_rtx (int_target_mode);
5514 
5515   comparison = gen_rtx_fmt_ee (code, result_mode, x, y);
5516 
5517   create_output_operand (&ops[0], optimize ? NULL_RTX : target, result_mode);
5518   create_fixed_operand (&ops[1], comparison);
5519   create_fixed_operand (&ops[2], x);
5520   create_fixed_operand (&ops[3], y);
5521   if (!maybe_expand_insn (icode, 4, ops))
5522     {
5523       delete_insns_since (last);
5524       return NULL_RTX;
5525     }
5526   subtarget = ops[0].value;
5527 
5528   /* If we are converting to a wider mode, first convert to
5529      INT_TARGET_MODE, then normalize.  This produces better combining
5530      opportunities on machines that have a SIGN_EXTRACT when we are
5531      testing a single bit.  This mostly benefits the 68k.
5532 
5533      If STORE_FLAG_VALUE does not have the sign bit set when
5534      interpreted in MODE, we can do this conversion as unsigned, which
5535      is usually more efficient.  */
5536   if (GET_MODE_PRECISION (int_target_mode) > GET_MODE_PRECISION (result_mode))
5537     {
5538       gcc_assert (GET_MODE_PRECISION (result_mode) != 1
5539 		  || STORE_FLAG_VALUE == 1 || STORE_FLAG_VALUE == -1);
5540 
5541       bool unsignedp = (STORE_FLAG_VALUE >= 0);
5542       convert_move (target, subtarget, unsignedp);
5543 
5544       op0 = target;
5545       result_mode = int_target_mode;
5546     }
5547   else
5548     op0 = subtarget;
5549 
5550   /* If we want to keep subexpressions around, don't reuse our last
5551      target.  */
5552   if (optimize)
5553     subtarget = 0;
5554 
5555   /* Now normalize to the proper value in MODE.  Sometimes we don't
5556      have to do anything.  */
5557   if (normalizep == 0 || normalizep == STORE_FLAG_VALUE)
5558     ;
5559   /* STORE_FLAG_VALUE might be the most negative number, so write
5560      the comparison this way to avoid a compiler-time warning.  */
5561   else if (- normalizep == STORE_FLAG_VALUE)
5562     op0 = expand_unop (result_mode, neg_optab, op0, subtarget, 0);
5563 
5564   /* We don't want to use STORE_FLAG_VALUE < 0 below since this makes
5565      it hard to use a value of just the sign bit due to ANSI integer
5566      constant typing rules.  */
5567   else if (val_signbit_known_set_p (result_mode, STORE_FLAG_VALUE))
5568     op0 = expand_shift (RSHIFT_EXPR, result_mode, op0,
5569 			GET_MODE_BITSIZE (result_mode) - 1, subtarget,
5570 			normalizep == 1);
5571   else
5572     {
5573       gcc_assert (STORE_FLAG_VALUE & 1);
5574 
5575       op0 = expand_and (result_mode, op0, const1_rtx, subtarget);
5576       if (normalizep == -1)
5577 	op0 = expand_unop (result_mode, neg_optab, op0, op0, 0);
5578     }
5579 
5580   /* If we were converting to a smaller mode, do the conversion now.  */
5581   if (int_target_mode != result_mode)
5582     {
5583       convert_move (target, op0, 0);
5584       return target;
5585     }
5586   else
5587     return op0;
5588 }
5589 
5590 
5591 /* A subroutine of emit_store_flag only including "tricks" that do not
5592    need a recursive call.  These are kept separate to avoid infinite
5593    loops.  */
5594 
5595 static rtx
emit_store_flag_1(rtx target,enum rtx_code code,rtx op0,rtx op1,machine_mode mode,int unsignedp,int normalizep,machine_mode target_mode)5596 emit_store_flag_1 (rtx target, enum rtx_code code, rtx op0, rtx op1,
5597 		   machine_mode mode, int unsignedp, int normalizep,
5598 		   machine_mode target_mode)
5599 {
5600   rtx subtarget;
5601   enum insn_code icode;
5602   machine_mode compare_mode;
5603   enum mode_class mclass;
5604   enum rtx_code scode;
5605 
5606   if (unsignedp)
5607     code = unsigned_condition (code);
5608   scode = swap_condition (code);
5609 
5610   /* If one operand is constant, make it the second one.  Only do this
5611      if the other operand is not constant as well.  */
5612 
5613   if (swap_commutative_operands_p (op0, op1))
5614     {
5615       std::swap (op0, op1);
5616       code = swap_condition (code);
5617     }
5618 
5619   if (mode == VOIDmode)
5620     mode = GET_MODE (op0);
5621 
5622   if (CONST_SCALAR_INT_P (op1))
5623     canonicalize_comparison (mode, &code, &op1);
5624 
5625   /* For some comparisons with 1 and -1, we can convert this to
5626      comparisons with zero.  This will often produce more opportunities for
5627      store-flag insns.  */
5628 
5629   switch (code)
5630     {
5631     case LT:
5632       if (op1 == const1_rtx)
5633 	op1 = const0_rtx, code = LE;
5634       break;
5635     case LE:
5636       if (op1 == constm1_rtx)
5637 	op1 = const0_rtx, code = LT;
5638       break;
5639     case GE:
5640       if (op1 == const1_rtx)
5641 	op1 = const0_rtx, code = GT;
5642       break;
5643     case GT:
5644       if (op1 == constm1_rtx)
5645 	op1 = const0_rtx, code = GE;
5646       break;
5647     case GEU:
5648       if (op1 == const1_rtx)
5649 	op1 = const0_rtx, code = NE;
5650       break;
5651     case LTU:
5652       if (op1 == const1_rtx)
5653 	op1 = const0_rtx, code = EQ;
5654       break;
5655     default:
5656       break;
5657     }
5658 
5659   /* If we are comparing a double-word integer with zero or -1, we can
5660      convert the comparison into one involving a single word.  */
5661   scalar_int_mode int_mode;
5662   if (is_int_mode (mode, &int_mode)
5663       && GET_MODE_BITSIZE (int_mode) == BITS_PER_WORD * 2
5664       && (!MEM_P (op0) || ! MEM_VOLATILE_P (op0)))
5665     {
5666       rtx tem;
5667       if ((code == EQ || code == NE)
5668 	  && (op1 == const0_rtx || op1 == constm1_rtx))
5669 	{
5670 	  rtx op00, op01;
5671 
5672 	  /* Do a logical OR or AND of the two words and compare the
5673 	     result.  */
5674 	  op00 = simplify_gen_subreg (word_mode, op0, int_mode, 0);
5675 	  op01 = simplify_gen_subreg (word_mode, op0, int_mode, UNITS_PER_WORD);
5676 	  tem = expand_binop (word_mode,
5677 			      op1 == const0_rtx ? ior_optab : and_optab,
5678 			      op00, op01, NULL_RTX, unsignedp,
5679 			      OPTAB_DIRECT);
5680 
5681 	  if (tem != 0)
5682 	    tem = emit_store_flag (NULL_RTX, code, tem, op1, word_mode,
5683 				   unsignedp, normalizep);
5684 	}
5685       else if ((code == LT || code == GE) && op1 == const0_rtx)
5686 	{
5687 	  rtx op0h;
5688 
5689 	  /* If testing the sign bit, can just test on high word.  */
5690 	  op0h = simplify_gen_subreg (word_mode, op0, int_mode,
5691 				      subreg_highpart_offset (word_mode,
5692 							      int_mode));
5693 	  tem = emit_store_flag (NULL_RTX, code, op0h, op1, word_mode,
5694 				 unsignedp, normalizep);
5695 	}
5696       else
5697 	tem = NULL_RTX;
5698 
5699       if (tem)
5700 	{
5701 	  if (target_mode == VOIDmode || GET_MODE (tem) == target_mode)
5702 	    return tem;
5703 	  if (!target)
5704 	    target = gen_reg_rtx (target_mode);
5705 
5706 	  convert_move (target, tem,
5707 			!val_signbit_known_set_p (word_mode,
5708 						  (normalizep ? normalizep
5709 						   : STORE_FLAG_VALUE)));
5710 	  return target;
5711 	}
5712     }
5713 
5714   /* If this is A < 0 or A >= 0, we can do this by taking the ones
5715      complement of A (for GE) and shifting the sign bit to the low bit.  */
5716   if (op1 == const0_rtx && (code == LT || code == GE)
5717       && is_int_mode (mode, &int_mode)
5718       && (normalizep || STORE_FLAG_VALUE == 1
5719 	  || val_signbit_p (int_mode, STORE_FLAG_VALUE)))
5720     {
5721       scalar_int_mode int_target_mode;
5722       subtarget = target;
5723 
5724       if (!target)
5725 	int_target_mode = int_mode;
5726       else
5727 	{
5728 	  /* If the result is to be wider than OP0, it is best to convert it
5729 	     first.  If it is to be narrower, it is *incorrect* to convert it
5730 	     first.  */
5731 	  int_target_mode = as_a <scalar_int_mode> (target_mode);
5732 	  if (GET_MODE_SIZE (int_target_mode) > GET_MODE_SIZE (int_mode))
5733 	    {
5734 	      op0 = convert_modes (int_target_mode, int_mode, op0, 0);
5735 	      int_mode = int_target_mode;
5736 	    }
5737 	}
5738 
5739       if (int_target_mode != int_mode)
5740 	subtarget = 0;
5741 
5742       if (code == GE)
5743 	op0 = expand_unop (int_mode, one_cmpl_optab, op0,
5744 			   ((STORE_FLAG_VALUE == 1 || normalizep)
5745 			    ? 0 : subtarget), 0);
5746 
5747       if (STORE_FLAG_VALUE == 1 || normalizep)
5748 	/* If we are supposed to produce a 0/1 value, we want to do
5749 	   a logical shift from the sign bit to the low-order bit; for
5750 	   a -1/0 value, we do an arithmetic shift.  */
5751 	op0 = expand_shift (RSHIFT_EXPR, int_mode, op0,
5752 			    GET_MODE_BITSIZE (int_mode) - 1,
5753 			    subtarget, normalizep != -1);
5754 
5755       if (int_mode != int_target_mode)
5756 	op0 = convert_modes (int_target_mode, int_mode, op0, 0);
5757 
5758       return op0;
5759     }
5760 
5761   mclass = GET_MODE_CLASS (mode);
5762   FOR_EACH_MODE_FROM (compare_mode, mode)
5763     {
5764      machine_mode optab_mode = mclass == MODE_CC ? CCmode : compare_mode;
5765      icode = optab_handler (cstore_optab, optab_mode);
5766      if (icode != CODE_FOR_nothing)
5767 	{
5768 	  do_pending_stack_adjust ();
5769 	  rtx tem = emit_cstore (target, icode, code, mode, compare_mode,
5770 				 unsignedp, op0, op1, normalizep, target_mode);
5771 	  if (tem)
5772 	    return tem;
5773 
5774 	  if (GET_MODE_CLASS (mode) == MODE_FLOAT)
5775 	    {
5776 	      tem = emit_cstore (target, icode, scode, mode, compare_mode,
5777 				 unsignedp, op1, op0, normalizep, target_mode);
5778 	      if (tem)
5779 	        return tem;
5780 	    }
5781 	  break;
5782 	}
5783     }
5784 
5785   return 0;
5786 }
5787 
5788 /* Subroutine of emit_store_flag that handles cases in which the operands
5789    are scalar integers.  SUBTARGET is the target to use for temporary
5790    operations and TRUEVAL is the value to store when the condition is
5791    true.  All other arguments are as for emit_store_flag.  */
5792 
5793 rtx
emit_store_flag_int(rtx target,rtx subtarget,enum rtx_code code,rtx op0,rtx op1,scalar_int_mode mode,int unsignedp,int normalizep,rtx trueval)5794 emit_store_flag_int (rtx target, rtx subtarget, enum rtx_code code, rtx op0,
5795 		     rtx op1, scalar_int_mode mode, int unsignedp,
5796 		     int normalizep, rtx trueval)
5797 {
5798   machine_mode target_mode = target ? GET_MODE (target) : VOIDmode;
5799   rtx_insn *last = get_last_insn ();
5800 
5801   /* If this is an equality comparison of integers, we can try to exclusive-or
5802      (or subtract) the two operands and use a recursive call to try the
5803      comparison with zero.  Don't do any of these cases if branches are
5804      very cheap.  */
5805 
5806   if ((code == EQ || code == NE) && op1 != const0_rtx)
5807     {
5808       rtx tem = expand_binop (mode, xor_optab, op0, op1, subtarget, 1,
5809 			      OPTAB_WIDEN);
5810 
5811       if (tem == 0)
5812 	tem = expand_binop (mode, sub_optab, op0, op1, subtarget, 1,
5813 			    OPTAB_WIDEN);
5814       if (tem != 0)
5815 	tem = emit_store_flag (target, code, tem, const0_rtx,
5816 			       mode, unsignedp, normalizep);
5817       if (tem != 0)
5818 	return tem;
5819 
5820       delete_insns_since (last);
5821     }
5822 
5823   /* For integer comparisons, try the reverse comparison.  However, for
5824      small X and if we'd have anyway to extend, implementing "X != 0"
5825      as "-(int)X >> 31" is still cheaper than inverting "(int)X == 0".  */
5826   rtx_code rcode = reverse_condition (code);
5827   if (can_compare_p (rcode, mode, ccp_store_flag)
5828       && ! (optab_handler (cstore_optab, mode) == CODE_FOR_nothing
5829 	    && code == NE
5830 	    && GET_MODE_SIZE (mode) < UNITS_PER_WORD
5831 	    && op1 == const0_rtx))
5832     {
5833       int want_add = ((STORE_FLAG_VALUE == 1 && normalizep == -1)
5834 		      || (STORE_FLAG_VALUE == -1 && normalizep == 1));
5835 
5836       /* Again, for the reverse comparison, use either an addition or a XOR.  */
5837       if (want_add
5838 	  && rtx_cost (GEN_INT (normalizep), mode, PLUS, 1,
5839 		       optimize_insn_for_speed_p ()) == 0)
5840 	{
5841 	  rtx tem = emit_store_flag_1 (subtarget, rcode, op0, op1, mode, 0,
5842 				       STORE_FLAG_VALUE, target_mode);
5843 	  if (tem != 0)
5844 	    tem = expand_binop (target_mode, add_optab, tem,
5845 				gen_int_mode (normalizep, target_mode),
5846 				target, 0, OPTAB_WIDEN);
5847 	  if (tem != 0)
5848 	    return tem;
5849 	}
5850       else if (!want_add
5851 	       && rtx_cost (trueval, mode, XOR, 1,
5852 			    optimize_insn_for_speed_p ()) == 0)
5853 	{
5854 	  rtx tem = emit_store_flag_1 (subtarget, rcode, op0, op1, mode, 0,
5855 				       normalizep, target_mode);
5856 	  if (tem != 0)
5857 	    tem = expand_binop (target_mode, xor_optab, tem, trueval, target,
5858 				INTVAL (trueval) >= 0, OPTAB_WIDEN);
5859 	  if (tem != 0)
5860 	    return tem;
5861 	}
5862 
5863       delete_insns_since (last);
5864     }
5865 
5866   /* Some other cases we can do are EQ, NE, LE, and GT comparisons with
5867      the constant zero.  Reject all other comparisons at this point.  Only
5868      do LE and GT if branches are expensive since they are expensive on
5869      2-operand machines.  */
5870 
5871   if (op1 != const0_rtx
5872       || (code != EQ && code != NE
5873 	  && (BRANCH_COST (optimize_insn_for_speed_p (),
5874 			   false) <= 1 || (code != LE && code != GT))))
5875     return 0;
5876 
5877   /* Try to put the result of the comparison in the sign bit.  Assume we can't
5878      do the necessary operation below.  */
5879 
5880   rtx tem = 0;
5881 
5882   /* To see if A <= 0, compute (A | (A - 1)).  A <= 0 iff that result has
5883      the sign bit set.  */
5884 
5885   if (code == LE)
5886     {
5887       /* This is destructive, so SUBTARGET can't be OP0.  */
5888       if (rtx_equal_p (subtarget, op0))
5889 	subtarget = 0;
5890 
5891       tem = expand_binop (mode, sub_optab, op0, const1_rtx, subtarget, 0,
5892 			  OPTAB_WIDEN);
5893       if (tem)
5894 	tem = expand_binop (mode, ior_optab, op0, tem, subtarget, 0,
5895 			    OPTAB_WIDEN);
5896     }
5897 
5898   /* To see if A > 0, compute (((signed) A) << BITS) - A, where BITS is the
5899      number of bits in the mode of OP0, minus one.  */
5900 
5901   if (code == GT)
5902     {
5903       if (rtx_equal_p (subtarget, op0))
5904 	subtarget = 0;
5905 
5906       tem = maybe_expand_shift (RSHIFT_EXPR, mode, op0,
5907 				GET_MODE_BITSIZE (mode) - 1,
5908 				subtarget, 0);
5909       if (tem)
5910 	tem = expand_binop (mode, sub_optab, tem, op0, subtarget, 0,
5911 			    OPTAB_WIDEN);
5912     }
5913 
5914   if (code == EQ || code == NE)
5915     {
5916       /* For EQ or NE, one way to do the comparison is to apply an operation
5917 	 that converts the operand into a positive number if it is nonzero
5918 	 or zero if it was originally zero.  Then, for EQ, we subtract 1 and
5919 	 for NE we negate.  This puts the result in the sign bit.  Then we
5920 	 normalize with a shift, if needed.
5921 
5922 	 Two operations that can do the above actions are ABS and FFS, so try
5923 	 them.  If that doesn't work, and MODE is smaller than a full word,
5924 	 we can use zero-extension to the wider mode (an unsigned conversion)
5925 	 as the operation.  */
5926 
5927       /* Note that ABS doesn't yield a positive number for INT_MIN, but
5928 	 that is compensated by the subsequent overflow when subtracting
5929 	 one / negating.  */
5930 
5931       if (optab_handler (abs_optab, mode) != CODE_FOR_nothing)
5932 	tem = expand_unop (mode, abs_optab, op0, subtarget, 1);
5933       else if (optab_handler (ffs_optab, mode) != CODE_FOR_nothing)
5934 	tem = expand_unop (mode, ffs_optab, op0, subtarget, 1);
5935       else if (GET_MODE_SIZE (mode) < UNITS_PER_WORD)
5936 	{
5937 	  tem = convert_modes (word_mode, mode, op0, 1);
5938 	  mode = word_mode;
5939 	}
5940 
5941       if (tem != 0)
5942 	{
5943 	  if (code == EQ)
5944 	    tem = expand_binop (mode, sub_optab, tem, const1_rtx, subtarget,
5945 				0, OPTAB_WIDEN);
5946 	  else
5947 	    tem = expand_unop (mode, neg_optab, tem, subtarget, 0);
5948 	}
5949 
5950       /* If we couldn't do it that way, for NE we can "or" the two's complement
5951 	 of the value with itself.  For EQ, we take the one's complement of
5952 	 that "or", which is an extra insn, so we only handle EQ if branches
5953 	 are expensive.  */
5954 
5955       if (tem == 0
5956 	  && (code == NE
5957 	      || BRANCH_COST (optimize_insn_for_speed_p (),
5958 			      false) > 1))
5959 	{
5960 	  if (rtx_equal_p (subtarget, op0))
5961 	    subtarget = 0;
5962 
5963 	  tem = expand_unop (mode, neg_optab, op0, subtarget, 0);
5964 	  tem = expand_binop (mode, ior_optab, tem, op0, subtarget, 0,
5965 			      OPTAB_WIDEN);
5966 
5967 	  if (tem && code == EQ)
5968 	    tem = expand_unop (mode, one_cmpl_optab, tem, subtarget, 0);
5969 	}
5970     }
5971 
5972   if (tem && normalizep)
5973     tem = maybe_expand_shift (RSHIFT_EXPR, mode, tem,
5974 			      GET_MODE_BITSIZE (mode) - 1,
5975 			      subtarget, normalizep == 1);
5976 
5977   if (tem)
5978     {
5979       if (!target)
5980 	;
5981       else if (GET_MODE (tem) != target_mode)
5982 	{
5983 	  convert_move (target, tem, 0);
5984 	  tem = target;
5985 	}
5986       else if (!subtarget)
5987 	{
5988 	  emit_move_insn (target, tem);
5989 	  tem = target;
5990 	}
5991     }
5992   else
5993     delete_insns_since (last);
5994 
5995   return tem;
5996 }
5997 
5998 /* Emit a store-flags instruction for comparison CODE on OP0 and OP1
5999    and storing in TARGET.  Normally return TARGET.
6000    Return 0 if that cannot be done.
6001 
6002    MODE is the mode to use for OP0 and OP1 should they be CONST_INTs.  If
6003    it is VOIDmode, they cannot both be CONST_INT.
6004 
6005    UNSIGNEDP is for the case where we have to widen the operands
6006    to perform the operation.  It says to use zero-extension.
6007 
6008    NORMALIZEP is 1 if we should convert the result to be either zero
6009    or one.  Normalize is -1 if we should convert the result to be
6010    either zero or -1.  If NORMALIZEP is zero, the result will be left
6011    "raw" out of the scc insn.  */
6012 
6013 rtx
emit_store_flag(rtx target,enum rtx_code code,rtx op0,rtx op1,machine_mode mode,int unsignedp,int normalizep)6014 emit_store_flag (rtx target, enum rtx_code code, rtx op0, rtx op1,
6015 		 machine_mode mode, int unsignedp, int normalizep)
6016 {
6017   machine_mode target_mode = target ? GET_MODE (target) : VOIDmode;
6018   enum rtx_code rcode;
6019   rtx subtarget;
6020   rtx tem, trueval;
6021   rtx_insn *last;
6022 
6023   /* If we compare constants, we shouldn't use a store-flag operation,
6024      but a constant load.  We can get there via the vanilla route that
6025      usually generates a compare-branch sequence, but will in this case
6026      fold the comparison to a constant, and thus elide the branch.  */
6027   if (CONSTANT_P (op0) && CONSTANT_P (op1))
6028     return NULL_RTX;
6029 
6030   tem = emit_store_flag_1 (target, code, op0, op1, mode, unsignedp, normalizep,
6031 			   target_mode);
6032   if (tem)
6033     return tem;
6034 
6035   /* If we reached here, we can't do this with a scc insn, however there
6036      are some comparisons that can be done in other ways.  Don't do any
6037      of these cases if branches are very cheap.  */
6038   if (BRANCH_COST (optimize_insn_for_speed_p (), false) == 0)
6039     return 0;
6040 
6041   /* See what we need to return.  We can only return a 1, -1, or the
6042      sign bit.  */
6043 
6044   if (normalizep == 0)
6045     {
6046       if (STORE_FLAG_VALUE == 1 || STORE_FLAG_VALUE == -1)
6047 	normalizep = STORE_FLAG_VALUE;
6048 
6049       else if (val_signbit_p (mode, STORE_FLAG_VALUE))
6050 	;
6051       else
6052 	return 0;
6053     }
6054 
6055   last = get_last_insn ();
6056 
6057   /* If optimizing, use different pseudo registers for each insn, instead
6058      of reusing the same pseudo.  This leads to better CSE, but slows
6059      down the compiler, since there are more pseudos.  */
6060   subtarget = (!optimize
6061 	       && (target_mode == mode)) ? target : NULL_RTX;
6062   trueval = GEN_INT (normalizep ? normalizep : STORE_FLAG_VALUE);
6063 
6064   /* For floating-point comparisons, try the reverse comparison or try
6065      changing the "orderedness" of the comparison.  */
6066   if (GET_MODE_CLASS (mode) == MODE_FLOAT)
6067     {
6068       enum rtx_code first_code;
6069       bool and_them;
6070 
6071       rcode = reverse_condition_maybe_unordered (code);
6072       if (can_compare_p (rcode, mode, ccp_store_flag)
6073 	  && (code == ORDERED || code == UNORDERED
6074 	      || (! HONOR_NANS (mode) && (code == LTGT || code == UNEQ))
6075 	      || (! HONOR_SNANS (mode) && (code == EQ || code == NE))))
6076 	{
6077 	  int want_add = ((STORE_FLAG_VALUE == 1 && normalizep == -1)
6078 			  || (STORE_FLAG_VALUE == -1 && normalizep == 1));
6079 
6080 	  /* For the reverse comparison, use either an addition or a XOR.  */
6081 	  if (want_add
6082 	      && rtx_cost (GEN_INT (normalizep), mode, PLUS, 1,
6083 			   optimize_insn_for_speed_p ()) == 0)
6084 	    {
6085 	      tem = emit_store_flag_1 (subtarget, rcode, op0, op1, mode, 0,
6086 				       STORE_FLAG_VALUE, target_mode);
6087 	      if (tem)
6088 		return expand_binop (target_mode, add_optab, tem,
6089 				     gen_int_mode (normalizep, target_mode),
6090 				     target, 0, OPTAB_WIDEN);
6091 	    }
6092 	  else if (!want_add
6093 		   && rtx_cost (trueval, mode, XOR, 1,
6094 				optimize_insn_for_speed_p ()) == 0)
6095 	    {
6096 	      tem = emit_store_flag_1 (subtarget, rcode, op0, op1, mode, 0,
6097 				       normalizep, target_mode);
6098 	      if (tem)
6099 		return expand_binop (target_mode, xor_optab, tem, trueval,
6100 				     target, INTVAL (trueval) >= 0,
6101 				     OPTAB_WIDEN);
6102 	    }
6103 	}
6104 
6105       delete_insns_since (last);
6106 
6107       /* Cannot split ORDERED and UNORDERED, only try the above trick.  */
6108       if (code == ORDERED || code == UNORDERED)
6109 	return 0;
6110 
6111       and_them = split_comparison (code, mode, &first_code, &code);
6112 
6113       /* If there are no NaNs, the first comparison should always fall through.
6114 	 Effectively change the comparison to the other one.  */
6115       if (!HONOR_NANS (mode))
6116 	{
6117 	  gcc_assert (first_code == (and_them ? ORDERED : UNORDERED));
6118 	  return emit_store_flag_1 (target, code, op0, op1, mode, 0, normalizep,
6119 				    target_mode);
6120 	}
6121 
6122       if (!HAVE_conditional_move)
6123 	return 0;
6124 
6125       /* Do not turn a trapping comparison into a non-trapping one.  */
6126       if ((code != EQ && code != NE && code != UNEQ && code != LTGT)
6127 	  && flag_trapping_math)
6128 	return 0;
6129 
6130       /* Try using a setcc instruction for ORDERED/UNORDERED, followed by a
6131 	 conditional move.  */
6132       tem = emit_store_flag_1 (subtarget, first_code, op0, op1, mode, 0,
6133 			       normalizep, target_mode);
6134       if (tem == 0)
6135 	return 0;
6136 
6137       if (and_them)
6138 	tem = emit_conditional_move (target, { code, op0, op1, mode },
6139 				     tem, const0_rtx, GET_MODE (tem), 0);
6140       else
6141 	tem = emit_conditional_move (target, { code, op0, op1, mode },
6142 				     trueval, tem, GET_MODE (tem), 0);
6143 
6144       if (tem == 0)
6145 	delete_insns_since (last);
6146       return tem;
6147     }
6148 
6149   /* The remaining tricks only apply to integer comparisons.  */
6150 
6151   scalar_int_mode int_mode;
6152   if (is_int_mode (mode, &int_mode))
6153     return emit_store_flag_int (target, subtarget, code, op0, op1, int_mode,
6154 				unsignedp, normalizep, trueval);
6155 
6156   return 0;
6157 }
6158 
6159 /* Like emit_store_flag, but always succeeds.  */
6160 
6161 rtx
emit_store_flag_force(rtx target,enum rtx_code code,rtx op0,rtx op1,machine_mode mode,int unsignedp,int normalizep)6162 emit_store_flag_force (rtx target, enum rtx_code code, rtx op0, rtx op1,
6163 		       machine_mode mode, int unsignedp, int normalizep)
6164 {
6165   rtx tem;
6166   rtx_code_label *label;
6167   rtx trueval, falseval;
6168 
6169   /* First see if emit_store_flag can do the job.  */
6170   tem = emit_store_flag (target, code, op0, op1, mode, unsignedp, normalizep);
6171   if (tem != 0)
6172     return tem;
6173 
6174   /* If one operand is constant, make it the second one.  Only do this
6175      if the other operand is not constant as well.  */
6176   if (swap_commutative_operands_p (op0, op1))
6177     {
6178       std::swap (op0, op1);
6179       code = swap_condition (code);
6180     }
6181 
6182   if (mode == VOIDmode)
6183     mode = GET_MODE (op0);
6184 
6185   if (!target)
6186     target = gen_reg_rtx (word_mode);
6187 
6188   /* If this failed, we have to do this with set/compare/jump/set code.
6189      For foo != 0, if foo is in OP0, just replace it with 1 if nonzero.  */
6190   trueval = normalizep ? GEN_INT (normalizep) : const1_rtx;
6191   if (code == NE
6192       && GET_MODE_CLASS (mode) == MODE_INT
6193       && REG_P (target)
6194       && op0 == target
6195       && op1 == const0_rtx)
6196     {
6197       label = gen_label_rtx ();
6198       do_compare_rtx_and_jump (target, const0_rtx, EQ, unsignedp, mode,
6199 			       NULL_RTX, NULL, label,
6200 			       profile_probability::uninitialized ());
6201       emit_move_insn (target, trueval);
6202       emit_label (label);
6203       return target;
6204     }
6205 
6206   if (!REG_P (target)
6207       || reg_mentioned_p (target, op0) || reg_mentioned_p (target, op1))
6208     target = gen_reg_rtx (GET_MODE (target));
6209 
6210   /* Jump in the right direction if the target cannot implement CODE
6211      but can jump on its reverse condition.  */
6212   falseval = const0_rtx;
6213   if (! can_compare_p (code, mode, ccp_jump)
6214       && (! FLOAT_MODE_P (mode)
6215           || code == ORDERED || code == UNORDERED
6216           || (! HONOR_NANS (mode) && (code == LTGT || code == UNEQ))
6217           || (! HONOR_SNANS (mode) && (code == EQ || code == NE))))
6218     {
6219       enum rtx_code rcode;
6220       if (FLOAT_MODE_P (mode))
6221         rcode = reverse_condition_maybe_unordered (code);
6222       else
6223         rcode = reverse_condition (code);
6224 
6225       /* Canonicalize to UNORDERED for the libcall.  */
6226       if (can_compare_p (rcode, mode, ccp_jump)
6227           || (code == ORDERED && ! can_compare_p (ORDERED, mode, ccp_jump)))
6228 	{
6229 	  falseval = trueval;
6230 	  trueval = const0_rtx;
6231 	  code = rcode;
6232 	}
6233     }
6234 
6235   emit_move_insn (target, trueval);
6236   label = gen_label_rtx ();
6237   do_compare_rtx_and_jump (op0, op1, code, unsignedp, mode, NULL_RTX, NULL,
6238 			   label, profile_probability::uninitialized ());
6239 
6240   emit_move_insn (target, falseval);
6241   emit_label (label);
6242 
6243   return target;
6244 }
6245 
6246 /* Helper function for canonicalize_cmp_for_target.  Swap between inclusive
6247    and exclusive ranges in order to create an equivalent comparison.  See
6248    canonicalize_cmp_for_target for the possible cases.  */
6249 
6250 static enum rtx_code
equivalent_cmp_code(enum rtx_code code)6251 equivalent_cmp_code (enum rtx_code code)
6252 {
6253   switch (code)
6254     {
6255     case GT:
6256       return GE;
6257     case GE:
6258       return GT;
6259     case LT:
6260       return LE;
6261     case LE:
6262       return LT;
6263     case GTU:
6264       return GEU;
6265     case GEU:
6266       return GTU;
6267     case LTU:
6268       return LEU;
6269     case LEU:
6270       return LTU;
6271 
6272     default:
6273       return code;
6274     }
6275 }
6276 
6277 /* Choose the more appropiate immediate in scalar integer comparisons.  The
6278    purpose of this is to end up with an immediate which can be loaded into a
6279    register in fewer moves, if possible.
6280 
6281    For each integer comparison there exists an equivalent choice:
6282      i)   a >  b or a >= b + 1
6283      ii)  a <= b or a <  b + 1
6284      iii) a >= b or a >  b - 1
6285      iv)  a <  b or a <= b - 1
6286 
6287    MODE is the mode of the first operand.
6288    CODE points to the comparison code.
6289    IMM points to the rtx containing the immediate.  *IMM must satisfy
6290    CONST_SCALAR_INT_P on entry and continues to satisfy CONST_SCALAR_INT_P
6291    on exit.  */
6292 
6293 void
canonicalize_comparison(machine_mode mode,enum rtx_code * code,rtx * imm)6294 canonicalize_comparison (machine_mode mode, enum rtx_code *code, rtx *imm)
6295 {
6296   if (!SCALAR_INT_MODE_P (mode))
6297     return;
6298 
6299   int to_add = 0;
6300   enum signop sgn = unsigned_condition_p (*code) ? UNSIGNED : SIGNED;
6301 
6302   /* Extract the immediate value from the rtx.  */
6303   wide_int imm_val = rtx_mode_t (*imm, mode);
6304 
6305   if (*code == GT || *code == GTU || *code == LE || *code == LEU)
6306     to_add = 1;
6307   else if (*code == GE || *code == GEU || *code == LT || *code == LTU)
6308     to_add = -1;
6309   else
6310     return;
6311 
6312   /* Check for overflow/underflow in the case of signed values and
6313      wrapping around in the case of unsigned values.  If any occur
6314      cancel the optimization.  */
6315   wi::overflow_type overflow = wi::OVF_NONE;
6316   wide_int imm_modif;
6317 
6318   if (to_add == 1)
6319     imm_modif = wi::add (imm_val, 1, sgn, &overflow);
6320   else
6321     imm_modif = wi::sub (imm_val, 1, sgn, &overflow);
6322 
6323   if (overflow)
6324     return;
6325 
6326   /* The following creates a pseudo; if we cannot do that, bail out.  */
6327   if (!can_create_pseudo_p ())
6328     return;
6329 
6330   rtx reg = gen_rtx_REG (mode, LAST_VIRTUAL_REGISTER + 1);
6331   rtx new_imm = immed_wide_int_const (imm_modif, mode);
6332 
6333   rtx_insn *old_rtx = gen_move_insn (reg, *imm);
6334   rtx_insn *new_rtx = gen_move_insn (reg, new_imm);
6335 
6336   /* Update the immediate and the code.  */
6337   if (insn_cost (old_rtx, true) > insn_cost (new_rtx, true))
6338     {
6339       *code = equivalent_cmp_code (*code);
6340       *imm = new_imm;
6341     }
6342 }
6343 
6344 
6345 
6346 /* Perform possibly multi-word comparison and conditional jump to LABEL
6347    if ARG1 OP ARG2 true where ARG1 and ARG2 are of mode MODE.  This is
6348    now a thin wrapper around do_compare_rtx_and_jump.  */
6349 
6350 static void
do_cmp_and_jump(rtx arg1,rtx arg2,enum rtx_code op,machine_mode mode,rtx_code_label * label)6351 do_cmp_and_jump (rtx arg1, rtx arg2, enum rtx_code op, machine_mode mode,
6352 		 rtx_code_label *label)
6353 {
6354   int unsignedp = (op == LTU || op == LEU || op == GTU || op == GEU);
6355   do_compare_rtx_and_jump (arg1, arg2, op, unsignedp, mode, NULL_RTX,
6356 			   NULL, label, profile_probability::uninitialized ());
6357 }
6358