xref: /netbsd-src/external/gpl3/gcc.old/dist/gcc/expmed.c (revision cef8759bd76c1b621f8eab8faa6f208faabc2e15)
1 /* Medium-level subroutines: convert bit-field store and extract
2    and shifts, multiplies and divides to rtl instructions.
3    Copyright (C) 1987-2017 Free Software Foundation, Inc.
4 
5 This file is part of GCC.
6 
7 GCC is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 3, or (at your option) any later
10 version.
11 
12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
15 for more details.
16 
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3.  If not see
19 <http://www.gnu.org/licenses/>.  */
20 
21 
22 #include "config.h"
23 #include "system.h"
24 #include "coretypes.h"
25 #include "backend.h"
26 #include "target.h"
27 #include "rtl.h"
28 #include "tree.h"
29 #include "predict.h"
30 #include "memmodel.h"
31 #include "tm_p.h"
32 #include "expmed.h"
33 #include "optabs.h"
34 #include "emit-rtl.h"
35 #include "diagnostic-core.h"
36 #include "fold-const.h"
37 #include "stor-layout.h"
38 #include "dojump.h"
39 #include "explow.h"
40 #include "expr.h"
41 #include "langhooks.h"
42 
43 struct target_expmed default_target_expmed;
44 #if SWITCHABLE_TARGET
45 struct target_expmed *this_target_expmed = &default_target_expmed;
46 #endif
47 
48 static void store_fixed_bit_field (rtx, unsigned HOST_WIDE_INT,
49 				   unsigned HOST_WIDE_INT,
50 				   unsigned HOST_WIDE_INT,
51 				   unsigned HOST_WIDE_INT,
52 				   rtx, bool);
53 static void store_fixed_bit_field_1 (rtx, unsigned HOST_WIDE_INT,
54 				     unsigned HOST_WIDE_INT,
55 				     rtx, bool);
56 static void store_split_bit_field (rtx, unsigned HOST_WIDE_INT,
57 				   unsigned HOST_WIDE_INT,
58 				   unsigned HOST_WIDE_INT,
59 				   unsigned HOST_WIDE_INT,
60 				   rtx, bool);
61 static rtx extract_fixed_bit_field (machine_mode, rtx,
62 				    unsigned HOST_WIDE_INT,
63 				    unsigned HOST_WIDE_INT, rtx, int, bool);
64 static rtx extract_fixed_bit_field_1 (machine_mode, rtx,
65 				      unsigned HOST_WIDE_INT,
66 				      unsigned HOST_WIDE_INT, rtx, int, bool);
67 static rtx lshift_value (machine_mode, unsigned HOST_WIDE_INT, int);
68 static rtx extract_split_bit_field (rtx, unsigned HOST_WIDE_INT,
69 				    unsigned HOST_WIDE_INT, int, bool);
70 static void do_cmp_and_jump (rtx, rtx, enum rtx_code, machine_mode, rtx_code_label *);
71 static rtx expand_smod_pow2 (machine_mode, rtx, HOST_WIDE_INT);
72 static rtx expand_sdiv_pow2 (machine_mode, rtx, HOST_WIDE_INT);
73 
74 /* Return a constant integer mask value of mode MODE with BITSIZE ones
75    followed by BITPOS zeros, or the complement of that if COMPLEMENT.
76    The mask is truncated if necessary to the width of mode MODE.  The
77    mask is zero-extended if BITSIZE+BITPOS is too small for MODE.  */
78 
79 static inline rtx
80 mask_rtx (machine_mode mode, int bitpos, int bitsize, bool complement)
81 {
82   return immed_wide_int_const
83     (wi::shifted_mask (bitpos, bitsize, complement,
84 		       GET_MODE_PRECISION (mode)), mode);
85 }
86 
87 /* Test whether a value is zero of a power of two.  */
88 #define EXACT_POWER_OF_2_OR_ZERO_P(x) \
89   (((x) & ((x) - HOST_WIDE_INT_1U)) == 0)
90 
91 struct init_expmed_rtl
92 {
93   rtx reg;
94   rtx plus;
95   rtx neg;
96   rtx mult;
97   rtx sdiv;
98   rtx udiv;
99   rtx sdiv_32;
100   rtx smod_32;
101   rtx wide_mult;
102   rtx wide_lshr;
103   rtx wide_trunc;
104   rtx shift;
105   rtx shift_mult;
106   rtx shift_add;
107   rtx shift_sub0;
108   rtx shift_sub1;
109   rtx zext;
110   rtx trunc;
111 
112   rtx pow2[MAX_BITS_PER_WORD];
113   rtx cint[MAX_BITS_PER_WORD];
114 };
115 
116 static void
117 init_expmed_one_conv (struct init_expmed_rtl *all, machine_mode to_mode,
118 		      machine_mode from_mode, bool speed)
119 {
120   int to_size, from_size;
121   rtx which;
122 
123   to_size = GET_MODE_PRECISION (to_mode);
124   from_size = GET_MODE_PRECISION (from_mode);
125 
126   /* Most partial integers have a precision less than the "full"
127      integer it requires for storage.  In case one doesn't, for
128      comparison purposes here, reduce the bit size by one in that
129      case.  */
130   if (GET_MODE_CLASS (to_mode) == MODE_PARTIAL_INT
131       && pow2p_hwi (to_size))
132     to_size --;
133   if (GET_MODE_CLASS (from_mode) == MODE_PARTIAL_INT
134       && pow2p_hwi (from_size))
135     from_size --;
136 
137   /* Assume cost of zero-extend and sign-extend is the same.  */
138   which = (to_size < from_size ? all->trunc : all->zext);
139 
140   PUT_MODE (all->reg, from_mode);
141   set_convert_cost (to_mode, from_mode, speed,
142 		    set_src_cost (which, to_mode, speed));
143 }
144 
145 static void
146 init_expmed_one_mode (struct init_expmed_rtl *all,
147 		      machine_mode mode, int speed)
148 {
149   int m, n, mode_bitsize;
150   machine_mode mode_from;
151 
152   mode_bitsize = GET_MODE_UNIT_BITSIZE (mode);
153 
154   PUT_MODE (all->reg, mode);
155   PUT_MODE (all->plus, mode);
156   PUT_MODE (all->neg, mode);
157   PUT_MODE (all->mult, mode);
158   PUT_MODE (all->sdiv, mode);
159   PUT_MODE (all->udiv, mode);
160   PUT_MODE (all->sdiv_32, mode);
161   PUT_MODE (all->smod_32, mode);
162   PUT_MODE (all->wide_trunc, mode);
163   PUT_MODE (all->shift, mode);
164   PUT_MODE (all->shift_mult, mode);
165   PUT_MODE (all->shift_add, mode);
166   PUT_MODE (all->shift_sub0, mode);
167   PUT_MODE (all->shift_sub1, mode);
168   PUT_MODE (all->zext, mode);
169   PUT_MODE (all->trunc, mode);
170 
171   set_add_cost (speed, mode, set_src_cost (all->plus, mode, speed));
172   set_neg_cost (speed, mode, set_src_cost (all->neg, mode, speed));
173   set_mul_cost (speed, mode, set_src_cost (all->mult, mode, speed));
174   set_sdiv_cost (speed, mode, set_src_cost (all->sdiv, mode, speed));
175   set_udiv_cost (speed, mode, set_src_cost (all->udiv, mode, speed));
176 
177   set_sdiv_pow2_cheap (speed, mode, (set_src_cost (all->sdiv_32, mode, speed)
178 				     <= 2 * add_cost (speed, mode)));
179   set_smod_pow2_cheap (speed, mode, (set_src_cost (all->smod_32, mode, speed)
180 				     <= 4 * add_cost (speed, mode)));
181 
182   set_shift_cost (speed, mode, 0, 0);
183   {
184     int cost = add_cost (speed, mode);
185     set_shiftadd_cost (speed, mode, 0, cost);
186     set_shiftsub0_cost (speed, mode, 0, cost);
187     set_shiftsub1_cost (speed, mode, 0, cost);
188   }
189 
190   n = MIN (MAX_BITS_PER_WORD, mode_bitsize);
191   for (m = 1; m < n; m++)
192     {
193       XEXP (all->shift, 1) = all->cint[m];
194       XEXP (all->shift_mult, 1) = all->pow2[m];
195 
196       set_shift_cost (speed, mode, m, set_src_cost (all->shift, mode, speed));
197       set_shiftadd_cost (speed, mode, m, set_src_cost (all->shift_add, mode,
198 						       speed));
199       set_shiftsub0_cost (speed, mode, m, set_src_cost (all->shift_sub0, mode,
200 							speed));
201       set_shiftsub1_cost (speed, mode, m, set_src_cost (all->shift_sub1, mode,
202 							speed));
203     }
204 
205   if (SCALAR_INT_MODE_P (mode))
206     {
207       for (mode_from = MIN_MODE_INT; mode_from <= MAX_MODE_INT;
208 	   mode_from = (machine_mode)(mode_from + 1))
209 	init_expmed_one_conv (all, mode, mode_from, speed);
210     }
211   if (GET_MODE_CLASS (mode) == MODE_INT)
212     {
213       machine_mode  wider_mode = GET_MODE_WIDER_MODE (mode);
214       if (wider_mode != VOIDmode)
215 	{
216 	  PUT_MODE (all->zext, wider_mode);
217 	  PUT_MODE (all->wide_mult, wider_mode);
218 	  PUT_MODE (all->wide_lshr, wider_mode);
219 	  XEXP (all->wide_lshr, 1) = GEN_INT (mode_bitsize);
220 
221 	  set_mul_widen_cost (speed, wider_mode,
222 			      set_src_cost (all->wide_mult, wider_mode, speed));
223 	  set_mul_highpart_cost (speed, mode,
224 				 set_src_cost (all->wide_trunc, mode, speed));
225 	}
226     }
227 }
228 
229 void
230 init_expmed (void)
231 {
232   struct init_expmed_rtl all;
233   machine_mode mode = QImode;
234   int m, speed;
235 
236   memset (&all, 0, sizeof all);
237   for (m = 1; m < MAX_BITS_PER_WORD; m++)
238     {
239       all.pow2[m] = GEN_INT (HOST_WIDE_INT_1 << m);
240       all.cint[m] = GEN_INT (m);
241     }
242 
243   /* Avoid using hard regs in ways which may be unsupported.  */
244   all.reg = gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1);
245   all.plus = gen_rtx_PLUS (mode, all.reg, all.reg);
246   all.neg = gen_rtx_NEG (mode, all.reg);
247   all.mult = gen_rtx_MULT (mode, all.reg, all.reg);
248   all.sdiv = gen_rtx_DIV (mode, all.reg, all.reg);
249   all.udiv = gen_rtx_UDIV (mode, all.reg, all.reg);
250   all.sdiv_32 = gen_rtx_DIV (mode, all.reg, all.pow2[5]);
251   all.smod_32 = gen_rtx_MOD (mode, all.reg, all.pow2[5]);
252   all.zext = gen_rtx_ZERO_EXTEND (mode, all.reg);
253   all.wide_mult = gen_rtx_MULT (mode, all.zext, all.zext);
254   all.wide_lshr = gen_rtx_LSHIFTRT (mode, all.wide_mult, all.reg);
255   all.wide_trunc = gen_rtx_TRUNCATE (mode, all.wide_lshr);
256   all.shift = gen_rtx_ASHIFT (mode, all.reg, all.reg);
257   all.shift_mult = gen_rtx_MULT (mode, all.reg, all.reg);
258   all.shift_add = gen_rtx_PLUS (mode, all.shift_mult, all.reg);
259   all.shift_sub0 = gen_rtx_MINUS (mode, all.shift_mult, all.reg);
260   all.shift_sub1 = gen_rtx_MINUS (mode, all.reg, all.shift_mult);
261   all.trunc = gen_rtx_TRUNCATE (mode, all.reg);
262 
263   for (speed = 0; speed < 2; speed++)
264     {
265       crtl->maybe_hot_insn_p = speed;
266       set_zero_cost (speed, set_src_cost (const0_rtx, mode, speed));
267 
268       for (mode = MIN_MODE_INT; mode <= MAX_MODE_INT;
269 	   mode = (machine_mode)(mode + 1))
270 	init_expmed_one_mode (&all, mode, speed);
271 
272       if (MIN_MODE_PARTIAL_INT != VOIDmode)
273 	for (mode = MIN_MODE_PARTIAL_INT; mode <= MAX_MODE_PARTIAL_INT;
274 	     mode = (machine_mode)(mode + 1))
275 	  init_expmed_one_mode (&all, mode, speed);
276 
277       if (MIN_MODE_VECTOR_INT != VOIDmode)
278 	for (mode = MIN_MODE_VECTOR_INT; mode <= MAX_MODE_VECTOR_INT;
279 	     mode = (machine_mode)(mode + 1))
280 	  init_expmed_one_mode (&all, mode, speed);
281     }
282 
283   if (alg_hash_used_p ())
284     {
285       struct alg_hash_entry *p = alg_hash_entry_ptr (0);
286       memset (p, 0, sizeof (*p) * NUM_ALG_HASH_ENTRIES);
287     }
288   else
289     set_alg_hash_used_p (true);
290   default_rtl_profile ();
291 
292   ggc_free (all.trunc);
293   ggc_free (all.shift_sub1);
294   ggc_free (all.shift_sub0);
295   ggc_free (all.shift_add);
296   ggc_free (all.shift_mult);
297   ggc_free (all.shift);
298   ggc_free (all.wide_trunc);
299   ggc_free (all.wide_lshr);
300   ggc_free (all.wide_mult);
301   ggc_free (all.zext);
302   ggc_free (all.smod_32);
303   ggc_free (all.sdiv_32);
304   ggc_free (all.udiv);
305   ggc_free (all.sdiv);
306   ggc_free (all.mult);
307   ggc_free (all.neg);
308   ggc_free (all.plus);
309   ggc_free (all.reg);
310 }
311 
312 /* Return an rtx representing minus the value of X.
313    MODE is the intended mode of the result,
314    useful if X is a CONST_INT.  */
315 
316 rtx
317 negate_rtx (machine_mode mode, rtx x)
318 {
319   rtx result = simplify_unary_operation (NEG, mode, x, mode);
320 
321   if (result == 0)
322     result = expand_unop (mode, neg_optab, x, NULL_RTX, 0);
323 
324   return result;
325 }
326 
327 /* Whether reverse storage order is supported on the target.  */
328 static int reverse_storage_order_supported = -1;
329 
330 /* Check whether reverse storage order is supported on the target.  */
331 
332 static void
333 check_reverse_storage_order_support (void)
334 {
335   if (BYTES_BIG_ENDIAN != WORDS_BIG_ENDIAN)
336     {
337       reverse_storage_order_supported = 0;
338       sorry ("reverse scalar storage order");
339     }
340   else
341     reverse_storage_order_supported = 1;
342 }
343 
344 /* Whether reverse FP storage order is supported on the target.  */
345 static int reverse_float_storage_order_supported = -1;
346 
347 /* Check whether reverse FP storage order is supported on the target.  */
348 
349 static void
350 check_reverse_float_storage_order_support (void)
351 {
352   if (FLOAT_WORDS_BIG_ENDIAN != WORDS_BIG_ENDIAN)
353     {
354       reverse_float_storage_order_supported = 0;
355       sorry ("reverse floating-point scalar storage order");
356     }
357   else
358     reverse_float_storage_order_supported = 1;
359 }
360 
361 /* Return an rtx representing value of X with reverse storage order.
362    MODE is the intended mode of the result,
363    useful if X is a CONST_INT.  */
364 
365 rtx
366 flip_storage_order (enum machine_mode mode, rtx x)
367 {
368   enum machine_mode int_mode;
369   rtx result;
370 
371   if (mode == QImode)
372     return x;
373 
374   if (COMPLEX_MODE_P (mode))
375     {
376       rtx real = read_complex_part (x, false);
377       rtx imag = read_complex_part (x, true);
378 
379       real = flip_storage_order (GET_MODE_INNER (mode), real);
380       imag = flip_storage_order (GET_MODE_INNER (mode), imag);
381 
382       return gen_rtx_CONCAT (mode, real, imag);
383     }
384 
385   if (__builtin_expect (reverse_storage_order_supported < 0, 0))
386     check_reverse_storage_order_support ();
387 
388   if (SCALAR_INT_MODE_P (mode))
389     int_mode = mode;
390   else
391     {
392       if (FLOAT_MODE_P (mode)
393 	  && __builtin_expect (reverse_float_storage_order_supported < 0, 0))
394 	check_reverse_float_storage_order_support ();
395 
396       int_mode = mode_for_size (GET_MODE_PRECISION (mode), MODE_INT, 0);
397       if (int_mode == BLKmode)
398 	{
399 	  sorry ("reverse storage order for %smode", GET_MODE_NAME (mode));
400 	  return x;
401 	}
402       x = gen_lowpart (int_mode, x);
403     }
404 
405   result = simplify_unary_operation (BSWAP, int_mode, x, int_mode);
406   if (result == 0)
407     result = expand_unop (int_mode, bswap_optab, x, NULL_RTX, 1);
408 
409   if (int_mode != mode)
410     result = gen_lowpart (mode, result);
411 
412   return result;
413 }
414 
415 /* Adjust bitfield memory MEM so that it points to the first unit of mode
416    MODE that contains a bitfield of size BITSIZE at bit position BITNUM.
417    If MODE is BLKmode, return a reference to every byte in the bitfield.
418    Set *NEW_BITNUM to the bit position of the field within the new memory.  */
419 
420 static rtx
421 narrow_bit_field_mem (rtx mem, machine_mode mode,
422 		      unsigned HOST_WIDE_INT bitsize,
423 		      unsigned HOST_WIDE_INT bitnum,
424 		      unsigned HOST_WIDE_INT *new_bitnum)
425 {
426   if (mode == BLKmode)
427     {
428       *new_bitnum = bitnum % BITS_PER_UNIT;
429       HOST_WIDE_INT offset = bitnum / BITS_PER_UNIT;
430       HOST_WIDE_INT size = ((*new_bitnum + bitsize + BITS_PER_UNIT - 1)
431 			    / BITS_PER_UNIT);
432       return adjust_bitfield_address_size (mem, mode, offset, size);
433     }
434   else
435     {
436       unsigned int unit = GET_MODE_BITSIZE (mode);
437       *new_bitnum = bitnum % unit;
438       HOST_WIDE_INT offset = (bitnum - *new_bitnum) / BITS_PER_UNIT;
439       return adjust_bitfield_address (mem, mode, offset);
440     }
441 }
442 
443 /* The caller wants to perform insertion or extraction PATTERN on a
444    bitfield of size BITSIZE at BITNUM bits into memory operand OP0.
445    BITREGION_START and BITREGION_END are as for store_bit_field
446    and FIELDMODE is the natural mode of the field.
447 
448    Search for a mode that is compatible with the memory access
449    restrictions and (where applicable) with a register insertion or
450    extraction.  Return the new memory on success, storing the adjusted
451    bit position in *NEW_BITNUM.  Return null otherwise.  */
452 
453 static rtx
454 adjust_bit_field_mem_for_reg (enum extraction_pattern pattern,
455 			      rtx op0, HOST_WIDE_INT bitsize,
456 			      HOST_WIDE_INT bitnum,
457 			      unsigned HOST_WIDE_INT bitregion_start,
458 			      unsigned HOST_WIDE_INT bitregion_end,
459 			      machine_mode fieldmode,
460 			      unsigned HOST_WIDE_INT *new_bitnum)
461 {
462   bit_field_mode_iterator iter (bitsize, bitnum, bitregion_start,
463 				bitregion_end, MEM_ALIGN (op0),
464 				MEM_VOLATILE_P (op0));
465   machine_mode best_mode;
466   if (iter.next_mode (&best_mode))
467     {
468       /* We can use a memory in BEST_MODE.  See whether this is true for
469 	 any wider modes.  All other things being equal, we prefer to
470 	 use the widest mode possible because it tends to expose more
471 	 CSE opportunities.  */
472       if (!iter.prefer_smaller_modes ())
473 	{
474 	  /* Limit the search to the mode required by the corresponding
475 	     register insertion or extraction instruction, if any.  */
476 	  machine_mode limit_mode = word_mode;
477 	  extraction_insn insn;
478 	  if (get_best_reg_extraction_insn (&insn, pattern,
479 					    GET_MODE_BITSIZE (best_mode),
480 					    fieldmode))
481 	    limit_mode = insn.field_mode;
482 
483 	  machine_mode wider_mode;
484 	  while (iter.next_mode (&wider_mode)
485 		 && GET_MODE_SIZE (wider_mode) <= GET_MODE_SIZE (limit_mode))
486 	    best_mode = wider_mode;
487 	}
488       return narrow_bit_field_mem (op0, best_mode, bitsize, bitnum,
489 				   new_bitnum);
490     }
491   return NULL_RTX;
492 }
493 
494 /* Return true if a bitfield of size BITSIZE at bit number BITNUM within
495    a structure of mode STRUCT_MODE represents a lowpart subreg.   The subreg
496    offset is then BITNUM / BITS_PER_UNIT.  */
497 
498 static bool
499 lowpart_bit_field_p (unsigned HOST_WIDE_INT bitnum,
500 		     unsigned HOST_WIDE_INT bitsize,
501 		     machine_mode struct_mode)
502 {
503   if (BYTES_BIG_ENDIAN)
504     return (bitnum % BITS_PER_UNIT == 0
505 	    && (bitnum + bitsize == GET_MODE_BITSIZE (struct_mode)
506 		|| (bitnum + bitsize) % BITS_PER_WORD == 0));
507   else
508     return bitnum % BITS_PER_WORD == 0;
509 }
510 
511 /* Return true if -fstrict-volatile-bitfields applies to an access of OP0
512    containing BITSIZE bits starting at BITNUM, with field mode FIELDMODE.
513    Return false if the access would touch memory outside the range
514    BITREGION_START to BITREGION_END for conformance to the C++ memory
515    model.  */
516 
517 static bool
518 strict_volatile_bitfield_p (rtx op0, unsigned HOST_WIDE_INT bitsize,
519 			    unsigned HOST_WIDE_INT bitnum,
520 			    machine_mode fieldmode,
521 			    unsigned HOST_WIDE_INT bitregion_start,
522 			    unsigned HOST_WIDE_INT bitregion_end)
523 {
524   unsigned HOST_WIDE_INT modesize = GET_MODE_BITSIZE (fieldmode);
525 
526   /* -fstrict-volatile-bitfields must be enabled and we must have a
527      volatile MEM.  */
528   if (!MEM_P (op0)
529       || !MEM_VOLATILE_P (op0)
530       || flag_strict_volatile_bitfields <= 0)
531     return false;
532 
533   /* Non-integral modes likely only happen with packed structures.
534      Punt.  */
535   if (!SCALAR_INT_MODE_P (fieldmode))
536     return false;
537 
538   /* The bit size must not be larger than the field mode, and
539      the field mode must not be larger than a word.  */
540   if (bitsize > modesize || modesize > BITS_PER_WORD)
541     return false;
542 
543   /* Check for cases of unaligned fields that must be split.  */
544   if (bitnum % modesize + bitsize > modesize)
545     return false;
546 
547   /* The memory must be sufficiently aligned for a MODESIZE access.
548      This condition guarantees, that the memory access will not
549      touch anything after the end of the structure.  */
550   if (MEM_ALIGN (op0) < modesize)
551     return false;
552 
553   /* Check for cases where the C++ memory model applies.  */
554   if (bitregion_end != 0
555       && (bitnum - bitnum % modesize < bitregion_start
556 	  || bitnum - bitnum % modesize + modesize - 1 > bitregion_end))
557     return false;
558 
559   return true;
560 }
561 
562 /* Return true if OP is a memory and if a bitfield of size BITSIZE at
563    bit number BITNUM can be treated as a simple value of mode MODE.  */
564 
565 static bool
566 simple_mem_bitfield_p (rtx op0, unsigned HOST_WIDE_INT bitsize,
567 		       unsigned HOST_WIDE_INT bitnum, machine_mode mode)
568 {
569   return (MEM_P (op0)
570 	  && bitnum % BITS_PER_UNIT == 0
571 	  && bitsize == GET_MODE_BITSIZE (mode)
572 	  && (!SLOW_UNALIGNED_ACCESS (mode, MEM_ALIGN (op0))
573 	      || (bitnum % GET_MODE_ALIGNMENT (mode) == 0
574 		  && MEM_ALIGN (op0) >= GET_MODE_ALIGNMENT (mode))));
575 }
576 
577 /* Try to use instruction INSV to store VALUE into a field of OP0.
578    BITSIZE and BITNUM are as for store_bit_field.  */
579 
580 static bool
581 store_bit_field_using_insv (const extraction_insn *insv, rtx op0,
582 			    unsigned HOST_WIDE_INT bitsize,
583 			    unsigned HOST_WIDE_INT bitnum,
584 			    rtx value)
585 {
586   struct expand_operand ops[4];
587   rtx value1;
588   rtx xop0 = op0;
589   rtx_insn *last = get_last_insn ();
590   bool copy_back = false;
591 
592   machine_mode op_mode = insv->field_mode;
593   unsigned int unit = GET_MODE_BITSIZE (op_mode);
594   if (bitsize == 0 || bitsize > unit)
595     return false;
596 
597   if (MEM_P (xop0))
598     /* Get a reference to the first byte of the field.  */
599     xop0 = narrow_bit_field_mem (xop0, insv->struct_mode, bitsize, bitnum,
600 				 &bitnum);
601   else
602     {
603       /* Convert from counting within OP0 to counting in OP_MODE.  */
604       if (BYTES_BIG_ENDIAN)
605 	bitnum += unit - GET_MODE_BITSIZE (GET_MODE (op0));
606 
607       /* If xop0 is a register, we need it in OP_MODE
608 	 to make it acceptable to the format of insv.  */
609       if (GET_CODE (xop0) == SUBREG)
610 	/* We can't just change the mode, because this might clobber op0,
611 	   and we will need the original value of op0 if insv fails.  */
612 	xop0 = gen_rtx_SUBREG (op_mode, SUBREG_REG (xop0), SUBREG_BYTE (xop0));
613       if (REG_P (xop0) && GET_MODE (xop0) != op_mode)
614 	xop0 = gen_lowpart_SUBREG (op_mode, xop0);
615     }
616 
617   /* If the destination is a paradoxical subreg such that we need a
618      truncate to the inner mode, perform the insertion on a temporary and
619      truncate the result to the original destination.  Note that we can't
620      just truncate the paradoxical subreg as (truncate:N (subreg:W (reg:N
621      X) 0)) is (reg:N X).  */
622   if (GET_CODE (xop0) == SUBREG
623       && REG_P (SUBREG_REG (xop0))
624       && !TRULY_NOOP_TRUNCATION_MODES_P (GET_MODE (SUBREG_REG (xop0)),
625 					 op_mode))
626     {
627       rtx tem = gen_reg_rtx (op_mode);
628       emit_move_insn (tem, xop0);
629       xop0 = tem;
630       copy_back = true;
631     }
632 
633   /* There are similar overflow check at the start of store_bit_field_1,
634      but that only check the situation where the field lies completely
635      outside the register, while there do have situation where the field
636      lies partialy in the register, we need to adjust bitsize for this
637      partial overflow situation.  Without this fix, pr48335-2.c on big-endian
638      will broken on those arch support bit insert instruction, like arm, aarch64
639      etc.  */
640   if (bitsize + bitnum > unit && bitnum < unit)
641     {
642       warning (OPT_Wextra, "write of %wu-bit data outside the bound of "
643 	       "destination object, data truncated into %wu-bit",
644 	       bitsize, unit - bitnum);
645       bitsize = unit - bitnum;
646     }
647 
648   /* If BITS_BIG_ENDIAN is zero on a BYTES_BIG_ENDIAN machine, we count
649      "backwards" from the size of the unit we are inserting into.
650      Otherwise, we count bits from the most significant on a
651      BYTES/BITS_BIG_ENDIAN machine.  */
652 
653   if (BITS_BIG_ENDIAN != BYTES_BIG_ENDIAN)
654     bitnum = unit - bitsize - bitnum;
655 
656   /* Convert VALUE to op_mode (which insv insn wants) in VALUE1.  */
657   value1 = value;
658   if (GET_MODE (value) != op_mode)
659     {
660       if (GET_MODE_BITSIZE (GET_MODE (value)) >= bitsize)
661 	{
662 	  rtx tmp;
663 	  /* Optimization: Don't bother really extending VALUE
664 	     if it has all the bits we will actually use.  However,
665 	     if we must narrow it, be sure we do it correctly.  */
666 
667 	  if (GET_MODE_SIZE (GET_MODE (value)) < GET_MODE_SIZE (op_mode))
668 	    {
669 	      tmp = simplify_subreg (op_mode, value1, GET_MODE (value), 0);
670 	      if (! tmp)
671 		tmp = simplify_gen_subreg (op_mode,
672 					   force_reg (GET_MODE (value),
673 						      value1),
674 					   GET_MODE (value), 0);
675 	    }
676 	  else
677 	    {
678 	      tmp = gen_lowpart_if_possible (op_mode, value1);
679 	      if (! tmp)
680 		tmp = gen_lowpart (op_mode, force_reg (GET_MODE (value),
681 						       value1));
682 	    }
683 	  value1 = tmp;
684 	}
685       else if (CONST_INT_P (value))
686 	value1 = gen_int_mode (INTVAL (value), op_mode);
687       else
688 	/* Parse phase is supposed to make VALUE's data type
689 	   match that of the component reference, which is a type
690 	   at least as wide as the field; so VALUE should have
691 	   a mode that corresponds to that type.  */
692 	gcc_assert (CONSTANT_P (value));
693     }
694 
695   create_fixed_operand (&ops[0], xop0);
696   create_integer_operand (&ops[1], bitsize);
697   create_integer_operand (&ops[2], bitnum);
698   create_input_operand (&ops[3], value1, op_mode);
699   if (maybe_expand_insn (insv->icode, 4, ops))
700     {
701       if (copy_back)
702 	convert_move (op0, xop0, true);
703       return true;
704     }
705   delete_insns_since (last);
706   return false;
707 }
708 
709 /* A subroutine of store_bit_field, with the same arguments.  Return true
710    if the operation could be implemented.
711 
712    If FALLBACK_P is true, fall back to store_fixed_bit_field if we have
713    no other way of implementing the operation.  If FALLBACK_P is false,
714    return false instead.  */
715 
716 static bool
717 store_bit_field_1 (rtx str_rtx, unsigned HOST_WIDE_INT bitsize,
718 		   unsigned HOST_WIDE_INT bitnum,
719 		   unsigned HOST_WIDE_INT bitregion_start,
720 		   unsigned HOST_WIDE_INT bitregion_end,
721 		   machine_mode fieldmode,
722 		   rtx value, bool reverse, bool fallback_p)
723 {
724   rtx op0 = str_rtx;
725   rtx orig_value;
726 
727   while (GET_CODE (op0) == SUBREG)
728     {
729       /* The following line once was done only if WORDS_BIG_ENDIAN,
730 	 but I think that is a mistake.  WORDS_BIG_ENDIAN is
731 	 meaningful at a much higher level; when structures are copied
732 	 between memory and regs, the higher-numbered regs
733 	 always get higher addresses.  */
734       int inner_mode_size = GET_MODE_SIZE (GET_MODE (SUBREG_REG (op0)));
735       int outer_mode_size = GET_MODE_SIZE (GET_MODE (op0));
736       int byte_offset = 0;
737 
738       /* Paradoxical subregs need special handling on big-endian machines.  */
739       if (SUBREG_BYTE (op0) == 0 && inner_mode_size < outer_mode_size)
740 	{
741 	  int difference = inner_mode_size - outer_mode_size;
742 
743 	  if (WORDS_BIG_ENDIAN)
744 	    byte_offset += (difference / UNITS_PER_WORD) * UNITS_PER_WORD;
745 	  if (BYTES_BIG_ENDIAN)
746 	    byte_offset += difference % UNITS_PER_WORD;
747 	}
748       else
749 	byte_offset = SUBREG_BYTE (op0);
750 
751       bitnum += byte_offset * BITS_PER_UNIT;
752       op0 = SUBREG_REG (op0);
753     }
754 
755   /* No action is needed if the target is a register and if the field
756      lies completely outside that register.  This can occur if the source
757      code contains an out-of-bounds access to a small array.  */
758   if (REG_P (op0) && bitnum >= GET_MODE_BITSIZE (GET_MODE (op0)))
759     return true;
760 
761   /* Use vec_set patterns for inserting parts of vectors whenever
762      available.  */
763   if (VECTOR_MODE_P (GET_MODE (op0))
764       && !MEM_P (op0)
765       && optab_handler (vec_set_optab, GET_MODE (op0)) != CODE_FOR_nothing
766       && fieldmode == GET_MODE_INNER (GET_MODE (op0))
767       && bitsize == GET_MODE_UNIT_BITSIZE (GET_MODE (op0))
768       && !(bitnum % GET_MODE_UNIT_BITSIZE (GET_MODE (op0))))
769     {
770       struct expand_operand ops[3];
771       machine_mode outermode = GET_MODE (op0);
772       machine_mode innermode = GET_MODE_INNER (outermode);
773       enum insn_code icode = optab_handler (vec_set_optab, outermode);
774       int pos = bitnum / GET_MODE_BITSIZE (innermode);
775 
776       create_fixed_operand (&ops[0], op0);
777       create_input_operand (&ops[1], value, innermode);
778       create_integer_operand (&ops[2], pos);
779       if (maybe_expand_insn (icode, 3, ops))
780 	return true;
781     }
782 
783   /* If the target is a register, overwriting the entire object, or storing
784      a full-word or multi-word field can be done with just a SUBREG.  */
785   if (!MEM_P (op0)
786       && bitsize == GET_MODE_BITSIZE (fieldmode)
787       && ((bitsize == GET_MODE_BITSIZE (GET_MODE (op0)) && bitnum == 0)
788 	  || (bitsize % BITS_PER_WORD == 0 && bitnum % BITS_PER_WORD == 0)))
789     {
790       /* Use the subreg machinery either to narrow OP0 to the required
791 	 words or to cope with mode punning between equal-sized modes.
792 	 In the latter case, use subreg on the rhs side, not lhs.  */
793       rtx sub;
794 
795       if (bitsize == GET_MODE_BITSIZE (GET_MODE (op0)))
796 	{
797 	  sub = simplify_gen_subreg (GET_MODE (op0), value, fieldmode, 0);
798 	  if (sub)
799 	    {
800 	      if (reverse)
801 		sub = flip_storage_order (GET_MODE (op0), sub);
802 	      emit_move_insn (op0, sub);
803 	      return true;
804 	    }
805 	}
806       else
807 	{
808 	  sub = simplify_gen_subreg (fieldmode, op0, GET_MODE (op0),
809 				     bitnum / BITS_PER_UNIT);
810 	  if (sub)
811 	    {
812 	      if (reverse)
813 		value = flip_storage_order (fieldmode, value);
814 	      emit_move_insn (sub, value);
815 	      return true;
816 	    }
817 	}
818     }
819 
820   /* If the target is memory, storing any naturally aligned field can be
821      done with a simple store.  For targets that support fast unaligned
822      memory, any naturally sized, unit aligned field can be done directly.  */
823   if (simple_mem_bitfield_p (op0, bitsize, bitnum, fieldmode))
824     {
825       op0 = adjust_bitfield_address (op0, fieldmode, bitnum / BITS_PER_UNIT);
826       if (reverse)
827 	value = flip_storage_order (fieldmode, value);
828       emit_move_insn (op0, value);
829       return true;
830     }
831 
832   /* Make sure we are playing with integral modes.  Pun with subregs
833      if we aren't.  This must come after the entire register case above,
834      since that case is valid for any mode.  The following cases are only
835      valid for integral modes.  */
836   {
837     machine_mode imode = int_mode_for_mode (GET_MODE (op0));
838     if (imode != GET_MODE (op0))
839       {
840 	if (MEM_P (op0))
841 	  op0 = adjust_bitfield_address_size (op0, imode, 0, MEM_SIZE (op0));
842 	else
843 	  {
844 	    gcc_assert (imode != BLKmode);
845 	    op0 = gen_lowpart (imode, op0);
846 	  }
847       }
848   }
849 
850   /* Storing an lsb-aligned field in a register
851      can be done with a movstrict instruction.  */
852 
853   if (!MEM_P (op0)
854       && !reverse
855       && lowpart_bit_field_p (bitnum, bitsize, GET_MODE (op0))
856       && bitsize == GET_MODE_BITSIZE (fieldmode)
857       && optab_handler (movstrict_optab, fieldmode) != CODE_FOR_nothing)
858     {
859       struct expand_operand ops[2];
860       enum insn_code icode = optab_handler (movstrict_optab, fieldmode);
861       rtx arg0 = op0;
862       unsigned HOST_WIDE_INT subreg_off;
863 
864       if (GET_CODE (arg0) == SUBREG)
865 	{
866 	  /* Else we've got some float mode source being extracted into
867 	     a different float mode destination -- this combination of
868 	     subregs results in Severe Tire Damage.  */
869 	  gcc_assert (GET_MODE (SUBREG_REG (arg0)) == fieldmode
870 		      || GET_MODE_CLASS (fieldmode) == MODE_INT
871 		      || GET_MODE_CLASS (fieldmode) == MODE_PARTIAL_INT);
872 	  arg0 = SUBREG_REG (arg0);
873 	}
874 
875       subreg_off = bitnum / BITS_PER_UNIT;
876       if (validate_subreg (fieldmode, GET_MODE (arg0), arg0, subreg_off))
877 	{
878 	  arg0 = gen_rtx_SUBREG (fieldmode, arg0, subreg_off);
879 
880 	  create_fixed_operand (&ops[0], arg0);
881 	  /* Shrink the source operand to FIELDMODE.  */
882 	  create_convert_operand_to (&ops[1], value, fieldmode, false);
883 	  if (maybe_expand_insn (icode, 2, ops))
884 	    return true;
885 	}
886     }
887 
888   /* Handle fields bigger than a word.  */
889 
890   if (bitsize > BITS_PER_WORD)
891     {
892       /* Here we transfer the words of the field
893 	 in the order least significant first.
894 	 This is because the most significant word is the one which may
895 	 be less than full.
896 	 However, only do that if the value is not BLKmode.  */
897 
898       const bool backwards = WORDS_BIG_ENDIAN && fieldmode != BLKmode;
899       unsigned int nwords = (bitsize + (BITS_PER_WORD - 1)) / BITS_PER_WORD;
900       unsigned int i;
901       rtx_insn *last;
902 
903       /* This is the mode we must force value to, so that there will be enough
904 	 subwords to extract.  Note that fieldmode will often (always?) be
905 	 VOIDmode, because that is what store_field uses to indicate that this
906 	 is a bit field, but passing VOIDmode to operand_subword_force
907 	 is not allowed.  */
908       fieldmode = GET_MODE (value);
909       if (fieldmode == VOIDmode)
910 	fieldmode = smallest_mode_for_size (nwords * BITS_PER_WORD, MODE_INT);
911 
912       last = get_last_insn ();
913       for (i = 0; i < nwords; i++)
914 	{
915 	  /* If I is 0, use the low-order word in both field and target;
916 	     if I is 1, use the next to lowest word; and so on.  */
917 	  unsigned int wordnum = (backwards
918 				  ? GET_MODE_SIZE (fieldmode) / UNITS_PER_WORD
919 				  - i - 1
920 				  : i);
921 	  unsigned int bit_offset = (backwards ^ reverse
922 				     ? MAX ((int) bitsize - ((int) i + 1)
923 					    * BITS_PER_WORD,
924 					    0)
925 				     : (int) i * BITS_PER_WORD);
926 	  rtx value_word = operand_subword_force (value, wordnum, fieldmode);
927 	  unsigned HOST_WIDE_INT new_bitsize =
928 	    MIN (BITS_PER_WORD, bitsize - i * BITS_PER_WORD);
929 
930 	  /* If the remaining chunk doesn't have full wordsize we have
931 	     to make sure that for big-endian machines the higher order
932 	     bits are used.  */
933 	  if (new_bitsize < BITS_PER_WORD && BYTES_BIG_ENDIAN && !backwards)
934 	    value_word = simplify_expand_binop (word_mode, lshr_optab,
935 						value_word,
936 						GEN_INT (BITS_PER_WORD
937 							 - new_bitsize),
938 						NULL_RTX, true,
939 						OPTAB_LIB_WIDEN);
940 
941 	  if (!store_bit_field_1 (op0, new_bitsize,
942 				  bitnum + bit_offset,
943 				  bitregion_start, bitregion_end,
944 				  word_mode,
945 				  value_word, reverse, fallback_p))
946 	    {
947 	      delete_insns_since (last);
948 	      return false;
949 	    }
950 	}
951       return true;
952     }
953 
954   /* If VALUE has a floating-point or complex mode, access it as an
955      integer of the corresponding size.  This can occur on a machine
956      with 64 bit registers that uses SFmode for float.  It can also
957      occur for unaligned float or complex fields.  */
958   orig_value = value;
959   if (GET_MODE (value) != VOIDmode
960       && GET_MODE_CLASS (GET_MODE (value)) != MODE_INT
961       && GET_MODE_CLASS (GET_MODE (value)) != MODE_PARTIAL_INT)
962     {
963       value = gen_reg_rtx (int_mode_for_mode (GET_MODE (value)));
964       emit_move_insn (gen_lowpart (GET_MODE (orig_value), value), orig_value);
965     }
966 
967   /* If OP0 is a multi-word register, narrow it to the affected word.
968      If the region spans two words, defer to store_split_bit_field.
969      Don't do this if op0 is a single hard register wider than word
970      such as a float or vector register.  */
971   if (!MEM_P (op0)
972       && GET_MODE_SIZE (GET_MODE (op0)) > UNITS_PER_WORD
973       && (!REG_P (op0)
974 	  || !HARD_REGISTER_P (op0)
975 	  || HARD_REGNO_NREGS (REGNO (op0), GET_MODE (op0)) != 1))
976     {
977       if (bitnum % BITS_PER_WORD + bitsize > BITS_PER_WORD)
978 	{
979 	  if (!fallback_p)
980 	    return false;
981 
982 	  store_split_bit_field (op0, bitsize, bitnum, bitregion_start,
983 				 bitregion_end, value, reverse);
984 	  return true;
985 	}
986       op0 = simplify_gen_subreg (word_mode, op0, GET_MODE (op0),
987 				 bitnum / BITS_PER_WORD * UNITS_PER_WORD);
988       gcc_assert (op0);
989       bitnum %= BITS_PER_WORD;
990     }
991 
992   /* From here on we can assume that the field to be stored in fits
993      within a word.  If the destination is a register, it too fits
994      in a word.  */
995 
996   extraction_insn insv;
997   if (!MEM_P (op0)
998       && !reverse
999       && get_best_reg_extraction_insn (&insv, EP_insv,
1000 				       GET_MODE_BITSIZE (GET_MODE (op0)),
1001 				       fieldmode)
1002       && store_bit_field_using_insv (&insv, op0, bitsize, bitnum, value))
1003     return true;
1004 
1005   /* If OP0 is a memory, try copying it to a register and seeing if a
1006      cheap register alternative is available.  */
1007   if (MEM_P (op0) && !reverse)
1008     {
1009       if (get_best_mem_extraction_insn (&insv, EP_insv, bitsize, bitnum,
1010 					fieldmode)
1011 	  && store_bit_field_using_insv (&insv, op0, bitsize, bitnum, value))
1012 	return true;
1013 
1014       rtx_insn *last = get_last_insn ();
1015 
1016       /* Try loading part of OP0 into a register, inserting the bitfield
1017 	 into that, and then copying the result back to OP0.  */
1018       unsigned HOST_WIDE_INT bitpos;
1019       rtx xop0 = adjust_bit_field_mem_for_reg (EP_insv, op0, bitsize, bitnum,
1020 					       bitregion_start, bitregion_end,
1021 					       fieldmode, &bitpos);
1022       if (xop0)
1023 	{
1024 	  rtx tempreg = copy_to_reg (xop0);
1025 	  if (store_bit_field_1 (tempreg, bitsize, bitpos,
1026 				 bitregion_start, bitregion_end,
1027 				 fieldmode, orig_value, reverse, false))
1028 	    {
1029 	      emit_move_insn (xop0, tempreg);
1030 	      return true;
1031 	    }
1032 	  delete_insns_since (last);
1033 	}
1034     }
1035 
1036   if (!fallback_p)
1037     return false;
1038 
1039   store_fixed_bit_field (op0, bitsize, bitnum, bitregion_start,
1040 			 bitregion_end, value, reverse);
1041   return true;
1042 }
1043 
1044 /* Generate code to store value from rtx VALUE
1045    into a bit-field within structure STR_RTX
1046    containing BITSIZE bits starting at bit BITNUM.
1047 
1048    BITREGION_START is bitpos of the first bitfield in this region.
1049    BITREGION_END is the bitpos of the ending bitfield in this region.
1050    These two fields are 0, if the C++ memory model does not apply,
1051    or we are not interested in keeping track of bitfield regions.
1052 
1053    FIELDMODE is the machine-mode of the FIELD_DECL node for this field.
1054 
1055    If REVERSE is true, the store is to be done in reverse order.  */
1056 
1057 void
1058 store_bit_field (rtx str_rtx, unsigned HOST_WIDE_INT bitsize,
1059 		 unsigned HOST_WIDE_INT bitnum,
1060 		 unsigned HOST_WIDE_INT bitregion_start,
1061 		 unsigned HOST_WIDE_INT bitregion_end,
1062 		 machine_mode fieldmode,
1063 		 rtx value, bool reverse)
1064 {
1065   /* Handle -fstrict-volatile-bitfields in the cases where it applies.  */
1066   if (strict_volatile_bitfield_p (str_rtx, bitsize, bitnum, fieldmode,
1067 				  bitregion_start, bitregion_end))
1068     {
1069       /* Storing of a full word can be done with a simple store.
1070 	 We know here that the field can be accessed with one single
1071 	 instruction.  For targets that support unaligned memory,
1072 	 an unaligned access may be necessary.  */
1073       if (bitsize == GET_MODE_BITSIZE (fieldmode))
1074 	{
1075 	  str_rtx = adjust_bitfield_address (str_rtx, fieldmode,
1076 					     bitnum / BITS_PER_UNIT);
1077 	  if (reverse)
1078 	    value = flip_storage_order (fieldmode, value);
1079 	  gcc_assert (bitnum % BITS_PER_UNIT == 0);
1080 	  emit_move_insn (str_rtx, value);
1081 	}
1082       else
1083 	{
1084 	  rtx temp;
1085 
1086 	  str_rtx = narrow_bit_field_mem (str_rtx, fieldmode, bitsize, bitnum,
1087 					  &bitnum);
1088 	  gcc_assert (bitnum + bitsize <= GET_MODE_BITSIZE (fieldmode));
1089 	  temp = copy_to_reg (str_rtx);
1090 	  if (!store_bit_field_1 (temp, bitsize, bitnum, 0, 0,
1091 				  fieldmode, value, reverse, true))
1092 	    gcc_unreachable ();
1093 
1094 	  emit_move_insn (str_rtx, temp);
1095 	}
1096 
1097       return;
1098     }
1099 
1100   /* Under the C++0x memory model, we must not touch bits outside the
1101      bit region.  Adjust the address to start at the beginning of the
1102      bit region.  */
1103   if (MEM_P (str_rtx) && bitregion_start > 0)
1104     {
1105       machine_mode bestmode;
1106       HOST_WIDE_INT offset, size;
1107 
1108       gcc_assert ((bitregion_start % BITS_PER_UNIT) == 0);
1109 
1110       offset = bitregion_start / BITS_PER_UNIT;
1111       bitnum -= bitregion_start;
1112       size = (bitnum + bitsize + BITS_PER_UNIT - 1) / BITS_PER_UNIT;
1113       bitregion_end -= bitregion_start;
1114       bitregion_start = 0;
1115       bestmode = get_best_mode (bitsize, bitnum,
1116 				bitregion_start, bitregion_end,
1117 				MEM_ALIGN (str_rtx), VOIDmode,
1118 				MEM_VOLATILE_P (str_rtx));
1119       str_rtx = adjust_bitfield_address_size (str_rtx, bestmode, offset, size);
1120     }
1121 
1122   if (!store_bit_field_1 (str_rtx, bitsize, bitnum,
1123 			  bitregion_start, bitregion_end,
1124 			  fieldmode, value, reverse, true))
1125     gcc_unreachable ();
1126 }
1127 
1128 /* Use shifts and boolean operations to store VALUE into a bit field of
1129    width BITSIZE in OP0, starting at bit BITNUM.
1130 
1131    If REVERSE is true, the store is to be done in reverse order.  */
1132 
1133 static void
1134 store_fixed_bit_field (rtx op0, unsigned HOST_WIDE_INT bitsize,
1135 		       unsigned HOST_WIDE_INT bitnum,
1136 		       unsigned HOST_WIDE_INT bitregion_start,
1137 		       unsigned HOST_WIDE_INT bitregion_end,
1138 		       rtx value, bool reverse)
1139 {
1140   /* There is a case not handled here:
1141      a structure with a known alignment of just a halfword
1142      and a field split across two aligned halfwords within the structure.
1143      Or likewise a structure with a known alignment of just a byte
1144      and a field split across two bytes.
1145      Such cases are not supposed to be able to occur.  */
1146 
1147   if (MEM_P (op0))
1148     {
1149       machine_mode mode = GET_MODE (op0);
1150       if (GET_MODE_BITSIZE (mode) == 0
1151 	  || GET_MODE_BITSIZE (mode) > GET_MODE_BITSIZE (word_mode))
1152 	mode = word_mode;
1153       mode = get_best_mode (bitsize, bitnum, bitregion_start, bitregion_end,
1154 			    MEM_ALIGN (op0), mode, MEM_VOLATILE_P (op0));
1155 
1156       if (mode == VOIDmode)
1157 	{
1158 	  /* The only way this should occur is if the field spans word
1159 	     boundaries.  */
1160 	  store_split_bit_field (op0, bitsize, bitnum, bitregion_start,
1161 				 bitregion_end, value, reverse);
1162 	  return;
1163 	}
1164 
1165       op0 = narrow_bit_field_mem (op0, mode, bitsize, bitnum, &bitnum);
1166     }
1167 
1168   store_fixed_bit_field_1 (op0, bitsize, bitnum, value, reverse);
1169 }
1170 
1171 /* Helper function for store_fixed_bit_field, stores
1172    the bit field always using the MODE of OP0.  */
1173 
1174 static void
1175 store_fixed_bit_field_1 (rtx op0, unsigned HOST_WIDE_INT bitsize,
1176 			 unsigned HOST_WIDE_INT bitnum,
1177 			 rtx value, bool reverse)
1178 {
1179   machine_mode mode;
1180   rtx temp;
1181   int all_zero = 0;
1182   int all_one = 0;
1183 
1184   mode = GET_MODE (op0);
1185   gcc_assert (SCALAR_INT_MODE_P (mode));
1186 
1187   /* Note that bitsize + bitnum can be greater than GET_MODE_BITSIZE (mode)
1188      for invalid input, such as f5 from gcc.dg/pr48335-2.c.  */
1189 
1190   if (reverse ? !BYTES_BIG_ENDIAN : BYTES_BIG_ENDIAN)
1191     /* BITNUM is the distance between our msb
1192        and that of the containing datum.
1193        Convert it to the distance from the lsb.  */
1194     bitnum = GET_MODE_BITSIZE (mode) - bitsize - bitnum;
1195 
1196   /* Now BITNUM is always the distance between our lsb
1197      and that of OP0.  */
1198 
1199   /* Shift VALUE left by BITNUM bits.  If VALUE is not constant,
1200      we must first convert its mode to MODE.  */
1201 
1202   if (CONST_INT_P (value))
1203     {
1204       unsigned HOST_WIDE_INT v = UINTVAL (value);
1205 
1206       if (bitsize < HOST_BITS_PER_WIDE_INT)
1207 	v &= (HOST_WIDE_INT_1U << bitsize) - 1;
1208 
1209       if (v == 0)
1210 	all_zero = 1;
1211       else if ((bitsize < HOST_BITS_PER_WIDE_INT
1212 		&& v == (HOST_WIDE_INT_1U << bitsize) - 1)
1213 	       || (bitsize == HOST_BITS_PER_WIDE_INT
1214 		   && v == HOST_WIDE_INT_M1U))
1215 	all_one = 1;
1216 
1217       value = lshift_value (mode, v, bitnum);
1218     }
1219   else
1220     {
1221       int must_and = (GET_MODE_BITSIZE (GET_MODE (value)) != bitsize
1222 		      && bitnum + bitsize != GET_MODE_BITSIZE (mode));
1223 
1224       if (GET_MODE (value) != mode)
1225 	value = convert_to_mode (mode, value, 1);
1226 
1227       if (must_and)
1228 	value = expand_binop (mode, and_optab, value,
1229 			      mask_rtx (mode, 0, bitsize, 0),
1230 			      NULL_RTX, 1, OPTAB_LIB_WIDEN);
1231       if (bitnum > 0)
1232 	value = expand_shift (LSHIFT_EXPR, mode, value,
1233 			      bitnum, NULL_RTX, 1);
1234     }
1235 
1236   if (reverse)
1237     value = flip_storage_order (mode, value);
1238 
1239   /* Now clear the chosen bits in OP0,
1240      except that if VALUE is -1 we need not bother.  */
1241   /* We keep the intermediates in registers to allow CSE to combine
1242      consecutive bitfield assignments.  */
1243 
1244   temp = force_reg (mode, op0);
1245 
1246   if (! all_one)
1247     {
1248       rtx mask = mask_rtx (mode, bitnum, bitsize, 1);
1249       if (reverse)
1250 	mask = flip_storage_order (mode, mask);
1251       temp = expand_binop (mode, and_optab, temp, mask,
1252 			   NULL_RTX, 1, OPTAB_LIB_WIDEN);
1253       temp = force_reg (mode, temp);
1254     }
1255 
1256   /* Now logical-or VALUE into OP0, unless it is zero.  */
1257 
1258   if (! all_zero)
1259     {
1260       temp = expand_binop (mode, ior_optab, temp, value,
1261 			   NULL_RTX, 1, OPTAB_LIB_WIDEN);
1262       temp = force_reg (mode, temp);
1263     }
1264 
1265   if (op0 != temp)
1266     {
1267       op0 = copy_rtx (op0);
1268       emit_move_insn (op0, temp);
1269     }
1270 }
1271 
1272 /* Store a bit field that is split across multiple accessible memory objects.
1273 
1274    OP0 is the REG, SUBREG or MEM rtx for the first of the objects.
1275    BITSIZE is the field width; BITPOS the position of its first bit
1276    (within the word).
1277    VALUE is the value to store.
1278 
1279    If REVERSE is true, the store is to be done in reverse order.
1280 
1281    This does not yet handle fields wider than BITS_PER_WORD.  */
1282 
1283 static void
1284 store_split_bit_field (rtx op0, unsigned HOST_WIDE_INT bitsize,
1285 		       unsigned HOST_WIDE_INT bitpos,
1286 		       unsigned HOST_WIDE_INT bitregion_start,
1287 		       unsigned HOST_WIDE_INT bitregion_end,
1288 		       rtx value, bool reverse)
1289 {
1290   unsigned int unit, total_bits, bitsdone = 0;
1291 
1292   /* Make sure UNIT isn't larger than BITS_PER_WORD, we can only handle that
1293      much at a time.  */
1294   if (REG_P (op0) || GET_CODE (op0) == SUBREG)
1295     unit = BITS_PER_WORD;
1296   else
1297     unit = MIN (MEM_ALIGN (op0), BITS_PER_WORD);
1298 
1299   /* If OP0 is a memory with a mode, then UNIT must not be larger than
1300      OP0's mode as well.  Otherwise, store_fixed_bit_field will call us
1301      again, and we will mutually recurse forever.  */
1302   if (MEM_P (op0) && GET_MODE_BITSIZE (GET_MODE (op0)) > 0)
1303     unit = MIN (unit, GET_MODE_BITSIZE (GET_MODE (op0)));
1304 
1305   /* If VALUE is a constant other than a CONST_INT, get it into a register in
1306      WORD_MODE.  If we can do this using gen_lowpart_common, do so.  Note
1307      that VALUE might be a floating-point constant.  */
1308   if (CONSTANT_P (value) && !CONST_INT_P (value))
1309     {
1310       rtx word = gen_lowpart_common (word_mode, value);
1311 
1312       if (word && (value != word))
1313 	value = word;
1314       else
1315 	value = gen_lowpart_common (word_mode,
1316 				    force_reg (GET_MODE (value) != VOIDmode
1317 					       ? GET_MODE (value)
1318 					       : word_mode, value));
1319     }
1320 
1321   total_bits = GET_MODE_BITSIZE (GET_MODE (value));
1322 
1323   while (bitsdone < bitsize)
1324     {
1325       unsigned HOST_WIDE_INT thissize;
1326       unsigned HOST_WIDE_INT thispos;
1327       unsigned HOST_WIDE_INT offset;
1328       rtx part, word;
1329 
1330       offset = (bitpos + bitsdone) / unit;
1331       thispos = (bitpos + bitsdone) % unit;
1332 
1333       /* When region of bytes we can touch is restricted, decrease
1334 	 UNIT close to the end of the region as needed.  If op0 is a REG
1335 	 or SUBREG of REG, don't do this, as there can't be data races
1336 	 on a register and we can expand shorter code in some cases.  */
1337       if (bitregion_end
1338 	  && unit > BITS_PER_UNIT
1339 	  && bitpos + bitsdone - thispos + unit > bitregion_end + 1
1340 	  && !REG_P (op0)
1341 	  && (GET_CODE (op0) != SUBREG || !REG_P (SUBREG_REG (op0))))
1342 	{
1343 	  unit = unit / 2;
1344 	  continue;
1345 	}
1346 
1347       /* THISSIZE must not overrun a word boundary.  Otherwise,
1348 	 store_fixed_bit_field will call us again, and we will mutually
1349 	 recurse forever.  */
1350       thissize = MIN (bitsize - bitsdone, BITS_PER_WORD);
1351       thissize = MIN (thissize, unit - thispos);
1352 
1353       if (reverse ? !BYTES_BIG_ENDIAN : BYTES_BIG_ENDIAN)
1354 	{
1355 	  /* Fetch successively less significant portions.  */
1356 	  if (CONST_INT_P (value))
1357 	    part = GEN_INT (((unsigned HOST_WIDE_INT) (INTVAL (value))
1358 			     >> (bitsize - bitsdone - thissize))
1359 			    & ((HOST_WIDE_INT_1 << thissize) - 1));
1360           /* Likewise, but the source is little-endian.  */
1361           else if (reverse)
1362 	    part = extract_fixed_bit_field (word_mode, value, thissize,
1363 					    bitsize - bitsdone - thissize,
1364 					    NULL_RTX, 1, false);
1365 	  else
1366 	    {
1367 	      int total_bits = GET_MODE_BITSIZE (GET_MODE (value));
1368 	      /* The args are chosen so that the last part includes the
1369 		 lsb.  Give extract_bit_field the value it needs (with
1370 		 endianness compensation) to fetch the piece we want.  */
1371 	      part = extract_fixed_bit_field (word_mode, value, thissize,
1372 					      total_bits - bitsize + bitsdone,
1373 					      NULL_RTX, 1, false);
1374 	    }
1375 	}
1376       else
1377 	{
1378 	  /* Fetch successively more significant portions.  */
1379 	  if (CONST_INT_P (value))
1380 	    part = GEN_INT (((unsigned HOST_WIDE_INT) (INTVAL (value))
1381 			     >> bitsdone)
1382 			    & ((HOST_WIDE_INT_1 << thissize) - 1));
1383 	  /* Likewise, but the source is big-endian.  */
1384           else if (reverse)
1385 	    part = extract_fixed_bit_field (word_mode, value, thissize,
1386 					    total_bits - bitsdone - thissize,
1387 					    NULL_RTX, 1, false);
1388 	  else
1389 	    part = extract_fixed_bit_field (word_mode, value, thissize,
1390 					    bitsdone, NULL_RTX, 1, false);
1391 	}
1392 
1393       /* If OP0 is a register, then handle OFFSET here.  */
1394       if (SUBREG_P (op0) || REG_P (op0))
1395 	{
1396 	  machine_mode op0_mode = GET_MODE (op0);
1397 	  if (op0_mode != BLKmode && GET_MODE_SIZE (op0_mode) < UNITS_PER_WORD)
1398 	    word = offset ? const0_rtx : op0;
1399 	  else
1400 	    word = operand_subword_force (op0, offset * unit / BITS_PER_WORD,
1401 					  GET_MODE (op0));
1402 	  offset &= BITS_PER_WORD / unit - 1;
1403 	}
1404       else
1405 	word = op0;
1406 
1407       /* OFFSET is in UNITs, and UNIT is in bits.  If WORD is const0_rtx,
1408 	 it is just an out-of-bounds access.  Ignore it.  */
1409       if (word != const0_rtx)
1410 	store_fixed_bit_field (word, thissize, offset * unit + thispos,
1411 			       bitregion_start, bitregion_end, part,
1412 			       reverse);
1413       bitsdone += thissize;
1414     }
1415 }
1416 
1417 /* A subroutine of extract_bit_field_1 that converts return value X
1418    to either MODE or TMODE.  MODE, TMODE and UNSIGNEDP are arguments
1419    to extract_bit_field.  */
1420 
1421 static rtx
1422 convert_extracted_bit_field (rtx x, machine_mode mode,
1423 			     machine_mode tmode, bool unsignedp)
1424 {
1425   if (GET_MODE (x) == tmode || GET_MODE (x) == mode)
1426     return x;
1427 
1428   /* If the x mode is not a scalar integral, first convert to the
1429      integer mode of that size and then access it as a floating-point
1430      value via a SUBREG.  */
1431   if (!SCALAR_INT_MODE_P (tmode))
1432     {
1433       machine_mode smode;
1434 
1435       smode = mode_for_size (GET_MODE_BITSIZE (tmode), MODE_INT, 0);
1436       x = convert_to_mode (smode, x, unsignedp);
1437       x = force_reg (smode, x);
1438       return gen_lowpart (tmode, x);
1439     }
1440 
1441   return convert_to_mode (tmode, x, unsignedp);
1442 }
1443 
1444 /* Try to use an ext(z)v pattern to extract a field from OP0.
1445    Return the extracted value on success, otherwise return null.
1446    EXT_MODE is the mode of the extraction and the other arguments
1447    are as for extract_bit_field.  */
1448 
1449 static rtx
1450 extract_bit_field_using_extv (const extraction_insn *extv, rtx op0,
1451 			      unsigned HOST_WIDE_INT bitsize,
1452 			      unsigned HOST_WIDE_INT bitnum,
1453 			      int unsignedp, rtx target,
1454 			      machine_mode mode, machine_mode tmode)
1455 {
1456   struct expand_operand ops[4];
1457   rtx spec_target = target;
1458   rtx spec_target_subreg = 0;
1459   machine_mode ext_mode = extv->field_mode;
1460   unsigned unit = GET_MODE_BITSIZE (ext_mode);
1461 
1462   if (bitsize == 0 || unit < bitsize)
1463     return NULL_RTX;
1464 
1465   if (MEM_P (op0))
1466     /* Get a reference to the first byte of the field.  */
1467     op0 = narrow_bit_field_mem (op0, extv->struct_mode, bitsize, bitnum,
1468 				&bitnum);
1469   else
1470     {
1471       /* Convert from counting within OP0 to counting in EXT_MODE.  */
1472       if (BYTES_BIG_ENDIAN)
1473 	bitnum += unit - GET_MODE_BITSIZE (GET_MODE (op0));
1474 
1475       /* If op0 is a register, we need it in EXT_MODE to make it
1476 	 acceptable to the format of ext(z)v.  */
1477       if (GET_CODE (op0) == SUBREG && GET_MODE (op0) != ext_mode)
1478 	return NULL_RTX;
1479       if (REG_P (op0) && GET_MODE (op0) != ext_mode)
1480 	op0 = gen_lowpart_SUBREG (ext_mode, op0);
1481     }
1482 
1483   /* If BITS_BIG_ENDIAN is zero on a BYTES_BIG_ENDIAN machine, we count
1484      "backwards" from the size of the unit we are extracting from.
1485      Otherwise, we count bits from the most significant on a
1486      BYTES/BITS_BIG_ENDIAN machine.  */
1487 
1488   if (BITS_BIG_ENDIAN != BYTES_BIG_ENDIAN)
1489     bitnum = unit - bitsize - bitnum;
1490 
1491   if (target == 0)
1492     target = spec_target = gen_reg_rtx (tmode);
1493 
1494   if (GET_MODE (target) != ext_mode)
1495     {
1496       /* Don't use LHS paradoxical subreg if explicit truncation is needed
1497 	 between the mode of the extraction (word_mode) and the target
1498 	 mode.  Instead, create a temporary and use convert_move to set
1499 	 the target.  */
1500       if (REG_P (target)
1501 	  && TRULY_NOOP_TRUNCATION_MODES_P (GET_MODE (target), ext_mode))
1502 	{
1503 	  target = gen_lowpart (ext_mode, target);
1504 	  if (GET_MODE_PRECISION (ext_mode)
1505 	      > GET_MODE_PRECISION (GET_MODE (spec_target)))
1506 	    spec_target_subreg = target;
1507 	}
1508       else
1509 	target = gen_reg_rtx (ext_mode);
1510     }
1511 
1512   create_output_operand (&ops[0], target, ext_mode);
1513   create_fixed_operand (&ops[1], op0);
1514   create_integer_operand (&ops[2], bitsize);
1515   create_integer_operand (&ops[3], bitnum);
1516   if (maybe_expand_insn (extv->icode, 4, ops))
1517     {
1518       target = ops[0].value;
1519       if (target == spec_target)
1520 	return target;
1521       if (target == spec_target_subreg)
1522 	return spec_target;
1523       return convert_extracted_bit_field (target, mode, tmode, unsignedp);
1524     }
1525   return NULL_RTX;
1526 }
1527 
1528 /* A subroutine of extract_bit_field, with the same arguments.
1529    If FALLBACK_P is true, fall back to extract_fixed_bit_field
1530    if we can find no other means of implementing the operation.
1531    if FALLBACK_P is false, return NULL instead.  */
1532 
1533 static rtx
1534 extract_bit_field_1 (rtx str_rtx, unsigned HOST_WIDE_INT bitsize,
1535 		     unsigned HOST_WIDE_INT bitnum, int unsignedp, rtx target,
1536 		     machine_mode mode, machine_mode tmode,
1537 		     bool reverse, bool fallback_p)
1538 {
1539   rtx op0 = str_rtx;
1540   machine_mode int_mode;
1541   machine_mode mode1;
1542 
1543   if (tmode == VOIDmode)
1544     tmode = mode;
1545 
1546   while (GET_CODE (op0) == SUBREG)
1547     {
1548       bitnum += SUBREG_BYTE (op0) * BITS_PER_UNIT;
1549       op0 = SUBREG_REG (op0);
1550     }
1551 
1552   /* If we have an out-of-bounds access to a register, just return an
1553      uninitialized register of the required mode.  This can occur if the
1554      source code contains an out-of-bounds access to a small array.  */
1555   if (REG_P (op0) && bitnum >= GET_MODE_BITSIZE (GET_MODE (op0)))
1556     return gen_reg_rtx (tmode);
1557 
1558   if (REG_P (op0)
1559       && mode == GET_MODE (op0)
1560       && bitnum == 0
1561       && bitsize == GET_MODE_BITSIZE (GET_MODE (op0)))
1562     {
1563       if (reverse)
1564 	op0 = flip_storage_order (mode, op0);
1565       /* We're trying to extract a full register from itself.  */
1566       return op0;
1567     }
1568 
1569   /* See if we can get a better vector mode before extracting.  */
1570   if (VECTOR_MODE_P (GET_MODE (op0))
1571       && !MEM_P (op0)
1572       && GET_MODE_INNER (GET_MODE (op0)) != tmode)
1573     {
1574       machine_mode new_mode;
1575 
1576       if (GET_MODE_CLASS (tmode) == MODE_FLOAT)
1577 	new_mode = MIN_MODE_VECTOR_FLOAT;
1578       else if (GET_MODE_CLASS (tmode) == MODE_FRACT)
1579 	new_mode = MIN_MODE_VECTOR_FRACT;
1580       else if (GET_MODE_CLASS (tmode) == MODE_UFRACT)
1581 	new_mode = MIN_MODE_VECTOR_UFRACT;
1582       else if (GET_MODE_CLASS (tmode) == MODE_ACCUM)
1583 	new_mode = MIN_MODE_VECTOR_ACCUM;
1584       else if (GET_MODE_CLASS (tmode) == MODE_UACCUM)
1585 	new_mode = MIN_MODE_VECTOR_UACCUM;
1586       else
1587 	new_mode = MIN_MODE_VECTOR_INT;
1588 
1589       for (; new_mode != VOIDmode ; new_mode = GET_MODE_WIDER_MODE (new_mode))
1590 	if (GET_MODE_SIZE (new_mode) == GET_MODE_SIZE (GET_MODE (op0))
1591 	    && GET_MODE_UNIT_SIZE (new_mode) == GET_MODE_SIZE (tmode)
1592 	    && targetm.vector_mode_supported_p (new_mode))
1593 	  break;
1594       if (new_mode != VOIDmode)
1595 	op0 = gen_lowpart (new_mode, op0);
1596     }
1597 
1598   /* Use vec_extract patterns for extracting parts of vectors whenever
1599      available.  */
1600   if (VECTOR_MODE_P (GET_MODE (op0))
1601       && !MEM_P (op0)
1602       && optab_handler (vec_extract_optab, GET_MODE (op0)) != CODE_FOR_nothing
1603       && ((bitnum + bitsize - 1) / GET_MODE_UNIT_BITSIZE (GET_MODE (op0))
1604 	  == bitnum / GET_MODE_UNIT_BITSIZE (GET_MODE (op0))))
1605     {
1606       struct expand_operand ops[3];
1607       machine_mode outermode = GET_MODE (op0);
1608       machine_mode innermode = GET_MODE_INNER (outermode);
1609       enum insn_code icode = optab_handler (vec_extract_optab, outermode);
1610       unsigned HOST_WIDE_INT pos = bitnum / GET_MODE_BITSIZE (innermode);
1611 
1612       create_output_operand (&ops[0], target, innermode);
1613       create_input_operand (&ops[1], op0, outermode);
1614       create_integer_operand (&ops[2], pos);
1615       if (maybe_expand_insn (icode, 3, ops))
1616 	{
1617 	  target = ops[0].value;
1618       	  if (GET_MODE (target) != mode)
1619 	    return gen_lowpart (tmode, target);
1620 	  return target;
1621 	}
1622     }
1623 
1624   /* Make sure we are playing with integral modes.  Pun with subregs
1625      if we aren't.  */
1626   {
1627     machine_mode imode = int_mode_for_mode (GET_MODE (op0));
1628     if (imode != GET_MODE (op0))
1629       {
1630 	if (MEM_P (op0))
1631 	  op0 = adjust_bitfield_address_size (op0, imode, 0, MEM_SIZE (op0));
1632 	else if (imode != BLKmode)
1633 	  {
1634 	    op0 = gen_lowpart (imode, op0);
1635 
1636 	    /* If we got a SUBREG, force it into a register since we
1637 	       aren't going to be able to do another SUBREG on it.  */
1638 	    if (GET_CODE (op0) == SUBREG)
1639 	      op0 = force_reg (imode, op0);
1640 	  }
1641 	else
1642 	  {
1643 	    HOST_WIDE_INT size = GET_MODE_SIZE (GET_MODE (op0));
1644 	    rtx mem = assign_stack_temp (GET_MODE (op0), size);
1645 	    emit_move_insn (mem, op0);
1646 	    op0 = adjust_bitfield_address_size (mem, BLKmode, 0, size);
1647 	  }
1648       }
1649   }
1650 
1651   /* ??? We currently assume TARGET is at least as big as BITSIZE.
1652      If that's wrong, the solution is to test for it and set TARGET to 0
1653      if needed.  */
1654 
1655   /* Get the mode of the field to use for atomic access or subreg
1656      conversion.  */
1657   mode1 = mode;
1658   if (SCALAR_INT_MODE_P (tmode))
1659     {
1660       machine_mode try_mode = mode_for_size (bitsize,
1661 						  GET_MODE_CLASS (tmode), 0);
1662       if (try_mode != BLKmode)
1663 	mode1 = try_mode;
1664     }
1665   gcc_assert (mode1 != BLKmode);
1666 
1667   /* Extraction of a full MODE1 value can be done with a subreg as long
1668      as the least significant bit of the value is the least significant
1669      bit of either OP0 or a word of OP0.  */
1670   if (!MEM_P (op0)
1671       && !reverse
1672       && lowpart_bit_field_p (bitnum, bitsize, GET_MODE (op0))
1673       && bitsize == GET_MODE_BITSIZE (mode1)
1674       && TRULY_NOOP_TRUNCATION_MODES_P (mode1, GET_MODE (op0)))
1675     {
1676       rtx sub = simplify_gen_subreg (mode1, op0, GET_MODE (op0),
1677 				     bitnum / BITS_PER_UNIT);
1678       if (sub)
1679 	return convert_extracted_bit_field (sub, mode, tmode, unsignedp);
1680     }
1681 
1682   /* Extraction of a full MODE1 value can be done with a load as long as
1683      the field is on a byte boundary and is sufficiently aligned.  */
1684   if (simple_mem_bitfield_p (op0, bitsize, bitnum, mode1))
1685     {
1686       op0 = adjust_bitfield_address (op0, mode1, bitnum / BITS_PER_UNIT);
1687       if (reverse)
1688 	op0 = flip_storage_order (mode1, op0);
1689       return convert_extracted_bit_field (op0, mode, tmode, unsignedp);
1690     }
1691 
1692   /* Handle fields bigger than a word.  */
1693 
1694   if (bitsize > BITS_PER_WORD)
1695     {
1696       /* Here we transfer the words of the field
1697 	 in the order least significant first.
1698 	 This is because the most significant word is the one which may
1699 	 be less than full.  */
1700 
1701       const bool backwards = WORDS_BIG_ENDIAN;
1702       unsigned int nwords = (bitsize + (BITS_PER_WORD - 1)) / BITS_PER_WORD;
1703       unsigned int i;
1704       rtx_insn *last;
1705 
1706       if (target == 0 || !REG_P (target) || !valid_multiword_target_p (target))
1707 	target = gen_reg_rtx (mode);
1708 
1709       /* In case we're about to clobber a base register or something
1710 	 (see gcc.c-torture/execute/20040625-1.c).   */
1711       if (reg_mentioned_p (target, str_rtx))
1712 	target = gen_reg_rtx (mode);
1713 
1714       /* Indicate for flow that the entire target reg is being set.  */
1715       emit_clobber (target);
1716 
1717       last = get_last_insn ();
1718       for (i = 0; i < nwords; i++)
1719 	{
1720 	  /* If I is 0, use the low-order word in both field and target;
1721 	     if I is 1, use the next to lowest word; and so on.  */
1722 	  /* Word number in TARGET to use.  */
1723 	  unsigned int wordnum
1724 	    = (backwards
1725 	       ? GET_MODE_SIZE (GET_MODE (target)) / UNITS_PER_WORD - i - 1
1726 	       : i);
1727 	  /* Offset from start of field in OP0.  */
1728 	  unsigned int bit_offset = (backwards ^ reverse
1729 				     ? MAX ((int) bitsize - ((int) i + 1)
1730 					    * BITS_PER_WORD,
1731 					    0)
1732 				     : (int) i * BITS_PER_WORD);
1733 	  rtx target_part = operand_subword (target, wordnum, 1, VOIDmode);
1734 	  rtx result_part
1735 	    = extract_bit_field_1 (op0, MIN (BITS_PER_WORD,
1736 					     bitsize - i * BITS_PER_WORD),
1737 				   bitnum + bit_offset, 1, target_part,
1738 				   mode, word_mode, reverse, fallback_p);
1739 
1740 	  gcc_assert (target_part);
1741 	  if (!result_part)
1742 	    {
1743 	      delete_insns_since (last);
1744 	      return NULL;
1745 	    }
1746 
1747 	  if (result_part != target_part)
1748 	    emit_move_insn (target_part, result_part);
1749 	}
1750 
1751       if (unsignedp)
1752 	{
1753 	  /* Unless we've filled TARGET, the upper regs in a multi-reg value
1754 	     need to be zero'd out.  */
1755 	  if (GET_MODE_SIZE (GET_MODE (target)) > nwords * UNITS_PER_WORD)
1756 	    {
1757 	      unsigned int i, total_words;
1758 
1759 	      total_words = GET_MODE_SIZE (GET_MODE (target)) / UNITS_PER_WORD;
1760 	      for (i = nwords; i < total_words; i++)
1761 		emit_move_insn
1762 		  (operand_subword (target,
1763 				    backwards ? total_words - i - 1 : i,
1764 				    1, VOIDmode),
1765 		   const0_rtx);
1766 	    }
1767 	  return target;
1768 	}
1769 
1770       /* Signed bit field: sign-extend with two arithmetic shifts.  */
1771       target = expand_shift (LSHIFT_EXPR, mode, target,
1772 			     GET_MODE_BITSIZE (mode) - bitsize, NULL_RTX, 0);
1773       return expand_shift (RSHIFT_EXPR, mode, target,
1774 			   GET_MODE_BITSIZE (mode) - bitsize, NULL_RTX, 0);
1775     }
1776 
1777   /* If OP0 is a multi-word register, narrow it to the affected word.
1778      If the region spans two words, defer to extract_split_bit_field.  */
1779   if (!MEM_P (op0) && GET_MODE_SIZE (GET_MODE (op0)) > UNITS_PER_WORD)
1780     {
1781       if (bitnum % BITS_PER_WORD + bitsize > BITS_PER_WORD)
1782 	{
1783 	  if (!fallback_p)
1784 	    return NULL_RTX;
1785 	  target = extract_split_bit_field (op0, bitsize, bitnum, unsignedp,
1786 					    reverse);
1787 	  return convert_extracted_bit_field (target, mode, tmode, unsignedp);
1788 	}
1789       op0 = simplify_gen_subreg (word_mode, op0, GET_MODE (op0),
1790 				 bitnum / BITS_PER_WORD * UNITS_PER_WORD);
1791       bitnum %= BITS_PER_WORD;
1792     }
1793 
1794   /* From here on we know the desired field is smaller than a word.
1795      If OP0 is a register, it too fits within a word.  */
1796   enum extraction_pattern pattern = unsignedp ? EP_extzv : EP_extv;
1797   extraction_insn extv;
1798   if (!MEM_P (op0)
1799       && !reverse
1800       /* ??? We could limit the structure size to the part of OP0 that
1801 	 contains the field, with appropriate checks for endianness
1802 	 and TRULY_NOOP_TRUNCATION.  */
1803       && get_best_reg_extraction_insn (&extv, pattern,
1804 				       GET_MODE_BITSIZE (GET_MODE (op0)),
1805 				       tmode))
1806     {
1807       rtx result = extract_bit_field_using_extv (&extv, op0, bitsize, bitnum,
1808 						 unsignedp, target, mode,
1809 						 tmode);
1810       if (result)
1811 	return result;
1812     }
1813 
1814   /* If OP0 is a memory, try copying it to a register and seeing if a
1815      cheap register alternative is available.  */
1816   if (MEM_P (op0) & !reverse)
1817     {
1818       if (get_best_mem_extraction_insn (&extv, pattern, bitsize, bitnum,
1819 					tmode))
1820 	{
1821 	  rtx result = extract_bit_field_using_extv (&extv, op0, bitsize,
1822 						     bitnum, unsignedp,
1823 						     target, mode,
1824 						     tmode);
1825 	  if (result)
1826 	    return result;
1827 	}
1828 
1829       rtx_insn *last = get_last_insn ();
1830 
1831       /* Try loading part of OP0 into a register and extracting the
1832 	 bitfield from that.  */
1833       unsigned HOST_WIDE_INT bitpos;
1834       rtx xop0 = adjust_bit_field_mem_for_reg (pattern, op0, bitsize, bitnum,
1835 					       0, 0, tmode, &bitpos);
1836       if (xop0)
1837 	{
1838 	  xop0 = copy_to_reg (xop0);
1839 	  rtx result = extract_bit_field_1 (xop0, bitsize, bitpos,
1840 					    unsignedp, target,
1841 					    mode, tmode, reverse, false);
1842 	  if (result)
1843 	    return result;
1844 	  delete_insns_since (last);
1845 	}
1846     }
1847 
1848   if (!fallback_p)
1849     return NULL;
1850 
1851   /* Find a correspondingly-sized integer field, so we can apply
1852      shifts and masks to it.  */
1853   int_mode = int_mode_for_mode (tmode);
1854   if (int_mode == BLKmode)
1855     int_mode = int_mode_for_mode (mode);
1856   /* Should probably push op0 out to memory and then do a load.  */
1857   gcc_assert (int_mode != BLKmode);
1858 
1859   target = extract_fixed_bit_field (int_mode, op0, bitsize, bitnum, target,
1860 				    unsignedp, reverse);
1861 
1862   /* Complex values must be reversed piecewise, so we need to undo the global
1863      reversal, convert to the complex mode and reverse again.  */
1864   if (reverse && COMPLEX_MODE_P (tmode))
1865     {
1866       target = flip_storage_order (int_mode, target);
1867       target = convert_extracted_bit_field (target, mode, tmode, unsignedp);
1868       target = flip_storage_order (tmode, target);
1869     }
1870   else
1871     target = convert_extracted_bit_field (target, mode, tmode, unsignedp);
1872 
1873   return target;
1874 }
1875 
1876 /* Generate code to extract a byte-field from STR_RTX
1877    containing BITSIZE bits, starting at BITNUM,
1878    and put it in TARGET if possible (if TARGET is nonzero).
1879    Regardless of TARGET, we return the rtx for where the value is placed.
1880 
1881    STR_RTX is the structure containing the byte (a REG or MEM).
1882    UNSIGNEDP is nonzero if this is an unsigned bit field.
1883    MODE is the natural mode of the field value once extracted.
1884    TMODE is the mode the caller would like the value to have;
1885    but the value may be returned with type MODE instead.
1886 
1887    If REVERSE is true, the extraction is to be done in reverse order.
1888 
1889    If a TARGET is specified and we can store in it at no extra cost,
1890    we do so, and return TARGET.
1891    Otherwise, we return a REG of mode TMODE or MODE, with TMODE preferred
1892    if they are equally easy.  */
1893 
1894 rtx
1895 extract_bit_field (rtx str_rtx, unsigned HOST_WIDE_INT bitsize,
1896 		   unsigned HOST_WIDE_INT bitnum, int unsignedp, rtx target,
1897 		   machine_mode mode, machine_mode tmode, bool reverse)
1898 {
1899   machine_mode mode1;
1900 
1901   /* Handle -fstrict-volatile-bitfields in the cases where it applies.  */
1902   if (GET_MODE_BITSIZE (GET_MODE (str_rtx)) > 0)
1903     mode1 = GET_MODE (str_rtx);
1904   else if (target && GET_MODE_BITSIZE (GET_MODE (target)) > 0)
1905     mode1 = GET_MODE (target);
1906   else
1907     mode1 = tmode;
1908 
1909   if (strict_volatile_bitfield_p (str_rtx, bitsize, bitnum, mode1, 0, 0))
1910     {
1911       /* Extraction of a full MODE1 value can be done with a simple load.
1912 	 We know here that the field can be accessed with one single
1913 	 instruction.  For targets that support unaligned memory,
1914 	 an unaligned access may be necessary.  */
1915       if (bitsize == GET_MODE_BITSIZE (mode1))
1916 	{
1917 	  rtx result = adjust_bitfield_address (str_rtx, mode1,
1918 						bitnum / BITS_PER_UNIT);
1919 	  if (reverse)
1920 	    result = flip_storage_order (mode1, result);
1921 	  gcc_assert (bitnum % BITS_PER_UNIT == 0);
1922 	  return convert_extracted_bit_field (result, mode, tmode, unsignedp);
1923 	}
1924 
1925       str_rtx = narrow_bit_field_mem (str_rtx, mode1, bitsize, bitnum,
1926 				      &bitnum);
1927       gcc_assert (bitnum + bitsize <= GET_MODE_BITSIZE (mode1));
1928       str_rtx = copy_to_reg (str_rtx);
1929     }
1930 
1931   return extract_bit_field_1 (str_rtx, bitsize, bitnum, unsignedp,
1932 			      target, mode, tmode, reverse, true);
1933 }
1934 
1935 /* Use shifts and boolean operations to extract a field of BITSIZE bits
1936    from bit BITNUM of OP0.
1937 
1938    UNSIGNEDP is nonzero for an unsigned bit field (don't sign-extend value).
1939    If REVERSE is true, the extraction is to be done in reverse order.
1940 
1941    If TARGET is nonzero, attempts to store the value there
1942    and return TARGET, but this is not guaranteed.
1943    If TARGET is not used, create a pseudo-reg of mode TMODE for the value.  */
1944 
1945 static rtx
1946 extract_fixed_bit_field (machine_mode tmode, rtx op0,
1947 			 unsigned HOST_WIDE_INT bitsize,
1948 			 unsigned HOST_WIDE_INT bitnum, rtx target,
1949 			 int unsignedp, bool reverse)
1950 {
1951   if (MEM_P (op0))
1952     {
1953       machine_mode mode
1954 	= get_best_mode (bitsize, bitnum, 0, 0, MEM_ALIGN (op0), word_mode,
1955 			 MEM_VOLATILE_P (op0));
1956 
1957       if (mode == VOIDmode)
1958 	/* The only way this should occur is if the field spans word
1959 	   boundaries.  */
1960 	return extract_split_bit_field (op0, bitsize, bitnum, unsignedp,
1961 					reverse);
1962 
1963       op0 = narrow_bit_field_mem (op0, mode, bitsize, bitnum, &bitnum);
1964     }
1965 
1966   return extract_fixed_bit_field_1 (tmode, op0, bitsize, bitnum,
1967 				    target, unsignedp, reverse);
1968 }
1969 
1970 /* Helper function for extract_fixed_bit_field, extracts
1971    the bit field always using the MODE of OP0.  */
1972 
1973 static rtx
1974 extract_fixed_bit_field_1 (machine_mode tmode, rtx op0,
1975 			   unsigned HOST_WIDE_INT bitsize,
1976 			   unsigned HOST_WIDE_INT bitnum, rtx target,
1977 			   int unsignedp, bool reverse)
1978 {
1979   machine_mode mode = GET_MODE (op0);
1980   gcc_assert (SCALAR_INT_MODE_P (mode));
1981 
1982   /* Note that bitsize + bitnum can be greater than GET_MODE_BITSIZE (mode)
1983      for invalid input, such as extract equivalent of f5 from
1984      gcc.dg/pr48335-2.c.  */
1985 
1986   if (reverse ? !BYTES_BIG_ENDIAN : BYTES_BIG_ENDIAN)
1987     /* BITNUM is the distance between our msb and that of OP0.
1988        Convert it to the distance from the lsb.  */
1989     bitnum = GET_MODE_BITSIZE (mode) - bitsize - bitnum;
1990 
1991   /* Now BITNUM is always the distance between the field's lsb and that of OP0.
1992      We have reduced the big-endian case to the little-endian case.  */
1993   if (reverse)
1994     op0 = flip_storage_order (mode, op0);
1995 
1996   if (unsignedp)
1997     {
1998       if (bitnum)
1999 	{
2000 	  /* If the field does not already start at the lsb,
2001 	     shift it so it does.  */
2002 	  /* Maybe propagate the target for the shift.  */
2003 	  rtx subtarget = (target != 0 && REG_P (target) ? target : 0);
2004 	  if (tmode != mode)
2005 	    subtarget = 0;
2006 	  op0 = expand_shift (RSHIFT_EXPR, mode, op0, bitnum, subtarget, 1);
2007 	}
2008       /* Convert the value to the desired mode.  */
2009       if (mode != tmode)
2010 	op0 = convert_to_mode (tmode, op0, 1);
2011 
2012       /* Unless the msb of the field used to be the msb when we shifted,
2013 	 mask out the upper bits.  */
2014 
2015       if (GET_MODE_BITSIZE (mode) != bitnum + bitsize)
2016 	return expand_binop (GET_MODE (op0), and_optab, op0,
2017 			     mask_rtx (GET_MODE (op0), 0, bitsize, 0),
2018 			     target, 1, OPTAB_LIB_WIDEN);
2019       return op0;
2020     }
2021 
2022   /* To extract a signed bit-field, first shift its msb to the msb of the word,
2023      then arithmetic-shift its lsb to the lsb of the word.  */
2024   op0 = force_reg (mode, op0);
2025 
2026   /* Find the narrowest integer mode that contains the field.  */
2027 
2028   for (mode = GET_CLASS_NARROWEST_MODE (MODE_INT); mode != VOIDmode;
2029        mode = GET_MODE_WIDER_MODE (mode))
2030     if (GET_MODE_BITSIZE (mode) >= bitsize + bitnum)
2031       {
2032 	op0 = convert_to_mode (mode, op0, 0);
2033 	break;
2034       }
2035 
2036   if (mode != tmode)
2037     target = 0;
2038 
2039   if (GET_MODE_BITSIZE (mode) != (bitsize + bitnum))
2040     {
2041       int amount = GET_MODE_BITSIZE (mode) - (bitsize + bitnum);
2042       /* Maybe propagate the target for the shift.  */
2043       rtx subtarget = (target != 0 && REG_P (target) ? target : 0);
2044       op0 = expand_shift (LSHIFT_EXPR, mode, op0, amount, subtarget, 1);
2045     }
2046 
2047   return expand_shift (RSHIFT_EXPR, mode, op0,
2048 		       GET_MODE_BITSIZE (mode) - bitsize, target, 0);
2049 }
2050 
2051 /* Return a constant integer (CONST_INT or CONST_DOUBLE) rtx with the value
2052    VALUE << BITPOS.  */
2053 
2054 static rtx
2055 lshift_value (machine_mode mode, unsigned HOST_WIDE_INT value,
2056 	      int bitpos)
2057 {
2058   return immed_wide_int_const (wi::lshift (value, bitpos), mode);
2059 }
2060 
2061 /* Extract a bit field that is split across two words
2062    and return an RTX for the result.
2063 
2064    OP0 is the REG, SUBREG or MEM rtx for the first of the two words.
2065    BITSIZE is the field width; BITPOS, position of its first bit, in the word.
2066    UNSIGNEDP is 1 if should zero-extend the contents; else sign-extend.
2067 
2068    If REVERSE is true, the extraction is to be done in reverse order.  */
2069 
2070 static rtx
2071 extract_split_bit_field (rtx op0, unsigned HOST_WIDE_INT bitsize,
2072 			 unsigned HOST_WIDE_INT bitpos, int unsignedp,
2073 			 bool reverse)
2074 {
2075   unsigned int unit;
2076   unsigned int bitsdone = 0;
2077   rtx result = NULL_RTX;
2078   int first = 1;
2079 
2080   /* Make sure UNIT isn't larger than BITS_PER_WORD, we can only handle that
2081      much at a time.  */
2082   if (REG_P (op0) || GET_CODE (op0) == SUBREG)
2083     unit = BITS_PER_WORD;
2084   else
2085     unit = MIN (MEM_ALIGN (op0), BITS_PER_WORD);
2086 
2087   while (bitsdone < bitsize)
2088     {
2089       unsigned HOST_WIDE_INT thissize;
2090       rtx part, word;
2091       unsigned HOST_WIDE_INT thispos;
2092       unsigned HOST_WIDE_INT offset;
2093 
2094       offset = (bitpos + bitsdone) / unit;
2095       thispos = (bitpos + bitsdone) % unit;
2096 
2097       /* THISSIZE must not overrun a word boundary.  Otherwise,
2098 	 extract_fixed_bit_field will call us again, and we will mutually
2099 	 recurse forever.  */
2100       thissize = MIN (bitsize - bitsdone, BITS_PER_WORD);
2101       thissize = MIN (thissize, unit - thispos);
2102 
2103       /* If OP0 is a register, then handle OFFSET here.  */
2104       if (SUBREG_P (op0) || REG_P (op0))
2105 	{
2106 	  word = operand_subword_force (op0, offset, GET_MODE (op0));
2107 	  offset = 0;
2108 	}
2109       else
2110 	word = op0;
2111 
2112       /* Extract the parts in bit-counting order,
2113 	 whose meaning is determined by BYTES_PER_UNIT.
2114 	 OFFSET is in UNITs, and UNIT is in bits.  */
2115       part = extract_fixed_bit_field (word_mode, word, thissize,
2116 				      offset * unit + thispos, 0, 1, reverse);
2117       bitsdone += thissize;
2118 
2119       /* Shift this part into place for the result.  */
2120       if (reverse ? !BYTES_BIG_ENDIAN : BYTES_BIG_ENDIAN)
2121 	{
2122 	  if (bitsize != bitsdone)
2123 	    part = expand_shift (LSHIFT_EXPR, word_mode, part,
2124 				 bitsize - bitsdone, 0, 1);
2125 	}
2126       else
2127 	{
2128 	  if (bitsdone != thissize)
2129 	    part = expand_shift (LSHIFT_EXPR, word_mode, part,
2130 				 bitsdone - thissize, 0, 1);
2131 	}
2132 
2133       if (first)
2134 	result = part;
2135       else
2136 	/* Combine the parts with bitwise or.  This works
2137 	   because we extracted each part as an unsigned bit field.  */
2138 	result = expand_binop (word_mode, ior_optab, part, result, NULL_RTX, 1,
2139 			       OPTAB_LIB_WIDEN);
2140 
2141       first = 0;
2142     }
2143 
2144   /* Unsigned bit field: we are done.  */
2145   if (unsignedp)
2146     return result;
2147   /* Signed bit field: sign-extend with two arithmetic shifts.  */
2148   result = expand_shift (LSHIFT_EXPR, word_mode, result,
2149 			 BITS_PER_WORD - bitsize, NULL_RTX, 0);
2150   return expand_shift (RSHIFT_EXPR, word_mode, result,
2151 		       BITS_PER_WORD - bitsize, NULL_RTX, 0);
2152 }
2153 
2154 /* Try to read the low bits of SRC as an rvalue of mode MODE, preserving
2155    the bit pattern.  SRC_MODE is the mode of SRC; if this is smaller than
2156    MODE, fill the upper bits with zeros.  Fail if the layout of either
2157    mode is unknown (as for CC modes) or if the extraction would involve
2158    unprofitable mode punning.  Return the value on success, otherwise
2159    return null.
2160 
2161    This is different from gen_lowpart* in these respects:
2162 
2163      - the returned value must always be considered an rvalue
2164 
2165      - when MODE is wider than SRC_MODE, the extraction involves
2166        a zero extension
2167 
2168      - when MODE is smaller than SRC_MODE, the extraction involves
2169        a truncation (and is thus subject to TRULY_NOOP_TRUNCATION).
2170 
2171    In other words, this routine performs a computation, whereas the
2172    gen_lowpart* routines are conceptually lvalue or rvalue subreg
2173    operations.  */
2174 
2175 rtx
2176 extract_low_bits (machine_mode mode, machine_mode src_mode, rtx src)
2177 {
2178   machine_mode int_mode, src_int_mode;
2179 
2180   if (mode == src_mode)
2181     return src;
2182 
2183   if (CONSTANT_P (src))
2184     {
2185       /* simplify_gen_subreg can't be used here, as if simplify_subreg
2186 	 fails, it will happily create (subreg (symbol_ref)) or similar
2187 	 invalid SUBREGs.  */
2188       unsigned int byte = subreg_lowpart_offset (mode, src_mode);
2189       rtx ret = simplify_subreg (mode, src, src_mode, byte);
2190       if (ret)
2191 	return ret;
2192 
2193       if (GET_MODE (src) == VOIDmode
2194 	  || !validate_subreg (mode, src_mode, src, byte))
2195 	return NULL_RTX;
2196 
2197       src = force_reg (GET_MODE (src), src);
2198       return gen_rtx_SUBREG (mode, src, byte);
2199     }
2200 
2201   if (GET_MODE_CLASS (mode) == MODE_CC || GET_MODE_CLASS (src_mode) == MODE_CC)
2202     return NULL_RTX;
2203 
2204   if (GET_MODE_BITSIZE (mode) == GET_MODE_BITSIZE (src_mode)
2205       && MODES_TIEABLE_P (mode, src_mode))
2206     {
2207       rtx x = gen_lowpart_common (mode, src);
2208       if (x)
2209         return x;
2210     }
2211 
2212   src_int_mode = int_mode_for_mode (src_mode);
2213   int_mode = int_mode_for_mode (mode);
2214   if (src_int_mode == BLKmode || int_mode == BLKmode)
2215     return NULL_RTX;
2216 
2217   if (!MODES_TIEABLE_P (src_int_mode, src_mode))
2218     return NULL_RTX;
2219   if (!MODES_TIEABLE_P (int_mode, mode))
2220     return NULL_RTX;
2221 
2222   src = gen_lowpart (src_int_mode, src);
2223   src = convert_modes (int_mode, src_int_mode, src, true);
2224   src = gen_lowpart (mode, src);
2225   return src;
2226 }
2227 
2228 /* Add INC into TARGET.  */
2229 
2230 void
2231 expand_inc (rtx target, rtx inc)
2232 {
2233   rtx value = expand_binop (GET_MODE (target), add_optab,
2234 			    target, inc,
2235 			    target, 0, OPTAB_LIB_WIDEN);
2236   if (value != target)
2237     emit_move_insn (target, value);
2238 }
2239 
2240 /* Subtract DEC from TARGET.  */
2241 
2242 void
2243 expand_dec (rtx target, rtx dec)
2244 {
2245   rtx value = expand_binop (GET_MODE (target), sub_optab,
2246 			    target, dec,
2247 			    target, 0, OPTAB_LIB_WIDEN);
2248   if (value != target)
2249     emit_move_insn (target, value);
2250 }
2251 
2252 /* Output a shift instruction for expression code CODE,
2253    with SHIFTED being the rtx for the value to shift,
2254    and AMOUNT the rtx for the amount to shift by.
2255    Store the result in the rtx TARGET, if that is convenient.
2256    If UNSIGNEDP is nonzero, do a logical shift; otherwise, arithmetic.
2257    Return the rtx for where the value is.
2258    If that cannot be done, abort the compilation unless MAY_FAIL is true,
2259    in which case 0 is returned.  */
2260 
2261 static rtx
2262 expand_shift_1 (enum tree_code code, machine_mode mode, rtx shifted,
2263 		rtx amount, rtx target, int unsignedp, bool may_fail = false)
2264 {
2265   rtx op1, temp = 0;
2266   int left = (code == LSHIFT_EXPR || code == LROTATE_EXPR);
2267   int rotate = (code == LROTATE_EXPR || code == RROTATE_EXPR);
2268   optab lshift_optab = ashl_optab;
2269   optab rshift_arith_optab = ashr_optab;
2270   optab rshift_uns_optab = lshr_optab;
2271   optab lrotate_optab = rotl_optab;
2272   optab rrotate_optab = rotr_optab;
2273   machine_mode op1_mode;
2274   machine_mode scalar_mode = mode;
2275   int attempt;
2276   bool speed = optimize_insn_for_speed_p ();
2277 
2278   if (VECTOR_MODE_P (mode))
2279     scalar_mode = GET_MODE_INNER (mode);
2280   op1 = amount;
2281   op1_mode = GET_MODE (op1);
2282 
2283   /* Determine whether the shift/rotate amount is a vector, or scalar.  If the
2284      shift amount is a vector, use the vector/vector shift patterns.  */
2285   if (VECTOR_MODE_P (mode) && VECTOR_MODE_P (op1_mode))
2286     {
2287       lshift_optab = vashl_optab;
2288       rshift_arith_optab = vashr_optab;
2289       rshift_uns_optab = vlshr_optab;
2290       lrotate_optab = vrotl_optab;
2291       rrotate_optab = vrotr_optab;
2292     }
2293 
2294   /* Previously detected shift-counts computed by NEGATE_EXPR
2295      and shifted in the other direction; but that does not work
2296      on all machines.  */
2297 
2298   if (SHIFT_COUNT_TRUNCATED)
2299     {
2300       if (CONST_INT_P (op1)
2301 	  && ((unsigned HOST_WIDE_INT) INTVAL (op1) >=
2302 	      (unsigned HOST_WIDE_INT) GET_MODE_BITSIZE (scalar_mode)))
2303 	op1 = GEN_INT ((unsigned HOST_WIDE_INT) INTVAL (op1)
2304 		       % GET_MODE_BITSIZE (scalar_mode));
2305       else if (GET_CODE (op1) == SUBREG
2306 	       && subreg_lowpart_p (op1)
2307 	       && SCALAR_INT_MODE_P (GET_MODE (SUBREG_REG (op1)))
2308 	       && SCALAR_INT_MODE_P (GET_MODE (op1)))
2309 	op1 = SUBREG_REG (op1);
2310     }
2311 
2312   /* Canonicalize rotates by constant amount.  If op1 is bitsize / 2,
2313      prefer left rotation, if op1 is from bitsize / 2 + 1 to
2314      bitsize - 1, use other direction of rotate with 1 .. bitsize / 2 - 1
2315      amount instead.  */
2316   if (rotate
2317       && CONST_INT_P (op1)
2318       && IN_RANGE (INTVAL (op1), GET_MODE_BITSIZE (scalar_mode) / 2 + left,
2319 		   GET_MODE_BITSIZE (scalar_mode) - 1))
2320     {
2321       op1 = GEN_INT (GET_MODE_BITSIZE (scalar_mode) - INTVAL (op1));
2322       left = !left;
2323       code = left ? LROTATE_EXPR : RROTATE_EXPR;
2324     }
2325 
2326   /* Rotation of 16bit values by 8 bits is effectively equivalent to a bswaphi.
2327      Note that this is not the case for bigger values.  For instance a rotation
2328      of 0x01020304 by 16 bits gives 0x03040102 which is different from
2329      0x04030201 (bswapsi).  */
2330   if (rotate
2331       && CONST_INT_P (op1)
2332       && INTVAL (op1) == BITS_PER_UNIT
2333       && GET_MODE_SIZE (scalar_mode) == 2
2334       && optab_handler (bswap_optab, mode) != CODE_FOR_nothing)
2335     return expand_unop (mode, bswap_optab, shifted, NULL_RTX, unsignedp);
2336 
2337   if (op1 == const0_rtx)
2338     return shifted;
2339 
2340   /* Check whether its cheaper to implement a left shift by a constant
2341      bit count by a sequence of additions.  */
2342   if (code == LSHIFT_EXPR
2343       && CONST_INT_P (op1)
2344       && INTVAL (op1) > 0
2345       && INTVAL (op1) < GET_MODE_PRECISION (scalar_mode)
2346       && INTVAL (op1) < MAX_BITS_PER_WORD
2347       && (shift_cost (speed, mode, INTVAL (op1))
2348 	  > INTVAL (op1) * add_cost (speed, mode))
2349       && shift_cost (speed, mode, INTVAL (op1)) != MAX_COST)
2350     {
2351       int i;
2352       for (i = 0; i < INTVAL (op1); i++)
2353 	{
2354 	  temp = force_reg (mode, shifted);
2355 	  shifted = expand_binop (mode, add_optab, temp, temp, NULL_RTX,
2356 				  unsignedp, OPTAB_LIB_WIDEN);
2357 	}
2358       return shifted;
2359     }
2360 
2361   for (attempt = 0; temp == 0 && attempt < 3; attempt++)
2362     {
2363       enum optab_methods methods;
2364 
2365       if (attempt == 0)
2366 	methods = OPTAB_DIRECT;
2367       else if (attempt == 1)
2368 	methods = OPTAB_WIDEN;
2369       else
2370 	methods = OPTAB_LIB_WIDEN;
2371 
2372       if (rotate)
2373 	{
2374 	  /* Widening does not work for rotation.  */
2375 	  if (methods == OPTAB_WIDEN)
2376 	    continue;
2377 	  else if (methods == OPTAB_LIB_WIDEN)
2378 	    {
2379 	      /* If we have been unable to open-code this by a rotation,
2380 		 do it as the IOR of two shifts.  I.e., to rotate A
2381 		 by N bits, compute
2382 		 (A << N) | ((unsigned) A >> ((-N) & (C - 1)))
2383 		 where C is the bitsize of A.
2384 
2385 		 It is theoretically possible that the target machine might
2386 		 not be able to perform either shift and hence we would
2387 		 be making two libcalls rather than just the one for the
2388 		 shift (similarly if IOR could not be done).  We will allow
2389 		 this extremely unlikely lossage to avoid complicating the
2390 		 code below.  */
2391 
2392 	      rtx subtarget = target == shifted ? 0 : target;
2393 	      rtx new_amount, other_amount;
2394 	      rtx temp1;
2395 
2396 	      new_amount = op1;
2397 	      if (op1 == const0_rtx)
2398 		return shifted;
2399 	      else if (CONST_INT_P (op1))
2400 		other_amount = GEN_INT (GET_MODE_BITSIZE (scalar_mode)
2401 					- INTVAL (op1));
2402 	      else
2403 		{
2404 		  other_amount
2405 		    = simplify_gen_unary (NEG, GET_MODE (op1),
2406 					  op1, GET_MODE (op1));
2407 		  HOST_WIDE_INT mask = GET_MODE_PRECISION (scalar_mode) - 1;
2408 		  other_amount
2409 		    = simplify_gen_binary (AND, GET_MODE (op1), other_amount,
2410 					   gen_int_mode (mask, GET_MODE (op1)));
2411 		}
2412 
2413 	      shifted = force_reg (mode, shifted);
2414 
2415 	      temp = expand_shift_1 (left ? LSHIFT_EXPR : RSHIFT_EXPR,
2416 				     mode, shifted, new_amount, 0, 1);
2417 	      temp1 = expand_shift_1 (left ? RSHIFT_EXPR : LSHIFT_EXPR,
2418 				      mode, shifted, other_amount,
2419 				      subtarget, 1);
2420 	      return expand_binop (mode, ior_optab, temp, temp1, target,
2421 				   unsignedp, methods);
2422 	    }
2423 
2424 	  temp = expand_binop (mode,
2425 			       left ? lrotate_optab : rrotate_optab,
2426 			       shifted, op1, target, unsignedp, methods);
2427 	}
2428       else if (unsignedp)
2429 	temp = expand_binop (mode,
2430 			     left ? lshift_optab : rshift_uns_optab,
2431 			     shifted, op1, target, unsignedp, methods);
2432 
2433       /* Do arithmetic shifts.
2434 	 Also, if we are going to widen the operand, we can just as well
2435 	 use an arithmetic right-shift instead of a logical one.  */
2436       if (temp == 0 && ! rotate
2437 	  && (! unsignedp || (! left && methods == OPTAB_WIDEN)))
2438 	{
2439 	  enum optab_methods methods1 = methods;
2440 
2441 	  /* If trying to widen a log shift to an arithmetic shift,
2442 	     don't accept an arithmetic shift of the same size.  */
2443 	  if (unsignedp)
2444 	    methods1 = OPTAB_MUST_WIDEN;
2445 
2446 	  /* Arithmetic shift */
2447 
2448 	  temp = expand_binop (mode,
2449 			       left ? lshift_optab : rshift_arith_optab,
2450 			       shifted, op1, target, unsignedp, methods1);
2451 	}
2452 
2453       /* We used to try extzv here for logical right shifts, but that was
2454 	 only useful for one machine, the VAX, and caused poor code
2455 	 generation there for lshrdi3, so the code was deleted and a
2456 	 define_expand for lshrsi3 was added to vax.md.  */
2457     }
2458 
2459   gcc_assert (temp != NULL_RTX || may_fail);
2460   return temp;
2461 }
2462 
2463 /* Output a shift instruction for expression code CODE,
2464    with SHIFTED being the rtx for the value to shift,
2465    and AMOUNT the amount to shift by.
2466    Store the result in the rtx TARGET, if that is convenient.
2467    If UNSIGNEDP is nonzero, do a logical shift; otherwise, arithmetic.
2468    Return the rtx for where the value is.  */
2469 
2470 rtx
2471 expand_shift (enum tree_code code, machine_mode mode, rtx shifted,
2472 	      int amount, rtx target, int unsignedp)
2473 {
2474   return expand_shift_1 (code, mode,
2475 			 shifted, GEN_INT (amount), target, unsignedp);
2476 }
2477 
2478 /* Likewise, but return 0 if that cannot be done.  */
2479 
2480 static rtx
2481 maybe_expand_shift (enum tree_code code, machine_mode mode, rtx shifted,
2482 		    int amount, rtx target, int unsignedp)
2483 {
2484   return expand_shift_1 (code, mode,
2485 			 shifted, GEN_INT (amount), target, unsignedp, true);
2486 }
2487 
2488 /* Output a shift instruction for expression code CODE,
2489    with SHIFTED being the rtx for the value to shift,
2490    and AMOUNT the tree for the amount to shift by.
2491    Store the result in the rtx TARGET, if that is convenient.
2492    If UNSIGNEDP is nonzero, do a logical shift; otherwise, arithmetic.
2493    Return the rtx for where the value is.  */
2494 
2495 rtx
2496 expand_variable_shift (enum tree_code code, machine_mode mode, rtx shifted,
2497 		       tree amount, rtx target, int unsignedp)
2498 {
2499   return expand_shift_1 (code, mode,
2500 			 shifted, expand_normal (amount), target, unsignedp);
2501 }
2502 
2503 
2504 static void synth_mult (struct algorithm *, unsigned HOST_WIDE_INT,
2505 			const struct mult_cost *, machine_mode mode);
2506 static rtx expand_mult_const (machine_mode, rtx, HOST_WIDE_INT, rtx,
2507 			      const struct algorithm *, enum mult_variant);
2508 static unsigned HOST_WIDE_INT invert_mod2n (unsigned HOST_WIDE_INT, int);
2509 static rtx extract_high_half (machine_mode, rtx);
2510 static rtx expmed_mult_highpart (machine_mode, rtx, rtx, rtx, int, int);
2511 static rtx expmed_mult_highpart_optab (machine_mode, rtx, rtx, rtx,
2512 				       int, int);
2513 /* Compute and return the best algorithm for multiplying by T.
2514    The algorithm must cost less than cost_limit
2515    If retval.cost >= COST_LIMIT, no algorithm was found and all
2516    other field of the returned struct are undefined.
2517    MODE is the machine mode of the multiplication.  */
2518 
2519 static void
2520 synth_mult (struct algorithm *alg_out, unsigned HOST_WIDE_INT t,
2521 	    const struct mult_cost *cost_limit, machine_mode mode)
2522 {
2523   int m;
2524   struct algorithm *alg_in, *best_alg;
2525   struct mult_cost best_cost;
2526   struct mult_cost new_limit;
2527   int op_cost, op_latency;
2528   unsigned HOST_WIDE_INT orig_t = t;
2529   unsigned HOST_WIDE_INT q;
2530   int maxm, hash_index;
2531   bool cache_hit = false;
2532   enum alg_code cache_alg = alg_zero;
2533   bool speed = optimize_insn_for_speed_p ();
2534   machine_mode imode;
2535   struct alg_hash_entry *entry_ptr;
2536 
2537   /* Indicate that no algorithm is yet found.  If no algorithm
2538      is found, this value will be returned and indicate failure.  */
2539   alg_out->cost.cost = cost_limit->cost + 1;
2540   alg_out->cost.latency = cost_limit->latency + 1;
2541 
2542   if (cost_limit->cost < 0
2543       || (cost_limit->cost == 0 && cost_limit->latency <= 0))
2544     return;
2545 
2546   /* Be prepared for vector modes.  */
2547   imode = GET_MODE_INNER (mode);
2548 
2549   maxm = MIN (BITS_PER_WORD, GET_MODE_BITSIZE (imode));
2550 
2551   /* Restrict the bits of "t" to the multiplication's mode.  */
2552   t &= GET_MODE_MASK (imode);
2553 
2554   /* t == 1 can be done in zero cost.  */
2555   if (t == 1)
2556     {
2557       alg_out->ops = 1;
2558       alg_out->cost.cost = 0;
2559       alg_out->cost.latency = 0;
2560       alg_out->op[0] = alg_m;
2561       return;
2562     }
2563 
2564   /* t == 0 sometimes has a cost.  If it does and it exceeds our limit,
2565      fail now.  */
2566   if (t == 0)
2567     {
2568       if (MULT_COST_LESS (cost_limit, zero_cost (speed)))
2569 	return;
2570       else
2571 	{
2572 	  alg_out->ops = 1;
2573 	  alg_out->cost.cost = zero_cost (speed);
2574 	  alg_out->cost.latency = zero_cost (speed);
2575 	  alg_out->op[0] = alg_zero;
2576 	  return;
2577 	}
2578     }
2579 
2580   /* We'll be needing a couple extra algorithm structures now.  */
2581 
2582   alg_in = XALLOCA (struct algorithm);
2583   best_alg = XALLOCA (struct algorithm);
2584   best_cost = *cost_limit;
2585 
2586   /* Compute the hash index.  */
2587   hash_index = (t ^ (unsigned int) mode ^ (speed * 256)) % NUM_ALG_HASH_ENTRIES;
2588 
2589   /* See if we already know what to do for T.  */
2590   entry_ptr = alg_hash_entry_ptr (hash_index);
2591   if (entry_ptr->t == t
2592       && entry_ptr->mode == mode
2593       && entry_ptr->speed == speed
2594       && entry_ptr->alg != alg_unknown)
2595     {
2596       cache_alg = entry_ptr->alg;
2597 
2598       if (cache_alg == alg_impossible)
2599 	{
2600 	  /* The cache tells us that it's impossible to synthesize
2601 	     multiplication by T within entry_ptr->cost.  */
2602 	  if (!CHEAPER_MULT_COST (&entry_ptr->cost, cost_limit))
2603 	    /* COST_LIMIT is at least as restrictive as the one
2604 	       recorded in the hash table, in which case we have no
2605 	       hope of synthesizing a multiplication.  Just
2606 	       return.  */
2607 	    return;
2608 
2609 	  /* If we get here, COST_LIMIT is less restrictive than the
2610 	     one recorded in the hash table, so we may be able to
2611 	     synthesize a multiplication.  Proceed as if we didn't
2612 	     have the cache entry.  */
2613 	}
2614       else
2615 	{
2616 	  if (CHEAPER_MULT_COST (cost_limit, &entry_ptr->cost))
2617 	    /* The cached algorithm shows that this multiplication
2618 	       requires more cost than COST_LIMIT.  Just return.  This
2619 	       way, we don't clobber this cache entry with
2620 	       alg_impossible but retain useful information.  */
2621 	    return;
2622 
2623 	  cache_hit = true;
2624 
2625 	  switch (cache_alg)
2626 	    {
2627 	    case alg_shift:
2628 	      goto do_alg_shift;
2629 
2630 	    case alg_add_t_m2:
2631 	    case alg_sub_t_m2:
2632 	      goto do_alg_addsub_t_m2;
2633 
2634 	    case alg_add_factor:
2635 	    case alg_sub_factor:
2636 	      goto do_alg_addsub_factor;
2637 
2638 	    case alg_add_t2_m:
2639 	      goto do_alg_add_t2_m;
2640 
2641 	    case alg_sub_t2_m:
2642 	      goto do_alg_sub_t2_m;
2643 
2644 	    default:
2645 	      gcc_unreachable ();
2646 	    }
2647 	}
2648     }
2649 
2650   /* If we have a group of zero bits at the low-order part of T, try
2651      multiplying by the remaining bits and then doing a shift.  */
2652 
2653   if ((t & 1) == 0)
2654     {
2655     do_alg_shift:
2656       m = ctz_or_zero (t); /* m = number of low zero bits */
2657       if (m < maxm)
2658 	{
2659 	  q = t >> m;
2660 	  /* The function expand_shift will choose between a shift and
2661 	     a sequence of additions, so the observed cost is given as
2662 	     MIN (m * add_cost(speed, mode), shift_cost(speed, mode, m)).  */
2663 	  op_cost = m * add_cost (speed, mode);
2664 	  if (shift_cost (speed, mode, m) < op_cost)
2665 	    op_cost = shift_cost (speed, mode, m);
2666 	  new_limit.cost = best_cost.cost - op_cost;
2667 	  new_limit.latency = best_cost.latency - op_cost;
2668 	  synth_mult (alg_in, q, &new_limit, mode);
2669 
2670 	  alg_in->cost.cost += op_cost;
2671 	  alg_in->cost.latency += op_cost;
2672 	  if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2673 	    {
2674 	      best_cost = alg_in->cost;
2675 	      std::swap (alg_in, best_alg);
2676 	      best_alg->log[best_alg->ops] = m;
2677 	      best_alg->op[best_alg->ops] = alg_shift;
2678 	    }
2679 
2680 	  /* See if treating ORIG_T as a signed number yields a better
2681 	     sequence.  Try this sequence only for a negative ORIG_T
2682 	     as it would be useless for a non-negative ORIG_T.  */
2683 	  if ((HOST_WIDE_INT) orig_t < 0)
2684 	    {
2685 	      /* Shift ORIG_T as follows because a right shift of a
2686 		 negative-valued signed type is implementation
2687 		 defined.  */
2688 	      q = ~(~orig_t >> m);
2689 	      /* The function expand_shift will choose between a shift
2690 		 and a sequence of additions, so the observed cost is
2691 		 given as MIN (m * add_cost(speed, mode),
2692 		 shift_cost(speed, mode, m)).  */
2693 	      op_cost = m * add_cost (speed, mode);
2694 	      if (shift_cost (speed, mode, m) < op_cost)
2695 		op_cost = shift_cost (speed, mode, m);
2696 	      new_limit.cost = best_cost.cost - op_cost;
2697 	      new_limit.latency = best_cost.latency - op_cost;
2698 	      synth_mult (alg_in, q, &new_limit, mode);
2699 
2700 	      alg_in->cost.cost += op_cost;
2701 	      alg_in->cost.latency += op_cost;
2702 	      if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2703 		{
2704 		  best_cost = alg_in->cost;
2705 		  std::swap (alg_in, best_alg);
2706 		  best_alg->log[best_alg->ops] = m;
2707 		  best_alg->op[best_alg->ops] = alg_shift;
2708 		}
2709 	    }
2710 	}
2711       if (cache_hit)
2712 	goto done;
2713     }
2714 
2715   /* If we have an odd number, add or subtract one.  */
2716   if ((t & 1) != 0)
2717     {
2718       unsigned HOST_WIDE_INT w;
2719 
2720     do_alg_addsub_t_m2:
2721       for (w = 1; (w & t) != 0; w <<= 1)
2722 	;
2723       /* If T was -1, then W will be zero after the loop.  This is another
2724 	 case where T ends with ...111.  Handling this with (T + 1) and
2725 	 subtract 1 produces slightly better code and results in algorithm
2726 	 selection much faster than treating it like the ...0111 case
2727 	 below.  */
2728       if (w == 0
2729 	  || (w > 2
2730 	      /* Reject the case where t is 3.
2731 		 Thus we prefer addition in that case.  */
2732 	      && t != 3))
2733 	{
2734 	  /* T ends with ...111.  Multiply by (T + 1) and subtract T.  */
2735 
2736 	  op_cost = add_cost (speed, mode);
2737 	  new_limit.cost = best_cost.cost - op_cost;
2738 	  new_limit.latency = best_cost.latency - op_cost;
2739 	  synth_mult (alg_in, t + 1, &new_limit, mode);
2740 
2741 	  alg_in->cost.cost += op_cost;
2742 	  alg_in->cost.latency += op_cost;
2743 	  if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2744 	    {
2745 	      best_cost = alg_in->cost;
2746 	      std::swap (alg_in, best_alg);
2747 	      best_alg->log[best_alg->ops] = 0;
2748 	      best_alg->op[best_alg->ops] = alg_sub_t_m2;
2749 	    }
2750 	}
2751       else
2752 	{
2753 	  /* T ends with ...01 or ...011.  Multiply by (T - 1) and add T.  */
2754 
2755 	  op_cost = add_cost (speed, mode);
2756 	  new_limit.cost = best_cost.cost - op_cost;
2757 	  new_limit.latency = best_cost.latency - op_cost;
2758 	  synth_mult (alg_in, t - 1, &new_limit, mode);
2759 
2760 	  alg_in->cost.cost += op_cost;
2761 	  alg_in->cost.latency += op_cost;
2762 	  if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2763 	    {
2764 	      best_cost = alg_in->cost;
2765 	      std::swap (alg_in, best_alg);
2766 	      best_alg->log[best_alg->ops] = 0;
2767 	      best_alg->op[best_alg->ops] = alg_add_t_m2;
2768 	    }
2769 	}
2770 
2771       /* We may be able to calculate a * -7, a * -15, a * -31, etc
2772 	 quickly with a - a * n for some appropriate constant n.  */
2773       m = exact_log2 (-orig_t + 1);
2774       if (m >= 0 && m < maxm)
2775 	{
2776 	  op_cost = add_cost (speed, mode) + shift_cost (speed, mode, m);
2777 	  /* If the target has a cheap shift-and-subtract insn use
2778 	     that in preference to a shift insn followed by a sub insn.
2779 	     Assume that the shift-and-sub is "atomic" with a latency
2780 	     equal to it's cost, otherwise assume that on superscalar
2781 	     hardware the shift may be executed concurrently with the
2782 	     earlier steps in the algorithm.  */
2783 	  if (shiftsub1_cost (speed, mode, m) <= op_cost)
2784 	    {
2785 	      op_cost = shiftsub1_cost (speed, mode, m);
2786 	      op_latency = op_cost;
2787 	    }
2788 	  else
2789 	    op_latency = add_cost (speed, mode);
2790 
2791 	  new_limit.cost = best_cost.cost - op_cost;
2792 	  new_limit.latency = best_cost.latency - op_latency;
2793 	  synth_mult (alg_in, (unsigned HOST_WIDE_INT) (-orig_t + 1) >> m,
2794 		      &new_limit, mode);
2795 
2796 	  alg_in->cost.cost += op_cost;
2797 	  alg_in->cost.latency += op_latency;
2798 	  if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2799 	    {
2800 	      best_cost = alg_in->cost;
2801 	      std::swap (alg_in, best_alg);
2802 	      best_alg->log[best_alg->ops] = m;
2803 	      best_alg->op[best_alg->ops] = alg_sub_t_m2;
2804 	    }
2805 	}
2806 
2807       if (cache_hit)
2808 	goto done;
2809     }
2810 
2811   /* Look for factors of t of the form
2812      t = q(2**m +- 1), 2 <= m <= floor(log2(t - 1)).
2813      If we find such a factor, we can multiply by t using an algorithm that
2814      multiplies by q, shift the result by m and add/subtract it to itself.
2815 
2816      We search for large factors first and loop down, even if large factors
2817      are less probable than small; if we find a large factor we will find a
2818      good sequence quickly, and therefore be able to prune (by decreasing
2819      COST_LIMIT) the search.  */
2820 
2821  do_alg_addsub_factor:
2822   for (m = floor_log2 (t - 1); m >= 2; m--)
2823     {
2824       unsigned HOST_WIDE_INT d;
2825 
2826       d = (HOST_WIDE_INT_1U << m) + 1;
2827       if (t % d == 0 && t > d && m < maxm
2828 	  && (!cache_hit || cache_alg == alg_add_factor))
2829 	{
2830 	  op_cost = add_cost (speed, mode) + shift_cost (speed, mode, m);
2831 	  if (shiftadd_cost (speed, mode, m) <= op_cost)
2832 	    op_cost = shiftadd_cost (speed, mode, m);
2833 
2834 	  op_latency = op_cost;
2835 
2836 
2837 	  new_limit.cost = best_cost.cost - op_cost;
2838 	  new_limit.latency = best_cost.latency - op_latency;
2839 	  synth_mult (alg_in, t / d, &new_limit, mode);
2840 
2841 	  alg_in->cost.cost += op_cost;
2842 	  alg_in->cost.latency += op_latency;
2843 	  if (alg_in->cost.latency < op_cost)
2844 	    alg_in->cost.latency = op_cost;
2845 	  if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2846 	    {
2847 	      best_cost = alg_in->cost;
2848 	      std::swap (alg_in, best_alg);
2849 	      best_alg->log[best_alg->ops] = m;
2850 	      best_alg->op[best_alg->ops] = alg_add_factor;
2851 	    }
2852 	  /* Other factors will have been taken care of in the recursion.  */
2853 	  break;
2854 	}
2855 
2856       d = (HOST_WIDE_INT_1U << m) - 1;
2857       if (t % d == 0 && t > d && m < maxm
2858 	  && (!cache_hit || cache_alg == alg_sub_factor))
2859 	{
2860 	  op_cost = add_cost (speed, mode) + shift_cost (speed, mode, m);
2861 	  if (shiftsub0_cost (speed, mode, m) <= op_cost)
2862 	    op_cost = shiftsub0_cost (speed, mode, m);
2863 
2864 	  op_latency = op_cost;
2865 
2866 	  new_limit.cost = best_cost.cost - op_cost;
2867 	  new_limit.latency = best_cost.latency - op_latency;
2868 	  synth_mult (alg_in, t / d, &new_limit, mode);
2869 
2870 	  alg_in->cost.cost += op_cost;
2871 	  alg_in->cost.latency += op_latency;
2872 	  if (alg_in->cost.latency < op_cost)
2873 	    alg_in->cost.latency = op_cost;
2874 	  if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2875 	    {
2876 	      best_cost = alg_in->cost;
2877 	      std::swap (alg_in, best_alg);
2878 	      best_alg->log[best_alg->ops] = m;
2879 	      best_alg->op[best_alg->ops] = alg_sub_factor;
2880 	    }
2881 	  break;
2882 	}
2883     }
2884   if (cache_hit)
2885     goto done;
2886 
2887   /* Try shift-and-add (load effective address) instructions,
2888      i.e. do a*3, a*5, a*9.  */
2889   if ((t & 1) != 0)
2890     {
2891     do_alg_add_t2_m:
2892       q = t - 1;
2893       m = ctz_hwi (q);
2894       if (q && m < maxm)
2895 	{
2896 	  op_cost = shiftadd_cost (speed, mode, m);
2897 	  new_limit.cost = best_cost.cost - op_cost;
2898 	  new_limit.latency = best_cost.latency - op_cost;
2899 	  synth_mult (alg_in, (t - 1) >> m, &new_limit, mode);
2900 
2901 	  alg_in->cost.cost += op_cost;
2902 	  alg_in->cost.latency += op_cost;
2903 	  if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2904 	    {
2905 	      best_cost = alg_in->cost;
2906 	      std::swap (alg_in, best_alg);
2907 	      best_alg->log[best_alg->ops] = m;
2908 	      best_alg->op[best_alg->ops] = alg_add_t2_m;
2909 	    }
2910 	}
2911       if (cache_hit)
2912 	goto done;
2913 
2914     do_alg_sub_t2_m:
2915       q = t + 1;
2916       m = ctz_hwi (q);
2917       if (q && m < maxm)
2918 	{
2919 	  op_cost = shiftsub0_cost (speed, mode, m);
2920 	  new_limit.cost = best_cost.cost - op_cost;
2921 	  new_limit.latency = best_cost.latency - op_cost;
2922 	  synth_mult (alg_in, (t + 1) >> m, &new_limit, mode);
2923 
2924 	  alg_in->cost.cost += op_cost;
2925 	  alg_in->cost.latency += op_cost;
2926 	  if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2927 	    {
2928 	      best_cost = alg_in->cost;
2929 	      std::swap (alg_in, best_alg);
2930 	      best_alg->log[best_alg->ops] = m;
2931 	      best_alg->op[best_alg->ops] = alg_sub_t2_m;
2932 	    }
2933 	}
2934       if (cache_hit)
2935 	goto done;
2936     }
2937 
2938  done:
2939   /* If best_cost has not decreased, we have not found any algorithm.  */
2940   if (!CHEAPER_MULT_COST (&best_cost, cost_limit))
2941     {
2942       /* We failed to find an algorithm.  Record alg_impossible for
2943 	 this case (that is, <T, MODE, COST_LIMIT>) so that next time
2944 	 we are asked to find an algorithm for T within the same or
2945 	 lower COST_LIMIT, we can immediately return to the
2946 	 caller.  */
2947       entry_ptr->t = t;
2948       entry_ptr->mode = mode;
2949       entry_ptr->speed = speed;
2950       entry_ptr->alg = alg_impossible;
2951       entry_ptr->cost = *cost_limit;
2952       return;
2953     }
2954 
2955   /* Cache the result.  */
2956   if (!cache_hit)
2957     {
2958       entry_ptr->t = t;
2959       entry_ptr->mode = mode;
2960       entry_ptr->speed = speed;
2961       entry_ptr->alg = best_alg->op[best_alg->ops];
2962       entry_ptr->cost.cost = best_cost.cost;
2963       entry_ptr->cost.latency = best_cost.latency;
2964     }
2965 
2966   /* If we are getting a too long sequence for `struct algorithm'
2967      to record, make this search fail.  */
2968   if (best_alg->ops == MAX_BITS_PER_WORD)
2969     return;
2970 
2971   /* Copy the algorithm from temporary space to the space at alg_out.
2972      We avoid using structure assignment because the majority of
2973      best_alg is normally undefined, and this is a critical function.  */
2974   alg_out->ops = best_alg->ops + 1;
2975   alg_out->cost = best_cost;
2976   memcpy (alg_out->op, best_alg->op,
2977 	  alg_out->ops * sizeof *alg_out->op);
2978   memcpy (alg_out->log, best_alg->log,
2979 	  alg_out->ops * sizeof *alg_out->log);
2980 }
2981 
2982 /* Find the cheapest way of multiplying a value of mode MODE by VAL.
2983    Try three variations:
2984 
2985        - a shift/add sequence based on VAL itself
2986        - a shift/add sequence based on -VAL, followed by a negation
2987        - a shift/add sequence based on VAL - 1, followed by an addition.
2988 
2989    Return true if the cheapest of these cost less than MULT_COST,
2990    describing the algorithm in *ALG and final fixup in *VARIANT.  */
2991 
2992 bool
2993 choose_mult_variant (machine_mode mode, HOST_WIDE_INT val,
2994 		     struct algorithm *alg, enum mult_variant *variant,
2995 		     int mult_cost)
2996 {
2997   struct algorithm alg2;
2998   struct mult_cost limit;
2999   int op_cost;
3000   bool speed = optimize_insn_for_speed_p ();
3001 
3002   /* Fail quickly for impossible bounds.  */
3003   if (mult_cost < 0)
3004     return false;
3005 
3006   /* Ensure that mult_cost provides a reasonable upper bound.
3007      Any constant multiplication can be performed with less
3008      than 2 * bits additions.  */
3009   op_cost = 2 * GET_MODE_UNIT_BITSIZE (mode) * add_cost (speed, mode);
3010   if (mult_cost > op_cost)
3011     mult_cost = op_cost;
3012 
3013   *variant = basic_variant;
3014   limit.cost = mult_cost;
3015   limit.latency = mult_cost;
3016   synth_mult (alg, val, &limit, mode);
3017 
3018   /* This works only if the inverted value actually fits in an
3019      `unsigned int' */
3020   if (HOST_BITS_PER_INT >= GET_MODE_UNIT_BITSIZE (mode))
3021     {
3022       op_cost = neg_cost (speed, mode);
3023       if (MULT_COST_LESS (&alg->cost, mult_cost))
3024 	{
3025 	  limit.cost = alg->cost.cost - op_cost;
3026 	  limit.latency = alg->cost.latency - op_cost;
3027 	}
3028       else
3029 	{
3030 	  limit.cost = mult_cost - op_cost;
3031 	  limit.latency = mult_cost - op_cost;
3032 	}
3033 
3034       synth_mult (&alg2, -val, &limit, mode);
3035       alg2.cost.cost += op_cost;
3036       alg2.cost.latency += op_cost;
3037       if (CHEAPER_MULT_COST (&alg2.cost, &alg->cost))
3038 	*alg = alg2, *variant = negate_variant;
3039     }
3040 
3041   /* This proves very useful for division-by-constant.  */
3042   op_cost = add_cost (speed, mode);
3043   if (MULT_COST_LESS (&alg->cost, mult_cost))
3044     {
3045       limit.cost = alg->cost.cost - op_cost;
3046       limit.latency = alg->cost.latency - op_cost;
3047     }
3048   else
3049     {
3050       limit.cost = mult_cost - op_cost;
3051       limit.latency = mult_cost - op_cost;
3052     }
3053 
3054   synth_mult (&alg2, val - 1, &limit, mode);
3055   alg2.cost.cost += op_cost;
3056   alg2.cost.latency += op_cost;
3057   if (CHEAPER_MULT_COST (&alg2.cost, &alg->cost))
3058     *alg = alg2, *variant = add_variant;
3059 
3060   return MULT_COST_LESS (&alg->cost, mult_cost);
3061 }
3062 
3063 /* A subroutine of expand_mult, used for constant multiplications.
3064    Multiply OP0 by VAL in mode MODE, storing the result in TARGET if
3065    convenient.  Use the shift/add sequence described by ALG and apply
3066    the final fixup specified by VARIANT.  */
3067 
3068 static rtx
3069 expand_mult_const (machine_mode mode, rtx op0, HOST_WIDE_INT val,
3070 		   rtx target, const struct algorithm *alg,
3071 		   enum mult_variant variant)
3072 {
3073   unsigned HOST_WIDE_INT val_so_far;
3074   rtx_insn *insn;
3075   rtx accum, tem;
3076   int opno;
3077   machine_mode nmode;
3078 
3079   /* Avoid referencing memory over and over and invalid sharing
3080      on SUBREGs.  */
3081   op0 = force_reg (mode, op0);
3082 
3083   /* ACCUM starts out either as OP0 or as a zero, depending on
3084      the first operation.  */
3085 
3086   if (alg->op[0] == alg_zero)
3087     {
3088       accum = copy_to_mode_reg (mode, CONST0_RTX (mode));
3089       val_so_far = 0;
3090     }
3091   else if (alg->op[0] == alg_m)
3092     {
3093       accum = copy_to_mode_reg (mode, op0);
3094       val_so_far = 1;
3095     }
3096   else
3097     gcc_unreachable ();
3098 
3099   for (opno = 1; opno < alg->ops; opno++)
3100     {
3101       int log = alg->log[opno];
3102       rtx shift_subtarget = optimize ? 0 : accum;
3103       rtx add_target
3104 	= (opno == alg->ops - 1 && target != 0 && variant != add_variant
3105 	   && !optimize)
3106 	  ? target : 0;
3107       rtx accum_target = optimize ? 0 : accum;
3108       rtx accum_inner;
3109 
3110       switch (alg->op[opno])
3111 	{
3112 	case alg_shift:
3113 	  tem = expand_shift (LSHIFT_EXPR, mode, accum, log, NULL_RTX, 0);
3114 	  /* REG_EQUAL note will be attached to the following insn.  */
3115 	  emit_move_insn (accum, tem);
3116 	  val_so_far <<= log;
3117 	  break;
3118 
3119 	case alg_add_t_m2:
3120 	  tem = expand_shift (LSHIFT_EXPR, mode, op0, log, NULL_RTX, 0);
3121 	  accum = force_operand (gen_rtx_PLUS (mode, accum, tem),
3122 				 add_target ? add_target : accum_target);
3123 	  val_so_far += HOST_WIDE_INT_1U << log;
3124 	  break;
3125 
3126 	case alg_sub_t_m2:
3127 	  tem = expand_shift (LSHIFT_EXPR, mode, op0, log, NULL_RTX, 0);
3128 	  accum = force_operand (gen_rtx_MINUS (mode, accum, tem),
3129 				 add_target ? add_target : accum_target);
3130 	  val_so_far -= HOST_WIDE_INT_1U << log;
3131 	  break;
3132 
3133 	case alg_add_t2_m:
3134 	  accum = expand_shift (LSHIFT_EXPR, mode, accum,
3135 				log, shift_subtarget, 0);
3136 	  accum = force_operand (gen_rtx_PLUS (mode, accum, op0),
3137 				 add_target ? add_target : accum_target);
3138 	  val_so_far = (val_so_far << log) + 1;
3139 	  break;
3140 
3141 	case alg_sub_t2_m:
3142 	  accum = expand_shift (LSHIFT_EXPR, mode, accum,
3143 				log, shift_subtarget, 0);
3144 	  accum = force_operand (gen_rtx_MINUS (mode, accum, op0),
3145 				 add_target ? add_target : accum_target);
3146 	  val_so_far = (val_so_far << log) - 1;
3147 	  break;
3148 
3149 	case alg_add_factor:
3150 	  tem = expand_shift (LSHIFT_EXPR, mode, accum, log, NULL_RTX, 0);
3151 	  accum = force_operand (gen_rtx_PLUS (mode, accum, tem),
3152 				 add_target ? add_target : accum_target);
3153 	  val_so_far += val_so_far << log;
3154 	  break;
3155 
3156 	case alg_sub_factor:
3157 	  tem = expand_shift (LSHIFT_EXPR, mode, accum, log, NULL_RTX, 0);
3158 	  accum = force_operand (gen_rtx_MINUS (mode, tem, accum),
3159 				 (add_target
3160 				  ? add_target : (optimize ? 0 : tem)));
3161 	  val_so_far = (val_so_far << log) - val_so_far;
3162 	  break;
3163 
3164 	default:
3165 	  gcc_unreachable ();
3166 	}
3167 
3168       if (SCALAR_INT_MODE_P (mode))
3169 	{
3170 	  /* Write a REG_EQUAL note on the last insn so that we can cse
3171 	     multiplication sequences.  Note that if ACCUM is a SUBREG,
3172 	     we've set the inner register and must properly indicate that.  */
3173           tem = op0, nmode = mode;
3174           accum_inner = accum;
3175           if (GET_CODE (accum) == SUBREG)
3176 	    {
3177 	      accum_inner = SUBREG_REG (accum);
3178 	      nmode = GET_MODE (accum_inner);
3179 	      tem = gen_lowpart (nmode, op0);
3180 	    }
3181 
3182 	  /* Don't add a REG_EQUAL note if tem is a paradoxical SUBREG.
3183 	     In that case, only the low bits of accum would be guaranteed to
3184 	     be equal to the content of the REG_EQUAL note, the upper bits
3185 	     can be anything.  */
3186 	  if (!paradoxical_subreg_p (tem))
3187 	    {
3188 	      insn = get_last_insn ();
3189 	      set_dst_reg_note (insn, REG_EQUAL,
3190 				gen_rtx_MULT (nmode, tem,
3191 					      gen_int_mode (val_so_far,
3192 							    nmode)),
3193 				accum_inner);
3194 	    }
3195 	}
3196     }
3197 
3198   if (variant == negate_variant)
3199     {
3200       val_so_far = -val_so_far;
3201       accum = expand_unop (mode, neg_optab, accum, target, 0);
3202     }
3203   else if (variant == add_variant)
3204     {
3205       val_so_far = val_so_far + 1;
3206       accum = force_operand (gen_rtx_PLUS (mode, accum, op0), target);
3207     }
3208 
3209   /* Compare only the bits of val and val_so_far that are significant
3210      in the result mode, to avoid sign-/zero-extension confusion.  */
3211   nmode = GET_MODE_INNER (mode);
3212   val &= GET_MODE_MASK (nmode);
3213   val_so_far &= GET_MODE_MASK (nmode);
3214   gcc_assert (val == (HOST_WIDE_INT) val_so_far);
3215 
3216   return accum;
3217 }
3218 
3219 /* Perform a multiplication and return an rtx for the result.
3220    MODE is mode of value; OP0 and OP1 are what to multiply (rtx's);
3221    TARGET is a suggestion for where to store the result (an rtx).
3222 
3223    We check specially for a constant integer as OP1.
3224    If you want this check for OP0 as well, then before calling
3225    you should swap the two operands if OP0 would be constant.  */
3226 
3227 rtx
3228 expand_mult (machine_mode mode, rtx op0, rtx op1, rtx target,
3229 	     int unsignedp)
3230 {
3231   enum mult_variant variant;
3232   struct algorithm algorithm;
3233   rtx scalar_op1;
3234   int max_cost;
3235   bool speed = optimize_insn_for_speed_p ();
3236   bool do_trapv = flag_trapv && SCALAR_INT_MODE_P (mode) && !unsignedp;
3237 
3238   if (CONSTANT_P (op0))
3239     std::swap (op0, op1);
3240 
3241   /* For vectors, there are several simplifications that can be made if
3242      all elements of the vector constant are identical.  */
3243   scalar_op1 = unwrap_const_vec_duplicate (op1);
3244 
3245   if (INTEGRAL_MODE_P (mode))
3246     {
3247       rtx fake_reg;
3248       HOST_WIDE_INT coeff;
3249       bool is_neg;
3250       int mode_bitsize;
3251 
3252       if (op1 == CONST0_RTX (mode))
3253 	return op1;
3254       if (op1 == CONST1_RTX (mode))
3255 	return op0;
3256       if (op1 == CONSTM1_RTX (mode))
3257 	return expand_unop (mode, do_trapv ? negv_optab : neg_optab,
3258 			    op0, target, 0);
3259 
3260       if (do_trapv)
3261 	goto skip_synth;
3262 
3263       /* If mode is integer vector mode, check if the backend supports
3264 	 vector lshift (by scalar or vector) at all.  If not, we can't use
3265 	 synthetized multiply.  */
3266       if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT
3267 	  && optab_handler (vashl_optab, mode) == CODE_FOR_nothing
3268 	  && optab_handler (ashl_optab, mode) == CODE_FOR_nothing)
3269 	goto skip_synth;
3270 
3271       /* These are the operations that are potentially turned into
3272 	 a sequence of shifts and additions.  */
3273       mode_bitsize = GET_MODE_UNIT_BITSIZE (mode);
3274 
3275       /* synth_mult does an `unsigned int' multiply.  As long as the mode is
3276 	 less than or equal in size to `unsigned int' this doesn't matter.
3277 	 If the mode is larger than `unsigned int', then synth_mult works
3278 	 only if the constant value exactly fits in an `unsigned int' without
3279 	 any truncation.  This means that multiplying by negative values does
3280 	 not work; results are off by 2^32 on a 32 bit machine.  */
3281       if (CONST_INT_P (scalar_op1))
3282 	{
3283 	  coeff = INTVAL (scalar_op1);
3284 	  is_neg = coeff < 0;
3285 	}
3286 #if TARGET_SUPPORTS_WIDE_INT
3287       else if (CONST_WIDE_INT_P (scalar_op1))
3288 #else
3289       else if (CONST_DOUBLE_AS_INT_P (scalar_op1))
3290 #endif
3291 	{
3292 	  int shift = wi::exact_log2 (rtx_mode_t (scalar_op1, mode));
3293 	  /* Perfect power of 2 (other than 1, which is handled above).  */
3294 	  if (shift > 0)
3295 	    return expand_shift (LSHIFT_EXPR, mode, op0,
3296 				 shift, target, unsignedp);
3297 	  else
3298 	    goto skip_synth;
3299 	}
3300       else
3301 	goto skip_synth;
3302 
3303       /* We used to test optimize here, on the grounds that it's better to
3304 	 produce a smaller program when -O is not used.  But this causes
3305 	 such a terrible slowdown sometimes that it seems better to always
3306 	 use synth_mult.  */
3307 
3308       /* Special case powers of two.  */
3309       if (EXACT_POWER_OF_2_OR_ZERO_P (coeff)
3310 	  && !(is_neg && mode_bitsize > HOST_BITS_PER_WIDE_INT))
3311 	return expand_shift (LSHIFT_EXPR, mode, op0,
3312 			     floor_log2 (coeff), target, unsignedp);
3313 
3314       fake_reg = gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1);
3315 
3316       /* Attempt to handle multiplication of DImode values by negative
3317 	 coefficients, by performing the multiplication by a positive
3318 	 multiplier and then inverting the result.  */
3319       if (is_neg && mode_bitsize > HOST_BITS_PER_WIDE_INT)
3320 	{
3321 	  /* Its safe to use -coeff even for INT_MIN, as the
3322 	     result is interpreted as an unsigned coefficient.
3323 	     Exclude cost of op0 from max_cost to match the cost
3324 	     calculation of the synth_mult.  */
3325 	  coeff = -(unsigned HOST_WIDE_INT) coeff;
3326 	  max_cost = (set_src_cost (gen_rtx_MULT (mode, fake_reg, op1),
3327 				    mode, speed)
3328 		      - neg_cost (speed, mode));
3329 	  if (max_cost <= 0)
3330 	    goto skip_synth;
3331 
3332 	  /* Special case powers of two.  */
3333 	  if (EXACT_POWER_OF_2_OR_ZERO_P (coeff))
3334 	    {
3335 	      rtx temp = expand_shift (LSHIFT_EXPR, mode, op0,
3336 				       floor_log2 (coeff), target, unsignedp);
3337 	      return expand_unop (mode, neg_optab, temp, target, 0);
3338 	    }
3339 
3340 	  if (choose_mult_variant (mode, coeff, &algorithm, &variant,
3341 				   max_cost))
3342 	    {
3343 	      rtx temp = expand_mult_const (mode, op0, coeff, NULL_RTX,
3344 					    &algorithm, variant);
3345 	      return expand_unop (mode, neg_optab, temp, target, 0);
3346 	    }
3347 	  goto skip_synth;
3348 	}
3349 
3350       /* Exclude cost of op0 from max_cost to match the cost
3351 	 calculation of the synth_mult.  */
3352       max_cost = set_src_cost (gen_rtx_MULT (mode, fake_reg, op1), mode, speed);
3353       if (choose_mult_variant (mode, coeff, &algorithm, &variant, max_cost))
3354 	return expand_mult_const (mode, op0, coeff, target,
3355 				  &algorithm, variant);
3356     }
3357  skip_synth:
3358 
3359   /* Expand x*2.0 as x+x.  */
3360   if (CONST_DOUBLE_AS_FLOAT_P (scalar_op1)
3361       && real_equal (CONST_DOUBLE_REAL_VALUE (scalar_op1), &dconst2))
3362     {
3363       op0 = force_reg (GET_MODE (op0), op0);
3364       return expand_binop (mode, add_optab, op0, op0,
3365 			   target, unsignedp, OPTAB_LIB_WIDEN);
3366     }
3367 
3368   /* This used to use umul_optab if unsigned, but for non-widening multiply
3369      there is no difference between signed and unsigned.  */
3370   op0 = expand_binop (mode, do_trapv ? smulv_optab : smul_optab,
3371 		      op0, op1, target, unsignedp, OPTAB_LIB_WIDEN);
3372   gcc_assert (op0);
3373   return op0;
3374 }
3375 
3376 /* Return a cost estimate for multiplying a register by the given
3377    COEFFicient in the given MODE and SPEED.  */
3378 
3379 int
3380 mult_by_coeff_cost (HOST_WIDE_INT coeff, machine_mode mode, bool speed)
3381 {
3382   int max_cost;
3383   struct algorithm algorithm;
3384   enum mult_variant variant;
3385 
3386   rtx fake_reg = gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1);
3387   max_cost = set_src_cost (gen_rtx_MULT (mode, fake_reg, fake_reg),
3388 			   mode, speed);
3389   if (choose_mult_variant (mode, coeff, &algorithm, &variant, max_cost))
3390     return algorithm.cost.cost;
3391   else
3392     return max_cost;
3393 }
3394 
3395 /* Perform a widening multiplication and return an rtx for the result.
3396    MODE is mode of value; OP0 and OP1 are what to multiply (rtx's);
3397    TARGET is a suggestion for where to store the result (an rtx).
3398    THIS_OPTAB is the optab we should use, it must be either umul_widen_optab
3399    or smul_widen_optab.
3400 
3401    We check specially for a constant integer as OP1, comparing the
3402    cost of a widening multiply against the cost of a sequence of shifts
3403    and adds.  */
3404 
3405 rtx
3406 expand_widening_mult (machine_mode mode, rtx op0, rtx op1, rtx target,
3407 		      int unsignedp, optab this_optab)
3408 {
3409   bool speed = optimize_insn_for_speed_p ();
3410   rtx cop1;
3411 
3412   if (CONST_INT_P (op1)
3413       && GET_MODE (op0) != VOIDmode
3414       && (cop1 = convert_modes (mode, GET_MODE (op0), op1,
3415 				this_optab == umul_widen_optab))
3416       && CONST_INT_P (cop1)
3417       && (INTVAL (cop1) >= 0
3418 	  || HWI_COMPUTABLE_MODE_P (mode)))
3419     {
3420       HOST_WIDE_INT coeff = INTVAL (cop1);
3421       int max_cost;
3422       enum mult_variant variant;
3423       struct algorithm algorithm;
3424 
3425       if (coeff == 0)
3426 	return CONST0_RTX (mode);
3427 
3428       /* Special case powers of two.  */
3429       if (EXACT_POWER_OF_2_OR_ZERO_P (coeff))
3430 	{
3431 	  op0 = convert_to_mode (mode, op0, this_optab == umul_widen_optab);
3432 	  return expand_shift (LSHIFT_EXPR, mode, op0,
3433 			       floor_log2 (coeff), target, unsignedp);
3434 	}
3435 
3436       /* Exclude cost of op0 from max_cost to match the cost
3437 	 calculation of the synth_mult.  */
3438       max_cost = mul_widen_cost (speed, mode);
3439       if (choose_mult_variant (mode, coeff, &algorithm, &variant,
3440 			       max_cost))
3441 	{
3442 	  op0 = convert_to_mode (mode, op0, this_optab == umul_widen_optab);
3443 	  return expand_mult_const (mode, op0, coeff, target,
3444 				    &algorithm, variant);
3445 	}
3446     }
3447   return expand_binop (mode, this_optab, op0, op1, target,
3448 		       unsignedp, OPTAB_LIB_WIDEN);
3449 }
3450 
3451 /* Choose a minimal N + 1 bit approximation to 1/D that can be used to
3452    replace division by D, and put the least significant N bits of the result
3453    in *MULTIPLIER_PTR and return the most significant bit.
3454 
3455    The width of operations is N (should be <= HOST_BITS_PER_WIDE_INT), the
3456    needed precision is in PRECISION (should be <= N).
3457 
3458    PRECISION should be as small as possible so this function can choose
3459    multiplier more freely.
3460 
3461    The rounded-up logarithm of D is placed in *lgup_ptr.  A shift count that
3462    is to be used for a final right shift is placed in *POST_SHIFT_PTR.
3463 
3464    Using this function, x/D will be equal to (x * m) >> (*POST_SHIFT_PTR),
3465    where m is the full HOST_BITS_PER_WIDE_INT + 1 bit multiplier.  */
3466 
3467 unsigned HOST_WIDE_INT
3468 choose_multiplier (unsigned HOST_WIDE_INT d, int n, int precision,
3469 		   unsigned HOST_WIDE_INT *multiplier_ptr,
3470 		   int *post_shift_ptr, int *lgup_ptr)
3471 {
3472   int lgup, post_shift;
3473   int pow, pow2;
3474 
3475   /* lgup = ceil(log2(divisor)); */
3476   lgup = ceil_log2 (d);
3477 
3478   gcc_assert (lgup <= n);
3479 
3480   pow = n + lgup;
3481   pow2 = n + lgup - precision;
3482 
3483   /* mlow = 2^(N + lgup)/d */
3484   wide_int val = wi::set_bit_in_zero (pow, HOST_BITS_PER_DOUBLE_INT);
3485   wide_int mlow = wi::udiv_trunc (val, d);
3486 
3487   /* mhigh = (2^(N + lgup) + 2^(N + lgup - precision))/d */
3488   val |= wi::set_bit_in_zero (pow2, HOST_BITS_PER_DOUBLE_INT);
3489   wide_int mhigh = wi::udiv_trunc (val, d);
3490 
3491   /* If precision == N, then mlow, mhigh exceed 2^N
3492      (but they do not exceed 2^(N+1)).  */
3493 
3494   /* Reduce to lowest terms.  */
3495   for (post_shift = lgup; post_shift > 0; post_shift--)
3496     {
3497       unsigned HOST_WIDE_INT ml_lo = wi::extract_uhwi (mlow, 1,
3498 						       HOST_BITS_PER_WIDE_INT);
3499       unsigned HOST_WIDE_INT mh_lo = wi::extract_uhwi (mhigh, 1,
3500 						       HOST_BITS_PER_WIDE_INT);
3501       if (ml_lo >= mh_lo)
3502 	break;
3503 
3504       mlow = wi::uhwi (ml_lo, HOST_BITS_PER_DOUBLE_INT);
3505       mhigh = wi::uhwi (mh_lo, HOST_BITS_PER_DOUBLE_INT);
3506     }
3507 
3508   *post_shift_ptr = post_shift;
3509   *lgup_ptr = lgup;
3510   if (n < HOST_BITS_PER_WIDE_INT)
3511     {
3512       unsigned HOST_WIDE_INT mask = (HOST_WIDE_INT_1U << n) - 1;
3513       *multiplier_ptr = mhigh.to_uhwi () & mask;
3514       return mhigh.to_uhwi () >= mask;
3515     }
3516   else
3517     {
3518       *multiplier_ptr = mhigh.to_uhwi ();
3519       return wi::extract_uhwi (mhigh, HOST_BITS_PER_WIDE_INT, 1);
3520     }
3521 }
3522 
3523 /* Compute the inverse of X mod 2**n, i.e., find Y such that X * Y is
3524    congruent to 1 (mod 2**N).  */
3525 
3526 static unsigned HOST_WIDE_INT
3527 invert_mod2n (unsigned HOST_WIDE_INT x, int n)
3528 {
3529   /* Solve x*y == 1 (mod 2^n), where x is odd.  Return y.  */
3530 
3531   /* The algorithm notes that the choice y = x satisfies
3532      x*y == 1 mod 2^3, since x is assumed odd.
3533      Each iteration doubles the number of bits of significance in y.  */
3534 
3535   unsigned HOST_WIDE_INT mask;
3536   unsigned HOST_WIDE_INT y = x;
3537   int nbit = 3;
3538 
3539   mask = (n == HOST_BITS_PER_WIDE_INT
3540 	  ? HOST_WIDE_INT_M1U
3541 	  : (HOST_WIDE_INT_1U << n) - 1);
3542 
3543   while (nbit < n)
3544     {
3545       y = y * (2 - x*y) & mask;		/* Modulo 2^N */
3546       nbit *= 2;
3547     }
3548   return y;
3549 }
3550 
3551 /* Emit code to adjust ADJ_OPERAND after multiplication of wrong signedness
3552    flavor of OP0 and OP1.  ADJ_OPERAND is already the high half of the
3553    product OP0 x OP1.  If UNSIGNEDP is nonzero, adjust the signed product
3554    to become unsigned, if UNSIGNEDP is zero, adjust the unsigned product to
3555    become signed.
3556 
3557    The result is put in TARGET if that is convenient.
3558 
3559    MODE is the mode of operation.  */
3560 
3561 rtx
3562 expand_mult_highpart_adjust (machine_mode mode, rtx adj_operand, rtx op0,
3563 			     rtx op1, rtx target, int unsignedp)
3564 {
3565   rtx tem;
3566   enum rtx_code adj_code = unsignedp ? PLUS : MINUS;
3567 
3568   tem = expand_shift (RSHIFT_EXPR, mode, op0,
3569 		      GET_MODE_BITSIZE (mode) - 1, NULL_RTX, 0);
3570   tem = expand_and (mode, tem, op1, NULL_RTX);
3571   adj_operand
3572     = force_operand (gen_rtx_fmt_ee (adj_code, mode, adj_operand, tem),
3573 		     adj_operand);
3574 
3575   tem = expand_shift (RSHIFT_EXPR, mode, op1,
3576 		      GET_MODE_BITSIZE (mode) - 1, NULL_RTX, 0);
3577   tem = expand_and (mode, tem, op0, NULL_RTX);
3578   target = force_operand (gen_rtx_fmt_ee (adj_code, mode, adj_operand, tem),
3579 			  target);
3580 
3581   return target;
3582 }
3583 
3584 /* Subroutine of expmed_mult_highpart.  Return the MODE high part of OP.  */
3585 
3586 static rtx
3587 extract_high_half (machine_mode mode, rtx op)
3588 {
3589   machine_mode wider_mode;
3590 
3591   if (mode == word_mode)
3592     return gen_highpart (mode, op);
3593 
3594   gcc_assert (!SCALAR_FLOAT_MODE_P (mode));
3595 
3596   wider_mode = GET_MODE_WIDER_MODE (mode);
3597   op = expand_shift (RSHIFT_EXPR, wider_mode, op,
3598 		     GET_MODE_BITSIZE (mode), 0, 1);
3599   return convert_modes (mode, wider_mode, op, 0);
3600 }
3601 
3602 /* Like expmed_mult_highpart, but only consider using a multiplication
3603    optab.  OP1 is an rtx for the constant operand.  */
3604 
3605 static rtx
3606 expmed_mult_highpart_optab (machine_mode mode, rtx op0, rtx op1,
3607 			    rtx target, int unsignedp, int max_cost)
3608 {
3609   rtx narrow_op1 = gen_int_mode (INTVAL (op1), mode);
3610   machine_mode wider_mode;
3611   optab moptab;
3612   rtx tem;
3613   int size;
3614   bool speed = optimize_insn_for_speed_p ();
3615 
3616   gcc_assert (!SCALAR_FLOAT_MODE_P (mode));
3617 
3618   wider_mode = GET_MODE_WIDER_MODE (mode);
3619   size = GET_MODE_BITSIZE (mode);
3620 
3621   /* Firstly, try using a multiplication insn that only generates the needed
3622      high part of the product, and in the sign flavor of unsignedp.  */
3623   if (mul_highpart_cost (speed, mode) < max_cost)
3624     {
3625       moptab = unsignedp ? umul_highpart_optab : smul_highpart_optab;
3626       tem = expand_binop (mode, moptab, op0, narrow_op1, target,
3627 			  unsignedp, OPTAB_DIRECT);
3628       if (tem)
3629 	return tem;
3630     }
3631 
3632   /* Secondly, same as above, but use sign flavor opposite of unsignedp.
3633      Need to adjust the result after the multiplication.  */
3634   if (size - 1 < BITS_PER_WORD
3635       && (mul_highpart_cost (speed, mode)
3636 	  + 2 * shift_cost (speed, mode, size-1)
3637 	  + 4 * add_cost (speed, mode) < max_cost))
3638     {
3639       moptab = unsignedp ? smul_highpart_optab : umul_highpart_optab;
3640       tem = expand_binop (mode, moptab, op0, narrow_op1, target,
3641 			  unsignedp, OPTAB_DIRECT);
3642       if (tem)
3643 	/* We used the wrong signedness.  Adjust the result.  */
3644 	return expand_mult_highpart_adjust (mode, tem, op0, narrow_op1,
3645 					    tem, unsignedp);
3646     }
3647 
3648   /* Try widening multiplication.  */
3649   moptab = unsignedp ? umul_widen_optab : smul_widen_optab;
3650   if (widening_optab_handler (moptab, wider_mode, mode) != CODE_FOR_nothing
3651       && mul_widen_cost (speed, wider_mode) < max_cost)
3652     {
3653       tem = expand_binop (wider_mode, moptab, op0, narrow_op1, 0,
3654 			  unsignedp, OPTAB_WIDEN);
3655       if (tem)
3656 	return extract_high_half (mode, tem);
3657     }
3658 
3659   /* Try widening the mode and perform a non-widening multiplication.  */
3660   if (optab_handler (smul_optab, wider_mode) != CODE_FOR_nothing
3661       && size - 1 < BITS_PER_WORD
3662       && (mul_cost (speed, wider_mode) + shift_cost (speed, mode, size-1)
3663 	  < max_cost))
3664     {
3665       rtx_insn *insns;
3666       rtx wop0, wop1;
3667 
3668       /* We need to widen the operands, for example to ensure the
3669 	 constant multiplier is correctly sign or zero extended.
3670 	 Use a sequence to clean-up any instructions emitted by
3671 	 the conversions if things don't work out.  */
3672       start_sequence ();
3673       wop0 = convert_modes (wider_mode, mode, op0, unsignedp);
3674       wop1 = convert_modes (wider_mode, mode, op1, unsignedp);
3675       tem = expand_binop (wider_mode, smul_optab, wop0, wop1, 0,
3676 			  unsignedp, OPTAB_WIDEN);
3677       insns = get_insns ();
3678       end_sequence ();
3679 
3680       if (tem)
3681 	{
3682 	  emit_insn (insns);
3683 	  return extract_high_half (mode, tem);
3684 	}
3685     }
3686 
3687   /* Try widening multiplication of opposite signedness, and adjust.  */
3688   moptab = unsignedp ? smul_widen_optab : umul_widen_optab;
3689   if (widening_optab_handler (moptab, wider_mode, mode) != CODE_FOR_nothing
3690       && size - 1 < BITS_PER_WORD
3691       && (mul_widen_cost (speed, wider_mode)
3692 	  + 2 * shift_cost (speed, mode, size-1)
3693 	  + 4 * add_cost (speed, mode) < max_cost))
3694     {
3695       tem = expand_binop (wider_mode, moptab, op0, narrow_op1,
3696 			  NULL_RTX, ! unsignedp, OPTAB_WIDEN);
3697       if (tem != 0)
3698 	{
3699 	  tem = extract_high_half (mode, tem);
3700 	  /* We used the wrong signedness.  Adjust the result.  */
3701 	  return expand_mult_highpart_adjust (mode, tem, op0, narrow_op1,
3702 					      target, unsignedp);
3703 	}
3704     }
3705 
3706   return 0;
3707 }
3708 
3709 /* Emit code to multiply OP0 and OP1 (where OP1 is an integer constant),
3710    putting the high half of the result in TARGET if that is convenient,
3711    and return where the result is.  If the operation can not be performed,
3712    0 is returned.
3713 
3714    MODE is the mode of operation and result.
3715 
3716    UNSIGNEDP nonzero means unsigned multiply.
3717 
3718    MAX_COST is the total allowed cost for the expanded RTL.  */
3719 
3720 static rtx
3721 expmed_mult_highpart (machine_mode mode, rtx op0, rtx op1,
3722 		      rtx target, int unsignedp, int max_cost)
3723 {
3724   machine_mode wider_mode = GET_MODE_WIDER_MODE (mode);
3725   unsigned HOST_WIDE_INT cnst1;
3726   int extra_cost;
3727   bool sign_adjust = false;
3728   enum mult_variant variant;
3729   struct algorithm alg;
3730   rtx tem;
3731   bool speed = optimize_insn_for_speed_p ();
3732 
3733   gcc_assert (!SCALAR_FLOAT_MODE_P (mode));
3734   /* We can't support modes wider than HOST_BITS_PER_INT.  */
3735   gcc_assert (HWI_COMPUTABLE_MODE_P (mode));
3736 
3737   cnst1 = INTVAL (op1) & GET_MODE_MASK (mode);
3738 
3739   /* We can't optimize modes wider than BITS_PER_WORD.
3740      ??? We might be able to perform double-word arithmetic if
3741      mode == word_mode, however all the cost calculations in
3742      synth_mult etc. assume single-word operations.  */
3743   if (GET_MODE_BITSIZE (wider_mode) > BITS_PER_WORD)
3744     return expmed_mult_highpart_optab (mode, op0, op1, target,
3745 				       unsignedp, max_cost);
3746 
3747   extra_cost = shift_cost (speed, mode, GET_MODE_BITSIZE (mode) - 1);
3748 
3749   /* Check whether we try to multiply by a negative constant.  */
3750   if (!unsignedp && ((cnst1 >> (GET_MODE_BITSIZE (mode) - 1)) & 1))
3751     {
3752       sign_adjust = true;
3753       extra_cost += add_cost (speed, mode);
3754     }
3755 
3756   /* See whether shift/add multiplication is cheap enough.  */
3757   if (choose_mult_variant (wider_mode, cnst1, &alg, &variant,
3758 			   max_cost - extra_cost))
3759     {
3760       /* See whether the specialized multiplication optabs are
3761 	 cheaper than the shift/add version.  */
3762       tem = expmed_mult_highpart_optab (mode, op0, op1, target, unsignedp,
3763 					alg.cost.cost + extra_cost);
3764       if (tem)
3765 	return tem;
3766 
3767       tem = convert_to_mode (wider_mode, op0, unsignedp);
3768       tem = expand_mult_const (wider_mode, tem, cnst1, 0, &alg, variant);
3769       tem = extract_high_half (mode, tem);
3770 
3771       /* Adjust result for signedness.  */
3772       if (sign_adjust)
3773 	tem = force_operand (gen_rtx_MINUS (mode, tem, op0), tem);
3774 
3775       return tem;
3776     }
3777   return expmed_mult_highpart_optab (mode, op0, op1, target,
3778 				     unsignedp, max_cost);
3779 }
3780 
3781 
3782 /* Expand signed modulus of OP0 by a power of two D in mode MODE.  */
3783 
3784 static rtx
3785 expand_smod_pow2 (machine_mode mode, rtx op0, HOST_WIDE_INT d)
3786 {
3787   rtx result, temp, shift;
3788   rtx_code_label *label;
3789   int logd;
3790   int prec = GET_MODE_PRECISION (mode);
3791 
3792   logd = floor_log2 (d);
3793   result = gen_reg_rtx (mode);
3794 
3795   /* Avoid conditional branches when they're expensive.  */
3796   if (BRANCH_COST (optimize_insn_for_speed_p (), false) >= 2
3797       && optimize_insn_for_speed_p ())
3798     {
3799       rtx signmask = emit_store_flag (result, LT, op0, const0_rtx,
3800 				      mode, 0, -1);
3801       if (signmask)
3802 	{
3803 	  HOST_WIDE_INT masklow = (HOST_WIDE_INT_1 << logd) - 1;
3804 	  signmask = force_reg (mode, signmask);
3805 	  shift = GEN_INT (GET_MODE_BITSIZE (mode) - logd);
3806 
3807 	  /* Use the rtx_cost of a LSHIFTRT instruction to determine
3808 	     which instruction sequence to use.  If logical right shifts
3809 	     are expensive the use 2 XORs, 2 SUBs and an AND, otherwise
3810 	     use a LSHIFTRT, 1 ADD, 1 SUB and an AND.  */
3811 
3812 	  temp = gen_rtx_LSHIFTRT (mode, result, shift);
3813 	  if (optab_handler (lshr_optab, mode) == CODE_FOR_nothing
3814 	      || (set_src_cost (temp, mode, optimize_insn_for_speed_p ())
3815 		  > COSTS_N_INSNS (2)))
3816 	    {
3817 	      temp = expand_binop (mode, xor_optab, op0, signmask,
3818 				   NULL_RTX, 1, OPTAB_LIB_WIDEN);
3819 	      temp = expand_binop (mode, sub_optab, temp, signmask,
3820 				   NULL_RTX, 1, OPTAB_LIB_WIDEN);
3821 	      temp = expand_binop (mode, and_optab, temp,
3822 				   gen_int_mode (masklow, mode),
3823 				   NULL_RTX, 1, OPTAB_LIB_WIDEN);
3824 	      temp = expand_binop (mode, xor_optab, temp, signmask,
3825 				   NULL_RTX, 1, OPTAB_LIB_WIDEN);
3826 	      temp = expand_binop (mode, sub_optab, temp, signmask,
3827 				   NULL_RTX, 1, OPTAB_LIB_WIDEN);
3828 	    }
3829 	  else
3830 	    {
3831 	      signmask = expand_binop (mode, lshr_optab, signmask, shift,
3832 				       NULL_RTX, 1, OPTAB_LIB_WIDEN);
3833 	      signmask = force_reg (mode, signmask);
3834 
3835 	      temp = expand_binop (mode, add_optab, op0, signmask,
3836 				   NULL_RTX, 1, OPTAB_LIB_WIDEN);
3837 	      temp = expand_binop (mode, and_optab, temp,
3838 				   gen_int_mode (masklow, mode),
3839 				   NULL_RTX, 1, OPTAB_LIB_WIDEN);
3840 	      temp = expand_binop (mode, sub_optab, temp, signmask,
3841 				   NULL_RTX, 1, OPTAB_LIB_WIDEN);
3842 	    }
3843 	  return temp;
3844 	}
3845     }
3846 
3847   /* Mask contains the mode's signbit and the significant bits of the
3848      modulus.  By including the signbit in the operation, many targets
3849      can avoid an explicit compare operation in the following comparison
3850      against zero.  */
3851   wide_int mask = wi::mask (logd, false, prec);
3852   mask = wi::set_bit (mask, prec - 1);
3853 
3854   temp = expand_binop (mode, and_optab, op0,
3855 		       immed_wide_int_const (mask, mode),
3856 		       result, 1, OPTAB_LIB_WIDEN);
3857   if (temp != result)
3858     emit_move_insn (result, temp);
3859 
3860   label = gen_label_rtx ();
3861   do_cmp_and_jump (result, const0_rtx, GE, mode, label);
3862 
3863   temp = expand_binop (mode, sub_optab, result, const1_rtx, result,
3864 		       0, OPTAB_LIB_WIDEN);
3865 
3866   mask = wi::mask (logd, true, prec);
3867   temp = expand_binop (mode, ior_optab, temp,
3868 		       immed_wide_int_const (mask, mode),
3869 		       result, 1, OPTAB_LIB_WIDEN);
3870   temp = expand_binop (mode, add_optab, temp, const1_rtx, result,
3871 		       0, OPTAB_LIB_WIDEN);
3872   if (temp != result)
3873     emit_move_insn (result, temp);
3874   emit_label (label);
3875   return result;
3876 }
3877 
3878 /* Expand signed division of OP0 by a power of two D in mode MODE.
3879    This routine is only called for positive values of D.  */
3880 
3881 static rtx
3882 expand_sdiv_pow2 (machine_mode mode, rtx op0, HOST_WIDE_INT d)
3883 {
3884   rtx temp;
3885   rtx_code_label *label;
3886   int logd;
3887 
3888   logd = floor_log2 (d);
3889 
3890   if (d == 2
3891       && BRANCH_COST (optimize_insn_for_speed_p (),
3892 		      false) >= 1)
3893     {
3894       temp = gen_reg_rtx (mode);
3895       temp = emit_store_flag (temp, LT, op0, const0_rtx, mode, 0, 1);
3896       temp = expand_binop (mode, add_optab, temp, op0, NULL_RTX,
3897 			   0, OPTAB_LIB_WIDEN);
3898       return expand_shift (RSHIFT_EXPR, mode, temp, logd, NULL_RTX, 0);
3899     }
3900 
3901   if (HAVE_conditional_move
3902       && BRANCH_COST (optimize_insn_for_speed_p (), false) >= 2)
3903     {
3904       rtx temp2;
3905 
3906       start_sequence ();
3907       temp2 = copy_to_mode_reg (mode, op0);
3908       temp = expand_binop (mode, add_optab, temp2, gen_int_mode (d - 1, mode),
3909 			   NULL_RTX, 0, OPTAB_LIB_WIDEN);
3910       temp = force_reg (mode, temp);
3911 
3912       /* Construct "temp2 = (temp2 < 0) ? temp : temp2".  */
3913       temp2 = emit_conditional_move (temp2, LT, temp2, const0_rtx,
3914 				     mode, temp, temp2, mode, 0);
3915       if (temp2)
3916 	{
3917 	  rtx_insn *seq = get_insns ();
3918 	  end_sequence ();
3919 	  emit_insn (seq);
3920 	  return expand_shift (RSHIFT_EXPR, mode, temp2, logd, NULL_RTX, 0);
3921 	}
3922       end_sequence ();
3923     }
3924 
3925   if (BRANCH_COST (optimize_insn_for_speed_p (),
3926 		   false) >= 2)
3927     {
3928       int ushift = GET_MODE_BITSIZE (mode) - logd;
3929 
3930       temp = gen_reg_rtx (mode);
3931       temp = emit_store_flag (temp, LT, op0, const0_rtx, mode, 0, -1);
3932       if (GET_MODE_BITSIZE (mode) >= BITS_PER_WORD
3933 	  || shift_cost (optimize_insn_for_speed_p (), mode, ushift)
3934 	     > COSTS_N_INSNS (1))
3935 	temp = expand_binop (mode, and_optab, temp, gen_int_mode (d - 1, mode),
3936 			     NULL_RTX, 0, OPTAB_LIB_WIDEN);
3937       else
3938 	temp = expand_shift (RSHIFT_EXPR, mode, temp,
3939 			     ushift, NULL_RTX, 1);
3940       temp = expand_binop (mode, add_optab, temp, op0, NULL_RTX,
3941 			   0, OPTAB_LIB_WIDEN);
3942       return expand_shift (RSHIFT_EXPR, mode, temp, logd, NULL_RTX, 0);
3943     }
3944 
3945   label = gen_label_rtx ();
3946   temp = copy_to_mode_reg (mode, op0);
3947   do_cmp_and_jump (temp, const0_rtx, GE, mode, label);
3948   expand_inc (temp, gen_int_mode (d - 1, mode));
3949   emit_label (label);
3950   return expand_shift (RSHIFT_EXPR, mode, temp, logd, NULL_RTX, 0);
3951 }
3952 
3953 /* Emit the code to divide OP0 by OP1, putting the result in TARGET
3954    if that is convenient, and returning where the result is.
3955    You may request either the quotient or the remainder as the result;
3956    specify REM_FLAG nonzero to get the remainder.
3957 
3958    CODE is the expression code for which kind of division this is;
3959    it controls how rounding is done.  MODE is the machine mode to use.
3960    UNSIGNEDP nonzero means do unsigned division.  */
3961 
3962 /* ??? For CEIL_MOD_EXPR, can compute incorrect remainder with ANDI
3963    and then correct it by or'ing in missing high bits
3964    if result of ANDI is nonzero.
3965    For ROUND_MOD_EXPR, can use ANDI and then sign-extend the result.
3966    This could optimize to a bfexts instruction.
3967    But C doesn't use these operations, so their optimizations are
3968    left for later.  */
3969 /* ??? For modulo, we don't actually need the highpart of the first product,
3970    the low part will do nicely.  And for small divisors, the second multiply
3971    can also be a low-part only multiply or even be completely left out.
3972    E.g. to calculate the remainder of a division by 3 with a 32 bit
3973    multiply, multiply with 0x55555556 and extract the upper two bits;
3974    the result is exact for inputs up to 0x1fffffff.
3975    The input range can be reduced by using cross-sum rules.
3976    For odd divisors >= 3, the following table gives right shift counts
3977    so that if a number is shifted by an integer multiple of the given
3978    amount, the remainder stays the same:
3979    2, 4, 3, 6, 10, 12, 4, 8, 18, 6, 11, 20, 18, 0, 5, 10, 12, 0, 12, 20,
3980    14, 12, 23, 21, 8, 0, 20, 18, 0, 0, 6, 12, 0, 22, 0, 18, 20, 30, 0, 0,
3981    0, 8, 0, 11, 12, 10, 36, 0, 30, 0, 0, 12, 0, 0, 0, 0, 44, 12, 24, 0,
3982    20, 0, 7, 14, 0, 18, 36, 0, 0, 46, 60, 0, 42, 0, 15, 24, 20, 0, 0, 33,
3983    0, 20, 0, 0, 18, 0, 60, 0, 0, 0, 0, 0, 40, 18, 0, 0, 12
3984 
3985    Cross-sum rules for even numbers can be derived by leaving as many bits
3986    to the right alone as the divisor has zeros to the right.
3987    E.g. if x is an unsigned 32 bit number:
3988    (x mod 12) == (((x & 1023) + ((x >> 8) & ~3)) * 0x15555558 >> 2 * 3) >> 28
3989    */
3990 
3991 rtx
3992 expand_divmod (int rem_flag, enum tree_code code, machine_mode mode,
3993 	       rtx op0, rtx op1, rtx target, int unsignedp)
3994 {
3995   machine_mode compute_mode;
3996   rtx tquotient;
3997   rtx quotient = 0, remainder = 0;
3998   rtx_insn *last;
3999   int size;
4000   rtx_insn *insn;
4001   optab optab1, optab2;
4002   int op1_is_constant, op1_is_pow2 = 0;
4003   int max_cost, extra_cost;
4004   static HOST_WIDE_INT last_div_const = 0;
4005   bool speed = optimize_insn_for_speed_p ();
4006 
4007   op1_is_constant = CONST_INT_P (op1);
4008   if (op1_is_constant)
4009     {
4010       wide_int ext_op1 = rtx_mode_t (op1, mode);
4011       op1_is_pow2 = (wi::popcount (ext_op1) == 1
4012 		     || (! unsignedp
4013 			 && wi::popcount (wi::neg (ext_op1)) == 1));
4014     }
4015 
4016   /*
4017      This is the structure of expand_divmod:
4018 
4019      First comes code to fix up the operands so we can perform the operations
4020      correctly and efficiently.
4021 
4022      Second comes a switch statement with code specific for each rounding mode.
4023      For some special operands this code emits all RTL for the desired
4024      operation, for other cases, it generates only a quotient and stores it in
4025      QUOTIENT.  The case for trunc division/remainder might leave quotient = 0,
4026      to indicate that it has not done anything.
4027 
4028      Last comes code that finishes the operation.  If QUOTIENT is set and
4029      REM_FLAG is set, the remainder is computed as OP0 - QUOTIENT * OP1.  If
4030      QUOTIENT is not set, it is computed using trunc rounding.
4031 
4032      We try to generate special code for division and remainder when OP1 is a
4033      constant.  If |OP1| = 2**n we can use shifts and some other fast
4034      operations.  For other values of OP1, we compute a carefully selected
4035      fixed-point approximation m = 1/OP1, and generate code that multiplies OP0
4036      by m.
4037 
4038      In all cases but EXACT_DIV_EXPR, this multiplication requires the upper
4039      half of the product.  Different strategies for generating the product are
4040      implemented in expmed_mult_highpart.
4041 
4042      If what we actually want is the remainder, we generate that by another
4043      by-constant multiplication and a subtraction.  */
4044 
4045   /* We shouldn't be called with OP1 == const1_rtx, but some of the
4046      code below will malfunction if we are, so check here and handle
4047      the special case if so.  */
4048   if (op1 == const1_rtx)
4049     return rem_flag ? const0_rtx : op0;
4050 
4051     /* When dividing by -1, we could get an overflow.
4052      negv_optab can handle overflows.  */
4053   if (! unsignedp && op1 == constm1_rtx)
4054     {
4055       if (rem_flag)
4056 	return const0_rtx;
4057       return expand_unop (mode, flag_trapv && GET_MODE_CLASS (mode) == MODE_INT
4058 			  ? negv_optab : neg_optab, op0, target, 0);
4059     }
4060 
4061   if (target
4062       /* Don't use the function value register as a target
4063 	 since we have to read it as well as write it,
4064 	 and function-inlining gets confused by this.  */
4065       && ((REG_P (target) && REG_FUNCTION_VALUE_P (target))
4066 	  /* Don't clobber an operand while doing a multi-step calculation.  */
4067 	  || ((rem_flag || op1_is_constant)
4068 	      && (reg_mentioned_p (target, op0)
4069 		  || (MEM_P (op0) && MEM_P (target))))
4070 	  || reg_mentioned_p (target, op1)
4071 	  || (MEM_P (op1) && MEM_P (target))))
4072     target = 0;
4073 
4074   /* Get the mode in which to perform this computation.  Normally it will
4075      be MODE, but sometimes we can't do the desired operation in MODE.
4076      If so, pick a wider mode in which we can do the operation.  Convert
4077      to that mode at the start to avoid repeated conversions.
4078 
4079      First see what operations we need.  These depend on the expression
4080      we are evaluating.  (We assume that divxx3 insns exist under the
4081      same conditions that modxx3 insns and that these insns don't normally
4082      fail.  If these assumptions are not correct, we may generate less
4083      efficient code in some cases.)
4084 
4085      Then see if we find a mode in which we can open-code that operation
4086      (either a division, modulus, or shift).  Finally, check for the smallest
4087      mode for which we can do the operation with a library call.  */
4088 
4089   /* We might want to refine this now that we have division-by-constant
4090      optimization.  Since expmed_mult_highpart tries so many variants, it is
4091      not straightforward to generalize this.  Maybe we should make an array
4092      of possible modes in init_expmed?  Save this for GCC 2.7.  */
4093 
4094   optab1 = (op1_is_pow2
4095 	    ? (unsignedp ? lshr_optab : ashr_optab)
4096 	    : (unsignedp ? udiv_optab : sdiv_optab));
4097   optab2 = (op1_is_pow2 ? optab1
4098 	    : (unsignedp ? udivmod_optab : sdivmod_optab));
4099 
4100   for (compute_mode = mode; compute_mode != VOIDmode;
4101        compute_mode = GET_MODE_WIDER_MODE (compute_mode))
4102     if (optab_handler (optab1, compute_mode) != CODE_FOR_nothing
4103 	|| optab_handler (optab2, compute_mode) != CODE_FOR_nothing)
4104       break;
4105 
4106   if (compute_mode == VOIDmode)
4107     for (compute_mode = mode; compute_mode != VOIDmode;
4108 	 compute_mode = GET_MODE_WIDER_MODE (compute_mode))
4109       if (optab_libfunc (optab1, compute_mode)
4110 	  || optab_libfunc (optab2, compute_mode))
4111 	break;
4112 
4113   /* If we still couldn't find a mode, use MODE, but expand_binop will
4114      probably die.  */
4115   if (compute_mode == VOIDmode)
4116     compute_mode = mode;
4117 
4118   if (target && GET_MODE (target) == compute_mode)
4119     tquotient = target;
4120   else
4121     tquotient = gen_reg_rtx (compute_mode);
4122 
4123   size = GET_MODE_BITSIZE (compute_mode);
4124 #if 0
4125   /* It should be possible to restrict the precision to GET_MODE_BITSIZE
4126      (mode), and thereby get better code when OP1 is a constant.  Do that
4127      later.  It will require going over all usages of SIZE below.  */
4128   size = GET_MODE_BITSIZE (mode);
4129 #endif
4130 
4131   /* Only deduct something for a REM if the last divide done was
4132      for a different constant.   Then set the constant of the last
4133      divide.  */
4134   max_cost = (unsignedp
4135 	      ? udiv_cost (speed, compute_mode)
4136 	      : sdiv_cost (speed, compute_mode));
4137   if (rem_flag && ! (last_div_const != 0 && op1_is_constant
4138 		     && INTVAL (op1) == last_div_const))
4139     max_cost -= (mul_cost (speed, compute_mode)
4140 		 + add_cost (speed, compute_mode));
4141 
4142   last_div_const = ! rem_flag && op1_is_constant ? INTVAL (op1) : 0;
4143 
4144   /* Now convert to the best mode to use.  */
4145   if (compute_mode != mode)
4146     {
4147       op0 = convert_modes (compute_mode, mode, op0, unsignedp);
4148       op1 = convert_modes (compute_mode, mode, op1, unsignedp);
4149 
4150       /* convert_modes may have placed op1 into a register, so we
4151 	 must recompute the following.  */
4152       op1_is_constant = CONST_INT_P (op1);
4153       if (op1_is_constant)
4154 	{
4155 	  wide_int ext_op1 = rtx_mode_t (op1, compute_mode);
4156 	  op1_is_pow2 = (wi::popcount (ext_op1) == 1
4157 			 || (! unsignedp
4158 			     && wi::popcount (wi::neg (ext_op1)) == 1));
4159 	}
4160       else
4161 	op1_is_pow2 = 0;
4162     }
4163 
4164   /* If one of the operands is a volatile MEM, copy it into a register.  */
4165 
4166   if (MEM_P (op0) && MEM_VOLATILE_P (op0))
4167     op0 = force_reg (compute_mode, op0);
4168   if (MEM_P (op1) && MEM_VOLATILE_P (op1))
4169     op1 = force_reg (compute_mode, op1);
4170 
4171   /* If we need the remainder or if OP1 is constant, we need to
4172      put OP0 in a register in case it has any queued subexpressions.  */
4173   if (rem_flag || op1_is_constant)
4174     op0 = force_reg (compute_mode, op0);
4175 
4176   last = get_last_insn ();
4177 
4178   /* Promote floor rounding to trunc rounding for unsigned operations.  */
4179   if (unsignedp)
4180     {
4181       if (code == FLOOR_DIV_EXPR)
4182 	code = TRUNC_DIV_EXPR;
4183       if (code == FLOOR_MOD_EXPR)
4184 	code = TRUNC_MOD_EXPR;
4185       if (code == EXACT_DIV_EXPR && op1_is_pow2)
4186 	code = TRUNC_DIV_EXPR;
4187     }
4188 
4189   if (op1 != const0_rtx)
4190     switch (code)
4191       {
4192       case TRUNC_MOD_EXPR:
4193       case TRUNC_DIV_EXPR:
4194 	if (op1_is_constant)
4195 	  {
4196 	    if (unsignedp)
4197 	      {
4198 		unsigned HOST_WIDE_INT mh, ml;
4199 		int pre_shift, post_shift;
4200 		int dummy;
4201 		wide_int wd = rtx_mode_t (op1, compute_mode);
4202 		unsigned HOST_WIDE_INT d = wd.to_uhwi ();
4203 
4204 		if (wi::popcount (wd) == 1)
4205 		  {
4206 		    pre_shift = floor_log2 (d);
4207 		    if (rem_flag)
4208 		      {
4209 			unsigned HOST_WIDE_INT mask
4210 			  = (HOST_WIDE_INT_1U << pre_shift) - 1;
4211 			remainder
4212 			  = expand_binop (compute_mode, and_optab, op0,
4213 					  gen_int_mode (mask, compute_mode),
4214 					  remainder, 1,
4215 					  OPTAB_LIB_WIDEN);
4216 			if (remainder)
4217 			  return gen_lowpart (mode, remainder);
4218 		      }
4219 		    quotient = expand_shift (RSHIFT_EXPR, compute_mode, op0,
4220 					     pre_shift, tquotient, 1);
4221 		  }
4222 		else if (size <= HOST_BITS_PER_WIDE_INT)
4223 		  {
4224 		    if (d >= (HOST_WIDE_INT_1U << (size - 1)))
4225 		      {
4226 			/* Most significant bit of divisor is set; emit an scc
4227 			   insn.  */
4228 			quotient = emit_store_flag_force (tquotient, GEU, op0, op1,
4229 							  compute_mode, 1, 1);
4230 		      }
4231 		    else
4232 		      {
4233 			/* Find a suitable multiplier and right shift count
4234 			   instead of multiplying with D.  */
4235 
4236 			mh = choose_multiplier (d, size, size,
4237 						&ml, &post_shift, &dummy);
4238 
4239 			/* If the suggested multiplier is more than SIZE bits,
4240 			   we can do better for even divisors, using an
4241 			   initial right shift.  */
4242 			if (mh != 0 && (d & 1) == 0)
4243 			  {
4244 			    pre_shift = ctz_or_zero (d);
4245 			    mh = choose_multiplier (d >> pre_shift, size,
4246 						    size - pre_shift,
4247 						    &ml, &post_shift, &dummy);
4248 			    gcc_assert (!mh);
4249 			  }
4250 			else
4251 			  pre_shift = 0;
4252 
4253 			if (mh != 0)
4254 			  {
4255 			    rtx t1, t2, t3, t4;
4256 
4257 			    if (post_shift - 1 >= BITS_PER_WORD)
4258 			      goto fail1;
4259 
4260 			    extra_cost
4261 			      = (shift_cost (speed, compute_mode, post_shift - 1)
4262 				 + shift_cost (speed, compute_mode, 1)
4263 				 + 2 * add_cost (speed, compute_mode));
4264 			    t1 = expmed_mult_highpart
4265 			      (compute_mode, op0,
4266 			       gen_int_mode (ml, compute_mode),
4267 			       NULL_RTX, 1, max_cost - extra_cost);
4268 			    if (t1 == 0)
4269 			      goto fail1;
4270 			    t2 = force_operand (gen_rtx_MINUS (compute_mode,
4271 							       op0, t1),
4272 						NULL_RTX);
4273 			    t3 = expand_shift (RSHIFT_EXPR, compute_mode,
4274 					       t2, 1, NULL_RTX, 1);
4275 			    t4 = force_operand (gen_rtx_PLUS (compute_mode,
4276 							      t1, t3),
4277 						NULL_RTX);
4278 			    quotient = expand_shift
4279 			      (RSHIFT_EXPR, compute_mode, t4,
4280 			       post_shift - 1, tquotient, 1);
4281 			  }
4282 			else
4283 			  {
4284 			    rtx t1, t2;
4285 
4286 			    if (pre_shift >= BITS_PER_WORD
4287 				|| post_shift >= BITS_PER_WORD)
4288 			      goto fail1;
4289 
4290 			    t1 = expand_shift
4291 			      (RSHIFT_EXPR, compute_mode, op0,
4292 			       pre_shift, NULL_RTX, 1);
4293 			    extra_cost
4294 			      = (shift_cost (speed, compute_mode, pre_shift)
4295 				 + shift_cost (speed, compute_mode, post_shift));
4296 			    t2 = expmed_mult_highpart
4297 			      (compute_mode, t1,
4298 			       gen_int_mode (ml, compute_mode),
4299 			       NULL_RTX, 1, max_cost - extra_cost);
4300 			    if (t2 == 0)
4301 			      goto fail1;
4302 			    quotient = expand_shift
4303 			      (RSHIFT_EXPR, compute_mode, t2,
4304 			       post_shift, tquotient, 1);
4305 			  }
4306 		      }
4307 		  }
4308 		else		/* Too wide mode to use tricky code */
4309 		  break;
4310 
4311 		insn = get_last_insn ();
4312 		if (insn != last)
4313 		  set_dst_reg_note (insn, REG_EQUAL,
4314 				    gen_rtx_UDIV (compute_mode, op0, op1),
4315 				    quotient);
4316 	      }
4317 	    else		/* TRUNC_DIV, signed */
4318 	      {
4319 		unsigned HOST_WIDE_INT ml;
4320 		int lgup, post_shift;
4321 		rtx mlr;
4322 		HOST_WIDE_INT d = INTVAL (op1);
4323 		unsigned HOST_WIDE_INT abs_d;
4324 
4325 		/* Not prepared to handle division/remainder by
4326 		   0xffffffffffffffff8000000000000000 etc.  */
4327 		if (d == HOST_WIDE_INT_MIN && size > HOST_BITS_PER_WIDE_INT)
4328 		  break;
4329 
4330 		/* Since d might be INT_MIN, we have to cast to
4331 		   unsigned HOST_WIDE_INT before negating to avoid
4332 		   undefined signed overflow.  */
4333 		abs_d = (d >= 0
4334 			 ? (unsigned HOST_WIDE_INT) d
4335 			 : - (unsigned HOST_WIDE_INT) d);
4336 
4337 		/* n rem d = n rem -d */
4338 		if (rem_flag && d < 0)
4339 		  {
4340 		    d = abs_d;
4341 		    op1 = gen_int_mode (abs_d, compute_mode);
4342 		  }
4343 
4344 		if (d == 1)
4345 		  quotient = op0;
4346 		else if (d == -1)
4347 		  quotient = expand_unop (compute_mode, neg_optab, op0,
4348 					  tquotient, 0);
4349 		else if (size <= HOST_BITS_PER_WIDE_INT
4350 			 && abs_d == HOST_WIDE_INT_1U << (size - 1))
4351 		  {
4352 		    /* This case is not handled correctly below.  */
4353 		    quotient = emit_store_flag (tquotient, EQ, op0, op1,
4354 						compute_mode, 1, 1);
4355 		    if (quotient == 0)
4356 		      goto fail1;
4357 		  }
4358 		else if (EXACT_POWER_OF_2_OR_ZERO_P (d)
4359 			 && (size <= HOST_BITS_PER_WIDE_INT || d >= 0)
4360 			 && (rem_flag
4361 			     ? smod_pow2_cheap (speed, compute_mode)
4362 			     : sdiv_pow2_cheap (speed, compute_mode))
4363 			 /* We assume that cheap metric is true if the
4364 			    optab has an expander for this mode.  */
4365 			 && ((optab_handler ((rem_flag ? smod_optab
4366 					      : sdiv_optab),
4367 					     compute_mode)
4368 			      != CODE_FOR_nothing)
4369 			     || (optab_handler (sdivmod_optab,
4370 						compute_mode)
4371 				 != CODE_FOR_nothing)))
4372 		  ;
4373 		else if (EXACT_POWER_OF_2_OR_ZERO_P (abs_d))
4374 		  {
4375 		    if (rem_flag)
4376 		      {
4377 			remainder = expand_smod_pow2 (compute_mode, op0, d);
4378 			if (remainder)
4379 			  return gen_lowpart (mode, remainder);
4380 		      }
4381 
4382 		    if (sdiv_pow2_cheap (speed, compute_mode)
4383 			&& ((optab_handler (sdiv_optab, compute_mode)
4384 			     != CODE_FOR_nothing)
4385 			    || (optab_handler (sdivmod_optab, compute_mode)
4386 				!= CODE_FOR_nothing)))
4387 		      quotient = expand_divmod (0, TRUNC_DIV_EXPR,
4388 						compute_mode, op0,
4389 						gen_int_mode (abs_d,
4390 							      compute_mode),
4391 						NULL_RTX, 0);
4392 		    else
4393 		      quotient = expand_sdiv_pow2 (compute_mode, op0, abs_d);
4394 
4395 		    /* We have computed OP0 / abs(OP1).  If OP1 is negative,
4396 		       negate the quotient.  */
4397 		    if (d < 0)
4398 		      {
4399 			insn = get_last_insn ();
4400 			if (insn != last
4401 			    && abs_d < (HOST_WIDE_INT_1U
4402 					<< (HOST_BITS_PER_WIDE_INT - 1)))
4403 			  set_dst_reg_note (insn, REG_EQUAL,
4404 					    gen_rtx_DIV (compute_mode, op0,
4405 							 gen_int_mode
4406 							   (abs_d,
4407 							    compute_mode)),
4408 					    quotient);
4409 
4410 			quotient = expand_unop (compute_mode, neg_optab,
4411 						quotient, quotient, 0);
4412 		      }
4413 		  }
4414 		else if (size <= HOST_BITS_PER_WIDE_INT)
4415 		  {
4416 		    choose_multiplier (abs_d, size, size - 1,
4417 				       &ml, &post_shift, &lgup);
4418 		    if (ml < HOST_WIDE_INT_1U << (size - 1))
4419 		      {
4420 			rtx t1, t2, t3;
4421 
4422 			if (post_shift >= BITS_PER_WORD
4423 			    || size - 1 >= BITS_PER_WORD)
4424 			  goto fail1;
4425 
4426 			extra_cost = (shift_cost (speed, compute_mode, post_shift)
4427 				      + shift_cost (speed, compute_mode, size - 1)
4428 				      + add_cost (speed, compute_mode));
4429 			t1 = expmed_mult_highpart
4430 			  (compute_mode, op0, gen_int_mode (ml, compute_mode),
4431 			   NULL_RTX, 0, max_cost - extra_cost);
4432 			if (t1 == 0)
4433 			  goto fail1;
4434 			t2 = expand_shift
4435 			  (RSHIFT_EXPR, compute_mode, t1,
4436 			   post_shift, NULL_RTX, 0);
4437 			t3 = expand_shift
4438 			  (RSHIFT_EXPR, compute_mode, op0,
4439 			   size - 1, NULL_RTX, 0);
4440 			if (d < 0)
4441 			  quotient
4442 			    = force_operand (gen_rtx_MINUS (compute_mode,
4443 							    t3, t2),
4444 					     tquotient);
4445 			else
4446 			  quotient
4447 			    = force_operand (gen_rtx_MINUS (compute_mode,
4448 							    t2, t3),
4449 					     tquotient);
4450 		      }
4451 		    else
4452 		      {
4453 			rtx t1, t2, t3, t4;
4454 
4455 			if (post_shift >= BITS_PER_WORD
4456 			    || size - 1 >= BITS_PER_WORD)
4457 			  goto fail1;
4458 
4459 			ml |= HOST_WIDE_INT_M1U << (size - 1);
4460 			mlr = gen_int_mode (ml, compute_mode);
4461 			extra_cost = (shift_cost (speed, compute_mode, post_shift)
4462 				      + shift_cost (speed, compute_mode, size - 1)
4463 				      + 2 * add_cost (speed, compute_mode));
4464 			t1 = expmed_mult_highpart (compute_mode, op0, mlr,
4465 						   NULL_RTX, 0,
4466 						   max_cost - extra_cost);
4467 			if (t1 == 0)
4468 			  goto fail1;
4469 			t2 = force_operand (gen_rtx_PLUS (compute_mode,
4470 							  t1, op0),
4471 					    NULL_RTX);
4472 			t3 = expand_shift
4473 			  (RSHIFT_EXPR, compute_mode, t2,
4474 			   post_shift, NULL_RTX, 0);
4475 			t4 = expand_shift
4476 			  (RSHIFT_EXPR, compute_mode, op0,
4477 			   size - 1, NULL_RTX, 0);
4478 			if (d < 0)
4479 			  quotient
4480 			    = force_operand (gen_rtx_MINUS (compute_mode,
4481 							    t4, t3),
4482 					     tquotient);
4483 			else
4484 			  quotient
4485 			    = force_operand (gen_rtx_MINUS (compute_mode,
4486 							    t3, t4),
4487 					     tquotient);
4488 		      }
4489 		  }
4490 		else		/* Too wide mode to use tricky code */
4491 		  break;
4492 
4493 		insn = get_last_insn ();
4494 		if (insn != last)
4495 		  set_dst_reg_note (insn, REG_EQUAL,
4496 				    gen_rtx_DIV (compute_mode, op0, op1),
4497 				    quotient);
4498 	      }
4499 	    break;
4500 	  }
4501       fail1:
4502 	delete_insns_since (last);
4503 	break;
4504 
4505       case FLOOR_DIV_EXPR:
4506       case FLOOR_MOD_EXPR:
4507       /* We will come here only for signed operations.  */
4508 	if (op1_is_constant && size <= HOST_BITS_PER_WIDE_INT)
4509 	  {
4510 	    unsigned HOST_WIDE_INT mh, ml;
4511 	    int pre_shift, lgup, post_shift;
4512 	    HOST_WIDE_INT d = INTVAL (op1);
4513 
4514 	    if (d > 0)
4515 	      {
4516 		/* We could just as easily deal with negative constants here,
4517 		   but it does not seem worth the trouble for GCC 2.6.  */
4518 		if (EXACT_POWER_OF_2_OR_ZERO_P (d))
4519 		  {
4520 		    pre_shift = floor_log2 (d);
4521 		    if (rem_flag)
4522 		      {
4523 			unsigned HOST_WIDE_INT mask
4524 			  = (HOST_WIDE_INT_1U << pre_shift) - 1;
4525 			remainder = expand_binop
4526 			  (compute_mode, and_optab, op0,
4527 			   gen_int_mode (mask, compute_mode),
4528 			   remainder, 0, OPTAB_LIB_WIDEN);
4529 			if (remainder)
4530 			  return gen_lowpart (mode, remainder);
4531 		      }
4532 		    quotient = expand_shift
4533 		      (RSHIFT_EXPR, compute_mode, op0,
4534 		       pre_shift, tquotient, 0);
4535 		  }
4536 		else
4537 		  {
4538 		    rtx t1, t2, t3, t4;
4539 
4540 		    mh = choose_multiplier (d, size, size - 1,
4541 					    &ml, &post_shift, &lgup);
4542 		    gcc_assert (!mh);
4543 
4544 		    if (post_shift < BITS_PER_WORD
4545 			&& size - 1 < BITS_PER_WORD)
4546 		      {
4547 			t1 = expand_shift
4548 			  (RSHIFT_EXPR, compute_mode, op0,
4549 			   size - 1, NULL_RTX, 0);
4550 			t2 = expand_binop (compute_mode, xor_optab, op0, t1,
4551 					   NULL_RTX, 0, OPTAB_WIDEN);
4552 			extra_cost = (shift_cost (speed, compute_mode, post_shift)
4553 				      + shift_cost (speed, compute_mode, size - 1)
4554 				      + 2 * add_cost (speed, compute_mode));
4555 			t3 = expmed_mult_highpart
4556 			  (compute_mode, t2, gen_int_mode (ml, compute_mode),
4557 			   NULL_RTX, 1, max_cost - extra_cost);
4558 			if (t3 != 0)
4559 			  {
4560 			    t4 = expand_shift
4561 			      (RSHIFT_EXPR, compute_mode, t3,
4562 			       post_shift, NULL_RTX, 1);
4563 			    quotient = expand_binop (compute_mode, xor_optab,
4564 						     t4, t1, tquotient, 0,
4565 						     OPTAB_WIDEN);
4566 			  }
4567 		      }
4568 		  }
4569 	      }
4570 	    else
4571 	      {
4572 		rtx nsign, t1, t2, t3, t4;
4573 		t1 = force_operand (gen_rtx_PLUS (compute_mode,
4574 						  op0, constm1_rtx), NULL_RTX);
4575 		t2 = expand_binop (compute_mode, ior_optab, op0, t1, NULL_RTX,
4576 				   0, OPTAB_WIDEN);
4577 		nsign = expand_shift (RSHIFT_EXPR, compute_mode, t2,
4578 				      size - 1, NULL_RTX, 0);
4579 		t3 = force_operand (gen_rtx_MINUS (compute_mode, t1, nsign),
4580 				    NULL_RTX);
4581 		t4 = expand_divmod (0, TRUNC_DIV_EXPR, compute_mode, t3, op1,
4582 				    NULL_RTX, 0);
4583 		if (t4)
4584 		  {
4585 		    rtx t5;
4586 		    t5 = expand_unop (compute_mode, one_cmpl_optab, nsign,
4587 				      NULL_RTX, 0);
4588 		    quotient = force_operand (gen_rtx_PLUS (compute_mode,
4589 							    t4, t5),
4590 					      tquotient);
4591 		  }
4592 	      }
4593 	  }
4594 
4595 	if (quotient != 0)
4596 	  break;
4597 	delete_insns_since (last);
4598 
4599 	/* Try using an instruction that produces both the quotient and
4600 	   remainder, using truncation.  We can easily compensate the quotient
4601 	   or remainder to get floor rounding, once we have the remainder.
4602 	   Notice that we compute also the final remainder value here,
4603 	   and return the result right away.  */
4604 	if (target == 0 || GET_MODE (target) != compute_mode)
4605 	  target = gen_reg_rtx (compute_mode);
4606 
4607 	if (rem_flag)
4608 	  {
4609 	    remainder
4610 	      = REG_P (target) ? target : gen_reg_rtx (compute_mode);
4611 	    quotient = gen_reg_rtx (compute_mode);
4612 	  }
4613 	else
4614 	  {
4615 	    quotient
4616 	      = REG_P (target) ? target : gen_reg_rtx (compute_mode);
4617 	    remainder = gen_reg_rtx (compute_mode);
4618 	  }
4619 
4620 	if (expand_twoval_binop (sdivmod_optab, op0, op1,
4621 				 quotient, remainder, 0))
4622 	  {
4623 	    /* This could be computed with a branch-less sequence.
4624 	       Save that for later.  */
4625 	    rtx tem;
4626 	    rtx_code_label *label = gen_label_rtx ();
4627 	    do_cmp_and_jump (remainder, const0_rtx, EQ, compute_mode, label);
4628 	    tem = expand_binop (compute_mode, xor_optab, op0, op1,
4629 				NULL_RTX, 0, OPTAB_WIDEN);
4630 	    do_cmp_and_jump (tem, const0_rtx, GE, compute_mode, label);
4631 	    expand_dec (quotient, const1_rtx);
4632 	    expand_inc (remainder, op1);
4633 	    emit_label (label);
4634 	    return gen_lowpart (mode, rem_flag ? remainder : quotient);
4635 	  }
4636 
4637 	/* No luck with division elimination or divmod.  Have to do it
4638 	   by conditionally adjusting op0 *and* the result.  */
4639 	{
4640 	  rtx_code_label *label1, *label2, *label3, *label4, *label5;
4641 	  rtx adjusted_op0;
4642 	  rtx tem;
4643 
4644 	  quotient = gen_reg_rtx (compute_mode);
4645 	  adjusted_op0 = copy_to_mode_reg (compute_mode, op0);
4646 	  label1 = gen_label_rtx ();
4647 	  label2 = gen_label_rtx ();
4648 	  label3 = gen_label_rtx ();
4649 	  label4 = gen_label_rtx ();
4650 	  label5 = gen_label_rtx ();
4651 	  do_cmp_and_jump (op1, const0_rtx, LT, compute_mode, label2);
4652 	  do_cmp_and_jump (adjusted_op0, const0_rtx, LT, compute_mode, label1);
4653 	  tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4654 			      quotient, 0, OPTAB_LIB_WIDEN);
4655 	  if (tem != quotient)
4656 	    emit_move_insn (quotient, tem);
4657 	  emit_jump_insn (targetm.gen_jump (label5));
4658 	  emit_barrier ();
4659 	  emit_label (label1);
4660 	  expand_inc (adjusted_op0, const1_rtx);
4661 	  emit_jump_insn (targetm.gen_jump (label4));
4662 	  emit_barrier ();
4663 	  emit_label (label2);
4664 	  do_cmp_and_jump (adjusted_op0, const0_rtx, GT, compute_mode, label3);
4665 	  tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4666 			      quotient, 0, OPTAB_LIB_WIDEN);
4667 	  if (tem != quotient)
4668 	    emit_move_insn (quotient, tem);
4669 	  emit_jump_insn (targetm.gen_jump (label5));
4670 	  emit_barrier ();
4671 	  emit_label (label3);
4672 	  expand_dec (adjusted_op0, const1_rtx);
4673 	  emit_label (label4);
4674 	  tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4675 			      quotient, 0, OPTAB_LIB_WIDEN);
4676 	  if (tem != quotient)
4677 	    emit_move_insn (quotient, tem);
4678 	  expand_dec (quotient, const1_rtx);
4679 	  emit_label (label5);
4680 	}
4681 	break;
4682 
4683       case CEIL_DIV_EXPR:
4684       case CEIL_MOD_EXPR:
4685 	if (unsignedp)
4686 	  {
4687 	    if (op1_is_constant
4688 		&& EXACT_POWER_OF_2_OR_ZERO_P (INTVAL (op1))
4689 		&& (size <= HOST_BITS_PER_WIDE_INT
4690 		    || INTVAL (op1) >= 0))
4691 	      {
4692 		rtx t1, t2, t3;
4693 		unsigned HOST_WIDE_INT d = INTVAL (op1);
4694 		t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
4695 				   floor_log2 (d), tquotient, 1);
4696 		t2 = expand_binop (compute_mode, and_optab, op0,
4697 				   gen_int_mode (d - 1, compute_mode),
4698 				   NULL_RTX, 1, OPTAB_LIB_WIDEN);
4699 		t3 = gen_reg_rtx (compute_mode);
4700 		t3 = emit_store_flag (t3, NE, t2, const0_rtx,
4701 				      compute_mode, 1, 1);
4702 		if (t3 == 0)
4703 		  {
4704 		    rtx_code_label *lab;
4705 		    lab = gen_label_rtx ();
4706 		    do_cmp_and_jump (t2, const0_rtx, EQ, compute_mode, lab);
4707 		    expand_inc (t1, const1_rtx);
4708 		    emit_label (lab);
4709 		    quotient = t1;
4710 		  }
4711 		else
4712 		  quotient = force_operand (gen_rtx_PLUS (compute_mode,
4713 							  t1, t3),
4714 					    tquotient);
4715 		break;
4716 	      }
4717 
4718 	    /* Try using an instruction that produces both the quotient and
4719 	       remainder, using truncation.  We can easily compensate the
4720 	       quotient or remainder to get ceiling rounding, once we have the
4721 	       remainder.  Notice that we compute also the final remainder
4722 	       value here, and return the result right away.  */
4723 	    if (target == 0 || GET_MODE (target) != compute_mode)
4724 	      target = gen_reg_rtx (compute_mode);
4725 
4726 	    if (rem_flag)
4727 	      {
4728 		remainder = (REG_P (target)
4729 			     ? target : gen_reg_rtx (compute_mode));
4730 		quotient = gen_reg_rtx (compute_mode);
4731 	      }
4732 	    else
4733 	      {
4734 		quotient = (REG_P (target)
4735 			    ? target : gen_reg_rtx (compute_mode));
4736 		remainder = gen_reg_rtx (compute_mode);
4737 	      }
4738 
4739 	    if (expand_twoval_binop (udivmod_optab, op0, op1, quotient,
4740 				     remainder, 1))
4741 	      {
4742 		/* This could be computed with a branch-less sequence.
4743 		   Save that for later.  */
4744 		rtx_code_label *label = gen_label_rtx ();
4745 		do_cmp_and_jump (remainder, const0_rtx, EQ,
4746 				 compute_mode, label);
4747 		expand_inc (quotient, const1_rtx);
4748 		expand_dec (remainder, op1);
4749 		emit_label (label);
4750 		return gen_lowpart (mode, rem_flag ? remainder : quotient);
4751 	      }
4752 
4753 	    /* No luck with division elimination or divmod.  Have to do it
4754 	       by conditionally adjusting op0 *and* the result.  */
4755 	    {
4756 	      rtx_code_label *label1, *label2;
4757 	      rtx adjusted_op0, tem;
4758 
4759 	      quotient = gen_reg_rtx (compute_mode);
4760 	      adjusted_op0 = copy_to_mode_reg (compute_mode, op0);
4761 	      label1 = gen_label_rtx ();
4762 	      label2 = gen_label_rtx ();
4763 	      do_cmp_and_jump (adjusted_op0, const0_rtx, NE,
4764 			       compute_mode, label1);
4765 	      emit_move_insn  (quotient, const0_rtx);
4766 	      emit_jump_insn (targetm.gen_jump (label2));
4767 	      emit_barrier ();
4768 	      emit_label (label1);
4769 	      expand_dec (adjusted_op0, const1_rtx);
4770 	      tem = expand_binop (compute_mode, udiv_optab, adjusted_op0, op1,
4771 				  quotient, 1, OPTAB_LIB_WIDEN);
4772 	      if (tem != quotient)
4773 		emit_move_insn (quotient, tem);
4774 	      expand_inc (quotient, const1_rtx);
4775 	      emit_label (label2);
4776 	    }
4777 	  }
4778 	else /* signed */
4779 	  {
4780 	    if (op1_is_constant && EXACT_POWER_OF_2_OR_ZERO_P (INTVAL (op1))
4781 		&& INTVAL (op1) >= 0)
4782 	      {
4783 		/* This is extremely similar to the code for the unsigned case
4784 		   above.  For 2.7 we should merge these variants, but for
4785 		   2.6.1 I don't want to touch the code for unsigned since that
4786 		   get used in C.  The signed case will only be used by other
4787 		   languages (Ada).  */
4788 
4789 		rtx t1, t2, t3;
4790 		unsigned HOST_WIDE_INT d = INTVAL (op1);
4791 		t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
4792 				   floor_log2 (d), tquotient, 0);
4793 		t2 = expand_binop (compute_mode, and_optab, op0,
4794 				   gen_int_mode (d - 1, compute_mode),
4795 				   NULL_RTX, 1, OPTAB_LIB_WIDEN);
4796 		t3 = gen_reg_rtx (compute_mode);
4797 		t3 = emit_store_flag (t3, NE, t2, const0_rtx,
4798 				      compute_mode, 1, 1);
4799 		if (t3 == 0)
4800 		  {
4801 		    rtx_code_label *lab;
4802 		    lab = gen_label_rtx ();
4803 		    do_cmp_and_jump (t2, const0_rtx, EQ, compute_mode, lab);
4804 		    expand_inc (t1, const1_rtx);
4805 		    emit_label (lab);
4806 		    quotient = t1;
4807 		  }
4808 		else
4809 		  quotient = force_operand (gen_rtx_PLUS (compute_mode,
4810 							  t1, t3),
4811 					    tquotient);
4812 		break;
4813 	      }
4814 
4815 	    /* Try using an instruction that produces both the quotient and
4816 	       remainder, using truncation.  We can easily compensate the
4817 	       quotient or remainder to get ceiling rounding, once we have the
4818 	       remainder.  Notice that we compute also the final remainder
4819 	       value here, and return the result right away.  */
4820 	    if (target == 0 || GET_MODE (target) != compute_mode)
4821 	      target = gen_reg_rtx (compute_mode);
4822 	    if (rem_flag)
4823 	      {
4824 		remainder= (REG_P (target)
4825 			    ? target : gen_reg_rtx (compute_mode));
4826 		quotient = gen_reg_rtx (compute_mode);
4827 	      }
4828 	    else
4829 	      {
4830 		quotient = (REG_P (target)
4831 			    ? target : gen_reg_rtx (compute_mode));
4832 		remainder = gen_reg_rtx (compute_mode);
4833 	      }
4834 
4835 	    if (expand_twoval_binop (sdivmod_optab, op0, op1, quotient,
4836 				     remainder, 0))
4837 	      {
4838 		/* This could be computed with a branch-less sequence.
4839 		   Save that for later.  */
4840 		rtx tem;
4841 		rtx_code_label *label = gen_label_rtx ();
4842 		do_cmp_and_jump (remainder, const0_rtx, EQ,
4843 				 compute_mode, label);
4844 		tem = expand_binop (compute_mode, xor_optab, op0, op1,
4845 				    NULL_RTX, 0, OPTAB_WIDEN);
4846 		do_cmp_and_jump (tem, const0_rtx, LT, compute_mode, label);
4847 		expand_inc (quotient, const1_rtx);
4848 		expand_dec (remainder, op1);
4849 		emit_label (label);
4850 		return gen_lowpart (mode, rem_flag ? remainder : quotient);
4851 	      }
4852 
4853 	    /* No luck with division elimination or divmod.  Have to do it
4854 	       by conditionally adjusting op0 *and* the result.  */
4855 	    {
4856 	      rtx_code_label *label1, *label2, *label3, *label4, *label5;
4857 	      rtx adjusted_op0;
4858 	      rtx tem;
4859 
4860 	      quotient = gen_reg_rtx (compute_mode);
4861 	      adjusted_op0 = copy_to_mode_reg (compute_mode, op0);
4862 	      label1 = gen_label_rtx ();
4863 	      label2 = gen_label_rtx ();
4864 	      label3 = gen_label_rtx ();
4865 	      label4 = gen_label_rtx ();
4866 	      label5 = gen_label_rtx ();
4867 	      do_cmp_and_jump (op1, const0_rtx, LT, compute_mode, label2);
4868 	      do_cmp_and_jump (adjusted_op0, const0_rtx, GT,
4869 			       compute_mode, label1);
4870 	      tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4871 				  quotient, 0, OPTAB_LIB_WIDEN);
4872 	      if (tem != quotient)
4873 		emit_move_insn (quotient, tem);
4874 	      emit_jump_insn (targetm.gen_jump (label5));
4875 	      emit_barrier ();
4876 	      emit_label (label1);
4877 	      expand_dec (adjusted_op0, const1_rtx);
4878 	      emit_jump_insn (targetm.gen_jump (label4));
4879 	      emit_barrier ();
4880 	      emit_label (label2);
4881 	      do_cmp_and_jump (adjusted_op0, const0_rtx, LT,
4882 			       compute_mode, label3);
4883 	      tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4884 				  quotient, 0, OPTAB_LIB_WIDEN);
4885 	      if (tem != quotient)
4886 		emit_move_insn (quotient, tem);
4887 	      emit_jump_insn (targetm.gen_jump (label5));
4888 	      emit_barrier ();
4889 	      emit_label (label3);
4890 	      expand_inc (adjusted_op0, const1_rtx);
4891 	      emit_label (label4);
4892 	      tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4893 				  quotient, 0, OPTAB_LIB_WIDEN);
4894 	      if (tem != quotient)
4895 		emit_move_insn (quotient, tem);
4896 	      expand_inc (quotient, const1_rtx);
4897 	      emit_label (label5);
4898 	    }
4899 	  }
4900 	break;
4901 
4902       case EXACT_DIV_EXPR:
4903 	if (op1_is_constant && size <= HOST_BITS_PER_WIDE_INT)
4904 	  {
4905 	    HOST_WIDE_INT d = INTVAL (op1);
4906 	    unsigned HOST_WIDE_INT ml;
4907 	    int pre_shift;
4908 	    rtx t1;
4909 
4910 	    pre_shift = ctz_or_zero (d);
4911 	    ml = invert_mod2n (d >> pre_shift, size);
4912 	    t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
4913 			       pre_shift, NULL_RTX, unsignedp);
4914 	    quotient = expand_mult (compute_mode, t1,
4915 				    gen_int_mode (ml, compute_mode),
4916 				    NULL_RTX, 1);
4917 
4918 	    insn = get_last_insn ();
4919 	    set_dst_reg_note (insn, REG_EQUAL,
4920 			      gen_rtx_fmt_ee (unsignedp ? UDIV : DIV,
4921 					      compute_mode, op0, op1),
4922 			      quotient);
4923 	  }
4924 	break;
4925 
4926       case ROUND_DIV_EXPR:
4927       case ROUND_MOD_EXPR:
4928 	if (unsignedp)
4929 	  {
4930 	    rtx tem;
4931 	    rtx_code_label *label;
4932 	    label = gen_label_rtx ();
4933 	    quotient = gen_reg_rtx (compute_mode);
4934 	    remainder = gen_reg_rtx (compute_mode);
4935 	    if (expand_twoval_binop (udivmod_optab, op0, op1, quotient, remainder, 1) == 0)
4936 	      {
4937 		rtx tem;
4938 		quotient = expand_binop (compute_mode, udiv_optab, op0, op1,
4939 					 quotient, 1, OPTAB_LIB_WIDEN);
4940 		tem = expand_mult (compute_mode, quotient, op1, NULL_RTX, 1);
4941 		remainder = expand_binop (compute_mode, sub_optab, op0, tem,
4942 					  remainder, 1, OPTAB_LIB_WIDEN);
4943 	      }
4944 	    tem = plus_constant (compute_mode, op1, -1);
4945 	    tem = expand_shift (RSHIFT_EXPR, compute_mode, tem, 1, NULL_RTX, 1);
4946 	    do_cmp_and_jump (remainder, tem, LEU, compute_mode, label);
4947 	    expand_inc (quotient, const1_rtx);
4948 	    expand_dec (remainder, op1);
4949 	    emit_label (label);
4950 	  }
4951 	else
4952 	  {
4953 	    rtx abs_rem, abs_op1, tem, mask;
4954 	    rtx_code_label *label;
4955 	    label = gen_label_rtx ();
4956 	    quotient = gen_reg_rtx (compute_mode);
4957 	    remainder = gen_reg_rtx (compute_mode);
4958 	    if (expand_twoval_binop (sdivmod_optab, op0, op1, quotient, remainder, 0) == 0)
4959 	      {
4960 		rtx tem;
4961 		quotient = expand_binop (compute_mode, sdiv_optab, op0, op1,
4962 					 quotient, 0, OPTAB_LIB_WIDEN);
4963 		tem = expand_mult (compute_mode, quotient, op1, NULL_RTX, 0);
4964 		remainder = expand_binop (compute_mode, sub_optab, op0, tem,
4965 					  remainder, 0, OPTAB_LIB_WIDEN);
4966 	      }
4967 	    abs_rem = expand_abs (compute_mode, remainder, NULL_RTX, 1, 0);
4968 	    abs_op1 = expand_abs (compute_mode, op1, NULL_RTX, 1, 0);
4969 	    tem = expand_shift (LSHIFT_EXPR, compute_mode, abs_rem,
4970 				1, NULL_RTX, 1);
4971 	    do_cmp_and_jump (tem, abs_op1, LTU, compute_mode, label);
4972 	    tem = expand_binop (compute_mode, xor_optab, op0, op1,
4973 				NULL_RTX, 0, OPTAB_WIDEN);
4974 	    mask = expand_shift (RSHIFT_EXPR, compute_mode, tem,
4975 				 size - 1, NULL_RTX, 0);
4976 	    tem = expand_binop (compute_mode, xor_optab, mask, const1_rtx,
4977 				NULL_RTX, 0, OPTAB_WIDEN);
4978 	    tem = expand_binop (compute_mode, sub_optab, tem, mask,
4979 				NULL_RTX, 0, OPTAB_WIDEN);
4980 	    expand_inc (quotient, tem);
4981 	    tem = expand_binop (compute_mode, xor_optab, mask, op1,
4982 				NULL_RTX, 0, OPTAB_WIDEN);
4983 	    tem = expand_binop (compute_mode, sub_optab, tem, mask,
4984 				NULL_RTX, 0, OPTAB_WIDEN);
4985 	    expand_dec (remainder, tem);
4986 	    emit_label (label);
4987 	  }
4988 	return gen_lowpart (mode, rem_flag ? remainder : quotient);
4989 
4990       default:
4991 	gcc_unreachable ();
4992       }
4993 
4994   if (quotient == 0)
4995     {
4996       if (target && GET_MODE (target) != compute_mode)
4997 	target = 0;
4998 
4999       if (rem_flag)
5000 	{
5001 	  /* Try to produce the remainder without producing the quotient.
5002 	     If we seem to have a divmod pattern that does not require widening,
5003 	     don't try widening here.  We should really have a WIDEN argument
5004 	     to expand_twoval_binop, since what we'd really like to do here is
5005 	     1) try a mod insn in compute_mode
5006 	     2) try a divmod insn in compute_mode
5007 	     3) try a div insn in compute_mode and multiply-subtract to get
5008 	        remainder
5009 	     4) try the same things with widening allowed.  */
5010 	  remainder
5011 	    = sign_expand_binop (compute_mode, umod_optab, smod_optab,
5012 				 op0, op1, target,
5013 				 unsignedp,
5014 				 ((optab_handler (optab2, compute_mode)
5015 				   != CODE_FOR_nothing)
5016 				  ? OPTAB_DIRECT : OPTAB_WIDEN));
5017 	  if (remainder == 0)
5018 	    {
5019 	      /* No luck there.  Can we do remainder and divide at once
5020 		 without a library call?  */
5021 	      remainder = gen_reg_rtx (compute_mode);
5022 	      if (! expand_twoval_binop ((unsignedp
5023 					  ? udivmod_optab
5024 					  : sdivmod_optab),
5025 					 op0, op1,
5026 					 NULL_RTX, remainder, unsignedp))
5027 		remainder = 0;
5028 	    }
5029 
5030 	  if (remainder)
5031 	    return gen_lowpart (mode, remainder);
5032 	}
5033 
5034       /* Produce the quotient.  Try a quotient insn, but not a library call.
5035 	 If we have a divmod in this mode, use it in preference to widening
5036 	 the div (for this test we assume it will not fail). Note that optab2
5037 	 is set to the one of the two optabs that the call below will use.  */
5038       quotient
5039 	= sign_expand_binop (compute_mode, udiv_optab, sdiv_optab,
5040 			     op0, op1, rem_flag ? NULL_RTX : target,
5041 			     unsignedp,
5042 			     ((optab_handler (optab2, compute_mode)
5043 			       != CODE_FOR_nothing)
5044 			      ? OPTAB_DIRECT : OPTAB_WIDEN));
5045 
5046       if (quotient == 0)
5047 	{
5048 	  /* No luck there.  Try a quotient-and-remainder insn,
5049 	     keeping the quotient alone.  */
5050 	  quotient = gen_reg_rtx (compute_mode);
5051 	  if (! expand_twoval_binop (unsignedp ? udivmod_optab : sdivmod_optab,
5052 				     op0, op1,
5053 				     quotient, NULL_RTX, unsignedp))
5054 	    {
5055 	      quotient = 0;
5056 	      if (! rem_flag)
5057 		/* Still no luck.  If we are not computing the remainder,
5058 		   use a library call for the quotient.  */
5059 		quotient = sign_expand_binop (compute_mode,
5060 					      udiv_optab, sdiv_optab,
5061 					      op0, op1, target,
5062 					      unsignedp, OPTAB_LIB_WIDEN);
5063 	    }
5064 	}
5065     }
5066 
5067   if (rem_flag)
5068     {
5069       if (target && GET_MODE (target) != compute_mode)
5070 	target = 0;
5071 
5072       if (quotient == 0)
5073 	{
5074 	  /* No divide instruction either.  Use library for remainder.  */
5075 	  remainder = sign_expand_binop (compute_mode, umod_optab, smod_optab,
5076 					 op0, op1, target,
5077 					 unsignedp, OPTAB_LIB_WIDEN);
5078 	  /* No remainder function.  Try a quotient-and-remainder
5079 	     function, keeping the remainder.  */
5080 	  if (!remainder)
5081 	    {
5082 	      remainder = gen_reg_rtx (compute_mode);
5083 	      if (!expand_twoval_binop_libfunc
5084 		  (unsignedp ? udivmod_optab : sdivmod_optab,
5085 		   op0, op1,
5086 		   NULL_RTX, remainder,
5087 		   unsignedp ? UMOD : MOD))
5088 		remainder = NULL_RTX;
5089 	    }
5090 	}
5091       else
5092 	{
5093 	  /* We divided.  Now finish doing X - Y * (X / Y).  */
5094 	  remainder = expand_mult (compute_mode, quotient, op1,
5095 				   NULL_RTX, unsignedp);
5096 	  remainder = expand_binop (compute_mode, sub_optab, op0,
5097 				    remainder, target, unsignedp,
5098 				    OPTAB_LIB_WIDEN);
5099 	}
5100     }
5101 
5102   return gen_lowpart (mode, rem_flag ? remainder : quotient);
5103 }
5104 
5105 /* Return a tree node with data type TYPE, describing the value of X.
5106    Usually this is an VAR_DECL, if there is no obvious better choice.
5107    X may be an expression, however we only support those expressions
5108    generated by loop.c.  */
5109 
5110 tree
5111 make_tree (tree type, rtx x)
5112 {
5113   tree t;
5114 
5115   switch (GET_CODE (x))
5116     {
5117     case CONST_INT:
5118     case CONST_WIDE_INT:
5119       t = wide_int_to_tree (type, rtx_mode_t (x, TYPE_MODE (type)));
5120       return t;
5121 
5122     case CONST_DOUBLE:
5123       STATIC_ASSERT (HOST_BITS_PER_WIDE_INT * 2 <= MAX_BITSIZE_MODE_ANY_INT);
5124       if (TARGET_SUPPORTS_WIDE_INT == 0 && GET_MODE (x) == VOIDmode)
5125 	t = wide_int_to_tree (type,
5126 			      wide_int::from_array (&CONST_DOUBLE_LOW (x), 2,
5127 						    HOST_BITS_PER_WIDE_INT * 2));
5128       else
5129 	t = build_real (type, *CONST_DOUBLE_REAL_VALUE (x));
5130 
5131       return t;
5132 
5133     case CONST_VECTOR:
5134       {
5135 	int units = CONST_VECTOR_NUNITS (x);
5136 	tree itype = TREE_TYPE (type);
5137 	tree *elts;
5138 	int i;
5139 
5140 	/* Build a tree with vector elements.  */
5141 	elts = XALLOCAVEC (tree, units);
5142 	for (i = units - 1; i >= 0; --i)
5143 	  {
5144 	    rtx elt = CONST_VECTOR_ELT (x, i);
5145 	    elts[i] = make_tree (itype, elt);
5146 	  }
5147 
5148 	return build_vector (type, elts);
5149       }
5150 
5151     case PLUS:
5152       return fold_build2 (PLUS_EXPR, type, make_tree (type, XEXP (x, 0)),
5153 			  make_tree (type, XEXP (x, 1)));
5154 
5155     case MINUS:
5156       return fold_build2 (MINUS_EXPR, type, make_tree (type, XEXP (x, 0)),
5157 			  make_tree (type, XEXP (x, 1)));
5158 
5159     case NEG:
5160       return fold_build1 (NEGATE_EXPR, type, make_tree (type, XEXP (x, 0)));
5161 
5162     case MULT:
5163       return fold_build2 (MULT_EXPR, type, make_tree (type, XEXP (x, 0)),
5164 			  make_tree (type, XEXP (x, 1)));
5165 
5166     case ASHIFT:
5167       return fold_build2 (LSHIFT_EXPR, type, make_tree (type, XEXP (x, 0)),
5168 			  make_tree (type, XEXP (x, 1)));
5169 
5170     case LSHIFTRT:
5171       t = unsigned_type_for (type);
5172       return fold_convert (type, build2 (RSHIFT_EXPR, t,
5173 			    		 make_tree (t, XEXP (x, 0)),
5174 				    	 make_tree (type, XEXP (x, 1))));
5175 
5176     case ASHIFTRT:
5177       t = signed_type_for (type);
5178       return fold_convert (type, build2 (RSHIFT_EXPR, t,
5179 					 make_tree (t, XEXP (x, 0)),
5180 				    	 make_tree (type, XEXP (x, 1))));
5181 
5182     case DIV:
5183       if (TREE_CODE (type) != REAL_TYPE)
5184 	t = signed_type_for (type);
5185       else
5186 	t = type;
5187 
5188       return fold_convert (type, build2 (TRUNC_DIV_EXPR, t,
5189 				    	 make_tree (t, XEXP (x, 0)),
5190 				    	 make_tree (t, XEXP (x, 1))));
5191     case UDIV:
5192       t = unsigned_type_for (type);
5193       return fold_convert (type, build2 (TRUNC_DIV_EXPR, t,
5194 				    	 make_tree (t, XEXP (x, 0)),
5195 				    	 make_tree (t, XEXP (x, 1))));
5196 
5197     case SIGN_EXTEND:
5198     case ZERO_EXTEND:
5199       t = lang_hooks.types.type_for_mode (GET_MODE (XEXP (x, 0)),
5200 					  GET_CODE (x) == ZERO_EXTEND);
5201       return fold_convert (type, make_tree (t, XEXP (x, 0)));
5202 
5203     case CONST:
5204       return make_tree (type, XEXP (x, 0));
5205 
5206     case SYMBOL_REF:
5207       t = SYMBOL_REF_DECL (x);
5208       if (t)
5209 	return fold_convert (type, build_fold_addr_expr (t));
5210       /* fall through.  */
5211 
5212     default:
5213       t = build_decl (RTL_LOCATION (x), VAR_DECL, NULL_TREE, type);
5214 
5215       /* If TYPE is a POINTER_TYPE, we might need to convert X from
5216 	 address mode to pointer mode.  */
5217       if (POINTER_TYPE_P (type))
5218 	x = convert_memory_address_addr_space
5219 	      (TYPE_MODE (type), x, TYPE_ADDR_SPACE (TREE_TYPE (type)));
5220 
5221       /* Note that we do *not* use SET_DECL_RTL here, because we do not
5222 	 want set_decl_rtl to go adjusting REG_ATTRS for this temporary.  */
5223       t->decl_with_rtl.rtl = x;
5224 
5225       return t;
5226     }
5227 }
5228 
5229 /* Compute the logical-and of OP0 and OP1, storing it in TARGET
5230    and returning TARGET.
5231 
5232    If TARGET is 0, a pseudo-register or constant is returned.  */
5233 
5234 rtx
5235 expand_and (machine_mode mode, rtx op0, rtx op1, rtx target)
5236 {
5237   rtx tem = 0;
5238 
5239   if (GET_MODE (op0) == VOIDmode && GET_MODE (op1) == VOIDmode)
5240     tem = simplify_binary_operation (AND, mode, op0, op1);
5241   if (tem == 0)
5242     tem = expand_binop (mode, and_optab, op0, op1, target, 0, OPTAB_LIB_WIDEN);
5243 
5244   if (target == 0)
5245     target = tem;
5246   else if (tem != target)
5247     emit_move_insn (target, tem);
5248   return target;
5249 }
5250 
5251 /* Helper function for emit_store_flag.  */
5252 rtx
5253 emit_cstore (rtx target, enum insn_code icode, enum rtx_code code,
5254 	     machine_mode mode, machine_mode compare_mode,
5255 	     int unsignedp, rtx x, rtx y, int normalizep,
5256 	     machine_mode target_mode)
5257 {
5258   struct expand_operand ops[4];
5259   rtx op0, comparison, subtarget;
5260   rtx_insn *last;
5261   machine_mode result_mode = targetm.cstore_mode (icode);
5262 
5263   last = get_last_insn ();
5264   x = prepare_operand (icode, x, 2, mode, compare_mode, unsignedp);
5265   y = prepare_operand (icode, y, 3, mode, compare_mode, unsignedp);
5266   if (!x || !y)
5267     {
5268       delete_insns_since (last);
5269       return NULL_RTX;
5270     }
5271 
5272   if (target_mode == VOIDmode)
5273     target_mode = result_mode;
5274   if (!target)
5275     target = gen_reg_rtx (target_mode);
5276 
5277   comparison = gen_rtx_fmt_ee (code, result_mode, x, y);
5278 
5279   create_output_operand (&ops[0], optimize ? NULL_RTX : target, result_mode);
5280   create_fixed_operand (&ops[1], comparison);
5281   create_fixed_operand (&ops[2], x);
5282   create_fixed_operand (&ops[3], y);
5283   if (!maybe_expand_insn (icode, 4, ops))
5284     {
5285       delete_insns_since (last);
5286       return NULL_RTX;
5287     }
5288   subtarget = ops[0].value;
5289 
5290   /* If we are converting to a wider mode, first convert to
5291      TARGET_MODE, then normalize.  This produces better combining
5292      opportunities on machines that have a SIGN_EXTRACT when we are
5293      testing a single bit.  This mostly benefits the 68k.
5294 
5295      If STORE_FLAG_VALUE does not have the sign bit set when
5296      interpreted in MODE, we can do this conversion as unsigned, which
5297      is usually more efficient.  */
5298   if (GET_MODE_SIZE (target_mode) > GET_MODE_SIZE (result_mode))
5299     {
5300       convert_move (target, subtarget,
5301 		    val_signbit_known_clear_p (result_mode,
5302 					       STORE_FLAG_VALUE));
5303       op0 = target;
5304       result_mode = target_mode;
5305     }
5306   else
5307     op0 = subtarget;
5308 
5309   /* If we want to keep subexpressions around, don't reuse our last
5310      target.  */
5311   if (optimize)
5312     subtarget = 0;
5313 
5314   /* Now normalize to the proper value in MODE.  Sometimes we don't
5315      have to do anything.  */
5316   if (normalizep == 0 || normalizep == STORE_FLAG_VALUE)
5317     ;
5318   /* STORE_FLAG_VALUE might be the most negative number, so write
5319      the comparison this way to avoid a compiler-time warning.  */
5320   else if (- normalizep == STORE_FLAG_VALUE)
5321     op0 = expand_unop (result_mode, neg_optab, op0, subtarget, 0);
5322 
5323   /* We don't want to use STORE_FLAG_VALUE < 0 below since this makes
5324      it hard to use a value of just the sign bit due to ANSI integer
5325      constant typing rules.  */
5326   else if (val_signbit_known_set_p (result_mode, STORE_FLAG_VALUE))
5327     op0 = expand_shift (RSHIFT_EXPR, result_mode, op0,
5328 			GET_MODE_BITSIZE (result_mode) - 1, subtarget,
5329 			normalizep == 1);
5330   else
5331     {
5332       gcc_assert (STORE_FLAG_VALUE & 1);
5333 
5334       op0 = expand_and (result_mode, op0, const1_rtx, subtarget);
5335       if (normalizep == -1)
5336 	op0 = expand_unop (result_mode, neg_optab, op0, op0, 0);
5337     }
5338 
5339   /* If we were converting to a smaller mode, do the conversion now.  */
5340   if (target_mode != result_mode)
5341     {
5342       convert_move (target, op0, 0);
5343       return target;
5344     }
5345   else
5346     return op0;
5347 }
5348 
5349 
5350 /* A subroutine of emit_store_flag only including "tricks" that do not
5351    need a recursive call.  These are kept separate to avoid infinite
5352    loops.  */
5353 
5354 static rtx
5355 emit_store_flag_1 (rtx target, enum rtx_code code, rtx op0, rtx op1,
5356 		   machine_mode mode, int unsignedp, int normalizep,
5357 		   machine_mode target_mode)
5358 {
5359   rtx subtarget;
5360   enum insn_code icode;
5361   machine_mode compare_mode;
5362   enum mode_class mclass;
5363   enum rtx_code scode;
5364 
5365   if (unsignedp)
5366     code = unsigned_condition (code);
5367   scode = swap_condition (code);
5368 
5369   /* If one operand is constant, make it the second one.  Only do this
5370      if the other operand is not constant as well.  */
5371 
5372   if (swap_commutative_operands_p (op0, op1))
5373     {
5374       std::swap (op0, op1);
5375       code = swap_condition (code);
5376     }
5377 
5378   if (mode == VOIDmode)
5379     mode = GET_MODE (op0);
5380 
5381   /* For some comparisons with 1 and -1, we can convert this to
5382      comparisons with zero.  This will often produce more opportunities for
5383      store-flag insns.  */
5384 
5385   switch (code)
5386     {
5387     case LT:
5388       if (op1 == const1_rtx)
5389 	op1 = const0_rtx, code = LE;
5390       break;
5391     case LE:
5392       if (op1 == constm1_rtx)
5393 	op1 = const0_rtx, code = LT;
5394       break;
5395     case GE:
5396       if (op1 == const1_rtx)
5397 	op1 = const0_rtx, code = GT;
5398       break;
5399     case GT:
5400       if (op1 == constm1_rtx)
5401 	op1 = const0_rtx, code = GE;
5402       break;
5403     case GEU:
5404       if (op1 == const1_rtx)
5405 	op1 = const0_rtx, code = NE;
5406       break;
5407     case LTU:
5408       if (op1 == const1_rtx)
5409 	op1 = const0_rtx, code = EQ;
5410       break;
5411     default:
5412       break;
5413     }
5414 
5415   /* If we are comparing a double-word integer with zero or -1, we can
5416      convert the comparison into one involving a single word.  */
5417   if (GET_MODE_BITSIZE (mode) == BITS_PER_WORD * 2
5418       && GET_MODE_CLASS (mode) == MODE_INT
5419       && (!MEM_P (op0) || ! MEM_VOLATILE_P (op0)))
5420     {
5421       rtx tem;
5422       if ((code == EQ || code == NE)
5423 	  && (op1 == const0_rtx || op1 == constm1_rtx))
5424 	{
5425 	  rtx op00, op01;
5426 
5427 	  /* Do a logical OR or AND of the two words and compare the
5428 	     result.  */
5429 	  op00 = simplify_gen_subreg (word_mode, op0, mode, 0);
5430 	  op01 = simplify_gen_subreg (word_mode, op0, mode, UNITS_PER_WORD);
5431 	  tem = expand_binop (word_mode,
5432 			      op1 == const0_rtx ? ior_optab : and_optab,
5433 			      op00, op01, NULL_RTX, unsignedp,
5434 			      OPTAB_DIRECT);
5435 
5436 	  if (tem != 0)
5437 	    tem = emit_store_flag (NULL_RTX, code, tem, op1, word_mode,
5438 				   unsignedp, normalizep);
5439 	}
5440       else if ((code == LT || code == GE) && op1 == const0_rtx)
5441 	{
5442 	  rtx op0h;
5443 
5444 	  /* If testing the sign bit, can just test on high word.  */
5445 	  op0h = simplify_gen_subreg (word_mode, op0, mode,
5446 				      subreg_highpart_offset (word_mode,
5447 							      mode));
5448 	  tem = emit_store_flag (NULL_RTX, code, op0h, op1, word_mode,
5449 				 unsignedp, normalizep);
5450 	}
5451       else
5452 	tem = NULL_RTX;
5453 
5454       if (tem)
5455 	{
5456 	  if (target_mode == VOIDmode || GET_MODE (tem) == target_mode)
5457 	    return tem;
5458 	  if (!target)
5459 	    target = gen_reg_rtx (target_mode);
5460 
5461 	  convert_move (target, tem,
5462 			!val_signbit_known_set_p (word_mode,
5463 						  (normalizep ? normalizep
5464 						   : STORE_FLAG_VALUE)));
5465 	  return target;
5466 	}
5467     }
5468 
5469   /* If this is A < 0 or A >= 0, we can do this by taking the ones
5470      complement of A (for GE) and shifting the sign bit to the low bit.  */
5471   if (op1 == const0_rtx && (code == LT || code == GE)
5472       && GET_MODE_CLASS (mode) == MODE_INT
5473       && (normalizep || STORE_FLAG_VALUE == 1
5474 	  || val_signbit_p (mode, STORE_FLAG_VALUE)))
5475     {
5476       subtarget = target;
5477 
5478       if (!target)
5479 	target_mode = mode;
5480 
5481       /* If the result is to be wider than OP0, it is best to convert it
5482 	 first.  If it is to be narrower, it is *incorrect* to convert it
5483 	 first.  */
5484       else if (GET_MODE_SIZE (target_mode) > GET_MODE_SIZE (mode))
5485 	{
5486 	  op0 = convert_modes (target_mode, mode, op0, 0);
5487 	  mode = target_mode;
5488 	}
5489 
5490       if (target_mode != mode)
5491 	subtarget = 0;
5492 
5493       if (code == GE)
5494 	op0 = expand_unop (mode, one_cmpl_optab, op0,
5495 			   ((STORE_FLAG_VALUE == 1 || normalizep)
5496 			    ? 0 : subtarget), 0);
5497 
5498       if (STORE_FLAG_VALUE == 1 || normalizep)
5499 	/* If we are supposed to produce a 0/1 value, we want to do
5500 	   a logical shift from the sign bit to the low-order bit; for
5501 	   a -1/0 value, we do an arithmetic shift.  */
5502 	op0 = expand_shift (RSHIFT_EXPR, mode, op0,
5503 			    GET_MODE_BITSIZE (mode) - 1,
5504 			    subtarget, normalizep != -1);
5505 
5506       if (mode != target_mode)
5507 	op0 = convert_modes (target_mode, mode, op0, 0);
5508 
5509       return op0;
5510     }
5511 
5512   mclass = GET_MODE_CLASS (mode);
5513   for (compare_mode = mode; compare_mode != VOIDmode;
5514        compare_mode = GET_MODE_WIDER_MODE (compare_mode))
5515     {
5516      machine_mode optab_mode = mclass == MODE_CC ? CCmode : compare_mode;
5517      icode = optab_handler (cstore_optab, optab_mode);
5518      if (icode != CODE_FOR_nothing)
5519 	{
5520 	  do_pending_stack_adjust ();
5521 	  rtx tem = emit_cstore (target, icode, code, mode, compare_mode,
5522 				 unsignedp, op0, op1, normalizep, target_mode);
5523 	  if (tem)
5524 	    return tem;
5525 
5526 	  if (GET_MODE_CLASS (mode) == MODE_FLOAT)
5527 	    {
5528 	      tem = emit_cstore (target, icode, scode, mode, compare_mode,
5529 				 unsignedp, op1, op0, normalizep, target_mode);
5530 	      if (tem)
5531 	        return tem;
5532 	    }
5533 	  break;
5534 	}
5535     }
5536 
5537   return 0;
5538 }
5539 
5540 /* Emit a store-flags instruction for comparison CODE on OP0 and OP1
5541    and storing in TARGET.  Normally return TARGET.
5542    Return 0 if that cannot be done.
5543 
5544    MODE is the mode to use for OP0 and OP1 should they be CONST_INTs.  If
5545    it is VOIDmode, they cannot both be CONST_INT.
5546 
5547    UNSIGNEDP is for the case where we have to widen the operands
5548    to perform the operation.  It says to use zero-extension.
5549 
5550    NORMALIZEP is 1 if we should convert the result to be either zero
5551    or one.  Normalize is -1 if we should convert the result to be
5552    either zero or -1.  If NORMALIZEP is zero, the result will be left
5553    "raw" out of the scc insn.  */
5554 
5555 rtx
5556 emit_store_flag (rtx target, enum rtx_code code, rtx op0, rtx op1,
5557 		 machine_mode mode, int unsignedp, int normalizep)
5558 {
5559   machine_mode target_mode = target ? GET_MODE (target) : VOIDmode;
5560   enum rtx_code rcode;
5561   rtx subtarget;
5562   rtx tem, trueval;
5563   rtx_insn *last;
5564 
5565   /* If we compare constants, we shouldn't use a store-flag operation,
5566      but a constant load.  We can get there via the vanilla route that
5567      usually generates a compare-branch sequence, but will in this case
5568      fold the comparison to a constant, and thus elide the branch.  */
5569   if (CONSTANT_P (op0) && CONSTANT_P (op1))
5570     return NULL_RTX;
5571 
5572   tem = emit_store_flag_1 (target, code, op0, op1, mode, unsignedp, normalizep,
5573 			   target_mode);
5574   if (tem)
5575     return tem;
5576 
5577   /* If we reached here, we can't do this with a scc insn, however there
5578      are some comparisons that can be done in other ways.  Don't do any
5579      of these cases if branches are very cheap.  */
5580   if (BRANCH_COST (optimize_insn_for_speed_p (), false) == 0)
5581     return 0;
5582 
5583   /* See what we need to return.  We can only return a 1, -1, or the
5584      sign bit.  */
5585 
5586   if (normalizep == 0)
5587     {
5588       if (STORE_FLAG_VALUE == 1 || STORE_FLAG_VALUE == -1)
5589 	normalizep = STORE_FLAG_VALUE;
5590 
5591       else if (val_signbit_p (mode, STORE_FLAG_VALUE))
5592 	;
5593       else
5594 	return 0;
5595     }
5596 
5597   last = get_last_insn ();
5598 
5599   /* If optimizing, use different pseudo registers for each insn, instead
5600      of reusing the same pseudo.  This leads to better CSE, but slows
5601      down the compiler, since there are more pseudos */
5602   subtarget = (!optimize
5603 	       && (target_mode == mode)) ? target : NULL_RTX;
5604   trueval = GEN_INT (normalizep ? normalizep : STORE_FLAG_VALUE);
5605 
5606   /* For floating-point comparisons, try the reverse comparison or try
5607      changing the "orderedness" of the comparison.  */
5608   if (GET_MODE_CLASS (mode) == MODE_FLOAT)
5609     {
5610       enum rtx_code first_code;
5611       bool and_them;
5612 
5613       rcode = reverse_condition_maybe_unordered (code);
5614       if (can_compare_p (rcode, mode, ccp_store_flag)
5615           && (code == ORDERED || code == UNORDERED
5616 	      || (! HONOR_NANS (mode) && (code == LTGT || code == UNEQ))
5617 	      || (! HONOR_SNANS (mode) && (code == EQ || code == NE))))
5618 	{
5619           int want_add = ((STORE_FLAG_VALUE == 1 && normalizep == -1)
5620 		          || (STORE_FLAG_VALUE == -1 && normalizep == 1));
5621 
5622 	  /* For the reverse comparison, use either an addition or a XOR.  */
5623           if (want_add
5624 	      && rtx_cost (GEN_INT (normalizep), mode, PLUS, 1,
5625 			   optimize_insn_for_speed_p ()) == 0)
5626 	    {
5627 	      tem = emit_store_flag_1 (subtarget, rcode, op0, op1, mode, 0,
5628 				       STORE_FLAG_VALUE, target_mode);
5629 	      if (tem)
5630                 return expand_binop (target_mode, add_optab, tem,
5631 				     gen_int_mode (normalizep, target_mode),
5632 				     target, 0, OPTAB_WIDEN);
5633 	    }
5634           else if (!want_add
5635 	           && rtx_cost (trueval, mode, XOR, 1,
5636 			        optimize_insn_for_speed_p ()) == 0)
5637 	    {
5638 	      tem = emit_store_flag_1 (subtarget, rcode, op0, op1, mode, 0,
5639 				       normalizep, target_mode);
5640 	      if (tem)
5641                 return expand_binop (target_mode, xor_optab, tem, trueval,
5642 				     target, INTVAL (trueval) >= 0, OPTAB_WIDEN);
5643 	    }
5644 	}
5645 
5646       delete_insns_since (last);
5647 
5648       /* Cannot split ORDERED and UNORDERED, only try the above trick.   */
5649       if (code == ORDERED || code == UNORDERED)
5650 	return 0;
5651 
5652       and_them = split_comparison (code, mode, &first_code, &code);
5653 
5654       /* If there are no NaNs, the first comparison should always fall through.
5655          Effectively change the comparison to the other one.  */
5656       if (!HONOR_NANS (mode))
5657 	{
5658           gcc_assert (first_code == (and_them ? ORDERED : UNORDERED));
5659 	  return emit_store_flag_1 (target, code, op0, op1, mode, 0, normalizep,
5660 				    target_mode);
5661 	}
5662 
5663       if (!HAVE_conditional_move)
5664 	return 0;
5665 
5666       /* Try using a setcc instruction for ORDERED/UNORDERED, followed by a
5667 	 conditional move.  */
5668       tem = emit_store_flag_1 (subtarget, first_code, op0, op1, mode, 0,
5669 			       normalizep, target_mode);
5670       if (tem == 0)
5671 	return 0;
5672 
5673       if (and_them)
5674         tem = emit_conditional_move (target, code, op0, op1, mode,
5675 				     tem, const0_rtx, GET_MODE (tem), 0);
5676       else
5677         tem = emit_conditional_move (target, code, op0, op1, mode,
5678 				     trueval, tem, GET_MODE (tem), 0);
5679 
5680       if (tem == 0)
5681         delete_insns_since (last);
5682       return tem;
5683     }
5684 
5685   /* The remaining tricks only apply to integer comparisons.  */
5686 
5687   if (GET_MODE_CLASS (mode) != MODE_INT)
5688     return 0;
5689 
5690   /* If this is an equality comparison of integers, we can try to exclusive-or
5691      (or subtract) the two operands and use a recursive call to try the
5692      comparison with zero.  Don't do any of these cases if branches are
5693      very cheap.  */
5694 
5695   if ((code == EQ || code == NE) && op1 != const0_rtx)
5696     {
5697       tem = expand_binop (mode, xor_optab, op0, op1, subtarget, 1,
5698 			  OPTAB_WIDEN);
5699 
5700       if (tem == 0)
5701 	tem = expand_binop (mode, sub_optab, op0, op1, subtarget, 1,
5702 			    OPTAB_WIDEN);
5703       if (tem != 0)
5704 	tem = emit_store_flag (target, code, tem, const0_rtx,
5705 			       mode, unsignedp, normalizep);
5706       if (tem != 0)
5707 	return tem;
5708 
5709       delete_insns_since (last);
5710     }
5711 
5712   /* For integer comparisons, try the reverse comparison.  However, for
5713      small X and if we'd have anyway to extend, implementing "X != 0"
5714      as "-(int)X >> 31" is still cheaper than inverting "(int)X == 0".  */
5715   rcode = reverse_condition (code);
5716   if (can_compare_p (rcode, mode, ccp_store_flag)
5717       && ! (optab_handler (cstore_optab, mode) == CODE_FOR_nothing
5718 	    && code == NE
5719 	    && GET_MODE_SIZE (mode) < UNITS_PER_WORD
5720 	    && op1 == const0_rtx))
5721     {
5722       int want_add = ((STORE_FLAG_VALUE == 1 && normalizep == -1)
5723 		      || (STORE_FLAG_VALUE == -1 && normalizep == 1));
5724 
5725       /* Again, for the reverse comparison, use either an addition or a XOR.  */
5726       if (want_add
5727 	  && rtx_cost (GEN_INT (normalizep), mode, PLUS, 1,
5728 		       optimize_insn_for_speed_p ()) == 0)
5729 	{
5730 	  tem = emit_store_flag_1 (subtarget, rcode, op0, op1, mode, 0,
5731 				   STORE_FLAG_VALUE, target_mode);
5732 	  if (tem != 0)
5733             tem = expand_binop (target_mode, add_optab, tem,
5734 				gen_int_mode (normalizep, target_mode),
5735 				target, 0, OPTAB_WIDEN);
5736 	}
5737       else if (!want_add
5738 	       && rtx_cost (trueval, mode, XOR, 1,
5739 			    optimize_insn_for_speed_p ()) == 0)
5740 	{
5741 	  tem = emit_store_flag_1 (subtarget, rcode, op0, op1, mode, 0,
5742 				   normalizep, target_mode);
5743 	  if (tem != 0)
5744             tem = expand_binop (target_mode, xor_optab, tem, trueval, target,
5745 				INTVAL (trueval) >= 0, OPTAB_WIDEN);
5746 	}
5747 
5748       if (tem != 0)
5749 	return tem;
5750       delete_insns_since (last);
5751     }
5752 
5753   /* Some other cases we can do are EQ, NE, LE, and GT comparisons with
5754      the constant zero.  Reject all other comparisons at this point.  Only
5755      do LE and GT if branches are expensive since they are expensive on
5756      2-operand machines.  */
5757 
5758   if (op1 != const0_rtx
5759       || (code != EQ && code != NE
5760 	  && (BRANCH_COST (optimize_insn_for_speed_p (),
5761 			   false) <= 1 || (code != LE && code != GT))))
5762     return 0;
5763 
5764   /* Try to put the result of the comparison in the sign bit.  Assume we can't
5765      do the necessary operation below.  */
5766 
5767   tem = 0;
5768 
5769   /* To see if A <= 0, compute (A | (A - 1)).  A <= 0 iff that result has
5770      the sign bit set.  */
5771 
5772   if (code == LE)
5773     {
5774       /* This is destructive, so SUBTARGET can't be OP0.  */
5775       if (rtx_equal_p (subtarget, op0))
5776 	subtarget = 0;
5777 
5778       tem = expand_binop (mode, sub_optab, op0, const1_rtx, subtarget, 0,
5779 			  OPTAB_WIDEN);
5780       if (tem)
5781 	tem = expand_binop (mode, ior_optab, op0, tem, subtarget, 0,
5782 			    OPTAB_WIDEN);
5783     }
5784 
5785   /* To see if A > 0, compute (((signed) A) << BITS) - A, where BITS is the
5786      number of bits in the mode of OP0, minus one.  */
5787 
5788   if (code == GT)
5789     {
5790       if (rtx_equal_p (subtarget, op0))
5791 	subtarget = 0;
5792 
5793       tem = maybe_expand_shift (RSHIFT_EXPR, mode, op0,
5794 				GET_MODE_BITSIZE (mode) - 1,
5795 				subtarget, 0);
5796       if (tem)
5797 	tem = expand_binop (mode, sub_optab, tem, op0, subtarget, 0,
5798 			    OPTAB_WIDEN);
5799     }
5800 
5801   if (code == EQ || code == NE)
5802     {
5803       /* For EQ or NE, one way to do the comparison is to apply an operation
5804 	 that converts the operand into a positive number if it is nonzero
5805 	 or zero if it was originally zero.  Then, for EQ, we subtract 1 and
5806 	 for NE we negate.  This puts the result in the sign bit.  Then we
5807 	 normalize with a shift, if needed.
5808 
5809 	 Two operations that can do the above actions are ABS and FFS, so try
5810 	 them.  If that doesn't work, and MODE is smaller than a full word,
5811 	 we can use zero-extension to the wider mode (an unsigned conversion)
5812 	 as the operation.  */
5813 
5814       /* Note that ABS doesn't yield a positive number for INT_MIN, but
5815 	 that is compensated by the subsequent overflow when subtracting
5816 	 one / negating.  */
5817 
5818       if (optab_handler (abs_optab, mode) != CODE_FOR_nothing)
5819 	tem = expand_unop (mode, abs_optab, op0, subtarget, 1);
5820       else if (optab_handler (ffs_optab, mode) != CODE_FOR_nothing)
5821 	tem = expand_unop (mode, ffs_optab, op0, subtarget, 1);
5822       else if (GET_MODE_SIZE (mode) < UNITS_PER_WORD)
5823 	{
5824 	  tem = convert_modes (word_mode, mode, op0, 1);
5825 	  mode = word_mode;
5826 	}
5827 
5828       if (tem != 0)
5829 	{
5830 	  if (code == EQ)
5831 	    tem = expand_binop (mode, sub_optab, tem, const1_rtx, subtarget,
5832 				0, OPTAB_WIDEN);
5833 	  else
5834 	    tem = expand_unop (mode, neg_optab, tem, subtarget, 0);
5835 	}
5836 
5837       /* If we couldn't do it that way, for NE we can "or" the two's complement
5838 	 of the value with itself.  For EQ, we take the one's complement of
5839 	 that "or", which is an extra insn, so we only handle EQ if branches
5840 	 are expensive.  */
5841 
5842       if (tem == 0
5843 	  && (code == NE
5844 	      || BRANCH_COST (optimize_insn_for_speed_p (),
5845 		      	      false) > 1))
5846 	{
5847 	  if (rtx_equal_p (subtarget, op0))
5848 	    subtarget = 0;
5849 
5850 	  tem = expand_unop (mode, neg_optab, op0, subtarget, 0);
5851 	  tem = expand_binop (mode, ior_optab, tem, op0, subtarget, 0,
5852 			      OPTAB_WIDEN);
5853 
5854 	  if (tem && code == EQ)
5855 	    tem = expand_unop (mode, one_cmpl_optab, tem, subtarget, 0);
5856 	}
5857     }
5858 
5859   if (tem && normalizep)
5860     tem = maybe_expand_shift (RSHIFT_EXPR, mode, tem,
5861 			      GET_MODE_BITSIZE (mode) - 1,
5862 			      subtarget, normalizep == 1);
5863 
5864   if (tem)
5865     {
5866       if (!target)
5867         ;
5868       else if (GET_MODE (tem) != target_mode)
5869 	{
5870 	  convert_move (target, tem, 0);
5871 	  tem = target;
5872 	}
5873       else if (!subtarget)
5874 	{
5875 	  emit_move_insn (target, tem);
5876 	  tem = target;
5877 	}
5878     }
5879   else
5880     delete_insns_since (last);
5881 
5882   return tem;
5883 }
5884 
5885 /* Like emit_store_flag, but always succeeds.  */
5886 
5887 rtx
5888 emit_store_flag_force (rtx target, enum rtx_code code, rtx op0, rtx op1,
5889 		       machine_mode mode, int unsignedp, int normalizep)
5890 {
5891   rtx tem;
5892   rtx_code_label *label;
5893   rtx trueval, falseval;
5894 
5895   /* First see if emit_store_flag can do the job.  */
5896   tem = emit_store_flag (target, code, op0, op1, mode, unsignedp, normalizep);
5897   if (tem != 0)
5898     return tem;
5899 
5900   /* If one operand is constant, make it the second one.  Only do this
5901      if the other operand is not constant as well.  */
5902 
5903   if (swap_commutative_operands_p (op0, op1))
5904     {
5905       std::swap (op0, op1);
5906       code = swap_condition (code);
5907     }
5908 
5909   if (mode == VOIDmode)
5910     mode = GET_MODE (op0);
5911 
5912   if (!target)
5913     target = gen_reg_rtx (word_mode);
5914 
5915   /* If this failed, we have to do this with set/compare/jump/set code.
5916      For foo != 0, if foo is in OP0, just replace it with 1 if nonzero.  */
5917   trueval = normalizep ? GEN_INT (normalizep) : const1_rtx;
5918   if (code == NE
5919       && GET_MODE_CLASS (mode) == MODE_INT
5920       && REG_P (target)
5921       && op0 == target
5922       && op1 == const0_rtx)
5923     {
5924       label = gen_label_rtx ();
5925       do_compare_rtx_and_jump (target, const0_rtx, EQ, unsignedp, mode,
5926 			       NULL_RTX, NULL, label, -1);
5927       emit_move_insn (target, trueval);
5928       emit_label (label);
5929       return target;
5930     }
5931 
5932   if (!REG_P (target)
5933       || reg_mentioned_p (target, op0) || reg_mentioned_p (target, op1))
5934     target = gen_reg_rtx (GET_MODE (target));
5935 
5936   /* Jump in the right direction if the target cannot implement CODE
5937      but can jump on its reverse condition.  */
5938   falseval = const0_rtx;
5939   if (! can_compare_p (code, mode, ccp_jump)
5940       && (! FLOAT_MODE_P (mode)
5941           || code == ORDERED || code == UNORDERED
5942           || (! HONOR_NANS (mode) && (code == LTGT || code == UNEQ))
5943           || (! HONOR_SNANS (mode) && (code == EQ || code == NE))))
5944     {
5945       enum rtx_code rcode;
5946       if (FLOAT_MODE_P (mode))
5947         rcode = reverse_condition_maybe_unordered (code);
5948       else
5949         rcode = reverse_condition (code);
5950 
5951       /* Canonicalize to UNORDERED for the libcall.  */
5952       if (can_compare_p (rcode, mode, ccp_jump)
5953           || (code == ORDERED && ! can_compare_p (ORDERED, mode, ccp_jump)))
5954 	{
5955 	  falseval = trueval;
5956 	  trueval = const0_rtx;
5957 	  code = rcode;
5958 	}
5959     }
5960 
5961   emit_move_insn (target, trueval);
5962   label = gen_label_rtx ();
5963   do_compare_rtx_and_jump (op0, op1, code, unsignedp, mode, NULL_RTX, NULL,
5964 			   label, -1);
5965 
5966   emit_move_insn (target, falseval);
5967   emit_label (label);
5968 
5969   return target;
5970 }
5971 
5972 /* Perform possibly multi-word comparison and conditional jump to LABEL
5973    if ARG1 OP ARG2 true where ARG1 and ARG2 are of mode MODE.  This is
5974    now a thin wrapper around do_compare_rtx_and_jump.  */
5975 
5976 static void
5977 do_cmp_and_jump (rtx arg1, rtx arg2, enum rtx_code op, machine_mode mode,
5978 		 rtx_code_label *label)
5979 {
5980   int unsignedp = (op == LTU || op == LEU || op == GTU || op == GEU);
5981   do_compare_rtx_and_jump (arg1, arg2, op, unsignedp, mode, NULL_RTX,
5982 			   NULL, label, -1);
5983 }
5984