xref: /netbsd-src/external/gpl3/gcc.old/dist/gcc/expmed.c (revision d90047b5d07facf36e6c01dcc0bded8997ce9cc2)
1 /* Medium-level subroutines: convert bit-field store and extract
2    and shifts, multiplies and divides to rtl instructions.
3    Copyright (C) 1987-2017 Free Software Foundation, Inc.
4 
5 This file is part of GCC.
6 
7 GCC is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 3, or (at your option) any later
10 version.
11 
12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
15 for more details.
16 
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3.  If not see
19 <http://www.gnu.org/licenses/>.  */
20 
21 
22 #include "config.h"
23 #include "system.h"
24 #include "coretypes.h"
25 #include "backend.h"
26 #include "target.h"
27 #include "rtl.h"
28 #include "tree.h"
29 #include "predict.h"
30 #include "memmodel.h"
31 #include "tm_p.h"
32 #include "expmed.h"
33 #include "optabs.h"
34 #include "emit-rtl.h"
35 #include "diagnostic-core.h"
36 #include "fold-const.h"
37 #include "stor-layout.h"
38 #include "dojump.h"
39 #include "explow.h"
40 #include "expr.h"
41 #include "langhooks.h"
42 
43 struct target_expmed default_target_expmed;
44 #if SWITCHABLE_TARGET
45 struct target_expmed *this_target_expmed = &default_target_expmed;
46 #endif
47 
48 static void store_fixed_bit_field (rtx, unsigned HOST_WIDE_INT,
49 				   unsigned HOST_WIDE_INT,
50 				   unsigned HOST_WIDE_INT,
51 				   unsigned HOST_WIDE_INT,
52 				   rtx, bool);
53 static void store_fixed_bit_field_1 (rtx, unsigned HOST_WIDE_INT,
54 				     unsigned HOST_WIDE_INT,
55 				     rtx, bool);
56 static void store_split_bit_field (rtx, unsigned HOST_WIDE_INT,
57 				   unsigned HOST_WIDE_INT,
58 				   unsigned HOST_WIDE_INT,
59 				   unsigned HOST_WIDE_INT,
60 				   rtx, bool);
61 static rtx extract_fixed_bit_field (machine_mode, rtx,
62 				    unsigned HOST_WIDE_INT,
63 				    unsigned HOST_WIDE_INT, rtx, int, bool);
64 static rtx extract_fixed_bit_field_1 (machine_mode, rtx,
65 				      unsigned HOST_WIDE_INT,
66 				      unsigned HOST_WIDE_INT, rtx, int, bool);
67 static rtx lshift_value (machine_mode, unsigned HOST_WIDE_INT, int);
68 static rtx extract_split_bit_field (rtx, unsigned HOST_WIDE_INT,
69 				    unsigned HOST_WIDE_INT, int, bool);
70 static void do_cmp_and_jump (rtx, rtx, enum rtx_code, machine_mode, rtx_code_label *);
71 static rtx expand_smod_pow2 (machine_mode, rtx, HOST_WIDE_INT);
72 static rtx expand_sdiv_pow2 (machine_mode, rtx, HOST_WIDE_INT);
73 
74 /* Return a constant integer mask value of mode MODE with BITSIZE ones
75    followed by BITPOS zeros, or the complement of that if COMPLEMENT.
76    The mask is truncated if necessary to the width of mode MODE.  The
77    mask is zero-extended if BITSIZE+BITPOS is too small for MODE.  */
78 
79 static inline rtx
80 mask_rtx (machine_mode mode, int bitpos, int bitsize, bool complement)
81 {
82   return immed_wide_int_const
83     (wi::shifted_mask (bitpos, bitsize, complement,
84 		       GET_MODE_PRECISION (mode)), mode);
85 }
86 
87 /* Test whether a value is zero of a power of two.  */
88 #define EXACT_POWER_OF_2_OR_ZERO_P(x) \
89   (((x) & ((x) - HOST_WIDE_INT_1U)) == 0)
90 
91 struct init_expmed_rtl
92 {
93   rtx reg;
94   rtx plus;
95   rtx neg;
96   rtx mult;
97   rtx sdiv;
98   rtx udiv;
99   rtx sdiv_32;
100   rtx smod_32;
101   rtx wide_mult;
102   rtx wide_lshr;
103   rtx wide_trunc;
104   rtx shift;
105   rtx shift_mult;
106   rtx shift_add;
107   rtx shift_sub0;
108   rtx shift_sub1;
109   rtx zext;
110   rtx trunc;
111 
112   rtx pow2[MAX_BITS_PER_WORD];
113   rtx cint[MAX_BITS_PER_WORD];
114 };
115 
116 static void
117 init_expmed_one_conv (struct init_expmed_rtl *all, machine_mode to_mode,
118 		      machine_mode from_mode, bool speed)
119 {
120   int to_size, from_size;
121   rtx which;
122 
123   to_size = GET_MODE_PRECISION (to_mode);
124   from_size = GET_MODE_PRECISION (from_mode);
125 
126   /* Most partial integers have a precision less than the "full"
127      integer it requires for storage.  In case one doesn't, for
128      comparison purposes here, reduce the bit size by one in that
129      case.  */
130   if (GET_MODE_CLASS (to_mode) == MODE_PARTIAL_INT
131       && pow2p_hwi (to_size))
132     to_size --;
133   if (GET_MODE_CLASS (from_mode) == MODE_PARTIAL_INT
134       && pow2p_hwi (from_size))
135     from_size --;
136 
137   /* Assume cost of zero-extend and sign-extend is the same.  */
138   which = (to_size < from_size ? all->trunc : all->zext);
139 
140   PUT_MODE (all->reg, from_mode);
141   set_convert_cost (to_mode, from_mode, speed,
142 		    set_src_cost (which, to_mode, speed));
143 }
144 
145 static void
146 init_expmed_one_mode (struct init_expmed_rtl *all,
147 		      machine_mode mode, int speed)
148 {
149   int m, n, mode_bitsize;
150   machine_mode mode_from;
151 
152   mode_bitsize = GET_MODE_UNIT_BITSIZE (mode);
153 
154   PUT_MODE (all->reg, mode);
155   PUT_MODE (all->plus, mode);
156   PUT_MODE (all->neg, mode);
157   PUT_MODE (all->mult, mode);
158   PUT_MODE (all->sdiv, mode);
159   PUT_MODE (all->udiv, mode);
160   PUT_MODE (all->sdiv_32, mode);
161   PUT_MODE (all->smod_32, mode);
162   PUT_MODE (all->wide_trunc, mode);
163   PUT_MODE (all->shift, mode);
164   PUT_MODE (all->shift_mult, mode);
165   PUT_MODE (all->shift_add, mode);
166   PUT_MODE (all->shift_sub0, mode);
167   PUT_MODE (all->shift_sub1, mode);
168   PUT_MODE (all->zext, mode);
169   PUT_MODE (all->trunc, mode);
170 
171   set_add_cost (speed, mode, set_src_cost (all->plus, mode, speed));
172   set_neg_cost (speed, mode, set_src_cost (all->neg, mode, speed));
173   set_mul_cost (speed, mode, set_src_cost (all->mult, mode, speed));
174   set_sdiv_cost (speed, mode, set_src_cost (all->sdiv, mode, speed));
175   set_udiv_cost (speed, mode, set_src_cost (all->udiv, mode, speed));
176 
177   set_sdiv_pow2_cheap (speed, mode, (set_src_cost (all->sdiv_32, mode, speed)
178 				     <= 2 * add_cost (speed, mode)));
179   set_smod_pow2_cheap (speed, mode, (set_src_cost (all->smod_32, mode, speed)
180 				     <= 4 * add_cost (speed, mode)));
181 
182   set_shift_cost (speed, mode, 0, 0);
183   {
184     int cost = add_cost (speed, mode);
185     set_shiftadd_cost (speed, mode, 0, cost);
186     set_shiftsub0_cost (speed, mode, 0, cost);
187     set_shiftsub1_cost (speed, mode, 0, cost);
188   }
189 
190   n = MIN (MAX_BITS_PER_WORD, mode_bitsize);
191   for (m = 1; m < n; m++)
192     {
193       XEXP (all->shift, 1) = all->cint[m];
194       XEXP (all->shift_mult, 1) = all->pow2[m];
195 
196       set_shift_cost (speed, mode, m, set_src_cost (all->shift, mode, speed));
197       set_shiftadd_cost (speed, mode, m, set_src_cost (all->shift_add, mode,
198 						       speed));
199       set_shiftsub0_cost (speed, mode, m, set_src_cost (all->shift_sub0, mode,
200 							speed));
201       set_shiftsub1_cost (speed, mode, m, set_src_cost (all->shift_sub1, mode,
202 							speed));
203     }
204 
205   if (SCALAR_INT_MODE_P (mode))
206     {
207       for (mode_from = MIN_MODE_INT; mode_from <= MAX_MODE_INT;
208 	   mode_from = (machine_mode)(mode_from + 1))
209 	init_expmed_one_conv (all, mode, mode_from, speed);
210     }
211   if (GET_MODE_CLASS (mode) == MODE_INT)
212     {
213       machine_mode  wider_mode = GET_MODE_WIDER_MODE (mode);
214       if (wider_mode != VOIDmode)
215 	{
216 	  PUT_MODE (all->zext, wider_mode);
217 	  PUT_MODE (all->wide_mult, wider_mode);
218 	  PUT_MODE (all->wide_lshr, wider_mode);
219 	  XEXP (all->wide_lshr, 1) = GEN_INT (mode_bitsize);
220 
221 	  set_mul_widen_cost (speed, wider_mode,
222 			      set_src_cost (all->wide_mult, wider_mode, speed));
223 	  set_mul_highpart_cost (speed, mode,
224 				 set_src_cost (all->wide_trunc, mode, speed));
225 	}
226     }
227 }
228 
229 void
230 init_expmed (void)
231 {
232   struct init_expmed_rtl all;
233   machine_mode mode = QImode;
234   int m, speed;
235 
236   memset (&all, 0, sizeof all);
237   for (m = 1; m < MAX_BITS_PER_WORD; m++)
238     {
239       all.pow2[m] = GEN_INT (HOST_WIDE_INT_1 << m);
240       all.cint[m] = GEN_INT (m);
241     }
242 
243   /* Avoid using hard regs in ways which may be unsupported.  */
244   all.reg = gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1);
245   all.plus = gen_rtx_PLUS (mode, all.reg, all.reg);
246   all.neg = gen_rtx_NEG (mode, all.reg);
247   all.mult = gen_rtx_MULT (mode, all.reg, all.reg);
248   all.sdiv = gen_rtx_DIV (mode, all.reg, all.reg);
249   all.udiv = gen_rtx_UDIV (mode, all.reg, all.reg);
250   all.sdiv_32 = gen_rtx_DIV (mode, all.reg, all.pow2[5]);
251   all.smod_32 = gen_rtx_MOD (mode, all.reg, all.pow2[5]);
252   all.zext = gen_rtx_ZERO_EXTEND (mode, all.reg);
253   all.wide_mult = gen_rtx_MULT (mode, all.zext, all.zext);
254   all.wide_lshr = gen_rtx_LSHIFTRT (mode, all.wide_mult, all.reg);
255   all.wide_trunc = gen_rtx_TRUNCATE (mode, all.wide_lshr);
256   all.shift = gen_rtx_ASHIFT (mode, all.reg, all.reg);
257   all.shift_mult = gen_rtx_MULT (mode, all.reg, all.reg);
258   all.shift_add = gen_rtx_PLUS (mode, all.shift_mult, all.reg);
259   all.shift_sub0 = gen_rtx_MINUS (mode, all.shift_mult, all.reg);
260   all.shift_sub1 = gen_rtx_MINUS (mode, all.reg, all.shift_mult);
261   all.trunc = gen_rtx_TRUNCATE (mode, all.reg);
262 
263   for (speed = 0; speed < 2; speed++)
264     {
265       crtl->maybe_hot_insn_p = speed;
266       set_zero_cost (speed, set_src_cost (const0_rtx, mode, speed));
267 
268       for (mode = MIN_MODE_INT; mode <= MAX_MODE_INT;
269 	   mode = (machine_mode)(mode + 1))
270 	init_expmed_one_mode (&all, mode, speed);
271 
272       if (MIN_MODE_PARTIAL_INT != VOIDmode)
273 	for (mode = MIN_MODE_PARTIAL_INT; mode <= MAX_MODE_PARTIAL_INT;
274 	     mode = (machine_mode)(mode + 1))
275 	  init_expmed_one_mode (&all, mode, speed);
276 
277       if (MIN_MODE_VECTOR_INT != VOIDmode)
278 	for (mode = MIN_MODE_VECTOR_INT; mode <= MAX_MODE_VECTOR_INT;
279 	     mode = (machine_mode)(mode + 1))
280 	  init_expmed_one_mode (&all, mode, speed);
281     }
282 
283   if (alg_hash_used_p ())
284     {
285       struct alg_hash_entry *p = alg_hash_entry_ptr (0);
286       memset (p, 0, sizeof (*p) * NUM_ALG_HASH_ENTRIES);
287     }
288   else
289     set_alg_hash_used_p (true);
290   default_rtl_profile ();
291 
292   ggc_free (all.trunc);
293   ggc_free (all.shift_sub1);
294   ggc_free (all.shift_sub0);
295   ggc_free (all.shift_add);
296   ggc_free (all.shift_mult);
297   ggc_free (all.shift);
298   ggc_free (all.wide_trunc);
299   ggc_free (all.wide_lshr);
300   ggc_free (all.wide_mult);
301   ggc_free (all.zext);
302   ggc_free (all.smod_32);
303   ggc_free (all.sdiv_32);
304   ggc_free (all.udiv);
305   ggc_free (all.sdiv);
306   ggc_free (all.mult);
307   ggc_free (all.neg);
308   ggc_free (all.plus);
309   ggc_free (all.reg);
310 }
311 
312 /* Return an rtx representing minus the value of X.
313    MODE is the intended mode of the result,
314    useful if X is a CONST_INT.  */
315 
316 rtx
317 negate_rtx (machine_mode mode, rtx x)
318 {
319   rtx result = simplify_unary_operation (NEG, mode, x, mode);
320 
321   if (result == 0)
322     result = expand_unop (mode, neg_optab, x, NULL_RTX, 0);
323 
324   return result;
325 }
326 
327 /* Whether reverse storage order is supported on the target.  */
328 static int reverse_storage_order_supported = -1;
329 
330 /* Check whether reverse storage order is supported on the target.  */
331 
332 static void
333 check_reverse_storage_order_support (void)
334 {
335   if (BYTES_BIG_ENDIAN != WORDS_BIG_ENDIAN)
336     {
337       reverse_storage_order_supported = 0;
338       sorry ("reverse scalar storage order");
339     }
340   else
341     reverse_storage_order_supported = 1;
342 }
343 
344 /* Whether reverse FP storage order is supported on the target.  */
345 static int reverse_float_storage_order_supported = -1;
346 
347 /* Check whether reverse FP storage order is supported on the target.  */
348 
349 static void
350 check_reverse_float_storage_order_support (void)
351 {
352   if (FLOAT_WORDS_BIG_ENDIAN != WORDS_BIG_ENDIAN)
353     {
354       reverse_float_storage_order_supported = 0;
355       sorry ("reverse floating-point scalar storage order");
356     }
357   else
358     reverse_float_storage_order_supported = 1;
359 }
360 
361 /* Return an rtx representing value of X with reverse storage order.
362    MODE is the intended mode of the result,
363    useful if X is a CONST_INT.  */
364 
365 rtx
366 flip_storage_order (enum machine_mode mode, rtx x)
367 {
368   enum machine_mode int_mode;
369   rtx result;
370 
371   if (mode == QImode)
372     return x;
373 
374   if (COMPLEX_MODE_P (mode))
375     {
376       rtx real = read_complex_part (x, false);
377       rtx imag = read_complex_part (x, true);
378 
379       real = flip_storage_order (GET_MODE_INNER (mode), real);
380       imag = flip_storage_order (GET_MODE_INNER (mode), imag);
381 
382       return gen_rtx_CONCAT (mode, real, imag);
383     }
384 
385   if (__builtin_expect (reverse_storage_order_supported < 0, 0))
386     check_reverse_storage_order_support ();
387 
388   if (SCALAR_INT_MODE_P (mode))
389     int_mode = mode;
390   else
391     {
392       if (FLOAT_MODE_P (mode)
393 	  && __builtin_expect (reverse_float_storage_order_supported < 0, 0))
394 	check_reverse_float_storage_order_support ();
395 
396       int_mode = mode_for_size (GET_MODE_PRECISION (mode), MODE_INT, 0);
397       if (int_mode == BLKmode)
398 	{
399 	  sorry ("reverse storage order for %smode", GET_MODE_NAME (mode));
400 	  return x;
401 	}
402       x = gen_lowpart (int_mode, x);
403     }
404 
405   result = simplify_unary_operation (BSWAP, int_mode, x, int_mode);
406   if (result == 0)
407     result = expand_unop (int_mode, bswap_optab, x, NULL_RTX, 1);
408 
409   if (int_mode != mode)
410     result = gen_lowpart (mode, result);
411 
412   return result;
413 }
414 
415 /* Adjust bitfield memory MEM so that it points to the first unit of mode
416    MODE that contains a bitfield of size BITSIZE at bit position BITNUM.
417    If MODE is BLKmode, return a reference to every byte in the bitfield.
418    Set *NEW_BITNUM to the bit position of the field within the new memory.  */
419 
420 static rtx
421 narrow_bit_field_mem (rtx mem, machine_mode mode,
422 		      unsigned HOST_WIDE_INT bitsize,
423 		      unsigned HOST_WIDE_INT bitnum,
424 		      unsigned HOST_WIDE_INT *new_bitnum)
425 {
426   if (mode == BLKmode)
427     {
428       *new_bitnum = bitnum % BITS_PER_UNIT;
429       HOST_WIDE_INT offset = bitnum / BITS_PER_UNIT;
430       HOST_WIDE_INT size = ((*new_bitnum + bitsize + BITS_PER_UNIT - 1)
431 			    / BITS_PER_UNIT);
432       return adjust_bitfield_address_size (mem, mode, offset, size);
433     }
434   else
435     {
436       unsigned int unit = GET_MODE_BITSIZE (mode);
437       *new_bitnum = bitnum % unit;
438       HOST_WIDE_INT offset = (bitnum - *new_bitnum) / BITS_PER_UNIT;
439       return adjust_bitfield_address (mem, mode, offset);
440     }
441 }
442 
443 /* The caller wants to perform insertion or extraction PATTERN on a
444    bitfield of size BITSIZE at BITNUM bits into memory operand OP0.
445    BITREGION_START and BITREGION_END are as for store_bit_field
446    and FIELDMODE is the natural mode of the field.
447 
448    Search for a mode that is compatible with the memory access
449    restrictions and (where applicable) with a register insertion or
450    extraction.  Return the new memory on success, storing the adjusted
451    bit position in *NEW_BITNUM.  Return null otherwise.  */
452 
453 static rtx
454 adjust_bit_field_mem_for_reg (enum extraction_pattern pattern,
455 			      rtx op0, HOST_WIDE_INT bitsize,
456 			      HOST_WIDE_INT bitnum,
457 			      unsigned HOST_WIDE_INT bitregion_start,
458 			      unsigned HOST_WIDE_INT bitregion_end,
459 			      machine_mode fieldmode,
460 			      unsigned HOST_WIDE_INT *new_bitnum)
461 {
462   bit_field_mode_iterator iter (bitsize, bitnum, bitregion_start,
463 				bitregion_end, MEM_ALIGN (op0),
464 				MEM_VOLATILE_P (op0));
465   machine_mode best_mode;
466   if (iter.next_mode (&best_mode))
467     {
468       /* We can use a memory in BEST_MODE.  See whether this is true for
469 	 any wider modes.  All other things being equal, we prefer to
470 	 use the widest mode possible because it tends to expose more
471 	 CSE opportunities.  */
472       if (!iter.prefer_smaller_modes ())
473 	{
474 	  /* Limit the search to the mode required by the corresponding
475 	     register insertion or extraction instruction, if any.  */
476 	  machine_mode limit_mode = word_mode;
477 	  extraction_insn insn;
478 	  if (get_best_reg_extraction_insn (&insn, pattern,
479 					    GET_MODE_BITSIZE (best_mode),
480 					    fieldmode))
481 	    limit_mode = insn.field_mode;
482 
483 	  machine_mode wider_mode;
484 	  while (iter.next_mode (&wider_mode)
485 		 && GET_MODE_SIZE (wider_mode) <= GET_MODE_SIZE (limit_mode))
486 	    best_mode = wider_mode;
487 	}
488       return narrow_bit_field_mem (op0, best_mode, bitsize, bitnum,
489 				   new_bitnum);
490     }
491   return NULL_RTX;
492 }
493 
494 /* Return true if a bitfield of size BITSIZE at bit number BITNUM within
495    a structure of mode STRUCT_MODE represents a lowpart subreg.   The subreg
496    offset is then BITNUM / BITS_PER_UNIT.  */
497 
498 static bool
499 lowpart_bit_field_p (unsigned HOST_WIDE_INT bitnum,
500 		     unsigned HOST_WIDE_INT bitsize,
501 		     machine_mode struct_mode)
502 {
503   if (BYTES_BIG_ENDIAN)
504     return (bitnum % BITS_PER_UNIT == 0
505 	    && (bitnum + bitsize == GET_MODE_BITSIZE (struct_mode)
506 		|| (bitnum + bitsize) % BITS_PER_WORD == 0));
507   else
508     return bitnum % BITS_PER_WORD == 0;
509 }
510 
511 /* Return true if -fstrict-volatile-bitfields applies to an access of OP0
512    containing BITSIZE bits starting at BITNUM, with field mode FIELDMODE.
513    Return false if the access would touch memory outside the range
514    BITREGION_START to BITREGION_END for conformance to the C++ memory
515    model.  */
516 
517 static bool
518 strict_volatile_bitfield_p (rtx op0, unsigned HOST_WIDE_INT bitsize,
519 			    unsigned HOST_WIDE_INT bitnum,
520 			    machine_mode fieldmode,
521 			    unsigned HOST_WIDE_INT bitregion_start,
522 			    unsigned HOST_WIDE_INT bitregion_end)
523 {
524   unsigned HOST_WIDE_INT modesize = GET_MODE_BITSIZE (fieldmode);
525 
526   /* -fstrict-volatile-bitfields must be enabled and we must have a
527      volatile MEM.  */
528   if (!MEM_P (op0)
529       || !MEM_VOLATILE_P (op0)
530       || flag_strict_volatile_bitfields <= 0)
531     return false;
532 
533   /* Non-integral modes likely only happen with packed structures.
534      Punt.  */
535   if (!SCALAR_INT_MODE_P (fieldmode))
536     return false;
537 
538   /* The bit size must not be larger than the field mode, and
539      the field mode must not be larger than a word.  */
540   if (bitsize > modesize || modesize > BITS_PER_WORD)
541     return false;
542 
543   /* Check for cases of unaligned fields that must be split.  */
544   if (bitnum % modesize + bitsize > modesize)
545     return false;
546 
547   /* The memory must be sufficiently aligned for a MODESIZE access.
548      This condition guarantees, that the memory access will not
549      touch anything after the end of the structure.  */
550   if (MEM_ALIGN (op0) < modesize)
551     return false;
552 
553   /* Check for cases where the C++ memory model applies.  */
554   if (bitregion_end != 0
555       && (bitnum - bitnum % modesize < bitregion_start
556 	  || bitnum - bitnum % modesize + modesize - 1 > bitregion_end))
557     return false;
558 
559   return true;
560 }
561 
562 /* Return true if OP is a memory and if a bitfield of size BITSIZE at
563    bit number BITNUM can be treated as a simple value of mode MODE.  */
564 
565 static bool
566 simple_mem_bitfield_p (rtx op0, unsigned HOST_WIDE_INT bitsize,
567 		       unsigned HOST_WIDE_INT bitnum, machine_mode mode)
568 {
569   return (MEM_P (op0)
570 	  && bitnum % BITS_PER_UNIT == 0
571 	  && bitsize == GET_MODE_BITSIZE (mode)
572 	  && (!SLOW_UNALIGNED_ACCESS (mode, MEM_ALIGN (op0))
573 	      || (bitnum % GET_MODE_ALIGNMENT (mode) == 0
574 		  && MEM_ALIGN (op0) >= GET_MODE_ALIGNMENT (mode))));
575 }
576 
577 /* Try to use instruction INSV to store VALUE into a field of OP0.
578    BITSIZE and BITNUM are as for store_bit_field.  */
579 
580 static bool
581 store_bit_field_using_insv (const extraction_insn *insv, rtx op0,
582 			    unsigned HOST_WIDE_INT bitsize,
583 			    unsigned HOST_WIDE_INT bitnum,
584 			    rtx value)
585 {
586   struct expand_operand ops[4];
587   rtx value1;
588   rtx xop0 = op0;
589   rtx_insn *last = get_last_insn ();
590   bool copy_back = false;
591 
592   machine_mode op_mode = insv->field_mode;
593   unsigned int unit = GET_MODE_BITSIZE (op_mode);
594   if (bitsize == 0 || bitsize > unit)
595     return false;
596 
597   if (MEM_P (xop0))
598     /* Get a reference to the first byte of the field.  */
599     xop0 = narrow_bit_field_mem (xop0, insv->struct_mode, bitsize, bitnum,
600 				 &bitnum);
601   else
602     {
603       /* Convert from counting within OP0 to counting in OP_MODE.  */
604       if (BYTES_BIG_ENDIAN)
605 	bitnum += unit - GET_MODE_BITSIZE (GET_MODE (op0));
606 
607       /* If xop0 is a register, we need it in OP_MODE
608 	 to make it acceptable to the format of insv.  */
609       if (GET_CODE (xop0) == SUBREG)
610 	/* We can't just change the mode, because this might clobber op0,
611 	   and we will need the original value of op0 if insv fails.  */
612 	xop0 = gen_rtx_SUBREG (op_mode, SUBREG_REG (xop0), SUBREG_BYTE (xop0));
613       if (REG_P (xop0) && GET_MODE (xop0) != op_mode)
614 	xop0 = gen_lowpart_SUBREG (op_mode, xop0);
615     }
616 
617   /* If the destination is a paradoxical subreg such that we need a
618      truncate to the inner mode, perform the insertion on a temporary and
619      truncate the result to the original destination.  Note that we can't
620      just truncate the paradoxical subreg as (truncate:N (subreg:W (reg:N
621      X) 0)) is (reg:N X).  */
622   if (GET_CODE (xop0) == SUBREG
623       && REG_P (SUBREG_REG (xop0))
624       && !TRULY_NOOP_TRUNCATION_MODES_P (GET_MODE (SUBREG_REG (xop0)),
625 					 op_mode))
626     {
627       rtx tem = gen_reg_rtx (op_mode);
628       emit_move_insn (tem, xop0);
629       xop0 = tem;
630       copy_back = true;
631     }
632 
633   /* There are similar overflow check at the start of store_bit_field_1,
634      but that only check the situation where the field lies completely
635      outside the register, while there do have situation where the field
636      lies partialy in the register, we need to adjust bitsize for this
637      partial overflow situation.  Without this fix, pr48335-2.c on big-endian
638      will broken on those arch support bit insert instruction, like arm, aarch64
639      etc.  */
640   if (bitsize + bitnum > unit && bitnum < unit)
641     {
642       warning (OPT_Wextra, "write of %wu-bit data outside the bound of "
643 	       "destination object, data truncated into %wu-bit",
644 	       bitsize, unit - bitnum);
645       bitsize = unit - bitnum;
646     }
647 
648   /* If BITS_BIG_ENDIAN is zero on a BYTES_BIG_ENDIAN machine, we count
649      "backwards" from the size of the unit we are inserting into.
650      Otherwise, we count bits from the most significant on a
651      BYTES/BITS_BIG_ENDIAN machine.  */
652 
653   if (BITS_BIG_ENDIAN != BYTES_BIG_ENDIAN)
654     bitnum = unit - bitsize - bitnum;
655 
656   /* Convert VALUE to op_mode (which insv insn wants) in VALUE1.  */
657   value1 = value;
658   if (GET_MODE (value) != op_mode)
659     {
660       if (GET_MODE_BITSIZE (GET_MODE (value)) >= bitsize)
661 	{
662 	  rtx tmp;
663 	  /* Optimization: Don't bother really extending VALUE
664 	     if it has all the bits we will actually use.  However,
665 	     if we must narrow it, be sure we do it correctly.  */
666 
667 	  if (GET_MODE_SIZE (GET_MODE (value)) < GET_MODE_SIZE (op_mode))
668 	    {
669 	      tmp = simplify_subreg (op_mode, value1, GET_MODE (value), 0);
670 	      if (! tmp)
671 		tmp = simplify_gen_subreg (op_mode,
672 					   force_reg (GET_MODE (value),
673 						      value1),
674 					   GET_MODE (value), 0);
675 	    }
676 	  else
677 	    {
678 	      tmp = gen_lowpart_if_possible (op_mode, value1);
679 	      if (! tmp)
680 		tmp = gen_lowpart (op_mode, force_reg (GET_MODE (value),
681 						       value1));
682 	    }
683 	  value1 = tmp;
684 	}
685       else if (CONST_INT_P (value))
686 	value1 = gen_int_mode (INTVAL (value), op_mode);
687       else
688 	/* Parse phase is supposed to make VALUE's data type
689 	   match that of the component reference, which is a type
690 	   at least as wide as the field; so VALUE should have
691 	   a mode that corresponds to that type.  */
692 	gcc_assert (CONSTANT_P (value));
693     }
694 
695   create_fixed_operand (&ops[0], xop0);
696   create_integer_operand (&ops[1], bitsize);
697   create_integer_operand (&ops[2], bitnum);
698   create_input_operand (&ops[3], value1, op_mode);
699   if (maybe_expand_insn (insv->icode, 4, ops))
700     {
701       if (copy_back)
702 	convert_move (op0, xop0, true);
703       return true;
704     }
705   delete_insns_since (last);
706   return false;
707 }
708 
709 /* A subroutine of store_bit_field, with the same arguments.  Return true
710    if the operation could be implemented.
711 
712    If FALLBACK_P is true, fall back to store_fixed_bit_field if we have
713    no other way of implementing the operation.  If FALLBACK_P is false,
714    return false instead.  */
715 
716 static bool
717 store_bit_field_1 (rtx str_rtx, unsigned HOST_WIDE_INT bitsize,
718 		   unsigned HOST_WIDE_INT bitnum,
719 		   unsigned HOST_WIDE_INT bitregion_start,
720 		   unsigned HOST_WIDE_INT bitregion_end,
721 		   machine_mode fieldmode,
722 		   rtx value, bool reverse, bool fallback_p)
723 {
724   rtx op0 = str_rtx;
725   rtx orig_value;
726 
727   while (GET_CODE (op0) == SUBREG)
728     {
729       /* The following line once was done only if WORDS_BIG_ENDIAN,
730 	 but I think that is a mistake.  WORDS_BIG_ENDIAN is
731 	 meaningful at a much higher level; when structures are copied
732 	 between memory and regs, the higher-numbered regs
733 	 always get higher addresses.  */
734       int inner_mode_size = GET_MODE_SIZE (GET_MODE (SUBREG_REG (op0)));
735       int outer_mode_size = GET_MODE_SIZE (GET_MODE (op0));
736       int byte_offset = 0;
737 
738       /* Paradoxical subregs need special handling on big-endian machines.  */
739       if (SUBREG_BYTE (op0) == 0 && inner_mode_size < outer_mode_size)
740 	{
741 	  int difference = inner_mode_size - outer_mode_size;
742 
743 	  if (WORDS_BIG_ENDIAN)
744 	    byte_offset += (difference / UNITS_PER_WORD) * UNITS_PER_WORD;
745 	  if (BYTES_BIG_ENDIAN)
746 	    byte_offset += difference % UNITS_PER_WORD;
747 	}
748       else
749 	byte_offset = SUBREG_BYTE (op0);
750 
751       bitnum += byte_offset * BITS_PER_UNIT;
752       op0 = SUBREG_REG (op0);
753     }
754 
755   /* No action is needed if the target is a register and if the field
756      lies completely outside that register.  This can occur if the source
757      code contains an out-of-bounds access to a small array.  */
758   if (REG_P (op0) && bitnum >= GET_MODE_BITSIZE (GET_MODE (op0)))
759     return true;
760 
761   /* Use vec_set patterns for inserting parts of vectors whenever
762      available.  */
763   if (VECTOR_MODE_P (GET_MODE (op0))
764       && !MEM_P (op0)
765       && optab_handler (vec_set_optab, GET_MODE (op0)) != CODE_FOR_nothing
766       && fieldmode == GET_MODE_INNER (GET_MODE (op0))
767       && bitsize == GET_MODE_UNIT_BITSIZE (GET_MODE (op0))
768       && !(bitnum % GET_MODE_UNIT_BITSIZE (GET_MODE (op0))))
769     {
770       struct expand_operand ops[3];
771       machine_mode outermode = GET_MODE (op0);
772       machine_mode innermode = GET_MODE_INNER (outermode);
773       enum insn_code icode = optab_handler (vec_set_optab, outermode);
774       int pos = bitnum / GET_MODE_BITSIZE (innermode);
775 
776       create_fixed_operand (&ops[0], op0);
777       create_input_operand (&ops[1], value, innermode);
778       create_integer_operand (&ops[2], pos);
779       if (maybe_expand_insn (icode, 3, ops))
780 	return true;
781     }
782 
783   /* If the target is a register, overwriting the entire object, or storing
784      a full-word or multi-word field can be done with just a SUBREG.  */
785   if (!MEM_P (op0)
786       && bitsize == GET_MODE_BITSIZE (fieldmode)
787       && ((bitsize == GET_MODE_BITSIZE (GET_MODE (op0)) && bitnum == 0)
788 	  || (bitsize % BITS_PER_WORD == 0 && bitnum % BITS_PER_WORD == 0)))
789     {
790       /* Use the subreg machinery either to narrow OP0 to the required
791 	 words or to cope with mode punning between equal-sized modes.
792 	 In the latter case, use subreg on the rhs side, not lhs.  */
793       rtx sub;
794 
795       if (bitsize == GET_MODE_BITSIZE (GET_MODE (op0)))
796 	{
797 	  sub = simplify_gen_subreg (GET_MODE (op0), value, fieldmode, 0);
798 	  if (sub)
799 	    {
800 	      if (reverse)
801 		sub = flip_storage_order (GET_MODE (op0), sub);
802 	      emit_move_insn (op0, sub);
803 	      return true;
804 	    }
805 	}
806       else
807 	{
808 	  sub = simplify_gen_subreg (fieldmode, op0, GET_MODE (op0),
809 				     bitnum / BITS_PER_UNIT);
810 	  if (sub)
811 	    {
812 	      if (reverse)
813 		value = flip_storage_order (fieldmode, value);
814 	      emit_move_insn (sub, value);
815 	      return true;
816 	    }
817 	}
818     }
819 
820   /* If the target is memory, storing any naturally aligned field can be
821      done with a simple store.  For targets that support fast unaligned
822      memory, any naturally sized, unit aligned field can be done directly.  */
823   if (simple_mem_bitfield_p (op0, bitsize, bitnum, fieldmode))
824     {
825       op0 = adjust_bitfield_address (op0, fieldmode, bitnum / BITS_PER_UNIT);
826       if (reverse)
827 	value = flip_storage_order (fieldmode, value);
828       emit_move_insn (op0, value);
829       return true;
830     }
831 
832   /* Make sure we are playing with integral modes.  Pun with subregs
833      if we aren't.  This must come after the entire register case above,
834      since that case is valid for any mode.  The following cases are only
835      valid for integral modes.  */
836   {
837     machine_mode imode = int_mode_for_mode (GET_MODE (op0));
838     if (imode != GET_MODE (op0))
839       {
840 	if (MEM_P (op0))
841 	  op0 = adjust_bitfield_address_size (op0, imode, 0, MEM_SIZE (op0));
842 	else
843 	  {
844 	    gcc_assert (imode != BLKmode);
845 	    op0 = gen_lowpart (imode, op0);
846 	  }
847       }
848   }
849 
850   /* Storing an lsb-aligned field in a register
851      can be done with a movstrict instruction.  */
852 
853   if (!MEM_P (op0)
854       && !reverse
855       && lowpart_bit_field_p (bitnum, bitsize, GET_MODE (op0))
856       && bitsize == GET_MODE_BITSIZE (fieldmode)
857       && optab_handler (movstrict_optab, fieldmode) != CODE_FOR_nothing)
858     {
859       struct expand_operand ops[2];
860       enum insn_code icode = optab_handler (movstrict_optab, fieldmode);
861       rtx arg0 = op0;
862       unsigned HOST_WIDE_INT subreg_off;
863 
864       if (GET_CODE (arg0) == SUBREG)
865 	{
866 	  /* Else we've got some float mode source being extracted into
867 	     a different float mode destination -- this combination of
868 	     subregs results in Severe Tire Damage.  */
869 	  gcc_assert (GET_MODE (SUBREG_REG (arg0)) == fieldmode
870 		      || GET_MODE_CLASS (fieldmode) == MODE_INT
871 		      || GET_MODE_CLASS (fieldmode) == MODE_PARTIAL_INT);
872 	  arg0 = SUBREG_REG (arg0);
873 	}
874 
875       subreg_off = bitnum / BITS_PER_UNIT;
876       if (validate_subreg (fieldmode, GET_MODE (arg0), arg0, subreg_off))
877 	{
878 	  arg0 = gen_rtx_SUBREG (fieldmode, arg0, subreg_off);
879 
880 	  create_fixed_operand (&ops[0], arg0);
881 	  /* Shrink the source operand to FIELDMODE.  */
882 	  create_convert_operand_to (&ops[1], value, fieldmode, false);
883 	  if (maybe_expand_insn (icode, 2, ops))
884 	    return true;
885 	}
886     }
887 
888   /* Handle fields bigger than a word.  */
889 
890   if (bitsize > BITS_PER_WORD)
891     {
892       /* Here we transfer the words of the field
893 	 in the order least significant first.
894 	 This is because the most significant word is the one which may
895 	 be less than full.
896 	 However, only do that if the value is not BLKmode.  */
897 
898       const bool backwards = WORDS_BIG_ENDIAN && fieldmode != BLKmode;
899       unsigned int nwords = (bitsize + (BITS_PER_WORD - 1)) / BITS_PER_WORD;
900       unsigned int i;
901       rtx_insn *last;
902 
903       /* This is the mode we must force value to, so that there will be enough
904 	 subwords to extract.  Note that fieldmode will often (always?) be
905 	 VOIDmode, because that is what store_field uses to indicate that this
906 	 is a bit field, but passing VOIDmode to operand_subword_force
907 	 is not allowed.  */
908       fieldmode = GET_MODE (value);
909       if (fieldmode == VOIDmode)
910 	fieldmode = smallest_mode_for_size (nwords * BITS_PER_WORD, MODE_INT);
911 
912       last = get_last_insn ();
913       for (i = 0; i < nwords; i++)
914 	{
915 	  /* If I is 0, use the low-order word in both field and target;
916 	     if I is 1, use the next to lowest word; and so on.  */
917 	  unsigned int wordnum = (backwards
918 				  ? GET_MODE_SIZE (fieldmode) / UNITS_PER_WORD
919 				  - i - 1
920 				  : i);
921 	  unsigned int bit_offset = (backwards ^ reverse
922 				     ? MAX ((int) bitsize - ((int) i + 1)
923 					    * BITS_PER_WORD,
924 					    0)
925 				     : (int) i * BITS_PER_WORD);
926 	  rtx value_word = operand_subword_force (value, wordnum, fieldmode);
927 	  unsigned HOST_WIDE_INT new_bitsize =
928 	    MIN (BITS_PER_WORD, bitsize - i * BITS_PER_WORD);
929 
930 	  /* If the remaining chunk doesn't have full wordsize we have
931 	     to make sure that for big-endian machines the higher order
932 	     bits are used.  */
933 	  if (new_bitsize < BITS_PER_WORD && BYTES_BIG_ENDIAN && !backwards)
934 	    value_word = simplify_expand_binop (word_mode, lshr_optab,
935 						value_word,
936 						GEN_INT (BITS_PER_WORD
937 							 - new_bitsize),
938 						NULL_RTX, true,
939 						OPTAB_LIB_WIDEN);
940 
941 	  if (!store_bit_field_1 (op0, new_bitsize,
942 				  bitnum + bit_offset,
943 				  bitregion_start, bitregion_end,
944 				  word_mode,
945 				  value_word, reverse, fallback_p))
946 	    {
947 	      delete_insns_since (last);
948 	      return false;
949 	    }
950 	}
951       return true;
952     }
953 
954   /* If VALUE has a floating-point or complex mode, access it as an
955      integer of the corresponding size.  This can occur on a machine
956      with 64 bit registers that uses SFmode for float.  It can also
957      occur for unaligned float or complex fields.  */
958   orig_value = value;
959   if (GET_MODE (value) != VOIDmode
960       && GET_MODE_CLASS (GET_MODE (value)) != MODE_INT
961       && GET_MODE_CLASS (GET_MODE (value)) != MODE_PARTIAL_INT)
962     {
963       value = gen_reg_rtx (int_mode_for_mode (GET_MODE (value)));
964       emit_move_insn (gen_lowpart (GET_MODE (orig_value), value), orig_value);
965     }
966 
967   /* If OP0 is a multi-word register, narrow it to the affected word.
968      If the region spans two words, defer to store_split_bit_field.
969      Don't do this if op0 is a single hard register wider than word
970      such as a float or vector register.  */
971   if (!MEM_P (op0)
972       && GET_MODE_SIZE (GET_MODE (op0)) > UNITS_PER_WORD
973       && (!REG_P (op0)
974 	  || !HARD_REGISTER_P (op0)
975 	  || HARD_REGNO_NREGS (REGNO (op0), GET_MODE (op0)) != 1))
976     {
977       if (bitnum % BITS_PER_WORD + bitsize > BITS_PER_WORD)
978 	{
979 	  if (!fallback_p)
980 	    return false;
981 
982 	  store_split_bit_field (op0, bitsize, bitnum, bitregion_start,
983 				 bitregion_end, value, reverse);
984 	  return true;
985 	}
986       op0 = simplify_gen_subreg (word_mode, op0, GET_MODE (op0),
987 				 bitnum / BITS_PER_WORD * UNITS_PER_WORD);
988       gcc_assert (op0);
989       bitnum %= BITS_PER_WORD;
990     }
991 
992   /* From here on we can assume that the field to be stored in fits
993      within a word.  If the destination is a register, it too fits
994      in a word.  */
995 
996   extraction_insn insv;
997   if (!MEM_P (op0)
998       && !reverse
999       && get_best_reg_extraction_insn (&insv, EP_insv,
1000 				       GET_MODE_BITSIZE (GET_MODE (op0)),
1001 				       fieldmode)
1002       && store_bit_field_using_insv (&insv, op0, bitsize, bitnum, value))
1003     return true;
1004 
1005   /* If OP0 is a memory, try copying it to a register and seeing if a
1006      cheap register alternative is available.  */
1007   if (MEM_P (op0) && !reverse)
1008     {
1009       if (get_best_mem_extraction_insn (&insv, EP_insv, bitsize, bitnum,
1010 					fieldmode)
1011 	  && store_bit_field_using_insv (&insv, op0, bitsize, bitnum, value))
1012 	return true;
1013 
1014       rtx_insn *last = get_last_insn ();
1015 
1016       /* Try loading part of OP0 into a register, inserting the bitfield
1017 	 into that, and then copying the result back to OP0.  */
1018       unsigned HOST_WIDE_INT bitpos;
1019       rtx xop0 = adjust_bit_field_mem_for_reg (EP_insv, op0, bitsize, bitnum,
1020 					       bitregion_start, bitregion_end,
1021 					       fieldmode, &bitpos);
1022       if (xop0)
1023 	{
1024 	  rtx tempreg = copy_to_reg (xop0);
1025 	  if (store_bit_field_1 (tempreg, bitsize, bitpos,
1026 				 bitregion_start, bitregion_end,
1027 				 fieldmode, orig_value, reverse, false))
1028 	    {
1029 	      emit_move_insn (xop0, tempreg);
1030 	      return true;
1031 	    }
1032 	  delete_insns_since (last);
1033 	}
1034     }
1035 
1036   if (!fallback_p)
1037     return false;
1038 
1039   store_fixed_bit_field (op0, bitsize, bitnum, bitregion_start,
1040 			 bitregion_end, value, reverse);
1041   return true;
1042 }
1043 
1044 /* Generate code to store value from rtx VALUE
1045    into a bit-field within structure STR_RTX
1046    containing BITSIZE bits starting at bit BITNUM.
1047 
1048    BITREGION_START is bitpos of the first bitfield in this region.
1049    BITREGION_END is the bitpos of the ending bitfield in this region.
1050    These two fields are 0, if the C++ memory model does not apply,
1051    or we are not interested in keeping track of bitfield regions.
1052 
1053    FIELDMODE is the machine-mode of the FIELD_DECL node for this field.
1054 
1055    If REVERSE is true, the store is to be done in reverse order.  */
1056 
1057 void
1058 store_bit_field (rtx str_rtx, unsigned HOST_WIDE_INT bitsize,
1059 		 unsigned HOST_WIDE_INT bitnum,
1060 		 unsigned HOST_WIDE_INT bitregion_start,
1061 		 unsigned HOST_WIDE_INT bitregion_end,
1062 		 machine_mode fieldmode,
1063 		 rtx value, bool reverse)
1064 {
1065   /* Handle -fstrict-volatile-bitfields in the cases where it applies.  */
1066   if (strict_volatile_bitfield_p (str_rtx, bitsize, bitnum, fieldmode,
1067 				  bitregion_start, bitregion_end))
1068     {
1069       /* Storing of a full word can be done with a simple store.
1070 	 We know here that the field can be accessed with one single
1071 	 instruction.  For targets that support unaligned memory,
1072 	 an unaligned access may be necessary.  */
1073       if (bitsize == GET_MODE_BITSIZE (fieldmode))
1074 	{
1075 	  str_rtx = adjust_bitfield_address (str_rtx, fieldmode,
1076 					     bitnum / BITS_PER_UNIT);
1077 	  if (reverse)
1078 	    value = flip_storage_order (fieldmode, value);
1079 	  gcc_assert (bitnum % BITS_PER_UNIT == 0);
1080 	  emit_move_insn (str_rtx, value);
1081 	}
1082       else
1083 	{
1084 	  rtx temp;
1085 
1086 	  str_rtx = narrow_bit_field_mem (str_rtx, fieldmode, bitsize, bitnum,
1087 					  &bitnum);
1088 	  gcc_assert (bitnum + bitsize <= GET_MODE_BITSIZE (fieldmode));
1089 	  temp = copy_to_reg (str_rtx);
1090 	  if (!store_bit_field_1 (temp, bitsize, bitnum, 0, 0,
1091 				  fieldmode, value, reverse, true))
1092 	    gcc_unreachable ();
1093 
1094 	  emit_move_insn (str_rtx, temp);
1095 	}
1096 
1097       return;
1098     }
1099 
1100   /* Under the C++0x memory model, we must not touch bits outside the
1101      bit region.  Adjust the address to start at the beginning of the
1102      bit region.  */
1103   if (MEM_P (str_rtx) && bitregion_start > 0)
1104     {
1105       machine_mode bestmode;
1106       HOST_WIDE_INT offset, size;
1107 
1108       gcc_assert ((bitregion_start % BITS_PER_UNIT) == 0);
1109 
1110       offset = bitregion_start / BITS_PER_UNIT;
1111       bitnum -= bitregion_start;
1112       size = (bitnum + bitsize + BITS_PER_UNIT - 1) / BITS_PER_UNIT;
1113       bitregion_end -= bitregion_start;
1114       bitregion_start = 0;
1115       bestmode = get_best_mode (bitsize, bitnum,
1116 				bitregion_start, bitregion_end,
1117 				MEM_ALIGN (str_rtx), VOIDmode,
1118 				MEM_VOLATILE_P (str_rtx));
1119       str_rtx = adjust_bitfield_address_size (str_rtx, bestmode, offset, size);
1120     }
1121 
1122   if (!store_bit_field_1 (str_rtx, bitsize, bitnum,
1123 			  bitregion_start, bitregion_end,
1124 			  fieldmode, value, reverse, true))
1125     gcc_unreachable ();
1126 }
1127 
1128 /* Use shifts and boolean operations to store VALUE into a bit field of
1129    width BITSIZE in OP0, starting at bit BITNUM.
1130 
1131    If REVERSE is true, the store is to be done in reverse order.  */
1132 
1133 static void
1134 store_fixed_bit_field (rtx op0, unsigned HOST_WIDE_INT bitsize,
1135 		       unsigned HOST_WIDE_INT bitnum,
1136 		       unsigned HOST_WIDE_INT bitregion_start,
1137 		       unsigned HOST_WIDE_INT bitregion_end,
1138 		       rtx value, bool reverse)
1139 {
1140   /* There is a case not handled here:
1141      a structure with a known alignment of just a halfword
1142      and a field split across two aligned halfwords within the structure.
1143      Or likewise a structure with a known alignment of just a byte
1144      and a field split across two bytes.
1145      Such cases are not supposed to be able to occur.  */
1146 
1147   if (MEM_P (op0))
1148     {
1149       machine_mode mode = GET_MODE (op0);
1150       if (GET_MODE_BITSIZE (mode) == 0
1151 	  || GET_MODE_BITSIZE (mode) > GET_MODE_BITSIZE (word_mode))
1152 	mode = word_mode;
1153       mode = get_best_mode (bitsize, bitnum, bitregion_start, bitregion_end,
1154 			    MEM_ALIGN (op0), mode, MEM_VOLATILE_P (op0));
1155 
1156       if (mode == VOIDmode)
1157 	{
1158 	  /* The only way this should occur is if the field spans word
1159 	     boundaries.  */
1160 	  store_split_bit_field (op0, bitsize, bitnum, bitregion_start,
1161 				 bitregion_end, value, reverse);
1162 	  return;
1163 	}
1164 
1165       op0 = narrow_bit_field_mem (op0, mode, bitsize, bitnum, &bitnum);
1166     }
1167 
1168   store_fixed_bit_field_1 (op0, bitsize, bitnum, value, reverse);
1169 }
1170 
1171 /* Helper function for store_fixed_bit_field, stores
1172    the bit field always using the MODE of OP0.  */
1173 
1174 static void
1175 store_fixed_bit_field_1 (rtx op0, unsigned HOST_WIDE_INT bitsize,
1176 			 unsigned HOST_WIDE_INT bitnum,
1177 			 rtx value, bool reverse)
1178 {
1179   machine_mode mode;
1180   rtx temp;
1181   int all_zero = 0;
1182   int all_one = 0;
1183 
1184   mode = GET_MODE (op0);
1185   gcc_assert (SCALAR_INT_MODE_P (mode));
1186 
1187   /* Note that bitsize + bitnum can be greater than GET_MODE_BITSIZE (mode)
1188      for invalid input, such as f5 from gcc.dg/pr48335-2.c.  */
1189 
1190   if (reverse ? !BYTES_BIG_ENDIAN : BYTES_BIG_ENDIAN)
1191     /* BITNUM is the distance between our msb
1192        and that of the containing datum.
1193        Convert it to the distance from the lsb.  */
1194     bitnum = GET_MODE_BITSIZE (mode) - bitsize - bitnum;
1195 
1196   /* Now BITNUM is always the distance between our lsb
1197      and that of OP0.  */
1198 
1199   /* Shift VALUE left by BITNUM bits.  If VALUE is not constant,
1200      we must first convert its mode to MODE.  */
1201 
1202   if (CONST_INT_P (value))
1203     {
1204       unsigned HOST_WIDE_INT v = UINTVAL (value);
1205 
1206       if (bitsize < HOST_BITS_PER_WIDE_INT)
1207 	v &= (HOST_WIDE_INT_1U << bitsize) - 1;
1208 
1209       if (v == 0)
1210 	all_zero = 1;
1211       else if ((bitsize < HOST_BITS_PER_WIDE_INT
1212 		&& v == (HOST_WIDE_INT_1U << bitsize) - 1)
1213 	       || (bitsize == HOST_BITS_PER_WIDE_INT
1214 		   && v == HOST_WIDE_INT_M1U))
1215 	all_one = 1;
1216 
1217       value = lshift_value (mode, v, bitnum);
1218     }
1219   else
1220     {
1221       int must_and = (GET_MODE_BITSIZE (GET_MODE (value)) != bitsize
1222 		      && bitnum + bitsize != GET_MODE_BITSIZE (mode));
1223 
1224       if (GET_MODE (value) != mode)
1225 	value = convert_to_mode (mode, value, 1);
1226 
1227       if (must_and)
1228 	value = expand_binop (mode, and_optab, value,
1229 			      mask_rtx (mode, 0, bitsize, 0),
1230 			      NULL_RTX, 1, OPTAB_LIB_WIDEN);
1231       if (bitnum > 0)
1232 	value = expand_shift (LSHIFT_EXPR, mode, value,
1233 			      bitnum, NULL_RTX, 1);
1234     }
1235 
1236   if (reverse)
1237     value = flip_storage_order (mode, value);
1238 
1239   /* Now clear the chosen bits in OP0,
1240      except that if VALUE is -1 we need not bother.  */
1241   /* We keep the intermediates in registers to allow CSE to combine
1242      consecutive bitfield assignments.  */
1243 
1244   temp = force_reg (mode, op0);
1245 
1246   if (! all_one)
1247     {
1248       rtx mask = mask_rtx (mode, bitnum, bitsize, 1);
1249       if (reverse)
1250 	mask = flip_storage_order (mode, mask);
1251       temp = expand_binop (mode, and_optab, temp, mask,
1252 			   NULL_RTX, 1, OPTAB_LIB_WIDEN);
1253       temp = force_reg (mode, temp);
1254     }
1255 
1256   /* Now logical-or VALUE into OP0, unless it is zero.  */
1257 
1258   if (! all_zero)
1259     {
1260       temp = expand_binop (mode, ior_optab, temp, value,
1261 			   NULL_RTX, 1, OPTAB_LIB_WIDEN);
1262       temp = force_reg (mode, temp);
1263     }
1264 
1265   if (op0 != temp)
1266     {
1267       op0 = copy_rtx (op0);
1268       emit_move_insn (op0, temp);
1269     }
1270 }
1271 
1272 /* Store a bit field that is split across multiple accessible memory objects.
1273 
1274    OP0 is the REG, SUBREG or MEM rtx for the first of the objects.
1275    BITSIZE is the field width; BITPOS the position of its first bit
1276    (within the word).
1277    VALUE is the value to store.
1278 
1279    If REVERSE is true, the store is to be done in reverse order.
1280 
1281    This does not yet handle fields wider than BITS_PER_WORD.  */
1282 
1283 static void
1284 store_split_bit_field (rtx op0, unsigned HOST_WIDE_INT bitsize,
1285 		       unsigned HOST_WIDE_INT bitpos,
1286 		       unsigned HOST_WIDE_INT bitregion_start,
1287 		       unsigned HOST_WIDE_INT bitregion_end,
1288 		       rtx value, bool reverse)
1289 {
1290   unsigned int unit, total_bits, bitsdone = 0;
1291 
1292   /* Make sure UNIT isn't larger than BITS_PER_WORD, we can only handle that
1293      much at a time.  */
1294   if (REG_P (op0) || GET_CODE (op0) == SUBREG)
1295     unit = BITS_PER_WORD;
1296   else
1297     unit = MIN (MEM_ALIGN (op0), BITS_PER_WORD);
1298 
1299   /* If OP0 is a memory with a mode, then UNIT must not be larger than
1300      OP0's mode as well.  Otherwise, store_fixed_bit_field will call us
1301      again, and we will mutually recurse forever.  */
1302   if (MEM_P (op0) && GET_MODE_BITSIZE (GET_MODE (op0)) > 0)
1303     unit = MIN (unit, GET_MODE_BITSIZE (GET_MODE (op0)));
1304 
1305   /* If VALUE is a constant other than a CONST_INT, get it into a register in
1306      WORD_MODE.  If we can do this using gen_lowpart_common, do so.  Note
1307      that VALUE might be a floating-point constant.  */
1308   if (CONSTANT_P (value) && !CONST_INT_P (value))
1309     {
1310       rtx word = gen_lowpart_common (word_mode, value);
1311 
1312       if (word && (value != word))
1313 	value = word;
1314       else
1315 	value = gen_lowpart_common (word_mode,
1316 				    force_reg (GET_MODE (value) != VOIDmode
1317 					       ? GET_MODE (value)
1318 					       : word_mode, value));
1319     }
1320 
1321   total_bits = GET_MODE_BITSIZE (GET_MODE (value));
1322 
1323   while (bitsdone < bitsize)
1324     {
1325       unsigned HOST_WIDE_INT thissize;
1326       unsigned HOST_WIDE_INT thispos;
1327       unsigned HOST_WIDE_INT offset;
1328       rtx part, word;
1329 
1330       offset = (bitpos + bitsdone) / unit;
1331       thispos = (bitpos + bitsdone) % unit;
1332 
1333       /* When region of bytes we can touch is restricted, decrease
1334 	 UNIT close to the end of the region as needed.  If op0 is a REG
1335 	 or SUBREG of REG, don't do this, as there can't be data races
1336 	 on a register and we can expand shorter code in some cases.  */
1337       if (bitregion_end
1338 	  && unit > BITS_PER_UNIT
1339 	  && bitpos + bitsdone - thispos + unit > bitregion_end + 1
1340 	  && !REG_P (op0)
1341 	  && (GET_CODE (op0) != SUBREG || !REG_P (SUBREG_REG (op0))))
1342 	{
1343 	  unit = unit / 2;
1344 	  continue;
1345 	}
1346 
1347       /* THISSIZE must not overrun a word boundary.  Otherwise,
1348 	 store_fixed_bit_field will call us again, and we will mutually
1349 	 recurse forever.  */
1350       thissize = MIN (bitsize - bitsdone, BITS_PER_WORD);
1351       thissize = MIN (thissize, unit - thispos);
1352 
1353       if (reverse ? !BYTES_BIG_ENDIAN : BYTES_BIG_ENDIAN)
1354 	{
1355 	  /* Fetch successively less significant portions.  */
1356 	  if (CONST_INT_P (value))
1357 	    part = GEN_INT (((unsigned HOST_WIDE_INT) (INTVAL (value))
1358 			     >> (bitsize - bitsdone - thissize))
1359 			    & ((HOST_WIDE_INT_1 << thissize) - 1));
1360           /* Likewise, but the source is little-endian.  */
1361           else if (reverse)
1362 	    part = extract_fixed_bit_field (word_mode, value, thissize,
1363 					    bitsize - bitsdone - thissize,
1364 					    NULL_RTX, 1, false);
1365 	  else
1366 	    {
1367 	      int total_bits = GET_MODE_BITSIZE (GET_MODE (value));
1368 	      /* The args are chosen so that the last part includes the
1369 		 lsb.  Give extract_bit_field the value it needs (with
1370 		 endianness compensation) to fetch the piece we want.  */
1371 	      part = extract_fixed_bit_field (word_mode, value, thissize,
1372 					      total_bits - bitsize + bitsdone,
1373 					      NULL_RTX, 1, false);
1374 	    }
1375 	}
1376       else
1377 	{
1378 	  /* Fetch successively more significant portions.  */
1379 	  if (CONST_INT_P (value))
1380 	    part = GEN_INT (((unsigned HOST_WIDE_INT) (INTVAL (value))
1381 			     >> bitsdone)
1382 			    & ((HOST_WIDE_INT_1 << thissize) - 1));
1383 	  /* Likewise, but the source is big-endian.  */
1384           else if (reverse)
1385 	    part = extract_fixed_bit_field (word_mode, value, thissize,
1386 					    total_bits - bitsdone - thissize,
1387 					    NULL_RTX, 1, false);
1388 	  else
1389 	    part = extract_fixed_bit_field (word_mode, value, thissize,
1390 					    bitsdone, NULL_RTX, 1, false);
1391 	}
1392 
1393       /* If OP0 is a register, then handle OFFSET here.  */
1394       if (SUBREG_P (op0) || REG_P (op0))
1395 	{
1396 	  machine_mode op0_mode = GET_MODE (op0);
1397 	  if (op0_mode != BLKmode && GET_MODE_SIZE (op0_mode) < UNITS_PER_WORD)
1398 	    word = offset ? const0_rtx : op0;
1399 	  else
1400 	    word = operand_subword_force (op0, offset * unit / BITS_PER_WORD,
1401 					  GET_MODE (op0));
1402 	  offset &= BITS_PER_WORD / unit - 1;
1403 	}
1404       else
1405 	word = op0;
1406 
1407       /* OFFSET is in UNITs, and UNIT is in bits.  If WORD is const0_rtx,
1408 	 it is just an out-of-bounds access.  Ignore it.  */
1409       if (word != const0_rtx)
1410 	store_fixed_bit_field (word, thissize, offset * unit + thispos,
1411 			       bitregion_start, bitregion_end, part,
1412 			       reverse);
1413       bitsdone += thissize;
1414     }
1415 }
1416 
1417 /* A subroutine of extract_bit_field_1 that converts return value X
1418    to either MODE or TMODE.  MODE, TMODE and UNSIGNEDP are arguments
1419    to extract_bit_field.  */
1420 
1421 static rtx
1422 convert_extracted_bit_field (rtx x, machine_mode mode,
1423 			     machine_mode tmode, bool unsignedp)
1424 {
1425   if (GET_MODE (x) == tmode || GET_MODE (x) == mode)
1426     return x;
1427 
1428   /* If the x mode is not a scalar integral, first convert to the
1429      integer mode of that size and then access it as a floating-point
1430      value via a SUBREG.  */
1431   if (!SCALAR_INT_MODE_P (tmode))
1432     {
1433       machine_mode smode;
1434 
1435       smode = mode_for_size (GET_MODE_BITSIZE (tmode), MODE_INT, 0);
1436       x = convert_to_mode (smode, x, unsignedp);
1437       x = force_reg (smode, x);
1438       return gen_lowpart (tmode, x);
1439     }
1440 
1441   return convert_to_mode (tmode, x, unsignedp);
1442 }
1443 
1444 /* Try to use an ext(z)v pattern to extract a field from OP0.
1445    Return the extracted value on success, otherwise return null.
1446    EXT_MODE is the mode of the extraction and the other arguments
1447    are as for extract_bit_field.  */
1448 
1449 static rtx
1450 extract_bit_field_using_extv (const extraction_insn *extv, rtx op0,
1451 			      unsigned HOST_WIDE_INT bitsize,
1452 			      unsigned HOST_WIDE_INT bitnum,
1453 			      int unsignedp, rtx target,
1454 			      machine_mode mode, machine_mode tmode)
1455 {
1456   struct expand_operand ops[4];
1457   rtx spec_target = target;
1458   rtx spec_target_subreg = 0;
1459   machine_mode ext_mode = extv->field_mode;
1460   unsigned unit = GET_MODE_BITSIZE (ext_mode);
1461 
1462   if (bitsize == 0 || unit < bitsize)
1463     return NULL_RTX;
1464 
1465   if (MEM_P (op0))
1466     /* Get a reference to the first byte of the field.  */
1467     op0 = narrow_bit_field_mem (op0, extv->struct_mode, bitsize, bitnum,
1468 				&bitnum);
1469   else
1470     {
1471       /* Convert from counting within OP0 to counting in EXT_MODE.  */
1472       if (BYTES_BIG_ENDIAN)
1473 	bitnum += unit - GET_MODE_BITSIZE (GET_MODE (op0));
1474 
1475       /* If op0 is a register, we need it in EXT_MODE to make it
1476 	 acceptable to the format of ext(z)v.  */
1477       if (GET_CODE (op0) == SUBREG && GET_MODE (op0) != ext_mode)
1478 	return NULL_RTX;
1479       if (REG_P (op0) && GET_MODE (op0) != ext_mode)
1480 	op0 = gen_lowpart_SUBREG (ext_mode, op0);
1481     }
1482 
1483   /* If BITS_BIG_ENDIAN is zero on a BYTES_BIG_ENDIAN machine, we count
1484      "backwards" from the size of the unit we are extracting from.
1485      Otherwise, we count bits from the most significant on a
1486      BYTES/BITS_BIG_ENDIAN machine.  */
1487 
1488   if (BITS_BIG_ENDIAN != BYTES_BIG_ENDIAN)
1489     bitnum = unit - bitsize - bitnum;
1490 
1491   if (target == 0)
1492     target = spec_target = gen_reg_rtx (tmode);
1493 
1494   if (GET_MODE (target) != ext_mode)
1495     {
1496       /* Don't use LHS paradoxical subreg if explicit truncation is needed
1497 	 between the mode of the extraction (word_mode) and the target
1498 	 mode.  Instead, create a temporary and use convert_move to set
1499 	 the target.  */
1500       if (REG_P (target)
1501 	  && TRULY_NOOP_TRUNCATION_MODES_P (GET_MODE (target), ext_mode))
1502 	{
1503 	  target = gen_lowpart (ext_mode, target);
1504 	  if (GET_MODE_PRECISION (ext_mode)
1505 	      > GET_MODE_PRECISION (GET_MODE (spec_target)))
1506 	    spec_target_subreg = target;
1507 	}
1508       else
1509 	target = gen_reg_rtx (ext_mode);
1510     }
1511 
1512   create_output_operand (&ops[0], target, ext_mode);
1513   create_fixed_operand (&ops[1], op0);
1514   create_integer_operand (&ops[2], bitsize);
1515   create_integer_operand (&ops[3], bitnum);
1516   if (maybe_expand_insn (extv->icode, 4, ops))
1517     {
1518       target = ops[0].value;
1519       if (target == spec_target)
1520 	return target;
1521       if (target == spec_target_subreg)
1522 	return spec_target;
1523       return convert_extracted_bit_field (target, mode, tmode, unsignedp);
1524     }
1525   return NULL_RTX;
1526 }
1527 
1528 /* A subroutine of extract_bit_field, with the same arguments.
1529    If FALLBACK_P is true, fall back to extract_fixed_bit_field
1530    if we can find no other means of implementing the operation.
1531    if FALLBACK_P is false, return NULL instead.  */
1532 
1533 static rtx
1534 extract_bit_field_1 (rtx str_rtx, unsigned HOST_WIDE_INT bitsize,
1535 		     unsigned HOST_WIDE_INT bitnum, int unsignedp, rtx target,
1536 		     machine_mode mode, machine_mode tmode,
1537 		     bool reverse, bool fallback_p)
1538 {
1539   rtx op0 = str_rtx;
1540   machine_mode int_mode;
1541   machine_mode mode1;
1542 
1543   if (tmode == VOIDmode)
1544     tmode = mode;
1545 
1546   while (GET_CODE (op0) == SUBREG)
1547     {
1548       bitnum += SUBREG_BYTE (op0) * BITS_PER_UNIT;
1549       op0 = SUBREG_REG (op0);
1550     }
1551 
1552   /* If we have an out-of-bounds access to a register, just return an
1553      uninitialized register of the required mode.  This can occur if the
1554      source code contains an out-of-bounds access to a small array.  */
1555   if (REG_P (op0) && bitnum >= GET_MODE_BITSIZE (GET_MODE (op0)))
1556     return gen_reg_rtx (tmode);
1557 
1558   if (REG_P (op0)
1559       && mode == GET_MODE (op0)
1560       && bitnum == 0
1561       && bitsize == GET_MODE_BITSIZE (GET_MODE (op0)))
1562     {
1563       if (reverse)
1564 	op0 = flip_storage_order (mode, op0);
1565       /* We're trying to extract a full register from itself.  */
1566       return op0;
1567     }
1568 
1569   /* See if we can get a better vector mode before extracting.  */
1570   if (VECTOR_MODE_P (GET_MODE (op0))
1571       && !MEM_P (op0)
1572       && GET_MODE_INNER (GET_MODE (op0)) != tmode)
1573     {
1574       machine_mode new_mode;
1575 
1576       if (GET_MODE_CLASS (tmode) == MODE_FLOAT)
1577 	new_mode = MIN_MODE_VECTOR_FLOAT;
1578       else if (GET_MODE_CLASS (tmode) == MODE_FRACT)
1579 	new_mode = MIN_MODE_VECTOR_FRACT;
1580       else if (GET_MODE_CLASS (tmode) == MODE_UFRACT)
1581 	new_mode = MIN_MODE_VECTOR_UFRACT;
1582       else if (GET_MODE_CLASS (tmode) == MODE_ACCUM)
1583 	new_mode = MIN_MODE_VECTOR_ACCUM;
1584       else if (GET_MODE_CLASS (tmode) == MODE_UACCUM)
1585 	new_mode = MIN_MODE_VECTOR_UACCUM;
1586       else
1587 	new_mode = MIN_MODE_VECTOR_INT;
1588 
1589       for (; new_mode != VOIDmode ; new_mode = GET_MODE_WIDER_MODE (new_mode))
1590 	if (GET_MODE_SIZE (new_mode) == GET_MODE_SIZE (GET_MODE (op0))
1591 	    && GET_MODE_UNIT_SIZE (new_mode) == GET_MODE_SIZE (tmode)
1592 	    && targetm.vector_mode_supported_p (new_mode))
1593 	  break;
1594       if (new_mode != VOIDmode)
1595 	op0 = gen_lowpart (new_mode, op0);
1596     }
1597 
1598   /* Use vec_extract patterns for extracting parts of vectors whenever
1599      available.  */
1600   if (VECTOR_MODE_P (GET_MODE (op0))
1601       && !MEM_P (op0)
1602       && optab_handler (vec_extract_optab, GET_MODE (op0)) != CODE_FOR_nothing
1603       && ((bitnum + bitsize - 1) / GET_MODE_UNIT_BITSIZE (GET_MODE (op0))
1604 	  == bitnum / GET_MODE_UNIT_BITSIZE (GET_MODE (op0))))
1605     {
1606       struct expand_operand ops[3];
1607       machine_mode outermode = GET_MODE (op0);
1608       machine_mode innermode = GET_MODE_INNER (outermode);
1609       enum insn_code icode = optab_handler (vec_extract_optab, outermode);
1610       unsigned HOST_WIDE_INT pos = bitnum / GET_MODE_BITSIZE (innermode);
1611 
1612       create_output_operand (&ops[0], target, innermode);
1613       create_input_operand (&ops[1], op0, outermode);
1614       create_integer_operand (&ops[2], pos);
1615       if (maybe_expand_insn (icode, 3, ops))
1616 	{
1617 	  target = ops[0].value;
1618       	  if (GET_MODE (target) != mode)
1619 	    return gen_lowpart (tmode, target);
1620 	  return target;
1621 	}
1622     }
1623 
1624   /* Make sure we are playing with integral modes.  Pun with subregs
1625      if we aren't.  */
1626   {
1627     machine_mode imode = int_mode_for_mode (GET_MODE (op0));
1628     if (imode != GET_MODE (op0))
1629       {
1630 	if (MEM_P (op0))
1631 	  op0 = adjust_bitfield_address_size (op0, imode, 0, MEM_SIZE (op0));
1632 	else if (imode != BLKmode)
1633 	  {
1634 	    op0 = gen_lowpart (imode, op0);
1635 
1636 	    /* If we got a SUBREG, force it into a register since we
1637 	       aren't going to be able to do another SUBREG on it.  */
1638 	    if (GET_CODE (op0) == SUBREG)
1639 	      op0 = force_reg (imode, op0);
1640 	  }
1641 	else
1642 	  {
1643 	    HOST_WIDE_INT size = GET_MODE_SIZE (GET_MODE (op0));
1644 	    rtx mem = assign_stack_temp (GET_MODE (op0), size);
1645 	    emit_move_insn (mem, op0);
1646 	    op0 = adjust_bitfield_address_size (mem, BLKmode, 0, size);
1647 	  }
1648       }
1649   }
1650 
1651   /* ??? We currently assume TARGET is at least as big as BITSIZE.
1652      If that's wrong, the solution is to test for it and set TARGET to 0
1653      if needed.  */
1654 
1655   /* Get the mode of the field to use for atomic access or subreg
1656      conversion.  */
1657   mode1 = mode;
1658   if (SCALAR_INT_MODE_P (tmode))
1659     {
1660       machine_mode try_mode = mode_for_size (bitsize,
1661 						  GET_MODE_CLASS (tmode), 0);
1662       if (try_mode != BLKmode)
1663 	mode1 = try_mode;
1664     }
1665   gcc_assert (mode1 != BLKmode);
1666 
1667   /* Extraction of a full MODE1 value can be done with a subreg as long
1668      as the least significant bit of the value is the least significant
1669      bit of either OP0 or a word of OP0.  */
1670   if (!MEM_P (op0)
1671       && !reverse
1672       && lowpart_bit_field_p (bitnum, bitsize, GET_MODE (op0))
1673       && bitsize == GET_MODE_BITSIZE (mode1)
1674       && TRULY_NOOP_TRUNCATION_MODES_P (mode1, GET_MODE (op0)))
1675     {
1676       rtx sub = simplify_gen_subreg (mode1, op0, GET_MODE (op0),
1677 				     bitnum / BITS_PER_UNIT);
1678       if (sub)
1679 	return convert_extracted_bit_field (sub, mode, tmode, unsignedp);
1680     }
1681 
1682   /* Extraction of a full MODE1 value can be done with a load as long as
1683      the field is on a byte boundary and is sufficiently aligned.  */
1684   if (simple_mem_bitfield_p (op0, bitsize, bitnum, mode1))
1685     {
1686       op0 = adjust_bitfield_address (op0, mode1, bitnum / BITS_PER_UNIT);
1687       if (reverse)
1688 	op0 = flip_storage_order (mode1, op0);
1689       return convert_extracted_bit_field (op0, mode, tmode, unsignedp);
1690     }
1691 
1692   /* Handle fields bigger than a word.  */
1693 
1694   if (bitsize > BITS_PER_WORD)
1695     {
1696       /* Here we transfer the words of the field
1697 	 in the order least significant first.
1698 	 This is because the most significant word is the one which may
1699 	 be less than full.  */
1700 
1701       const bool backwards = WORDS_BIG_ENDIAN;
1702       unsigned int nwords = (bitsize + (BITS_PER_WORD - 1)) / BITS_PER_WORD;
1703       unsigned int i;
1704       rtx_insn *last;
1705 
1706       if (target == 0 || !REG_P (target) || !valid_multiword_target_p (target))
1707 	target = gen_reg_rtx (mode);
1708 
1709       /* In case we're about to clobber a base register or something
1710 	 (see gcc.c-torture/execute/20040625-1.c).   */
1711       if (reg_mentioned_p (target, str_rtx))
1712 	target = gen_reg_rtx (mode);
1713 
1714       /* Indicate for flow that the entire target reg is being set.  */
1715       emit_clobber (target);
1716 
1717       last = get_last_insn ();
1718       for (i = 0; i < nwords; i++)
1719 	{
1720 	  /* If I is 0, use the low-order word in both field and target;
1721 	     if I is 1, use the next to lowest word; and so on.  */
1722 	  /* Word number in TARGET to use.  */
1723 	  unsigned int wordnum
1724 	    = (backwards
1725 	       ? GET_MODE_SIZE (GET_MODE (target)) / UNITS_PER_WORD - i - 1
1726 	       : i);
1727 	  /* Offset from start of field in OP0.  */
1728 	  unsigned int bit_offset = (backwards ^ reverse
1729 				     ? MAX ((int) bitsize - ((int) i + 1)
1730 					    * BITS_PER_WORD,
1731 					    0)
1732 				     : (int) i * BITS_PER_WORD);
1733 	  rtx target_part = operand_subword (target, wordnum, 1, VOIDmode);
1734 	  rtx result_part
1735 	    = extract_bit_field_1 (op0, MIN (BITS_PER_WORD,
1736 					     bitsize - i * BITS_PER_WORD),
1737 				   bitnum + bit_offset, 1, target_part,
1738 				   mode, word_mode, reverse, fallback_p);
1739 
1740 	  gcc_assert (target_part);
1741 	  if (!result_part)
1742 	    {
1743 	      delete_insns_since (last);
1744 	      return NULL;
1745 	    }
1746 
1747 	  if (result_part != target_part)
1748 	    emit_move_insn (target_part, result_part);
1749 	}
1750 
1751       if (unsignedp)
1752 	{
1753 	  /* Unless we've filled TARGET, the upper regs in a multi-reg value
1754 	     need to be zero'd out.  */
1755 	  if (GET_MODE_SIZE (GET_MODE (target)) > nwords * UNITS_PER_WORD)
1756 	    {
1757 	      unsigned int i, total_words;
1758 
1759 	      total_words = GET_MODE_SIZE (GET_MODE (target)) / UNITS_PER_WORD;
1760 	      for (i = nwords; i < total_words; i++)
1761 		emit_move_insn
1762 		  (operand_subword (target,
1763 				    backwards ? total_words - i - 1 : i,
1764 				    1, VOIDmode),
1765 		   const0_rtx);
1766 	    }
1767 	  return target;
1768 	}
1769 
1770       /* Signed bit field: sign-extend with two arithmetic shifts.  */
1771       target = expand_shift (LSHIFT_EXPR, mode, target,
1772 			     GET_MODE_BITSIZE (mode) - bitsize, NULL_RTX, 0);
1773       return expand_shift (RSHIFT_EXPR, mode, target,
1774 			   GET_MODE_BITSIZE (mode) - bitsize, NULL_RTX, 0);
1775     }
1776 
1777   /* If OP0 is a multi-word register, narrow it to the affected word.
1778      If the region spans two words, defer to extract_split_bit_field.  */
1779   if (!MEM_P (op0) && GET_MODE_SIZE (GET_MODE (op0)) > UNITS_PER_WORD)
1780     {
1781       if (bitnum % BITS_PER_WORD + bitsize > BITS_PER_WORD)
1782 	{
1783 	  if (!fallback_p)
1784 	    return NULL_RTX;
1785 	  target = extract_split_bit_field (op0, bitsize, bitnum, unsignedp,
1786 					    reverse);
1787 	  return convert_extracted_bit_field (target, mode, tmode, unsignedp);
1788 	}
1789       op0 = simplify_gen_subreg (word_mode, op0, GET_MODE (op0),
1790 				 bitnum / BITS_PER_WORD * UNITS_PER_WORD);
1791       bitnum %= BITS_PER_WORD;
1792     }
1793 
1794   /* From here on we know the desired field is smaller than a word.
1795      If OP0 is a register, it too fits within a word.  */
1796   enum extraction_pattern pattern = unsignedp ? EP_extzv : EP_extv;
1797   extraction_insn extv;
1798   if (!MEM_P (op0)
1799       && !reverse
1800       /* ??? We could limit the structure size to the part of OP0 that
1801 	 contains the field, with appropriate checks for endianness
1802 	 and TRULY_NOOP_TRUNCATION.  */
1803       && get_best_reg_extraction_insn (&extv, pattern,
1804 				       GET_MODE_BITSIZE (GET_MODE (op0)),
1805 				       tmode))
1806     {
1807       rtx result = extract_bit_field_using_extv (&extv, op0, bitsize, bitnum,
1808 						 unsignedp, target, mode,
1809 						 tmode);
1810       if (result)
1811 	return result;
1812     }
1813 
1814   /* If OP0 is a memory, try copying it to a register and seeing if a
1815      cheap register alternative is available.  */
1816   if (MEM_P (op0) & !reverse)
1817     {
1818       if (get_best_mem_extraction_insn (&extv, pattern, bitsize, bitnum,
1819 					tmode))
1820 	{
1821 	  rtx result = extract_bit_field_using_extv (&extv, op0, bitsize,
1822 						     bitnum, unsignedp,
1823 						     target, mode,
1824 						     tmode);
1825 	  if (result)
1826 	    return result;
1827 	}
1828 
1829       rtx_insn *last = get_last_insn ();
1830 
1831       /* Try loading part of OP0 into a register and extracting the
1832 	 bitfield from that.  */
1833       unsigned HOST_WIDE_INT bitpos;
1834       rtx xop0 = adjust_bit_field_mem_for_reg (pattern, op0, bitsize, bitnum,
1835 					       0, 0, tmode, &bitpos);
1836       if (xop0)
1837 	{
1838 	  xop0 = copy_to_reg (xop0);
1839 	  rtx result = extract_bit_field_1 (xop0, bitsize, bitpos,
1840 					    unsignedp, target,
1841 					    mode, tmode, reverse, false);
1842 	  if (result)
1843 	    return result;
1844 	  delete_insns_since (last);
1845 	}
1846     }
1847 
1848   if (!fallback_p)
1849     return NULL;
1850 
1851   /* Find a correspondingly-sized integer field, so we can apply
1852      shifts and masks to it.  */
1853   int_mode = int_mode_for_mode (tmode);
1854   if (int_mode == BLKmode)
1855     int_mode = int_mode_for_mode (mode);
1856   /* Should probably push op0 out to memory and then do a load.  */
1857   gcc_assert (int_mode != BLKmode);
1858 
1859   target = extract_fixed_bit_field (int_mode, op0, bitsize, bitnum, target,
1860 				    unsignedp, reverse);
1861 
1862   /* Complex values must be reversed piecewise, so we need to undo the global
1863      reversal, convert to the complex mode and reverse again.  */
1864   if (reverse && COMPLEX_MODE_P (tmode))
1865     {
1866       target = flip_storage_order (int_mode, target);
1867       target = convert_extracted_bit_field (target, mode, tmode, unsignedp);
1868       target = flip_storage_order (tmode, target);
1869     }
1870   else
1871     target = convert_extracted_bit_field (target, mode, tmode, unsignedp);
1872 
1873   return target;
1874 }
1875 
1876 /* Generate code to extract a byte-field from STR_RTX
1877    containing BITSIZE bits, starting at BITNUM,
1878    and put it in TARGET if possible (if TARGET is nonzero).
1879    Regardless of TARGET, we return the rtx for where the value is placed.
1880 
1881    STR_RTX is the structure containing the byte (a REG or MEM).
1882    UNSIGNEDP is nonzero if this is an unsigned bit field.
1883    MODE is the natural mode of the field value once extracted.
1884    TMODE is the mode the caller would like the value to have;
1885    but the value may be returned with type MODE instead.
1886 
1887    If REVERSE is true, the extraction is to be done in reverse order.
1888 
1889    If a TARGET is specified and we can store in it at no extra cost,
1890    we do so, and return TARGET.
1891    Otherwise, we return a REG of mode TMODE or MODE, with TMODE preferred
1892    if they are equally easy.  */
1893 
1894 rtx
1895 extract_bit_field (rtx str_rtx, unsigned HOST_WIDE_INT bitsize,
1896 		   unsigned HOST_WIDE_INT bitnum, int unsignedp, rtx target,
1897 		   machine_mode mode, machine_mode tmode, bool reverse)
1898 {
1899   machine_mode mode1;
1900 
1901   /* Handle -fstrict-volatile-bitfields in the cases where it applies.  */
1902   if (GET_MODE_BITSIZE (GET_MODE (str_rtx)) > 0)
1903     mode1 = GET_MODE (str_rtx);
1904   else if (target && GET_MODE_BITSIZE (GET_MODE (target)) > 0)
1905     mode1 = GET_MODE (target);
1906   else
1907     mode1 = tmode;
1908 
1909   if (strict_volatile_bitfield_p (str_rtx, bitsize, bitnum, mode1, 0, 0))
1910     {
1911       /* Extraction of a full MODE1 value can be done with a simple load.
1912 	 We know here that the field can be accessed with one single
1913 	 instruction.  For targets that support unaligned memory,
1914 	 an unaligned access may be necessary.  */
1915       if (bitsize == GET_MODE_BITSIZE (mode1))
1916 	{
1917 	  rtx result = adjust_bitfield_address (str_rtx, mode1,
1918 						bitnum / BITS_PER_UNIT);
1919 	  if (reverse)
1920 	    result = flip_storage_order (mode1, result);
1921 	  gcc_assert (bitnum % BITS_PER_UNIT == 0);
1922 	  return convert_extracted_bit_field (result, mode, tmode, unsignedp);
1923 	}
1924 
1925       str_rtx = narrow_bit_field_mem (str_rtx, mode1, bitsize, bitnum,
1926 				      &bitnum);
1927       gcc_assert (bitnum + bitsize <= GET_MODE_BITSIZE (mode1));
1928       str_rtx = copy_to_reg (str_rtx);
1929     }
1930 
1931   return extract_bit_field_1 (str_rtx, bitsize, bitnum, unsignedp,
1932 			      target, mode, tmode, reverse, true);
1933 }
1934 
1935 /* Use shifts and boolean operations to extract a field of BITSIZE bits
1936    from bit BITNUM of OP0.
1937 
1938    UNSIGNEDP is nonzero for an unsigned bit field (don't sign-extend value).
1939    If REVERSE is true, the extraction is to be done in reverse order.
1940 
1941    If TARGET is nonzero, attempts to store the value there
1942    and return TARGET, but this is not guaranteed.
1943    If TARGET is not used, create a pseudo-reg of mode TMODE for the value.  */
1944 
1945 static rtx
1946 extract_fixed_bit_field (machine_mode tmode, rtx op0,
1947 			 unsigned HOST_WIDE_INT bitsize,
1948 			 unsigned HOST_WIDE_INT bitnum, rtx target,
1949 			 int unsignedp, bool reverse)
1950 {
1951   if (MEM_P (op0))
1952     {
1953       machine_mode mode
1954 	= get_best_mode (bitsize, bitnum, 0, 0, MEM_ALIGN (op0), word_mode,
1955 			 MEM_VOLATILE_P (op0));
1956 
1957       if (mode == VOIDmode)
1958 	/* The only way this should occur is if the field spans word
1959 	   boundaries.  */
1960 	return extract_split_bit_field (op0, bitsize, bitnum, unsignedp,
1961 					reverse);
1962 
1963       op0 = narrow_bit_field_mem (op0, mode, bitsize, bitnum, &bitnum);
1964     }
1965 
1966   return extract_fixed_bit_field_1 (tmode, op0, bitsize, bitnum,
1967 				    target, unsignedp, reverse);
1968 }
1969 
1970 /* Helper function for extract_fixed_bit_field, extracts
1971    the bit field always using the MODE of OP0.  */
1972 
1973 static rtx
1974 extract_fixed_bit_field_1 (machine_mode tmode, rtx op0,
1975 			   unsigned HOST_WIDE_INT bitsize,
1976 			   unsigned HOST_WIDE_INT bitnum, rtx target,
1977 			   int unsignedp, bool reverse)
1978 {
1979   machine_mode mode = GET_MODE (op0);
1980   gcc_assert (SCALAR_INT_MODE_P (mode));
1981 
1982   /* Note that bitsize + bitnum can be greater than GET_MODE_BITSIZE (mode)
1983      for invalid input, such as extract equivalent of f5 from
1984      gcc.dg/pr48335-2.c.  */
1985 
1986   if (reverse ? !BYTES_BIG_ENDIAN : BYTES_BIG_ENDIAN)
1987     /* BITNUM is the distance between our msb and that of OP0.
1988        Convert it to the distance from the lsb.  */
1989     bitnum = GET_MODE_BITSIZE (mode) - bitsize - bitnum;
1990 
1991   /* Now BITNUM is always the distance between the field's lsb and that of OP0.
1992      We have reduced the big-endian case to the little-endian case.  */
1993   if (reverse)
1994     op0 = flip_storage_order (mode, op0);
1995 
1996   if (unsignedp)
1997     {
1998       if (bitnum)
1999 	{
2000 	  /* If the field does not already start at the lsb,
2001 	     shift it so it does.  */
2002 	  /* Maybe propagate the target for the shift.  */
2003 	  rtx subtarget = (target != 0 && REG_P (target) ? target : 0);
2004 	  if (tmode != mode)
2005 	    subtarget = 0;
2006 	  op0 = expand_shift (RSHIFT_EXPR, mode, op0, bitnum, subtarget, 1);
2007 	}
2008       /* Convert the value to the desired mode.  */
2009       if (mode != tmode)
2010 	op0 = convert_to_mode (tmode, op0, 1);
2011 
2012       /* Unless the msb of the field used to be the msb when we shifted,
2013 	 mask out the upper bits.  */
2014 
2015       if (GET_MODE_BITSIZE (mode) != bitnum + bitsize)
2016 	return expand_binop (GET_MODE (op0), and_optab, op0,
2017 			     mask_rtx (GET_MODE (op0), 0, bitsize, 0),
2018 			     target, 1, OPTAB_LIB_WIDEN);
2019       return op0;
2020     }
2021 
2022   /* To extract a signed bit-field, first shift its msb to the msb of the word,
2023      then arithmetic-shift its lsb to the lsb of the word.  */
2024   op0 = force_reg (mode, op0);
2025 
2026   /* Find the narrowest integer mode that contains the field.  */
2027 
2028   for (mode = GET_CLASS_NARROWEST_MODE (MODE_INT); mode != VOIDmode;
2029        mode = GET_MODE_WIDER_MODE (mode))
2030     if (GET_MODE_BITSIZE (mode) >= bitsize + bitnum)
2031       {
2032 	op0 = convert_to_mode (mode, op0, 0);
2033 	break;
2034       }
2035 
2036   if (mode != tmode)
2037     target = 0;
2038 
2039   if (GET_MODE_BITSIZE (mode) != (bitsize + bitnum))
2040     {
2041       int amount = GET_MODE_BITSIZE (mode) - (bitsize + bitnum);
2042       /* Maybe propagate the target for the shift.  */
2043       rtx subtarget = (target != 0 && REG_P (target) ? target : 0);
2044       op0 = expand_shift (LSHIFT_EXPR, mode, op0, amount, subtarget, 1);
2045     }
2046 
2047   return expand_shift (RSHIFT_EXPR, mode, op0,
2048 		       GET_MODE_BITSIZE (mode) - bitsize, target, 0);
2049 }
2050 
2051 /* Return a constant integer (CONST_INT or CONST_DOUBLE) rtx with the value
2052    VALUE << BITPOS.  */
2053 
2054 static rtx
2055 lshift_value (machine_mode mode, unsigned HOST_WIDE_INT value,
2056 	      int bitpos)
2057 {
2058   return immed_wide_int_const (wi::lshift (value, bitpos), mode);
2059 }
2060 
2061 /* Extract a bit field that is split across two words
2062    and return an RTX for the result.
2063 
2064    OP0 is the REG, SUBREG or MEM rtx for the first of the two words.
2065    BITSIZE is the field width; BITPOS, position of its first bit, in the word.
2066    UNSIGNEDP is 1 if should zero-extend the contents; else sign-extend.
2067 
2068    If REVERSE is true, the extraction is to be done in reverse order.  */
2069 
2070 static rtx
2071 extract_split_bit_field (rtx op0, unsigned HOST_WIDE_INT bitsize,
2072 			 unsigned HOST_WIDE_INT bitpos, int unsignedp,
2073 			 bool reverse)
2074 {
2075   unsigned int unit;
2076   unsigned int bitsdone = 0;
2077   rtx result = NULL_RTX;
2078   int first = 1;
2079 
2080   /* Make sure UNIT isn't larger than BITS_PER_WORD, we can only handle that
2081      much at a time.  */
2082   if (REG_P (op0) || GET_CODE (op0) == SUBREG)
2083     unit = BITS_PER_WORD;
2084   else
2085     unit = MIN (MEM_ALIGN (op0), BITS_PER_WORD);
2086 
2087   while (bitsdone < bitsize)
2088     {
2089       unsigned HOST_WIDE_INT thissize;
2090       rtx part, word;
2091       unsigned HOST_WIDE_INT thispos;
2092       unsigned HOST_WIDE_INT offset;
2093 
2094       offset = (bitpos + bitsdone) / unit;
2095       thispos = (bitpos + bitsdone) % unit;
2096 
2097       /* THISSIZE must not overrun a word boundary.  Otherwise,
2098 	 extract_fixed_bit_field will call us again, and we will mutually
2099 	 recurse forever.  */
2100       thissize = MIN (bitsize - bitsdone, BITS_PER_WORD);
2101       thissize = MIN (thissize, unit - thispos);
2102 
2103       /* If OP0 is a register, then handle OFFSET here.  */
2104       if (SUBREG_P (op0) || REG_P (op0))
2105 	{
2106 	  word = operand_subword_force (op0, offset, GET_MODE (op0));
2107 	  offset = 0;
2108 	}
2109       else
2110 	word = op0;
2111 
2112       /* Extract the parts in bit-counting order,
2113 	 whose meaning is determined by BYTES_PER_UNIT.
2114 	 OFFSET is in UNITs, and UNIT is in bits.  */
2115       part = extract_fixed_bit_field (word_mode, word, thissize,
2116 				      offset * unit + thispos, 0, 1, reverse);
2117       bitsdone += thissize;
2118 
2119       /* Shift this part into place for the result.  */
2120       if (reverse ? !BYTES_BIG_ENDIAN : BYTES_BIG_ENDIAN)
2121 	{
2122 	  if (bitsize != bitsdone)
2123 	    part = expand_shift (LSHIFT_EXPR, word_mode, part,
2124 				 bitsize - bitsdone, 0, 1);
2125 	}
2126       else
2127 	{
2128 	  if (bitsdone != thissize)
2129 	    part = expand_shift (LSHIFT_EXPR, word_mode, part,
2130 				 bitsdone - thissize, 0, 1);
2131 	}
2132 
2133       if (first)
2134 	result = part;
2135       else
2136 	/* Combine the parts with bitwise or.  This works
2137 	   because we extracted each part as an unsigned bit field.  */
2138 	result = expand_binop (word_mode, ior_optab, part, result, NULL_RTX, 1,
2139 			       OPTAB_LIB_WIDEN);
2140 
2141       first = 0;
2142     }
2143 
2144   /* Unsigned bit field: we are done.  */
2145   if (unsignedp)
2146     return result;
2147   /* Signed bit field: sign-extend with two arithmetic shifts.  */
2148   result = expand_shift (LSHIFT_EXPR, word_mode, result,
2149 			 BITS_PER_WORD - bitsize, NULL_RTX, 0);
2150   return expand_shift (RSHIFT_EXPR, word_mode, result,
2151 		       BITS_PER_WORD - bitsize, NULL_RTX, 0);
2152 }
2153 
2154 /* Try to read the low bits of SRC as an rvalue of mode MODE, preserving
2155    the bit pattern.  SRC_MODE is the mode of SRC; if this is smaller than
2156    MODE, fill the upper bits with zeros.  Fail if the layout of either
2157    mode is unknown (as for CC modes) or if the extraction would involve
2158    unprofitable mode punning.  Return the value on success, otherwise
2159    return null.
2160 
2161    This is different from gen_lowpart* in these respects:
2162 
2163      - the returned value must always be considered an rvalue
2164 
2165      - when MODE is wider than SRC_MODE, the extraction involves
2166        a zero extension
2167 
2168      - when MODE is smaller than SRC_MODE, the extraction involves
2169        a truncation (and is thus subject to TRULY_NOOP_TRUNCATION).
2170 
2171    In other words, this routine performs a computation, whereas the
2172    gen_lowpart* routines are conceptually lvalue or rvalue subreg
2173    operations.  */
2174 
2175 rtx
2176 extract_low_bits (machine_mode mode, machine_mode src_mode, rtx src)
2177 {
2178   machine_mode int_mode, src_int_mode;
2179 
2180   if (mode == src_mode)
2181     return src;
2182 
2183   if (CONSTANT_P (src))
2184     {
2185       /* simplify_gen_subreg can't be used here, as if simplify_subreg
2186 	 fails, it will happily create (subreg (symbol_ref)) or similar
2187 	 invalid SUBREGs.  */
2188       unsigned int byte = subreg_lowpart_offset (mode, src_mode);
2189       rtx ret = simplify_subreg (mode, src, src_mode, byte);
2190       if (ret)
2191 	return ret;
2192 
2193       if (GET_MODE (src) == VOIDmode
2194 	  || !validate_subreg (mode, src_mode, src, byte))
2195 	return NULL_RTX;
2196 
2197       src = force_reg (GET_MODE (src), src);
2198       return gen_rtx_SUBREG (mode, src, byte);
2199     }
2200 
2201   if (GET_MODE_CLASS (mode) == MODE_CC || GET_MODE_CLASS (src_mode) == MODE_CC)
2202     return NULL_RTX;
2203 
2204   if (GET_MODE_BITSIZE (mode) == GET_MODE_BITSIZE (src_mode)
2205       && MODES_TIEABLE_P (mode, src_mode))
2206     {
2207       rtx x = gen_lowpart_common (mode, src);
2208       if (x)
2209         return x;
2210     }
2211 
2212   src_int_mode = int_mode_for_mode (src_mode);
2213   int_mode = int_mode_for_mode (mode);
2214   if (src_int_mode == BLKmode || int_mode == BLKmode)
2215     return NULL_RTX;
2216 
2217   if (!MODES_TIEABLE_P (src_int_mode, src_mode))
2218     return NULL_RTX;
2219   if (!MODES_TIEABLE_P (int_mode, mode))
2220     return NULL_RTX;
2221 
2222   src = gen_lowpart (src_int_mode, src);
2223   src = convert_modes (int_mode, src_int_mode, src, true);
2224   src = gen_lowpart (mode, src);
2225   return src;
2226 }
2227 
2228 /* Add INC into TARGET.  */
2229 
2230 void
2231 expand_inc (rtx target, rtx inc)
2232 {
2233   rtx value = expand_binop (GET_MODE (target), add_optab,
2234 			    target, inc,
2235 			    target, 0, OPTAB_LIB_WIDEN);
2236   if (value != target)
2237     emit_move_insn (target, value);
2238 }
2239 
2240 /* Subtract DEC from TARGET.  */
2241 
2242 void
2243 expand_dec (rtx target, rtx dec)
2244 {
2245   rtx value = expand_binop (GET_MODE (target), sub_optab,
2246 			    target, dec,
2247 			    target, 0, OPTAB_LIB_WIDEN);
2248   if (value != target)
2249     emit_move_insn (target, value);
2250 }
2251 
2252 /* Output a shift instruction for expression code CODE,
2253    with SHIFTED being the rtx for the value to shift,
2254    and AMOUNT the rtx for the amount to shift by.
2255    Store the result in the rtx TARGET, if that is convenient.
2256    If UNSIGNEDP is nonzero, do a logical shift; otherwise, arithmetic.
2257    Return the rtx for where the value is.
2258    If that cannot be done, abort the compilation unless MAY_FAIL is true,
2259    in which case 0 is returned.  */
2260 
2261 static rtx
2262 expand_shift_1 (enum tree_code code, machine_mode mode, rtx shifted,
2263 		rtx amount, rtx target, int unsignedp, bool may_fail = false)
2264 {
2265   rtx op1, temp = 0;
2266   int left = (code == LSHIFT_EXPR || code == LROTATE_EXPR);
2267   int rotate = (code == LROTATE_EXPR || code == RROTATE_EXPR);
2268   optab lshift_optab = ashl_optab;
2269   optab rshift_arith_optab = ashr_optab;
2270   optab rshift_uns_optab = lshr_optab;
2271   optab lrotate_optab = rotl_optab;
2272   optab rrotate_optab = rotr_optab;
2273   machine_mode op1_mode;
2274   machine_mode scalar_mode = mode;
2275   int attempt;
2276   bool speed = optimize_insn_for_speed_p ();
2277 
2278   if (VECTOR_MODE_P (mode))
2279     scalar_mode = GET_MODE_INNER (mode);
2280   op1 = amount;
2281   op1_mode = GET_MODE (op1);
2282 
2283   /* Determine whether the shift/rotate amount is a vector, or scalar.  If the
2284      shift amount is a vector, use the vector/vector shift patterns.  */
2285   if (VECTOR_MODE_P (mode) && VECTOR_MODE_P (op1_mode))
2286     {
2287       lshift_optab = vashl_optab;
2288       rshift_arith_optab = vashr_optab;
2289       rshift_uns_optab = vlshr_optab;
2290       lrotate_optab = vrotl_optab;
2291       rrotate_optab = vrotr_optab;
2292     }
2293 
2294   /* Previously detected shift-counts computed by NEGATE_EXPR
2295      and shifted in the other direction; but that does not work
2296      on all machines.  */
2297 
2298   if (SHIFT_COUNT_TRUNCATED)
2299     {
2300       if (CONST_INT_P (op1)
2301 	  && ((unsigned HOST_WIDE_INT) INTVAL (op1) >=
2302 	      (unsigned HOST_WIDE_INT) GET_MODE_BITSIZE (scalar_mode)))
2303 	op1 = GEN_INT ((unsigned HOST_WIDE_INT) INTVAL (op1)
2304 		       % GET_MODE_BITSIZE (scalar_mode));
2305       else if (GET_CODE (op1) == SUBREG
2306 	       && subreg_lowpart_p (op1)
2307 	       && SCALAR_INT_MODE_P (GET_MODE (SUBREG_REG (op1)))
2308 	       && SCALAR_INT_MODE_P (GET_MODE (op1)))
2309 	op1 = SUBREG_REG (op1);
2310     }
2311 
2312   /* Canonicalize rotates by constant amount.  If op1 is bitsize / 2,
2313      prefer left rotation, if op1 is from bitsize / 2 + 1 to
2314      bitsize - 1, use other direction of rotate with 1 .. bitsize / 2 - 1
2315      amount instead.  */
2316   if (rotate
2317       && CONST_INT_P (op1)
2318       && IN_RANGE (INTVAL (op1), GET_MODE_BITSIZE (scalar_mode) / 2 + left,
2319 		   GET_MODE_BITSIZE (scalar_mode) - 1))
2320     {
2321       op1 = GEN_INT (GET_MODE_BITSIZE (scalar_mode) - INTVAL (op1));
2322       left = !left;
2323       code = left ? LROTATE_EXPR : RROTATE_EXPR;
2324     }
2325 
2326   /* Rotation of 16bit values by 8 bits is effectively equivalent to a bswaphi.
2327      Note that this is not the case for bigger values.  For instance a rotation
2328      of 0x01020304 by 16 bits gives 0x03040102 which is different from
2329      0x04030201 (bswapsi).  */
2330   if (rotate
2331       && CONST_INT_P (op1)
2332       && INTVAL (op1) == BITS_PER_UNIT
2333       && GET_MODE_SIZE (scalar_mode) == 2
2334       && optab_handler (bswap_optab, mode) != CODE_FOR_nothing)
2335     return expand_unop (mode, bswap_optab, shifted, NULL_RTX, unsignedp);
2336 
2337   if (op1 == const0_rtx)
2338     return shifted;
2339 
2340   /* Check whether its cheaper to implement a left shift by a constant
2341      bit count by a sequence of additions.  */
2342   if (code == LSHIFT_EXPR
2343       && CONST_INT_P (op1)
2344       && INTVAL (op1) > 0
2345       && INTVAL (op1) < GET_MODE_PRECISION (scalar_mode)
2346       && INTVAL (op1) < MAX_BITS_PER_WORD
2347       && (shift_cost (speed, mode, INTVAL (op1))
2348 	  > INTVAL (op1) * add_cost (speed, mode))
2349       && shift_cost (speed, mode, INTVAL (op1)) != MAX_COST)
2350     {
2351       int i;
2352       for (i = 0; i < INTVAL (op1); i++)
2353 	{
2354 	  temp = force_reg (mode, shifted);
2355 	  shifted = expand_binop (mode, add_optab, temp, temp, NULL_RTX,
2356 				  unsignedp, OPTAB_LIB_WIDEN);
2357 	}
2358       return shifted;
2359     }
2360 
2361   for (attempt = 0; temp == 0 && attempt < 3; attempt++)
2362     {
2363       enum optab_methods methods;
2364 
2365       if (attempt == 0)
2366 	methods = OPTAB_DIRECT;
2367       else if (attempt == 1)
2368 	methods = OPTAB_WIDEN;
2369       else
2370 	methods = OPTAB_LIB_WIDEN;
2371 
2372       if (rotate)
2373 	{
2374 	  /* Widening does not work for rotation.  */
2375 	  if (methods == OPTAB_WIDEN)
2376 	    continue;
2377 	  else if (methods == OPTAB_LIB_WIDEN)
2378 	    {
2379 	      /* If we have been unable to open-code this by a rotation,
2380 		 do it as the IOR of two shifts.  I.e., to rotate A
2381 		 by N bits, compute
2382 		 (A << N) | ((unsigned) A >> ((-N) & (C - 1)))
2383 		 where C is the bitsize of A.
2384 
2385 		 It is theoretically possible that the target machine might
2386 		 not be able to perform either shift and hence we would
2387 		 be making two libcalls rather than just the one for the
2388 		 shift (similarly if IOR could not be done).  We will allow
2389 		 this extremely unlikely lossage to avoid complicating the
2390 		 code below.  */
2391 
2392 	      rtx subtarget = target == shifted ? 0 : target;
2393 	      rtx new_amount, other_amount;
2394 	      rtx temp1;
2395 
2396 	      new_amount = op1;
2397 	      if (op1 == const0_rtx)
2398 		return shifted;
2399 	      else if (CONST_INT_P (op1))
2400 		other_amount = GEN_INT (GET_MODE_BITSIZE (scalar_mode)
2401 					- INTVAL (op1));
2402 	      else
2403 		{
2404 		  other_amount
2405 		    = simplify_gen_unary (NEG, GET_MODE (op1),
2406 					  op1, GET_MODE (op1));
2407 		  HOST_WIDE_INT mask = GET_MODE_PRECISION (scalar_mode) - 1;
2408 		  other_amount
2409 		    = simplify_gen_binary (AND, GET_MODE (op1), other_amount,
2410 					   gen_int_mode (mask, GET_MODE (op1)));
2411 		}
2412 
2413 	      shifted = force_reg (mode, shifted);
2414 
2415 	      temp = expand_shift_1 (left ? LSHIFT_EXPR : RSHIFT_EXPR,
2416 				     mode, shifted, new_amount, 0, 1);
2417 	      temp1 = expand_shift_1 (left ? RSHIFT_EXPR : LSHIFT_EXPR,
2418 				      mode, shifted, other_amount,
2419 				      subtarget, 1);
2420 	      return expand_binop (mode, ior_optab, temp, temp1, target,
2421 				   unsignedp, methods);
2422 	    }
2423 
2424 	  temp = expand_binop (mode,
2425 			       left ? lrotate_optab : rrotate_optab,
2426 			       shifted, op1, target, unsignedp, methods);
2427 	}
2428       else if (unsignedp)
2429 	temp = expand_binop (mode,
2430 			     left ? lshift_optab : rshift_uns_optab,
2431 			     shifted, op1, target, unsignedp, methods);
2432 
2433       /* Do arithmetic shifts.
2434 	 Also, if we are going to widen the operand, we can just as well
2435 	 use an arithmetic right-shift instead of a logical one.  */
2436       if (temp == 0 && ! rotate
2437 	  && (! unsignedp || (! left && methods == OPTAB_WIDEN)))
2438 	{
2439 	  enum optab_methods methods1 = methods;
2440 
2441 	  /* If trying to widen a log shift to an arithmetic shift,
2442 	     don't accept an arithmetic shift of the same size.  */
2443 	  if (unsignedp)
2444 	    methods1 = OPTAB_MUST_WIDEN;
2445 
2446 	  /* Arithmetic shift */
2447 
2448 	  temp = expand_binop (mode,
2449 			       left ? lshift_optab : rshift_arith_optab,
2450 			       shifted, op1, target, unsignedp, methods1);
2451 	}
2452 
2453       /* We used to try extzv here for logical right shifts, but that was
2454 	 only useful for one machine, the VAX, and caused poor code
2455 	 generation there for lshrdi3, so the code was deleted and a
2456 	 define_expand for lshrsi3 was added to vax.md.  */
2457     }
2458 
2459   gcc_assert (temp != NULL_RTX || may_fail);
2460   return temp;
2461 }
2462 
2463 /* Output a shift instruction for expression code CODE,
2464    with SHIFTED being the rtx for the value to shift,
2465    and AMOUNT the amount to shift by.
2466    Store the result in the rtx TARGET, if that is convenient.
2467    If UNSIGNEDP is nonzero, do a logical shift; otherwise, arithmetic.
2468    Return the rtx for where the value is.  */
2469 
2470 rtx
2471 expand_shift (enum tree_code code, machine_mode mode, rtx shifted,
2472 	      int amount, rtx target, int unsignedp)
2473 {
2474   return expand_shift_1 (code, mode,
2475 			 shifted, GEN_INT (amount), target, unsignedp);
2476 }
2477 
2478 /* Likewise, but return 0 if that cannot be done.  */
2479 
2480 static rtx
2481 maybe_expand_shift (enum tree_code code, machine_mode mode, rtx shifted,
2482 		    int amount, rtx target, int unsignedp)
2483 {
2484   return expand_shift_1 (code, mode,
2485 			 shifted, GEN_INT (amount), target, unsignedp, true);
2486 }
2487 
2488 /* Output a shift instruction for expression code CODE,
2489    with SHIFTED being the rtx for the value to shift,
2490    and AMOUNT the tree for the amount to shift by.
2491    Store the result in the rtx TARGET, if that is convenient.
2492    If UNSIGNEDP is nonzero, do a logical shift; otherwise, arithmetic.
2493    Return the rtx for where the value is.  */
2494 
2495 rtx
2496 expand_variable_shift (enum tree_code code, machine_mode mode, rtx shifted,
2497 		       tree amount, rtx target, int unsignedp)
2498 {
2499   return expand_shift_1 (code, mode,
2500 			 shifted, expand_normal (amount), target, unsignedp);
2501 }
2502 
2503 
2504 static void synth_mult (struct algorithm *, unsigned HOST_WIDE_INT,
2505 			const struct mult_cost *, machine_mode mode);
2506 static rtx expand_mult_const (machine_mode, rtx, HOST_WIDE_INT, rtx,
2507 			      const struct algorithm *, enum mult_variant);
2508 static unsigned HOST_WIDE_INT invert_mod2n (unsigned HOST_WIDE_INT, int);
2509 static rtx extract_high_half (machine_mode, rtx);
2510 static rtx expmed_mult_highpart (machine_mode, rtx, rtx, rtx, int, int);
2511 static rtx expmed_mult_highpart_optab (machine_mode, rtx, rtx, rtx,
2512 				       int, int);
2513 /* Compute and return the best algorithm for multiplying by T.
2514    The algorithm must cost less than cost_limit
2515    If retval.cost >= COST_LIMIT, no algorithm was found and all
2516    other field of the returned struct are undefined.
2517    MODE is the machine mode of the multiplication.  */
2518 
2519 static void
2520 synth_mult (struct algorithm *alg_out, unsigned HOST_WIDE_INT t,
2521 	    const struct mult_cost *cost_limit, machine_mode mode)
2522 {
2523   int m;
2524   struct algorithm *alg_in, *best_alg;
2525   struct mult_cost best_cost;
2526   struct mult_cost new_limit;
2527   int op_cost, op_latency;
2528   unsigned HOST_WIDE_INT orig_t = t;
2529   unsigned HOST_WIDE_INT q;
2530   int maxm, hash_index;
2531   bool cache_hit = false;
2532   enum alg_code cache_alg = alg_zero;
2533   bool speed = optimize_insn_for_speed_p ();
2534   machine_mode imode;
2535   struct alg_hash_entry *entry_ptr;
2536 
2537   /* Indicate that no algorithm is yet found.  If no algorithm
2538      is found, this value will be returned and indicate failure.  */
2539   alg_out->cost.cost = cost_limit->cost + 1;
2540   alg_out->cost.latency = cost_limit->latency + 1;
2541 
2542   if (cost_limit->cost < 0
2543       || (cost_limit->cost == 0 && cost_limit->latency <= 0))
2544     return;
2545 
2546   /* Be prepared for vector modes.  */
2547   imode = GET_MODE_INNER (mode);
2548 
2549   maxm = MIN (BITS_PER_WORD, GET_MODE_BITSIZE (imode));
2550 
2551   /* Restrict the bits of "t" to the multiplication's mode.  */
2552   t &= GET_MODE_MASK (imode);
2553 
2554   /* t == 1 can be done in zero cost.  */
2555   if (t == 1)
2556     {
2557       alg_out->ops = 1;
2558       alg_out->cost.cost = 0;
2559       alg_out->cost.latency = 0;
2560       alg_out->op[0] = alg_m;
2561       return;
2562     }
2563 
2564   /* t == 0 sometimes has a cost.  If it does and it exceeds our limit,
2565      fail now.  */
2566   if (t == 0)
2567     {
2568       if (MULT_COST_LESS (cost_limit, zero_cost (speed)))
2569 	return;
2570       else
2571 	{
2572 	  alg_out->ops = 1;
2573 	  alg_out->cost.cost = zero_cost (speed);
2574 	  alg_out->cost.latency = zero_cost (speed);
2575 	  alg_out->op[0] = alg_zero;
2576 	  return;
2577 	}
2578     }
2579 
2580   /* We'll be needing a couple extra algorithm structures now.  */
2581 
2582   alg_in = XALLOCA (struct algorithm);
2583   best_alg = XALLOCA (struct algorithm);
2584   best_cost = *cost_limit;
2585 
2586   /* Compute the hash index.  */
2587   hash_index = (t ^ (unsigned int) mode ^ (speed * 256)) % NUM_ALG_HASH_ENTRIES;
2588 
2589   /* See if we already know what to do for T.  */
2590   entry_ptr = alg_hash_entry_ptr (hash_index);
2591   if (entry_ptr->t == t
2592       && entry_ptr->mode == mode
2593       && entry_ptr->speed == speed
2594       && entry_ptr->alg != alg_unknown)
2595     {
2596       cache_alg = entry_ptr->alg;
2597 
2598       if (cache_alg == alg_impossible)
2599 	{
2600 	  /* The cache tells us that it's impossible to synthesize
2601 	     multiplication by T within entry_ptr->cost.  */
2602 	  if (!CHEAPER_MULT_COST (&entry_ptr->cost, cost_limit))
2603 	    /* COST_LIMIT is at least as restrictive as the one
2604 	       recorded in the hash table, in which case we have no
2605 	       hope of synthesizing a multiplication.  Just
2606 	       return.  */
2607 	    return;
2608 
2609 	  /* If we get here, COST_LIMIT is less restrictive than the
2610 	     one recorded in the hash table, so we may be able to
2611 	     synthesize a multiplication.  Proceed as if we didn't
2612 	     have the cache entry.  */
2613 	}
2614       else
2615 	{
2616 	  if (CHEAPER_MULT_COST (cost_limit, &entry_ptr->cost))
2617 	    /* The cached algorithm shows that this multiplication
2618 	       requires more cost than COST_LIMIT.  Just return.  This
2619 	       way, we don't clobber this cache entry with
2620 	       alg_impossible but retain useful information.  */
2621 	    return;
2622 
2623 	  cache_hit = true;
2624 
2625 	  switch (cache_alg)
2626 	    {
2627 	    case alg_shift:
2628 	      goto do_alg_shift;
2629 
2630 	    case alg_add_t_m2:
2631 	    case alg_sub_t_m2:
2632 	      goto do_alg_addsub_t_m2;
2633 
2634 	    case alg_add_factor:
2635 	    case alg_sub_factor:
2636 	      goto do_alg_addsub_factor;
2637 
2638 	    case alg_add_t2_m:
2639 	      goto do_alg_add_t2_m;
2640 
2641 	    case alg_sub_t2_m:
2642 	      goto do_alg_sub_t2_m;
2643 
2644 	    default:
2645 	      gcc_unreachable ();
2646 	    }
2647 	}
2648     }
2649 
2650   /* If we have a group of zero bits at the low-order part of T, try
2651      multiplying by the remaining bits and then doing a shift.  */
2652 
2653   if ((t & 1) == 0)
2654     {
2655     do_alg_shift:
2656       m = ctz_or_zero (t); /* m = number of low zero bits */
2657       if (m < maxm)
2658 	{
2659 	  q = t >> m;
2660 	  /* The function expand_shift will choose between a shift and
2661 	     a sequence of additions, so the observed cost is given as
2662 	     MIN (m * add_cost(speed, mode), shift_cost(speed, mode, m)).  */
2663 	  op_cost = m * add_cost (speed, mode);
2664 	  if (shift_cost (speed, mode, m) < op_cost)
2665 	    op_cost = shift_cost (speed, mode, m);
2666 	  new_limit.cost = best_cost.cost - op_cost;
2667 	  new_limit.latency = best_cost.latency - op_cost;
2668 	  synth_mult (alg_in, q, &new_limit, mode);
2669 
2670 	  alg_in->cost.cost += op_cost;
2671 	  alg_in->cost.latency += op_cost;
2672 	  if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2673 	    {
2674 	      best_cost = alg_in->cost;
2675 	      std::swap (alg_in, best_alg);
2676 	      best_alg->log[best_alg->ops] = m;
2677 	      best_alg->op[best_alg->ops] = alg_shift;
2678 	    }
2679 
2680 	  /* See if treating ORIG_T as a signed number yields a better
2681 	     sequence.  Try this sequence only for a negative ORIG_T
2682 	     as it would be useless for a non-negative ORIG_T.  */
2683 	  if ((HOST_WIDE_INT) orig_t < 0)
2684 	    {
2685 	      /* Shift ORIG_T as follows because a right shift of a
2686 		 negative-valued signed type is implementation
2687 		 defined.  */
2688 	      q = ~(~orig_t >> m);
2689 	      /* The function expand_shift will choose between a shift
2690 		 and a sequence of additions, so the observed cost is
2691 		 given as MIN (m * add_cost(speed, mode),
2692 		 shift_cost(speed, mode, m)).  */
2693 	      op_cost = m * add_cost (speed, mode);
2694 	      if (shift_cost (speed, mode, m) < op_cost)
2695 		op_cost = shift_cost (speed, mode, m);
2696 	      new_limit.cost = best_cost.cost - op_cost;
2697 	      new_limit.latency = best_cost.latency - op_cost;
2698 	      synth_mult (alg_in, q, &new_limit, mode);
2699 
2700 	      alg_in->cost.cost += op_cost;
2701 	      alg_in->cost.latency += op_cost;
2702 	      if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2703 		{
2704 		  best_cost = alg_in->cost;
2705 		  std::swap (alg_in, best_alg);
2706 		  best_alg->log[best_alg->ops] = m;
2707 		  best_alg->op[best_alg->ops] = alg_shift;
2708 		}
2709 	    }
2710 	}
2711       if (cache_hit)
2712 	goto done;
2713     }
2714 
2715   /* If we have an odd number, add or subtract one.  */
2716   if ((t & 1) != 0)
2717     {
2718       unsigned HOST_WIDE_INT w;
2719 
2720     do_alg_addsub_t_m2:
2721       for (w = 1; (w & t) != 0; w <<= 1)
2722 	;
2723       /* If T was -1, then W will be zero after the loop.  This is another
2724 	 case where T ends with ...111.  Handling this with (T + 1) and
2725 	 subtract 1 produces slightly better code and results in algorithm
2726 	 selection much faster than treating it like the ...0111 case
2727 	 below.  */
2728       if (w == 0
2729 	  || (w > 2
2730 	      /* Reject the case where t is 3.
2731 		 Thus we prefer addition in that case.  */
2732 	      && t != 3))
2733 	{
2734 	  /* T ends with ...111.  Multiply by (T + 1) and subtract T.  */
2735 
2736 	  op_cost = add_cost (speed, mode);
2737 	  new_limit.cost = best_cost.cost - op_cost;
2738 	  new_limit.latency = best_cost.latency - op_cost;
2739 	  synth_mult (alg_in, t + 1, &new_limit, mode);
2740 
2741 	  alg_in->cost.cost += op_cost;
2742 	  alg_in->cost.latency += op_cost;
2743 	  if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2744 	    {
2745 	      best_cost = alg_in->cost;
2746 	      std::swap (alg_in, best_alg);
2747 	      best_alg->log[best_alg->ops] = 0;
2748 	      best_alg->op[best_alg->ops] = alg_sub_t_m2;
2749 	    }
2750 	}
2751       else
2752 	{
2753 	  /* T ends with ...01 or ...011.  Multiply by (T - 1) and add T.  */
2754 
2755 	  op_cost = add_cost (speed, mode);
2756 	  new_limit.cost = best_cost.cost - op_cost;
2757 	  new_limit.latency = best_cost.latency - op_cost;
2758 	  synth_mult (alg_in, t - 1, &new_limit, mode);
2759 
2760 	  alg_in->cost.cost += op_cost;
2761 	  alg_in->cost.latency += op_cost;
2762 	  if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2763 	    {
2764 	      best_cost = alg_in->cost;
2765 	      std::swap (alg_in, best_alg);
2766 	      best_alg->log[best_alg->ops] = 0;
2767 	      best_alg->op[best_alg->ops] = alg_add_t_m2;
2768 	    }
2769 	}
2770 
2771       /* We may be able to calculate a * -7, a * -15, a * -31, etc
2772 	 quickly with a - a * n for some appropriate constant n.  */
2773       m = exact_log2 (-orig_t + 1);
2774       if (m >= 0 && m < maxm)
2775 	{
2776 	  op_cost = add_cost (speed, mode) + shift_cost (speed, mode, m);
2777 	  /* If the target has a cheap shift-and-subtract insn use
2778 	     that in preference to a shift insn followed by a sub insn.
2779 	     Assume that the shift-and-sub is "atomic" with a latency
2780 	     equal to it's cost, otherwise assume that on superscalar
2781 	     hardware the shift may be executed concurrently with the
2782 	     earlier steps in the algorithm.  */
2783 	  if (shiftsub1_cost (speed, mode, m) <= op_cost)
2784 	    {
2785 	      op_cost = shiftsub1_cost (speed, mode, m);
2786 	      op_latency = op_cost;
2787 	    }
2788 	  else
2789 	    op_latency = add_cost (speed, mode);
2790 
2791 	  new_limit.cost = best_cost.cost - op_cost;
2792 	  new_limit.latency = best_cost.latency - op_latency;
2793 	  synth_mult (alg_in, (unsigned HOST_WIDE_INT) (-orig_t + 1) >> m,
2794 		      &new_limit, mode);
2795 
2796 	  alg_in->cost.cost += op_cost;
2797 	  alg_in->cost.latency += op_latency;
2798 	  if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2799 	    {
2800 	      best_cost = alg_in->cost;
2801 	      std::swap (alg_in, best_alg);
2802 	      best_alg->log[best_alg->ops] = m;
2803 	      best_alg->op[best_alg->ops] = alg_sub_t_m2;
2804 	    }
2805 	}
2806 
2807       if (cache_hit)
2808 	goto done;
2809     }
2810 
2811   /* Look for factors of t of the form
2812      t = q(2**m +- 1), 2 <= m <= floor(log2(t - 1)).
2813      If we find such a factor, we can multiply by t using an algorithm that
2814      multiplies by q, shift the result by m and add/subtract it to itself.
2815 
2816      We search for large factors first and loop down, even if large factors
2817      are less probable than small; if we find a large factor we will find a
2818      good sequence quickly, and therefore be able to prune (by decreasing
2819      COST_LIMIT) the search.  */
2820 
2821  do_alg_addsub_factor:
2822   for (m = floor_log2 (t - 1); m >= 2; m--)
2823     {
2824       unsigned HOST_WIDE_INT d;
2825 
2826       d = (HOST_WIDE_INT_1U << m) + 1;
2827       if (t % d == 0 && t > d && m < maxm
2828 	  && (!cache_hit || cache_alg == alg_add_factor))
2829 	{
2830 	  op_cost = add_cost (speed, mode) + shift_cost (speed, mode, m);
2831 	  if (shiftadd_cost (speed, mode, m) <= op_cost)
2832 	    op_cost = shiftadd_cost (speed, mode, m);
2833 
2834 	  op_latency = op_cost;
2835 
2836 
2837 	  new_limit.cost = best_cost.cost - op_cost;
2838 	  new_limit.latency = best_cost.latency - op_latency;
2839 	  synth_mult (alg_in, t / d, &new_limit, mode);
2840 
2841 	  alg_in->cost.cost += op_cost;
2842 	  alg_in->cost.latency += op_latency;
2843 	  if (alg_in->cost.latency < op_cost)
2844 	    alg_in->cost.latency = op_cost;
2845 	  if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2846 	    {
2847 	      best_cost = alg_in->cost;
2848 	      std::swap (alg_in, best_alg);
2849 	      best_alg->log[best_alg->ops] = m;
2850 	      best_alg->op[best_alg->ops] = alg_add_factor;
2851 	    }
2852 	  /* Other factors will have been taken care of in the recursion.  */
2853 	  break;
2854 	}
2855 
2856       d = (HOST_WIDE_INT_1U << m) - 1;
2857       if (t % d == 0 && t > d && m < maxm
2858 	  && (!cache_hit || cache_alg == alg_sub_factor))
2859 	{
2860 	  op_cost = add_cost (speed, mode) + shift_cost (speed, mode, m);
2861 	  if (shiftsub0_cost (speed, mode, m) <= op_cost)
2862 	    op_cost = shiftsub0_cost (speed, mode, m);
2863 
2864 	  op_latency = op_cost;
2865 
2866 	  new_limit.cost = best_cost.cost - op_cost;
2867 	  new_limit.latency = best_cost.latency - op_latency;
2868 	  synth_mult (alg_in, t / d, &new_limit, mode);
2869 
2870 	  alg_in->cost.cost += op_cost;
2871 	  alg_in->cost.latency += op_latency;
2872 	  if (alg_in->cost.latency < op_cost)
2873 	    alg_in->cost.latency = op_cost;
2874 	  if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2875 	    {
2876 	      best_cost = alg_in->cost;
2877 	      std::swap (alg_in, best_alg);
2878 	      best_alg->log[best_alg->ops] = m;
2879 	      best_alg->op[best_alg->ops] = alg_sub_factor;
2880 	    }
2881 	  break;
2882 	}
2883     }
2884   if (cache_hit)
2885     goto done;
2886 
2887   /* Try shift-and-add (load effective address) instructions,
2888      i.e. do a*3, a*5, a*9.  */
2889   if ((t & 1) != 0)
2890     {
2891     do_alg_add_t2_m:
2892       q = t - 1;
2893       m = ctz_hwi (q);
2894       if (q && m < maxm)
2895 	{
2896 	  op_cost = shiftadd_cost (speed, mode, m);
2897 	  new_limit.cost = best_cost.cost - op_cost;
2898 	  new_limit.latency = best_cost.latency - op_cost;
2899 	  synth_mult (alg_in, (t - 1) >> m, &new_limit, mode);
2900 
2901 	  alg_in->cost.cost += op_cost;
2902 	  alg_in->cost.latency += op_cost;
2903 	  if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2904 	    {
2905 	      best_cost = alg_in->cost;
2906 	      std::swap (alg_in, best_alg);
2907 	      best_alg->log[best_alg->ops] = m;
2908 	      best_alg->op[best_alg->ops] = alg_add_t2_m;
2909 	    }
2910 	}
2911       if (cache_hit)
2912 	goto done;
2913 
2914     do_alg_sub_t2_m:
2915       q = t + 1;
2916       m = ctz_hwi (q);
2917       if (q && m < maxm)
2918 	{
2919 	  op_cost = shiftsub0_cost (speed, mode, m);
2920 	  new_limit.cost = best_cost.cost - op_cost;
2921 	  new_limit.latency = best_cost.latency - op_cost;
2922 	  synth_mult (alg_in, (t + 1) >> m, &new_limit, mode);
2923 
2924 	  alg_in->cost.cost += op_cost;
2925 	  alg_in->cost.latency += op_cost;
2926 	  if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2927 	    {
2928 	      best_cost = alg_in->cost;
2929 	      std::swap (alg_in, best_alg);
2930 	      best_alg->log[best_alg->ops] = m;
2931 	      best_alg->op[best_alg->ops] = alg_sub_t2_m;
2932 	    }
2933 	}
2934       if (cache_hit)
2935 	goto done;
2936     }
2937 
2938  done:
2939   /* If best_cost has not decreased, we have not found any algorithm.  */
2940   if (!CHEAPER_MULT_COST (&best_cost, cost_limit))
2941     {
2942       /* We failed to find an algorithm.  Record alg_impossible for
2943 	 this case (that is, <T, MODE, COST_LIMIT>) so that next time
2944 	 we are asked to find an algorithm for T within the same or
2945 	 lower COST_LIMIT, we can immediately return to the
2946 	 caller.  */
2947       entry_ptr->t = t;
2948       entry_ptr->mode = mode;
2949       entry_ptr->speed = speed;
2950       entry_ptr->alg = alg_impossible;
2951       entry_ptr->cost = *cost_limit;
2952       return;
2953     }
2954 
2955   /* Cache the result.  */
2956   if (!cache_hit)
2957     {
2958       entry_ptr->t = t;
2959       entry_ptr->mode = mode;
2960       entry_ptr->speed = speed;
2961       entry_ptr->alg = best_alg->op[best_alg->ops];
2962       entry_ptr->cost.cost = best_cost.cost;
2963       entry_ptr->cost.latency = best_cost.latency;
2964     }
2965 
2966   /* If we are getting a too long sequence for `struct algorithm'
2967      to record, make this search fail.  */
2968   if (best_alg->ops == MAX_BITS_PER_WORD)
2969     return;
2970 
2971   /* Copy the algorithm from temporary space to the space at alg_out.
2972      We avoid using structure assignment because the majority of
2973      best_alg is normally undefined, and this is a critical function.  */
2974   alg_out->ops = best_alg->ops + 1;
2975   alg_out->cost = best_cost;
2976   memcpy (alg_out->op, best_alg->op,
2977 	  alg_out->ops * sizeof *alg_out->op);
2978   memcpy (alg_out->log, best_alg->log,
2979 	  alg_out->ops * sizeof *alg_out->log);
2980 }
2981 
2982 /* Find the cheapest way of multiplying a value of mode MODE by VAL.
2983    Try three variations:
2984 
2985        - a shift/add sequence based on VAL itself
2986        - a shift/add sequence based on -VAL, followed by a negation
2987        - a shift/add sequence based on VAL - 1, followed by an addition.
2988 
2989    Return true if the cheapest of these cost less than MULT_COST,
2990    describing the algorithm in *ALG and final fixup in *VARIANT.  */
2991 
2992 bool
2993 choose_mult_variant (machine_mode mode, HOST_WIDE_INT val,
2994 		     struct algorithm *alg, enum mult_variant *variant,
2995 		     int mult_cost)
2996 {
2997   struct algorithm alg2;
2998   struct mult_cost limit;
2999   int op_cost;
3000   bool speed = optimize_insn_for_speed_p ();
3001 
3002   /* Fail quickly for impossible bounds.  */
3003   if (mult_cost < 0)
3004     return false;
3005 
3006   /* Ensure that mult_cost provides a reasonable upper bound.
3007      Any constant multiplication can be performed with less
3008      than 2 * bits additions.  */
3009   op_cost = 2 * GET_MODE_UNIT_BITSIZE (mode) * add_cost (speed, mode);
3010   if (mult_cost > op_cost)
3011     mult_cost = op_cost;
3012 
3013   *variant = basic_variant;
3014   limit.cost = mult_cost;
3015   limit.latency = mult_cost;
3016   synth_mult (alg, val, &limit, mode);
3017 
3018   /* This works only if the inverted value actually fits in an
3019      `unsigned int' */
3020   if (HOST_BITS_PER_INT >= GET_MODE_UNIT_BITSIZE (mode))
3021     {
3022       op_cost = neg_cost (speed, mode);
3023       if (MULT_COST_LESS (&alg->cost, mult_cost))
3024 	{
3025 	  limit.cost = alg->cost.cost - op_cost;
3026 	  limit.latency = alg->cost.latency - op_cost;
3027 	}
3028       else
3029 	{
3030 	  limit.cost = mult_cost - op_cost;
3031 	  limit.latency = mult_cost - op_cost;
3032 	}
3033 
3034       synth_mult (&alg2, -val, &limit, mode);
3035       alg2.cost.cost += op_cost;
3036       alg2.cost.latency += op_cost;
3037       if (CHEAPER_MULT_COST (&alg2.cost, &alg->cost))
3038 	*alg = alg2, *variant = negate_variant;
3039     }
3040 
3041   /* This proves very useful for division-by-constant.  */
3042   op_cost = add_cost (speed, mode);
3043   if (MULT_COST_LESS (&alg->cost, mult_cost))
3044     {
3045       limit.cost = alg->cost.cost - op_cost;
3046       limit.latency = alg->cost.latency - op_cost;
3047     }
3048   else
3049     {
3050       limit.cost = mult_cost - op_cost;
3051       limit.latency = mult_cost - op_cost;
3052     }
3053 
3054   synth_mult (&alg2, val - 1, &limit, mode);
3055   alg2.cost.cost += op_cost;
3056   alg2.cost.latency += op_cost;
3057   if (CHEAPER_MULT_COST (&alg2.cost, &alg->cost))
3058     *alg = alg2, *variant = add_variant;
3059 
3060   return MULT_COST_LESS (&alg->cost, mult_cost);
3061 }
3062 
3063 /* A subroutine of expand_mult, used for constant multiplications.
3064    Multiply OP0 by VAL in mode MODE, storing the result in TARGET if
3065    convenient.  Use the shift/add sequence described by ALG and apply
3066    the final fixup specified by VARIANT.  */
3067 
3068 static rtx
3069 expand_mult_const (machine_mode mode, rtx op0, HOST_WIDE_INT val,
3070 		   rtx target, const struct algorithm *alg,
3071 		   enum mult_variant variant)
3072 {
3073   unsigned HOST_WIDE_INT val_so_far;
3074   rtx_insn *insn;
3075   rtx accum, tem;
3076   int opno;
3077   machine_mode nmode;
3078 
3079   /* Avoid referencing memory over and over and invalid sharing
3080      on SUBREGs.  */
3081   op0 = force_reg (mode, op0);
3082 
3083   /* ACCUM starts out either as OP0 or as a zero, depending on
3084      the first operation.  */
3085 
3086   if (alg->op[0] == alg_zero)
3087     {
3088       accum = copy_to_mode_reg (mode, CONST0_RTX (mode));
3089       val_so_far = 0;
3090     }
3091   else if (alg->op[0] == alg_m)
3092     {
3093       accum = copy_to_mode_reg (mode, op0);
3094       val_so_far = 1;
3095     }
3096   else
3097     gcc_unreachable ();
3098 
3099   for (opno = 1; opno < alg->ops; opno++)
3100     {
3101       int log = alg->log[opno];
3102       rtx shift_subtarget = optimize ? 0 : accum;
3103       rtx add_target
3104 	= (opno == alg->ops - 1 && target != 0 && variant != add_variant
3105 	   && !optimize)
3106 	  ? target : 0;
3107       rtx accum_target = optimize ? 0 : accum;
3108       rtx accum_inner;
3109 
3110       switch (alg->op[opno])
3111 	{
3112 	case alg_shift:
3113 	  tem = expand_shift (LSHIFT_EXPR, mode, accum, log, NULL_RTX, 0);
3114 	  /* REG_EQUAL note will be attached to the following insn.  */
3115 	  emit_move_insn (accum, tem);
3116 	  val_so_far <<= log;
3117 	  break;
3118 
3119 	case alg_add_t_m2:
3120 	  tem = expand_shift (LSHIFT_EXPR, mode, op0, log, NULL_RTX, 0);
3121 	  accum = force_operand (gen_rtx_PLUS (mode, accum, tem),
3122 				 add_target ? add_target : accum_target);
3123 	  val_so_far += HOST_WIDE_INT_1U << log;
3124 	  break;
3125 
3126 	case alg_sub_t_m2:
3127 	  tem = expand_shift (LSHIFT_EXPR, mode, op0, log, NULL_RTX, 0);
3128 	  accum = force_operand (gen_rtx_MINUS (mode, accum, tem),
3129 				 add_target ? add_target : accum_target);
3130 	  val_so_far -= HOST_WIDE_INT_1U << log;
3131 	  break;
3132 
3133 	case alg_add_t2_m:
3134 	  accum = expand_shift (LSHIFT_EXPR, mode, accum,
3135 				log, shift_subtarget, 0);
3136 	  accum = force_operand (gen_rtx_PLUS (mode, accum, op0),
3137 				 add_target ? add_target : accum_target);
3138 	  val_so_far = (val_so_far << log) + 1;
3139 	  break;
3140 
3141 	case alg_sub_t2_m:
3142 	  accum = expand_shift (LSHIFT_EXPR, mode, accum,
3143 				log, shift_subtarget, 0);
3144 	  accum = force_operand (gen_rtx_MINUS (mode, accum, op0),
3145 				 add_target ? add_target : accum_target);
3146 	  val_so_far = (val_so_far << log) - 1;
3147 	  break;
3148 
3149 	case alg_add_factor:
3150 	  tem = expand_shift (LSHIFT_EXPR, mode, accum, log, NULL_RTX, 0);
3151 	  accum = force_operand (gen_rtx_PLUS (mode, accum, tem),
3152 				 add_target ? add_target : accum_target);
3153 	  val_so_far += val_so_far << log;
3154 	  break;
3155 
3156 	case alg_sub_factor:
3157 	  tem = expand_shift (LSHIFT_EXPR, mode, accum, log, NULL_RTX, 0);
3158 	  accum = force_operand (gen_rtx_MINUS (mode, tem, accum),
3159 				 (add_target
3160 				  ? add_target : (optimize ? 0 : tem)));
3161 	  val_so_far = (val_so_far << log) - val_so_far;
3162 	  break;
3163 
3164 	default:
3165 	  gcc_unreachable ();
3166 	}
3167 
3168       if (SCALAR_INT_MODE_P (mode))
3169 	{
3170 	  /* Write a REG_EQUAL note on the last insn so that we can cse
3171 	     multiplication sequences.  Note that if ACCUM is a SUBREG,
3172 	     we've set the inner register and must properly indicate that.  */
3173           tem = op0, nmode = mode;
3174           accum_inner = accum;
3175           if (GET_CODE (accum) == SUBREG)
3176 	    {
3177 	      accum_inner = SUBREG_REG (accum);
3178 	      nmode = GET_MODE (accum_inner);
3179 	      tem = gen_lowpart (nmode, op0);
3180 	    }
3181 
3182           insn = get_last_insn ();
3183           set_dst_reg_note (insn, REG_EQUAL,
3184 			    gen_rtx_MULT (nmode, tem,
3185 					  gen_int_mode (val_so_far, nmode)),
3186 			    accum_inner);
3187 	}
3188     }
3189 
3190   if (variant == negate_variant)
3191     {
3192       val_so_far = -val_so_far;
3193       accum = expand_unop (mode, neg_optab, accum, target, 0);
3194     }
3195   else if (variant == add_variant)
3196     {
3197       val_so_far = val_so_far + 1;
3198       accum = force_operand (gen_rtx_PLUS (mode, accum, op0), target);
3199     }
3200 
3201   /* Compare only the bits of val and val_so_far that are significant
3202      in the result mode, to avoid sign-/zero-extension confusion.  */
3203   nmode = GET_MODE_INNER (mode);
3204   val &= GET_MODE_MASK (nmode);
3205   val_so_far &= GET_MODE_MASK (nmode);
3206   gcc_assert (val == (HOST_WIDE_INT) val_so_far);
3207 
3208   return accum;
3209 }
3210 
3211 /* Perform a multiplication and return an rtx for the result.
3212    MODE is mode of value; OP0 and OP1 are what to multiply (rtx's);
3213    TARGET is a suggestion for where to store the result (an rtx).
3214 
3215    We check specially for a constant integer as OP1.
3216    If you want this check for OP0 as well, then before calling
3217    you should swap the two operands if OP0 would be constant.  */
3218 
3219 rtx
3220 expand_mult (machine_mode mode, rtx op0, rtx op1, rtx target,
3221 	     int unsignedp)
3222 {
3223   enum mult_variant variant;
3224   struct algorithm algorithm;
3225   rtx scalar_op1;
3226   int max_cost;
3227   bool speed = optimize_insn_for_speed_p ();
3228   bool do_trapv = flag_trapv && SCALAR_INT_MODE_P (mode) && !unsignedp;
3229 
3230   if (CONSTANT_P (op0))
3231     std::swap (op0, op1);
3232 
3233   /* For vectors, there are several simplifications that can be made if
3234      all elements of the vector constant are identical.  */
3235   scalar_op1 = unwrap_const_vec_duplicate (op1);
3236 
3237   if (INTEGRAL_MODE_P (mode))
3238     {
3239       rtx fake_reg;
3240       HOST_WIDE_INT coeff;
3241       bool is_neg;
3242       int mode_bitsize;
3243 
3244       if (op1 == CONST0_RTX (mode))
3245 	return op1;
3246       if (op1 == CONST1_RTX (mode))
3247 	return op0;
3248       if (op1 == CONSTM1_RTX (mode))
3249 	return expand_unop (mode, do_trapv ? negv_optab : neg_optab,
3250 			    op0, target, 0);
3251 
3252       if (do_trapv)
3253 	goto skip_synth;
3254 
3255       /* If mode is integer vector mode, check if the backend supports
3256 	 vector lshift (by scalar or vector) at all.  If not, we can't use
3257 	 synthetized multiply.  */
3258       if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT
3259 	  && optab_handler (vashl_optab, mode) == CODE_FOR_nothing
3260 	  && optab_handler (ashl_optab, mode) == CODE_FOR_nothing)
3261 	goto skip_synth;
3262 
3263       /* These are the operations that are potentially turned into
3264 	 a sequence of shifts and additions.  */
3265       mode_bitsize = GET_MODE_UNIT_BITSIZE (mode);
3266 
3267       /* synth_mult does an `unsigned int' multiply.  As long as the mode is
3268 	 less than or equal in size to `unsigned int' this doesn't matter.
3269 	 If the mode is larger than `unsigned int', then synth_mult works
3270 	 only if the constant value exactly fits in an `unsigned int' without
3271 	 any truncation.  This means that multiplying by negative values does
3272 	 not work; results are off by 2^32 on a 32 bit machine.  */
3273       if (CONST_INT_P (scalar_op1))
3274 	{
3275 	  coeff = INTVAL (scalar_op1);
3276 	  is_neg = coeff < 0;
3277 	}
3278 #if TARGET_SUPPORTS_WIDE_INT
3279       else if (CONST_WIDE_INT_P (scalar_op1))
3280 #else
3281       else if (CONST_DOUBLE_AS_INT_P (scalar_op1))
3282 #endif
3283 	{
3284 	  int shift = wi::exact_log2 (rtx_mode_t (scalar_op1, mode));
3285 	  /* Perfect power of 2 (other than 1, which is handled above).  */
3286 	  if (shift > 0)
3287 	    return expand_shift (LSHIFT_EXPR, mode, op0,
3288 				 shift, target, unsignedp);
3289 	  else
3290 	    goto skip_synth;
3291 	}
3292       else
3293 	goto skip_synth;
3294 
3295       /* We used to test optimize here, on the grounds that it's better to
3296 	 produce a smaller program when -O is not used.  But this causes
3297 	 such a terrible slowdown sometimes that it seems better to always
3298 	 use synth_mult.  */
3299 
3300       /* Special case powers of two.  */
3301       if (EXACT_POWER_OF_2_OR_ZERO_P (coeff)
3302 	  && !(is_neg && mode_bitsize > HOST_BITS_PER_WIDE_INT))
3303 	return expand_shift (LSHIFT_EXPR, mode, op0,
3304 			     floor_log2 (coeff), target, unsignedp);
3305 
3306       fake_reg = gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1);
3307 
3308       /* Attempt to handle multiplication of DImode values by negative
3309 	 coefficients, by performing the multiplication by a positive
3310 	 multiplier and then inverting the result.  */
3311       if (is_neg && mode_bitsize > HOST_BITS_PER_WIDE_INT)
3312 	{
3313 	  /* Its safe to use -coeff even for INT_MIN, as the
3314 	     result is interpreted as an unsigned coefficient.
3315 	     Exclude cost of op0 from max_cost to match the cost
3316 	     calculation of the synth_mult.  */
3317 	  coeff = -(unsigned HOST_WIDE_INT) coeff;
3318 	  max_cost = (set_src_cost (gen_rtx_MULT (mode, fake_reg, op1),
3319 				    mode, speed)
3320 		      - neg_cost (speed, mode));
3321 	  if (max_cost <= 0)
3322 	    goto skip_synth;
3323 
3324 	  /* Special case powers of two.  */
3325 	  if (EXACT_POWER_OF_2_OR_ZERO_P (coeff))
3326 	    {
3327 	      rtx temp = expand_shift (LSHIFT_EXPR, mode, op0,
3328 				       floor_log2 (coeff), target, unsignedp);
3329 	      return expand_unop (mode, neg_optab, temp, target, 0);
3330 	    }
3331 
3332 	  if (choose_mult_variant (mode, coeff, &algorithm, &variant,
3333 				   max_cost))
3334 	    {
3335 	      rtx temp = expand_mult_const (mode, op0, coeff, NULL_RTX,
3336 					    &algorithm, variant);
3337 	      return expand_unop (mode, neg_optab, temp, target, 0);
3338 	    }
3339 	  goto skip_synth;
3340 	}
3341 
3342       /* Exclude cost of op0 from max_cost to match the cost
3343 	 calculation of the synth_mult.  */
3344       max_cost = set_src_cost (gen_rtx_MULT (mode, fake_reg, op1), mode, speed);
3345       if (choose_mult_variant (mode, coeff, &algorithm, &variant, max_cost))
3346 	return expand_mult_const (mode, op0, coeff, target,
3347 				  &algorithm, variant);
3348     }
3349  skip_synth:
3350 
3351   /* Expand x*2.0 as x+x.  */
3352   if (CONST_DOUBLE_AS_FLOAT_P (scalar_op1)
3353       && real_equal (CONST_DOUBLE_REAL_VALUE (scalar_op1), &dconst2))
3354     {
3355       op0 = force_reg (GET_MODE (op0), op0);
3356       return expand_binop (mode, add_optab, op0, op0,
3357 			   target, unsignedp, OPTAB_LIB_WIDEN);
3358     }
3359 
3360   /* This used to use umul_optab if unsigned, but for non-widening multiply
3361      there is no difference between signed and unsigned.  */
3362   op0 = expand_binop (mode, do_trapv ? smulv_optab : smul_optab,
3363 		      op0, op1, target, unsignedp, OPTAB_LIB_WIDEN);
3364   gcc_assert (op0);
3365   return op0;
3366 }
3367 
3368 /* Return a cost estimate for multiplying a register by the given
3369    COEFFicient in the given MODE and SPEED.  */
3370 
3371 int
3372 mult_by_coeff_cost (HOST_WIDE_INT coeff, machine_mode mode, bool speed)
3373 {
3374   int max_cost;
3375   struct algorithm algorithm;
3376   enum mult_variant variant;
3377 
3378   rtx fake_reg = gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1);
3379   max_cost = set_src_cost (gen_rtx_MULT (mode, fake_reg, fake_reg),
3380 			   mode, speed);
3381   if (choose_mult_variant (mode, coeff, &algorithm, &variant, max_cost))
3382     return algorithm.cost.cost;
3383   else
3384     return max_cost;
3385 }
3386 
3387 /* Perform a widening multiplication and return an rtx for the result.
3388    MODE is mode of value; OP0 and OP1 are what to multiply (rtx's);
3389    TARGET is a suggestion for where to store the result (an rtx).
3390    THIS_OPTAB is the optab we should use, it must be either umul_widen_optab
3391    or smul_widen_optab.
3392 
3393    We check specially for a constant integer as OP1, comparing the
3394    cost of a widening multiply against the cost of a sequence of shifts
3395    and adds.  */
3396 
3397 rtx
3398 expand_widening_mult (machine_mode mode, rtx op0, rtx op1, rtx target,
3399 		      int unsignedp, optab this_optab)
3400 {
3401   bool speed = optimize_insn_for_speed_p ();
3402   rtx cop1;
3403 
3404   if (CONST_INT_P (op1)
3405       && GET_MODE (op0) != VOIDmode
3406       && (cop1 = convert_modes (mode, GET_MODE (op0), op1,
3407 				this_optab == umul_widen_optab))
3408       && CONST_INT_P (cop1)
3409       && (INTVAL (cop1) >= 0
3410 	  || HWI_COMPUTABLE_MODE_P (mode)))
3411     {
3412       HOST_WIDE_INT coeff = INTVAL (cop1);
3413       int max_cost;
3414       enum mult_variant variant;
3415       struct algorithm algorithm;
3416 
3417       if (coeff == 0)
3418 	return CONST0_RTX (mode);
3419 
3420       /* Special case powers of two.  */
3421       if (EXACT_POWER_OF_2_OR_ZERO_P (coeff))
3422 	{
3423 	  op0 = convert_to_mode (mode, op0, this_optab == umul_widen_optab);
3424 	  return expand_shift (LSHIFT_EXPR, mode, op0,
3425 			       floor_log2 (coeff), target, unsignedp);
3426 	}
3427 
3428       /* Exclude cost of op0 from max_cost to match the cost
3429 	 calculation of the synth_mult.  */
3430       max_cost = mul_widen_cost (speed, mode);
3431       if (choose_mult_variant (mode, coeff, &algorithm, &variant,
3432 			       max_cost))
3433 	{
3434 	  op0 = convert_to_mode (mode, op0, this_optab == umul_widen_optab);
3435 	  return expand_mult_const (mode, op0, coeff, target,
3436 				    &algorithm, variant);
3437 	}
3438     }
3439   return expand_binop (mode, this_optab, op0, op1, target,
3440 		       unsignedp, OPTAB_LIB_WIDEN);
3441 }
3442 
3443 /* Choose a minimal N + 1 bit approximation to 1/D that can be used to
3444    replace division by D, and put the least significant N bits of the result
3445    in *MULTIPLIER_PTR and return the most significant bit.
3446 
3447    The width of operations is N (should be <= HOST_BITS_PER_WIDE_INT), the
3448    needed precision is in PRECISION (should be <= N).
3449 
3450    PRECISION should be as small as possible so this function can choose
3451    multiplier more freely.
3452 
3453    The rounded-up logarithm of D is placed in *lgup_ptr.  A shift count that
3454    is to be used for a final right shift is placed in *POST_SHIFT_PTR.
3455 
3456    Using this function, x/D will be equal to (x * m) >> (*POST_SHIFT_PTR),
3457    where m is the full HOST_BITS_PER_WIDE_INT + 1 bit multiplier.  */
3458 
3459 unsigned HOST_WIDE_INT
3460 choose_multiplier (unsigned HOST_WIDE_INT d, int n, int precision,
3461 		   unsigned HOST_WIDE_INT *multiplier_ptr,
3462 		   int *post_shift_ptr, int *lgup_ptr)
3463 {
3464   int lgup, post_shift;
3465   int pow, pow2;
3466 
3467   /* lgup = ceil(log2(divisor)); */
3468   lgup = ceil_log2 (d);
3469 
3470   gcc_assert (lgup <= n);
3471 
3472   pow = n + lgup;
3473   pow2 = n + lgup - precision;
3474 
3475   /* mlow = 2^(N + lgup)/d */
3476   wide_int val = wi::set_bit_in_zero (pow, HOST_BITS_PER_DOUBLE_INT);
3477   wide_int mlow = wi::udiv_trunc (val, d);
3478 
3479   /* mhigh = (2^(N + lgup) + 2^(N + lgup - precision))/d */
3480   val |= wi::set_bit_in_zero (pow2, HOST_BITS_PER_DOUBLE_INT);
3481   wide_int mhigh = wi::udiv_trunc (val, d);
3482 
3483   /* If precision == N, then mlow, mhigh exceed 2^N
3484      (but they do not exceed 2^(N+1)).  */
3485 
3486   /* Reduce to lowest terms.  */
3487   for (post_shift = lgup; post_shift > 0; post_shift--)
3488     {
3489       unsigned HOST_WIDE_INT ml_lo = wi::extract_uhwi (mlow, 1,
3490 						       HOST_BITS_PER_WIDE_INT);
3491       unsigned HOST_WIDE_INT mh_lo = wi::extract_uhwi (mhigh, 1,
3492 						       HOST_BITS_PER_WIDE_INT);
3493       if (ml_lo >= mh_lo)
3494 	break;
3495 
3496       mlow = wi::uhwi (ml_lo, HOST_BITS_PER_DOUBLE_INT);
3497       mhigh = wi::uhwi (mh_lo, HOST_BITS_PER_DOUBLE_INT);
3498     }
3499 
3500   *post_shift_ptr = post_shift;
3501   *lgup_ptr = lgup;
3502   if (n < HOST_BITS_PER_WIDE_INT)
3503     {
3504       unsigned HOST_WIDE_INT mask = (HOST_WIDE_INT_1U << n) - 1;
3505       *multiplier_ptr = mhigh.to_uhwi () & mask;
3506       return mhigh.to_uhwi () >= mask;
3507     }
3508   else
3509     {
3510       *multiplier_ptr = mhigh.to_uhwi ();
3511       return wi::extract_uhwi (mhigh, HOST_BITS_PER_WIDE_INT, 1);
3512     }
3513 }
3514 
3515 /* Compute the inverse of X mod 2**n, i.e., find Y such that X * Y is
3516    congruent to 1 (mod 2**N).  */
3517 
3518 static unsigned HOST_WIDE_INT
3519 invert_mod2n (unsigned HOST_WIDE_INT x, int n)
3520 {
3521   /* Solve x*y == 1 (mod 2^n), where x is odd.  Return y.  */
3522 
3523   /* The algorithm notes that the choice y = x satisfies
3524      x*y == 1 mod 2^3, since x is assumed odd.
3525      Each iteration doubles the number of bits of significance in y.  */
3526 
3527   unsigned HOST_WIDE_INT mask;
3528   unsigned HOST_WIDE_INT y = x;
3529   int nbit = 3;
3530 
3531   mask = (n == HOST_BITS_PER_WIDE_INT
3532 	  ? HOST_WIDE_INT_M1U
3533 	  : (HOST_WIDE_INT_1U << n) - 1);
3534 
3535   while (nbit < n)
3536     {
3537       y = y * (2 - x*y) & mask;		/* Modulo 2^N */
3538       nbit *= 2;
3539     }
3540   return y;
3541 }
3542 
3543 /* Emit code to adjust ADJ_OPERAND after multiplication of wrong signedness
3544    flavor of OP0 and OP1.  ADJ_OPERAND is already the high half of the
3545    product OP0 x OP1.  If UNSIGNEDP is nonzero, adjust the signed product
3546    to become unsigned, if UNSIGNEDP is zero, adjust the unsigned product to
3547    become signed.
3548 
3549    The result is put in TARGET if that is convenient.
3550 
3551    MODE is the mode of operation.  */
3552 
3553 rtx
3554 expand_mult_highpart_adjust (machine_mode mode, rtx adj_operand, rtx op0,
3555 			     rtx op1, rtx target, int unsignedp)
3556 {
3557   rtx tem;
3558   enum rtx_code adj_code = unsignedp ? PLUS : MINUS;
3559 
3560   tem = expand_shift (RSHIFT_EXPR, mode, op0,
3561 		      GET_MODE_BITSIZE (mode) - 1, NULL_RTX, 0);
3562   tem = expand_and (mode, tem, op1, NULL_RTX);
3563   adj_operand
3564     = force_operand (gen_rtx_fmt_ee (adj_code, mode, adj_operand, tem),
3565 		     adj_operand);
3566 
3567   tem = expand_shift (RSHIFT_EXPR, mode, op1,
3568 		      GET_MODE_BITSIZE (mode) - 1, NULL_RTX, 0);
3569   tem = expand_and (mode, tem, op0, NULL_RTX);
3570   target = force_operand (gen_rtx_fmt_ee (adj_code, mode, adj_operand, tem),
3571 			  target);
3572 
3573   return target;
3574 }
3575 
3576 /* Subroutine of expmed_mult_highpart.  Return the MODE high part of OP.  */
3577 
3578 static rtx
3579 extract_high_half (machine_mode mode, rtx op)
3580 {
3581   machine_mode wider_mode;
3582 
3583   if (mode == word_mode)
3584     return gen_highpart (mode, op);
3585 
3586   gcc_assert (!SCALAR_FLOAT_MODE_P (mode));
3587 
3588   wider_mode = GET_MODE_WIDER_MODE (mode);
3589   op = expand_shift (RSHIFT_EXPR, wider_mode, op,
3590 		     GET_MODE_BITSIZE (mode), 0, 1);
3591   return convert_modes (mode, wider_mode, op, 0);
3592 }
3593 
3594 /* Like expmed_mult_highpart, but only consider using a multiplication
3595    optab.  OP1 is an rtx for the constant operand.  */
3596 
3597 static rtx
3598 expmed_mult_highpart_optab (machine_mode mode, rtx op0, rtx op1,
3599 			    rtx target, int unsignedp, int max_cost)
3600 {
3601   rtx narrow_op1 = gen_int_mode (INTVAL (op1), mode);
3602   machine_mode wider_mode;
3603   optab moptab;
3604   rtx tem;
3605   int size;
3606   bool speed = optimize_insn_for_speed_p ();
3607 
3608   gcc_assert (!SCALAR_FLOAT_MODE_P (mode));
3609 
3610   wider_mode = GET_MODE_WIDER_MODE (mode);
3611   size = GET_MODE_BITSIZE (mode);
3612 
3613   /* Firstly, try using a multiplication insn that only generates the needed
3614      high part of the product, and in the sign flavor of unsignedp.  */
3615   if (mul_highpart_cost (speed, mode) < max_cost)
3616     {
3617       moptab = unsignedp ? umul_highpart_optab : smul_highpart_optab;
3618       tem = expand_binop (mode, moptab, op0, narrow_op1, target,
3619 			  unsignedp, OPTAB_DIRECT);
3620       if (tem)
3621 	return tem;
3622     }
3623 
3624   /* Secondly, same as above, but use sign flavor opposite of unsignedp.
3625      Need to adjust the result after the multiplication.  */
3626   if (size - 1 < BITS_PER_WORD
3627       && (mul_highpart_cost (speed, mode)
3628 	  + 2 * shift_cost (speed, mode, size-1)
3629 	  + 4 * add_cost (speed, mode) < max_cost))
3630     {
3631       moptab = unsignedp ? smul_highpart_optab : umul_highpart_optab;
3632       tem = expand_binop (mode, moptab, op0, narrow_op1, target,
3633 			  unsignedp, OPTAB_DIRECT);
3634       if (tem)
3635 	/* We used the wrong signedness.  Adjust the result.  */
3636 	return expand_mult_highpart_adjust (mode, tem, op0, narrow_op1,
3637 					    tem, unsignedp);
3638     }
3639 
3640   /* Try widening multiplication.  */
3641   moptab = unsignedp ? umul_widen_optab : smul_widen_optab;
3642   if (widening_optab_handler (moptab, wider_mode, mode) != CODE_FOR_nothing
3643       && mul_widen_cost (speed, wider_mode) < max_cost)
3644     {
3645       tem = expand_binop (wider_mode, moptab, op0, narrow_op1, 0,
3646 			  unsignedp, OPTAB_WIDEN);
3647       if (tem)
3648 	return extract_high_half (mode, tem);
3649     }
3650 
3651   /* Try widening the mode and perform a non-widening multiplication.  */
3652   if (optab_handler (smul_optab, wider_mode) != CODE_FOR_nothing
3653       && size - 1 < BITS_PER_WORD
3654       && (mul_cost (speed, wider_mode) + shift_cost (speed, mode, size-1)
3655 	  < max_cost))
3656     {
3657       rtx_insn *insns;
3658       rtx wop0, wop1;
3659 
3660       /* We need to widen the operands, for example to ensure the
3661 	 constant multiplier is correctly sign or zero extended.
3662 	 Use a sequence to clean-up any instructions emitted by
3663 	 the conversions if things don't work out.  */
3664       start_sequence ();
3665       wop0 = convert_modes (wider_mode, mode, op0, unsignedp);
3666       wop1 = convert_modes (wider_mode, mode, op1, unsignedp);
3667       tem = expand_binop (wider_mode, smul_optab, wop0, wop1, 0,
3668 			  unsignedp, OPTAB_WIDEN);
3669       insns = get_insns ();
3670       end_sequence ();
3671 
3672       if (tem)
3673 	{
3674 	  emit_insn (insns);
3675 	  return extract_high_half (mode, tem);
3676 	}
3677     }
3678 
3679   /* Try widening multiplication of opposite signedness, and adjust.  */
3680   moptab = unsignedp ? smul_widen_optab : umul_widen_optab;
3681   if (widening_optab_handler (moptab, wider_mode, mode) != CODE_FOR_nothing
3682       && size - 1 < BITS_PER_WORD
3683       && (mul_widen_cost (speed, wider_mode)
3684 	  + 2 * shift_cost (speed, mode, size-1)
3685 	  + 4 * add_cost (speed, mode) < max_cost))
3686     {
3687       tem = expand_binop (wider_mode, moptab, op0, narrow_op1,
3688 			  NULL_RTX, ! unsignedp, OPTAB_WIDEN);
3689       if (tem != 0)
3690 	{
3691 	  tem = extract_high_half (mode, tem);
3692 	  /* We used the wrong signedness.  Adjust the result.  */
3693 	  return expand_mult_highpart_adjust (mode, tem, op0, narrow_op1,
3694 					      target, unsignedp);
3695 	}
3696     }
3697 
3698   return 0;
3699 }
3700 
3701 /* Emit code to multiply OP0 and OP1 (where OP1 is an integer constant),
3702    putting the high half of the result in TARGET if that is convenient,
3703    and return where the result is.  If the operation can not be performed,
3704    0 is returned.
3705 
3706    MODE is the mode of operation and result.
3707 
3708    UNSIGNEDP nonzero means unsigned multiply.
3709 
3710    MAX_COST is the total allowed cost for the expanded RTL.  */
3711 
3712 static rtx
3713 expmed_mult_highpart (machine_mode mode, rtx op0, rtx op1,
3714 		      rtx target, int unsignedp, int max_cost)
3715 {
3716   machine_mode wider_mode = GET_MODE_WIDER_MODE (mode);
3717   unsigned HOST_WIDE_INT cnst1;
3718   int extra_cost;
3719   bool sign_adjust = false;
3720   enum mult_variant variant;
3721   struct algorithm alg;
3722   rtx tem;
3723   bool speed = optimize_insn_for_speed_p ();
3724 
3725   gcc_assert (!SCALAR_FLOAT_MODE_P (mode));
3726   /* We can't support modes wider than HOST_BITS_PER_INT.  */
3727   gcc_assert (HWI_COMPUTABLE_MODE_P (mode));
3728 
3729   cnst1 = INTVAL (op1) & GET_MODE_MASK (mode);
3730 
3731   /* We can't optimize modes wider than BITS_PER_WORD.
3732      ??? We might be able to perform double-word arithmetic if
3733      mode == word_mode, however all the cost calculations in
3734      synth_mult etc. assume single-word operations.  */
3735   if (GET_MODE_BITSIZE (wider_mode) > BITS_PER_WORD)
3736     return expmed_mult_highpart_optab (mode, op0, op1, target,
3737 				       unsignedp, max_cost);
3738 
3739   extra_cost = shift_cost (speed, mode, GET_MODE_BITSIZE (mode) - 1);
3740 
3741   /* Check whether we try to multiply by a negative constant.  */
3742   if (!unsignedp && ((cnst1 >> (GET_MODE_BITSIZE (mode) - 1)) & 1))
3743     {
3744       sign_adjust = true;
3745       extra_cost += add_cost (speed, mode);
3746     }
3747 
3748   /* See whether shift/add multiplication is cheap enough.  */
3749   if (choose_mult_variant (wider_mode, cnst1, &alg, &variant,
3750 			   max_cost - extra_cost))
3751     {
3752       /* See whether the specialized multiplication optabs are
3753 	 cheaper than the shift/add version.  */
3754       tem = expmed_mult_highpart_optab (mode, op0, op1, target, unsignedp,
3755 					alg.cost.cost + extra_cost);
3756       if (tem)
3757 	return tem;
3758 
3759       tem = convert_to_mode (wider_mode, op0, unsignedp);
3760       tem = expand_mult_const (wider_mode, tem, cnst1, 0, &alg, variant);
3761       tem = extract_high_half (mode, tem);
3762 
3763       /* Adjust result for signedness.  */
3764       if (sign_adjust)
3765 	tem = force_operand (gen_rtx_MINUS (mode, tem, op0), tem);
3766 
3767       return tem;
3768     }
3769   return expmed_mult_highpart_optab (mode, op0, op1, target,
3770 				     unsignedp, max_cost);
3771 }
3772 
3773 
3774 /* Expand signed modulus of OP0 by a power of two D in mode MODE.  */
3775 
3776 static rtx
3777 expand_smod_pow2 (machine_mode mode, rtx op0, HOST_WIDE_INT d)
3778 {
3779   rtx result, temp, shift;
3780   rtx_code_label *label;
3781   int logd;
3782   int prec = GET_MODE_PRECISION (mode);
3783 
3784   logd = floor_log2 (d);
3785   result = gen_reg_rtx (mode);
3786 
3787   /* Avoid conditional branches when they're expensive.  */
3788   if (BRANCH_COST (optimize_insn_for_speed_p (), false) >= 2
3789       && optimize_insn_for_speed_p ())
3790     {
3791       rtx signmask = emit_store_flag (result, LT, op0, const0_rtx,
3792 				      mode, 0, -1);
3793       if (signmask)
3794 	{
3795 	  HOST_WIDE_INT masklow = (HOST_WIDE_INT_1 << logd) - 1;
3796 	  signmask = force_reg (mode, signmask);
3797 	  shift = GEN_INT (GET_MODE_BITSIZE (mode) - logd);
3798 
3799 	  /* Use the rtx_cost of a LSHIFTRT instruction to determine
3800 	     which instruction sequence to use.  If logical right shifts
3801 	     are expensive the use 2 XORs, 2 SUBs and an AND, otherwise
3802 	     use a LSHIFTRT, 1 ADD, 1 SUB and an AND.  */
3803 
3804 	  temp = gen_rtx_LSHIFTRT (mode, result, shift);
3805 	  if (optab_handler (lshr_optab, mode) == CODE_FOR_nothing
3806 	      || (set_src_cost (temp, mode, optimize_insn_for_speed_p ())
3807 		  > COSTS_N_INSNS (2)))
3808 	    {
3809 	      temp = expand_binop (mode, xor_optab, op0, signmask,
3810 				   NULL_RTX, 1, OPTAB_LIB_WIDEN);
3811 	      temp = expand_binop (mode, sub_optab, temp, signmask,
3812 				   NULL_RTX, 1, OPTAB_LIB_WIDEN);
3813 	      temp = expand_binop (mode, and_optab, temp,
3814 				   gen_int_mode (masklow, mode),
3815 				   NULL_RTX, 1, OPTAB_LIB_WIDEN);
3816 	      temp = expand_binop (mode, xor_optab, temp, signmask,
3817 				   NULL_RTX, 1, OPTAB_LIB_WIDEN);
3818 	      temp = expand_binop (mode, sub_optab, temp, signmask,
3819 				   NULL_RTX, 1, OPTAB_LIB_WIDEN);
3820 	    }
3821 	  else
3822 	    {
3823 	      signmask = expand_binop (mode, lshr_optab, signmask, shift,
3824 				       NULL_RTX, 1, OPTAB_LIB_WIDEN);
3825 	      signmask = force_reg (mode, signmask);
3826 
3827 	      temp = expand_binop (mode, add_optab, op0, signmask,
3828 				   NULL_RTX, 1, OPTAB_LIB_WIDEN);
3829 	      temp = expand_binop (mode, and_optab, temp,
3830 				   gen_int_mode (masklow, mode),
3831 				   NULL_RTX, 1, OPTAB_LIB_WIDEN);
3832 	      temp = expand_binop (mode, sub_optab, temp, signmask,
3833 				   NULL_RTX, 1, OPTAB_LIB_WIDEN);
3834 	    }
3835 	  return temp;
3836 	}
3837     }
3838 
3839   /* Mask contains the mode's signbit and the significant bits of the
3840      modulus.  By including the signbit in the operation, many targets
3841      can avoid an explicit compare operation in the following comparison
3842      against zero.  */
3843   wide_int mask = wi::mask (logd, false, prec);
3844   mask = wi::set_bit (mask, prec - 1);
3845 
3846   temp = expand_binop (mode, and_optab, op0,
3847 		       immed_wide_int_const (mask, mode),
3848 		       result, 1, OPTAB_LIB_WIDEN);
3849   if (temp != result)
3850     emit_move_insn (result, temp);
3851 
3852   label = gen_label_rtx ();
3853   do_cmp_and_jump (result, const0_rtx, GE, mode, label);
3854 
3855   temp = expand_binop (mode, sub_optab, result, const1_rtx, result,
3856 		       0, OPTAB_LIB_WIDEN);
3857 
3858   mask = wi::mask (logd, true, prec);
3859   temp = expand_binop (mode, ior_optab, temp,
3860 		       immed_wide_int_const (mask, mode),
3861 		       result, 1, OPTAB_LIB_WIDEN);
3862   temp = expand_binop (mode, add_optab, temp, const1_rtx, result,
3863 		       0, OPTAB_LIB_WIDEN);
3864   if (temp != result)
3865     emit_move_insn (result, temp);
3866   emit_label (label);
3867   return result;
3868 }
3869 
3870 /* Expand signed division of OP0 by a power of two D in mode MODE.
3871    This routine is only called for positive values of D.  */
3872 
3873 static rtx
3874 expand_sdiv_pow2 (machine_mode mode, rtx op0, HOST_WIDE_INT d)
3875 {
3876   rtx temp;
3877   rtx_code_label *label;
3878   int logd;
3879 
3880   logd = floor_log2 (d);
3881 
3882   if (d == 2
3883       && BRANCH_COST (optimize_insn_for_speed_p (),
3884 		      false) >= 1)
3885     {
3886       temp = gen_reg_rtx (mode);
3887       temp = emit_store_flag (temp, LT, op0, const0_rtx, mode, 0, 1);
3888       temp = expand_binop (mode, add_optab, temp, op0, NULL_RTX,
3889 			   0, OPTAB_LIB_WIDEN);
3890       return expand_shift (RSHIFT_EXPR, mode, temp, logd, NULL_RTX, 0);
3891     }
3892 
3893   if (HAVE_conditional_move
3894       && BRANCH_COST (optimize_insn_for_speed_p (), false) >= 2)
3895     {
3896       rtx temp2;
3897 
3898       start_sequence ();
3899       temp2 = copy_to_mode_reg (mode, op0);
3900       temp = expand_binop (mode, add_optab, temp2, gen_int_mode (d - 1, mode),
3901 			   NULL_RTX, 0, OPTAB_LIB_WIDEN);
3902       temp = force_reg (mode, temp);
3903 
3904       /* Construct "temp2 = (temp2 < 0) ? temp : temp2".  */
3905       temp2 = emit_conditional_move (temp2, LT, temp2, const0_rtx,
3906 				     mode, temp, temp2, mode, 0);
3907       if (temp2)
3908 	{
3909 	  rtx_insn *seq = get_insns ();
3910 	  end_sequence ();
3911 	  emit_insn (seq);
3912 	  return expand_shift (RSHIFT_EXPR, mode, temp2, logd, NULL_RTX, 0);
3913 	}
3914       end_sequence ();
3915     }
3916 
3917   if (BRANCH_COST (optimize_insn_for_speed_p (),
3918 		   false) >= 2)
3919     {
3920       int ushift = GET_MODE_BITSIZE (mode) - logd;
3921 
3922       temp = gen_reg_rtx (mode);
3923       temp = emit_store_flag (temp, LT, op0, const0_rtx, mode, 0, -1);
3924       if (GET_MODE_BITSIZE (mode) >= BITS_PER_WORD
3925 	  || shift_cost (optimize_insn_for_speed_p (), mode, ushift)
3926 	     > COSTS_N_INSNS (1))
3927 	temp = expand_binop (mode, and_optab, temp, gen_int_mode (d - 1, mode),
3928 			     NULL_RTX, 0, OPTAB_LIB_WIDEN);
3929       else
3930 	temp = expand_shift (RSHIFT_EXPR, mode, temp,
3931 			     ushift, NULL_RTX, 1);
3932       temp = expand_binop (mode, add_optab, temp, op0, NULL_RTX,
3933 			   0, OPTAB_LIB_WIDEN);
3934       return expand_shift (RSHIFT_EXPR, mode, temp, logd, NULL_RTX, 0);
3935     }
3936 
3937   label = gen_label_rtx ();
3938   temp = copy_to_mode_reg (mode, op0);
3939   do_cmp_and_jump (temp, const0_rtx, GE, mode, label);
3940   expand_inc (temp, gen_int_mode (d - 1, mode));
3941   emit_label (label);
3942   return expand_shift (RSHIFT_EXPR, mode, temp, logd, NULL_RTX, 0);
3943 }
3944 
3945 /* Emit the code to divide OP0 by OP1, putting the result in TARGET
3946    if that is convenient, and returning where the result is.
3947    You may request either the quotient or the remainder as the result;
3948    specify REM_FLAG nonzero to get the remainder.
3949 
3950    CODE is the expression code for which kind of division this is;
3951    it controls how rounding is done.  MODE is the machine mode to use.
3952    UNSIGNEDP nonzero means do unsigned division.  */
3953 
3954 /* ??? For CEIL_MOD_EXPR, can compute incorrect remainder with ANDI
3955    and then correct it by or'ing in missing high bits
3956    if result of ANDI is nonzero.
3957    For ROUND_MOD_EXPR, can use ANDI and then sign-extend the result.
3958    This could optimize to a bfexts instruction.
3959    But C doesn't use these operations, so their optimizations are
3960    left for later.  */
3961 /* ??? For modulo, we don't actually need the highpart of the first product,
3962    the low part will do nicely.  And for small divisors, the second multiply
3963    can also be a low-part only multiply or even be completely left out.
3964    E.g. to calculate the remainder of a division by 3 with a 32 bit
3965    multiply, multiply with 0x55555556 and extract the upper two bits;
3966    the result is exact for inputs up to 0x1fffffff.
3967    The input range can be reduced by using cross-sum rules.
3968    For odd divisors >= 3, the following table gives right shift counts
3969    so that if a number is shifted by an integer multiple of the given
3970    amount, the remainder stays the same:
3971    2, 4, 3, 6, 10, 12, 4, 8, 18, 6, 11, 20, 18, 0, 5, 10, 12, 0, 12, 20,
3972    14, 12, 23, 21, 8, 0, 20, 18, 0, 0, 6, 12, 0, 22, 0, 18, 20, 30, 0, 0,
3973    0, 8, 0, 11, 12, 10, 36, 0, 30, 0, 0, 12, 0, 0, 0, 0, 44, 12, 24, 0,
3974    20, 0, 7, 14, 0, 18, 36, 0, 0, 46, 60, 0, 42, 0, 15, 24, 20, 0, 0, 33,
3975    0, 20, 0, 0, 18, 0, 60, 0, 0, 0, 0, 0, 40, 18, 0, 0, 12
3976 
3977    Cross-sum rules for even numbers can be derived by leaving as many bits
3978    to the right alone as the divisor has zeros to the right.
3979    E.g. if x is an unsigned 32 bit number:
3980    (x mod 12) == (((x & 1023) + ((x >> 8) & ~3)) * 0x15555558 >> 2 * 3) >> 28
3981    */
3982 
3983 rtx
3984 expand_divmod (int rem_flag, enum tree_code code, machine_mode mode,
3985 	       rtx op0, rtx op1, rtx target, int unsignedp)
3986 {
3987   machine_mode compute_mode;
3988   rtx tquotient;
3989   rtx quotient = 0, remainder = 0;
3990   rtx_insn *last;
3991   int size;
3992   rtx_insn *insn;
3993   optab optab1, optab2;
3994   int op1_is_constant, op1_is_pow2 = 0;
3995   int max_cost, extra_cost;
3996   static HOST_WIDE_INT last_div_const = 0;
3997   bool speed = optimize_insn_for_speed_p ();
3998 
3999   op1_is_constant = CONST_INT_P (op1);
4000   if (op1_is_constant)
4001     {
4002       wide_int ext_op1 = rtx_mode_t (op1, mode);
4003       op1_is_pow2 = (wi::popcount (ext_op1) == 1
4004 		     || (! unsignedp
4005 			 && wi::popcount (wi::neg (ext_op1)) == 1));
4006     }
4007 
4008   /*
4009      This is the structure of expand_divmod:
4010 
4011      First comes code to fix up the operands so we can perform the operations
4012      correctly and efficiently.
4013 
4014      Second comes a switch statement with code specific for each rounding mode.
4015      For some special operands this code emits all RTL for the desired
4016      operation, for other cases, it generates only a quotient and stores it in
4017      QUOTIENT.  The case for trunc division/remainder might leave quotient = 0,
4018      to indicate that it has not done anything.
4019 
4020      Last comes code that finishes the operation.  If QUOTIENT is set and
4021      REM_FLAG is set, the remainder is computed as OP0 - QUOTIENT * OP1.  If
4022      QUOTIENT is not set, it is computed using trunc rounding.
4023 
4024      We try to generate special code for division and remainder when OP1 is a
4025      constant.  If |OP1| = 2**n we can use shifts and some other fast
4026      operations.  For other values of OP1, we compute a carefully selected
4027      fixed-point approximation m = 1/OP1, and generate code that multiplies OP0
4028      by m.
4029 
4030      In all cases but EXACT_DIV_EXPR, this multiplication requires the upper
4031      half of the product.  Different strategies for generating the product are
4032      implemented in expmed_mult_highpart.
4033 
4034      If what we actually want is the remainder, we generate that by another
4035      by-constant multiplication and a subtraction.  */
4036 
4037   /* We shouldn't be called with OP1 == const1_rtx, but some of the
4038      code below will malfunction if we are, so check here and handle
4039      the special case if so.  */
4040   if (op1 == const1_rtx)
4041     return rem_flag ? const0_rtx : op0;
4042 
4043     /* When dividing by -1, we could get an overflow.
4044      negv_optab can handle overflows.  */
4045   if (! unsignedp && op1 == constm1_rtx)
4046     {
4047       if (rem_flag)
4048 	return const0_rtx;
4049       return expand_unop (mode, flag_trapv && GET_MODE_CLASS (mode) == MODE_INT
4050 			  ? negv_optab : neg_optab, op0, target, 0);
4051     }
4052 
4053   if (target
4054       /* Don't use the function value register as a target
4055 	 since we have to read it as well as write it,
4056 	 and function-inlining gets confused by this.  */
4057       && ((REG_P (target) && REG_FUNCTION_VALUE_P (target))
4058 	  /* Don't clobber an operand while doing a multi-step calculation.  */
4059 	  || ((rem_flag || op1_is_constant)
4060 	      && (reg_mentioned_p (target, op0)
4061 		  || (MEM_P (op0) && MEM_P (target))))
4062 	  || reg_mentioned_p (target, op1)
4063 	  || (MEM_P (op1) && MEM_P (target))))
4064     target = 0;
4065 
4066   /* Get the mode in which to perform this computation.  Normally it will
4067      be MODE, but sometimes we can't do the desired operation in MODE.
4068      If so, pick a wider mode in which we can do the operation.  Convert
4069      to that mode at the start to avoid repeated conversions.
4070 
4071      First see what operations we need.  These depend on the expression
4072      we are evaluating.  (We assume that divxx3 insns exist under the
4073      same conditions that modxx3 insns and that these insns don't normally
4074      fail.  If these assumptions are not correct, we may generate less
4075      efficient code in some cases.)
4076 
4077      Then see if we find a mode in which we can open-code that operation
4078      (either a division, modulus, or shift).  Finally, check for the smallest
4079      mode for which we can do the operation with a library call.  */
4080 
4081   /* We might want to refine this now that we have division-by-constant
4082      optimization.  Since expmed_mult_highpart tries so many variants, it is
4083      not straightforward to generalize this.  Maybe we should make an array
4084      of possible modes in init_expmed?  Save this for GCC 2.7.  */
4085 
4086   optab1 = (op1_is_pow2
4087 	    ? (unsignedp ? lshr_optab : ashr_optab)
4088 	    : (unsignedp ? udiv_optab : sdiv_optab));
4089   optab2 = (op1_is_pow2 ? optab1
4090 	    : (unsignedp ? udivmod_optab : sdivmod_optab));
4091 
4092   for (compute_mode = mode; compute_mode != VOIDmode;
4093        compute_mode = GET_MODE_WIDER_MODE (compute_mode))
4094     if (optab_handler (optab1, compute_mode) != CODE_FOR_nothing
4095 	|| optab_handler (optab2, compute_mode) != CODE_FOR_nothing)
4096       break;
4097 
4098   if (compute_mode == VOIDmode)
4099     for (compute_mode = mode; compute_mode != VOIDmode;
4100 	 compute_mode = GET_MODE_WIDER_MODE (compute_mode))
4101       if (optab_libfunc (optab1, compute_mode)
4102 	  || optab_libfunc (optab2, compute_mode))
4103 	break;
4104 
4105   /* If we still couldn't find a mode, use MODE, but expand_binop will
4106      probably die.  */
4107   if (compute_mode == VOIDmode)
4108     compute_mode = mode;
4109 
4110   if (target && GET_MODE (target) == compute_mode)
4111     tquotient = target;
4112   else
4113     tquotient = gen_reg_rtx (compute_mode);
4114 
4115   size = GET_MODE_BITSIZE (compute_mode);
4116 #if 0
4117   /* It should be possible to restrict the precision to GET_MODE_BITSIZE
4118      (mode), and thereby get better code when OP1 is a constant.  Do that
4119      later.  It will require going over all usages of SIZE below.  */
4120   size = GET_MODE_BITSIZE (mode);
4121 #endif
4122 
4123   /* Only deduct something for a REM if the last divide done was
4124      for a different constant.   Then set the constant of the last
4125      divide.  */
4126   max_cost = (unsignedp
4127 	      ? udiv_cost (speed, compute_mode)
4128 	      : sdiv_cost (speed, compute_mode));
4129   if (rem_flag && ! (last_div_const != 0 && op1_is_constant
4130 		     && INTVAL (op1) == last_div_const))
4131     max_cost -= (mul_cost (speed, compute_mode)
4132 		 + add_cost (speed, compute_mode));
4133 
4134   last_div_const = ! rem_flag && op1_is_constant ? INTVAL (op1) : 0;
4135 
4136   /* Now convert to the best mode to use.  */
4137   if (compute_mode != mode)
4138     {
4139       op0 = convert_modes (compute_mode, mode, op0, unsignedp);
4140       op1 = convert_modes (compute_mode, mode, op1, unsignedp);
4141 
4142       /* convert_modes may have placed op1 into a register, so we
4143 	 must recompute the following.  */
4144       op1_is_constant = CONST_INT_P (op1);
4145       if (op1_is_constant)
4146 	{
4147 	  wide_int ext_op1 = rtx_mode_t (op1, compute_mode);
4148 	  op1_is_pow2 = (wi::popcount (ext_op1) == 1
4149 			 || (! unsignedp
4150 			     && wi::popcount (wi::neg (ext_op1)) == 1));
4151 	}
4152       else
4153 	op1_is_pow2 = 0;
4154     }
4155 
4156   /* If one of the operands is a volatile MEM, copy it into a register.  */
4157 
4158   if (MEM_P (op0) && MEM_VOLATILE_P (op0))
4159     op0 = force_reg (compute_mode, op0);
4160   if (MEM_P (op1) && MEM_VOLATILE_P (op1))
4161     op1 = force_reg (compute_mode, op1);
4162 
4163   /* If we need the remainder or if OP1 is constant, we need to
4164      put OP0 in a register in case it has any queued subexpressions.  */
4165   if (rem_flag || op1_is_constant)
4166     op0 = force_reg (compute_mode, op0);
4167 
4168   last = get_last_insn ();
4169 
4170   /* Promote floor rounding to trunc rounding for unsigned operations.  */
4171   if (unsignedp)
4172     {
4173       if (code == FLOOR_DIV_EXPR)
4174 	code = TRUNC_DIV_EXPR;
4175       if (code == FLOOR_MOD_EXPR)
4176 	code = TRUNC_MOD_EXPR;
4177       if (code == EXACT_DIV_EXPR && op1_is_pow2)
4178 	code = TRUNC_DIV_EXPR;
4179     }
4180 
4181   if (op1 != const0_rtx)
4182     switch (code)
4183       {
4184       case TRUNC_MOD_EXPR:
4185       case TRUNC_DIV_EXPR:
4186 	if (op1_is_constant)
4187 	  {
4188 	    if (unsignedp)
4189 	      {
4190 		unsigned HOST_WIDE_INT mh, ml;
4191 		int pre_shift, post_shift;
4192 		int dummy;
4193 		wide_int wd = rtx_mode_t (op1, compute_mode);
4194 		unsigned HOST_WIDE_INT d = wd.to_uhwi ();
4195 
4196 		if (wi::popcount (wd) == 1)
4197 		  {
4198 		    pre_shift = floor_log2 (d);
4199 		    if (rem_flag)
4200 		      {
4201 			unsigned HOST_WIDE_INT mask
4202 			  = (HOST_WIDE_INT_1U << pre_shift) - 1;
4203 			remainder
4204 			  = expand_binop (compute_mode, and_optab, op0,
4205 					  gen_int_mode (mask, compute_mode),
4206 					  remainder, 1,
4207 					  OPTAB_LIB_WIDEN);
4208 			if (remainder)
4209 			  return gen_lowpart (mode, remainder);
4210 		      }
4211 		    quotient = expand_shift (RSHIFT_EXPR, compute_mode, op0,
4212 					     pre_shift, tquotient, 1);
4213 		  }
4214 		else if (size <= HOST_BITS_PER_WIDE_INT)
4215 		  {
4216 		    if (d >= (HOST_WIDE_INT_1U << (size - 1)))
4217 		      {
4218 			/* Most significant bit of divisor is set; emit an scc
4219 			   insn.  */
4220 			quotient = emit_store_flag_force (tquotient, GEU, op0, op1,
4221 							  compute_mode, 1, 1);
4222 		      }
4223 		    else
4224 		      {
4225 			/* Find a suitable multiplier and right shift count
4226 			   instead of multiplying with D.  */
4227 
4228 			mh = choose_multiplier (d, size, size,
4229 						&ml, &post_shift, &dummy);
4230 
4231 			/* If the suggested multiplier is more than SIZE bits,
4232 			   we can do better for even divisors, using an
4233 			   initial right shift.  */
4234 			if (mh != 0 && (d & 1) == 0)
4235 			  {
4236 			    pre_shift = ctz_or_zero (d);
4237 			    mh = choose_multiplier (d >> pre_shift, size,
4238 						    size - pre_shift,
4239 						    &ml, &post_shift, &dummy);
4240 			    gcc_assert (!mh);
4241 			  }
4242 			else
4243 			  pre_shift = 0;
4244 
4245 			if (mh != 0)
4246 			  {
4247 			    rtx t1, t2, t3, t4;
4248 
4249 			    if (post_shift - 1 >= BITS_PER_WORD)
4250 			      goto fail1;
4251 
4252 			    extra_cost
4253 			      = (shift_cost (speed, compute_mode, post_shift - 1)
4254 				 + shift_cost (speed, compute_mode, 1)
4255 				 + 2 * add_cost (speed, compute_mode));
4256 			    t1 = expmed_mult_highpart
4257 			      (compute_mode, op0,
4258 			       gen_int_mode (ml, compute_mode),
4259 			       NULL_RTX, 1, max_cost - extra_cost);
4260 			    if (t1 == 0)
4261 			      goto fail1;
4262 			    t2 = force_operand (gen_rtx_MINUS (compute_mode,
4263 							       op0, t1),
4264 						NULL_RTX);
4265 			    t3 = expand_shift (RSHIFT_EXPR, compute_mode,
4266 					       t2, 1, NULL_RTX, 1);
4267 			    t4 = force_operand (gen_rtx_PLUS (compute_mode,
4268 							      t1, t3),
4269 						NULL_RTX);
4270 			    quotient = expand_shift
4271 			      (RSHIFT_EXPR, compute_mode, t4,
4272 			       post_shift - 1, tquotient, 1);
4273 			  }
4274 			else
4275 			  {
4276 			    rtx t1, t2;
4277 
4278 			    if (pre_shift >= BITS_PER_WORD
4279 				|| post_shift >= BITS_PER_WORD)
4280 			      goto fail1;
4281 
4282 			    t1 = expand_shift
4283 			      (RSHIFT_EXPR, compute_mode, op0,
4284 			       pre_shift, NULL_RTX, 1);
4285 			    extra_cost
4286 			      = (shift_cost (speed, compute_mode, pre_shift)
4287 				 + shift_cost (speed, compute_mode, post_shift));
4288 			    t2 = expmed_mult_highpart
4289 			      (compute_mode, t1,
4290 			       gen_int_mode (ml, compute_mode),
4291 			       NULL_RTX, 1, max_cost - extra_cost);
4292 			    if (t2 == 0)
4293 			      goto fail1;
4294 			    quotient = expand_shift
4295 			      (RSHIFT_EXPR, compute_mode, t2,
4296 			       post_shift, tquotient, 1);
4297 			  }
4298 		      }
4299 		  }
4300 		else		/* Too wide mode to use tricky code */
4301 		  break;
4302 
4303 		insn = get_last_insn ();
4304 		if (insn != last)
4305 		  set_dst_reg_note (insn, REG_EQUAL,
4306 				    gen_rtx_UDIV (compute_mode, op0, op1),
4307 				    quotient);
4308 	      }
4309 	    else		/* TRUNC_DIV, signed */
4310 	      {
4311 		unsigned HOST_WIDE_INT ml;
4312 		int lgup, post_shift;
4313 		rtx mlr;
4314 		HOST_WIDE_INT d = INTVAL (op1);
4315 		unsigned HOST_WIDE_INT abs_d;
4316 
4317 		/* Not prepared to handle division/remainder by
4318 		   0xffffffffffffffff8000000000000000 etc.  */
4319 		if (d == HOST_WIDE_INT_MIN && size > HOST_BITS_PER_WIDE_INT)
4320 		  break;
4321 
4322 		/* Since d might be INT_MIN, we have to cast to
4323 		   unsigned HOST_WIDE_INT before negating to avoid
4324 		   undefined signed overflow.  */
4325 		abs_d = (d >= 0
4326 			 ? (unsigned HOST_WIDE_INT) d
4327 			 : - (unsigned HOST_WIDE_INT) d);
4328 
4329 		/* n rem d = n rem -d */
4330 		if (rem_flag && d < 0)
4331 		  {
4332 		    d = abs_d;
4333 		    op1 = gen_int_mode (abs_d, compute_mode);
4334 		  }
4335 
4336 		if (d == 1)
4337 		  quotient = op0;
4338 		else if (d == -1)
4339 		  quotient = expand_unop (compute_mode, neg_optab, op0,
4340 					  tquotient, 0);
4341 		else if (size <= HOST_BITS_PER_WIDE_INT
4342 			 && abs_d == HOST_WIDE_INT_1U << (size - 1))
4343 		  {
4344 		    /* This case is not handled correctly below.  */
4345 		    quotient = emit_store_flag (tquotient, EQ, op0, op1,
4346 						compute_mode, 1, 1);
4347 		    if (quotient == 0)
4348 		      goto fail1;
4349 		  }
4350 		else if (EXACT_POWER_OF_2_OR_ZERO_P (d)
4351 			 && (size <= HOST_BITS_PER_WIDE_INT || d >= 0)
4352 			 && (rem_flag
4353 			     ? smod_pow2_cheap (speed, compute_mode)
4354 			     : sdiv_pow2_cheap (speed, compute_mode))
4355 			 /* We assume that cheap metric is true if the
4356 			    optab has an expander for this mode.  */
4357 			 && ((optab_handler ((rem_flag ? smod_optab
4358 					      : sdiv_optab),
4359 					     compute_mode)
4360 			      != CODE_FOR_nothing)
4361 			     || (optab_handler (sdivmod_optab,
4362 						compute_mode)
4363 				 != CODE_FOR_nothing)))
4364 		  ;
4365 		else if (EXACT_POWER_OF_2_OR_ZERO_P (abs_d))
4366 		  {
4367 		    if (rem_flag)
4368 		      {
4369 			remainder = expand_smod_pow2 (compute_mode, op0, d);
4370 			if (remainder)
4371 			  return gen_lowpart (mode, remainder);
4372 		      }
4373 
4374 		    if (sdiv_pow2_cheap (speed, compute_mode)
4375 			&& ((optab_handler (sdiv_optab, compute_mode)
4376 			     != CODE_FOR_nothing)
4377 			    || (optab_handler (sdivmod_optab, compute_mode)
4378 				!= CODE_FOR_nothing)))
4379 		      quotient = expand_divmod (0, TRUNC_DIV_EXPR,
4380 						compute_mode, op0,
4381 						gen_int_mode (abs_d,
4382 							      compute_mode),
4383 						NULL_RTX, 0);
4384 		    else
4385 		      quotient = expand_sdiv_pow2 (compute_mode, op0, abs_d);
4386 
4387 		    /* We have computed OP0 / abs(OP1).  If OP1 is negative,
4388 		       negate the quotient.  */
4389 		    if (d < 0)
4390 		      {
4391 			insn = get_last_insn ();
4392 			if (insn != last
4393 			    && abs_d < (HOST_WIDE_INT_1U
4394 					<< (HOST_BITS_PER_WIDE_INT - 1)))
4395 			  set_dst_reg_note (insn, REG_EQUAL,
4396 					    gen_rtx_DIV (compute_mode, op0,
4397 							 gen_int_mode
4398 							   (abs_d,
4399 							    compute_mode)),
4400 					    quotient);
4401 
4402 			quotient = expand_unop (compute_mode, neg_optab,
4403 						quotient, quotient, 0);
4404 		      }
4405 		  }
4406 		else if (size <= HOST_BITS_PER_WIDE_INT)
4407 		  {
4408 		    choose_multiplier (abs_d, size, size - 1,
4409 				       &ml, &post_shift, &lgup);
4410 		    if (ml < HOST_WIDE_INT_1U << (size - 1))
4411 		      {
4412 			rtx t1, t2, t3;
4413 
4414 			if (post_shift >= BITS_PER_WORD
4415 			    || size - 1 >= BITS_PER_WORD)
4416 			  goto fail1;
4417 
4418 			extra_cost = (shift_cost (speed, compute_mode, post_shift)
4419 				      + shift_cost (speed, compute_mode, size - 1)
4420 				      + add_cost (speed, compute_mode));
4421 			t1 = expmed_mult_highpart
4422 			  (compute_mode, op0, gen_int_mode (ml, compute_mode),
4423 			   NULL_RTX, 0, max_cost - extra_cost);
4424 			if (t1 == 0)
4425 			  goto fail1;
4426 			t2 = expand_shift
4427 			  (RSHIFT_EXPR, compute_mode, t1,
4428 			   post_shift, NULL_RTX, 0);
4429 			t3 = expand_shift
4430 			  (RSHIFT_EXPR, compute_mode, op0,
4431 			   size - 1, NULL_RTX, 0);
4432 			if (d < 0)
4433 			  quotient
4434 			    = force_operand (gen_rtx_MINUS (compute_mode,
4435 							    t3, t2),
4436 					     tquotient);
4437 			else
4438 			  quotient
4439 			    = force_operand (gen_rtx_MINUS (compute_mode,
4440 							    t2, t3),
4441 					     tquotient);
4442 		      }
4443 		    else
4444 		      {
4445 			rtx t1, t2, t3, t4;
4446 
4447 			if (post_shift >= BITS_PER_WORD
4448 			    || size - 1 >= BITS_PER_WORD)
4449 			  goto fail1;
4450 
4451 			ml |= HOST_WIDE_INT_M1U << (size - 1);
4452 			mlr = gen_int_mode (ml, compute_mode);
4453 			extra_cost = (shift_cost (speed, compute_mode, post_shift)
4454 				      + shift_cost (speed, compute_mode, size - 1)
4455 				      + 2 * add_cost (speed, compute_mode));
4456 			t1 = expmed_mult_highpart (compute_mode, op0, mlr,
4457 						   NULL_RTX, 0,
4458 						   max_cost - extra_cost);
4459 			if (t1 == 0)
4460 			  goto fail1;
4461 			t2 = force_operand (gen_rtx_PLUS (compute_mode,
4462 							  t1, op0),
4463 					    NULL_RTX);
4464 			t3 = expand_shift
4465 			  (RSHIFT_EXPR, compute_mode, t2,
4466 			   post_shift, NULL_RTX, 0);
4467 			t4 = expand_shift
4468 			  (RSHIFT_EXPR, compute_mode, op0,
4469 			   size - 1, NULL_RTX, 0);
4470 			if (d < 0)
4471 			  quotient
4472 			    = force_operand (gen_rtx_MINUS (compute_mode,
4473 							    t4, t3),
4474 					     tquotient);
4475 			else
4476 			  quotient
4477 			    = force_operand (gen_rtx_MINUS (compute_mode,
4478 							    t3, t4),
4479 					     tquotient);
4480 		      }
4481 		  }
4482 		else		/* Too wide mode to use tricky code */
4483 		  break;
4484 
4485 		insn = get_last_insn ();
4486 		if (insn != last)
4487 		  set_dst_reg_note (insn, REG_EQUAL,
4488 				    gen_rtx_DIV (compute_mode, op0, op1),
4489 				    quotient);
4490 	      }
4491 	    break;
4492 	  }
4493       fail1:
4494 	delete_insns_since (last);
4495 	break;
4496 
4497       case FLOOR_DIV_EXPR:
4498       case FLOOR_MOD_EXPR:
4499       /* We will come here only for signed operations.  */
4500 	if (op1_is_constant && size <= HOST_BITS_PER_WIDE_INT)
4501 	  {
4502 	    unsigned HOST_WIDE_INT mh, ml;
4503 	    int pre_shift, lgup, post_shift;
4504 	    HOST_WIDE_INT d = INTVAL (op1);
4505 
4506 	    if (d > 0)
4507 	      {
4508 		/* We could just as easily deal with negative constants here,
4509 		   but it does not seem worth the trouble for GCC 2.6.  */
4510 		if (EXACT_POWER_OF_2_OR_ZERO_P (d))
4511 		  {
4512 		    pre_shift = floor_log2 (d);
4513 		    if (rem_flag)
4514 		      {
4515 			unsigned HOST_WIDE_INT mask
4516 			  = (HOST_WIDE_INT_1U << pre_shift) - 1;
4517 			remainder = expand_binop
4518 			  (compute_mode, and_optab, op0,
4519 			   gen_int_mode (mask, compute_mode),
4520 			   remainder, 0, OPTAB_LIB_WIDEN);
4521 			if (remainder)
4522 			  return gen_lowpart (mode, remainder);
4523 		      }
4524 		    quotient = expand_shift
4525 		      (RSHIFT_EXPR, compute_mode, op0,
4526 		       pre_shift, tquotient, 0);
4527 		  }
4528 		else
4529 		  {
4530 		    rtx t1, t2, t3, t4;
4531 
4532 		    mh = choose_multiplier (d, size, size - 1,
4533 					    &ml, &post_shift, &lgup);
4534 		    gcc_assert (!mh);
4535 
4536 		    if (post_shift < BITS_PER_WORD
4537 			&& size - 1 < BITS_PER_WORD)
4538 		      {
4539 			t1 = expand_shift
4540 			  (RSHIFT_EXPR, compute_mode, op0,
4541 			   size - 1, NULL_RTX, 0);
4542 			t2 = expand_binop (compute_mode, xor_optab, op0, t1,
4543 					   NULL_RTX, 0, OPTAB_WIDEN);
4544 			extra_cost = (shift_cost (speed, compute_mode, post_shift)
4545 				      + shift_cost (speed, compute_mode, size - 1)
4546 				      + 2 * add_cost (speed, compute_mode));
4547 			t3 = expmed_mult_highpart
4548 			  (compute_mode, t2, gen_int_mode (ml, compute_mode),
4549 			   NULL_RTX, 1, max_cost - extra_cost);
4550 			if (t3 != 0)
4551 			  {
4552 			    t4 = expand_shift
4553 			      (RSHIFT_EXPR, compute_mode, t3,
4554 			       post_shift, NULL_RTX, 1);
4555 			    quotient = expand_binop (compute_mode, xor_optab,
4556 						     t4, t1, tquotient, 0,
4557 						     OPTAB_WIDEN);
4558 			  }
4559 		      }
4560 		  }
4561 	      }
4562 	    else
4563 	      {
4564 		rtx nsign, t1, t2, t3, t4;
4565 		t1 = force_operand (gen_rtx_PLUS (compute_mode,
4566 						  op0, constm1_rtx), NULL_RTX);
4567 		t2 = expand_binop (compute_mode, ior_optab, op0, t1, NULL_RTX,
4568 				   0, OPTAB_WIDEN);
4569 		nsign = expand_shift (RSHIFT_EXPR, compute_mode, t2,
4570 				      size - 1, NULL_RTX, 0);
4571 		t3 = force_operand (gen_rtx_MINUS (compute_mode, t1, nsign),
4572 				    NULL_RTX);
4573 		t4 = expand_divmod (0, TRUNC_DIV_EXPR, compute_mode, t3, op1,
4574 				    NULL_RTX, 0);
4575 		if (t4)
4576 		  {
4577 		    rtx t5;
4578 		    t5 = expand_unop (compute_mode, one_cmpl_optab, nsign,
4579 				      NULL_RTX, 0);
4580 		    quotient = force_operand (gen_rtx_PLUS (compute_mode,
4581 							    t4, t5),
4582 					      tquotient);
4583 		  }
4584 	      }
4585 	  }
4586 
4587 	if (quotient != 0)
4588 	  break;
4589 	delete_insns_since (last);
4590 
4591 	/* Try using an instruction that produces both the quotient and
4592 	   remainder, using truncation.  We can easily compensate the quotient
4593 	   or remainder to get floor rounding, once we have the remainder.
4594 	   Notice that we compute also the final remainder value here,
4595 	   and return the result right away.  */
4596 	if (target == 0 || GET_MODE (target) != compute_mode)
4597 	  target = gen_reg_rtx (compute_mode);
4598 
4599 	if (rem_flag)
4600 	  {
4601 	    remainder
4602 	      = REG_P (target) ? target : gen_reg_rtx (compute_mode);
4603 	    quotient = gen_reg_rtx (compute_mode);
4604 	  }
4605 	else
4606 	  {
4607 	    quotient
4608 	      = REG_P (target) ? target : gen_reg_rtx (compute_mode);
4609 	    remainder = gen_reg_rtx (compute_mode);
4610 	  }
4611 
4612 	if (expand_twoval_binop (sdivmod_optab, op0, op1,
4613 				 quotient, remainder, 0))
4614 	  {
4615 	    /* This could be computed with a branch-less sequence.
4616 	       Save that for later.  */
4617 	    rtx tem;
4618 	    rtx_code_label *label = gen_label_rtx ();
4619 	    do_cmp_and_jump (remainder, const0_rtx, EQ, compute_mode, label);
4620 	    tem = expand_binop (compute_mode, xor_optab, op0, op1,
4621 				NULL_RTX, 0, OPTAB_WIDEN);
4622 	    do_cmp_and_jump (tem, const0_rtx, GE, compute_mode, label);
4623 	    expand_dec (quotient, const1_rtx);
4624 	    expand_inc (remainder, op1);
4625 	    emit_label (label);
4626 	    return gen_lowpart (mode, rem_flag ? remainder : quotient);
4627 	  }
4628 
4629 	/* No luck with division elimination or divmod.  Have to do it
4630 	   by conditionally adjusting op0 *and* the result.  */
4631 	{
4632 	  rtx_code_label *label1, *label2, *label3, *label4, *label5;
4633 	  rtx adjusted_op0;
4634 	  rtx tem;
4635 
4636 	  quotient = gen_reg_rtx (compute_mode);
4637 	  adjusted_op0 = copy_to_mode_reg (compute_mode, op0);
4638 	  label1 = gen_label_rtx ();
4639 	  label2 = gen_label_rtx ();
4640 	  label3 = gen_label_rtx ();
4641 	  label4 = gen_label_rtx ();
4642 	  label5 = gen_label_rtx ();
4643 	  do_cmp_and_jump (op1, const0_rtx, LT, compute_mode, label2);
4644 	  do_cmp_and_jump (adjusted_op0, const0_rtx, LT, compute_mode, label1);
4645 	  tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4646 			      quotient, 0, OPTAB_LIB_WIDEN);
4647 	  if (tem != quotient)
4648 	    emit_move_insn (quotient, tem);
4649 	  emit_jump_insn (targetm.gen_jump (label5));
4650 	  emit_barrier ();
4651 	  emit_label (label1);
4652 	  expand_inc (adjusted_op0, const1_rtx);
4653 	  emit_jump_insn (targetm.gen_jump (label4));
4654 	  emit_barrier ();
4655 	  emit_label (label2);
4656 	  do_cmp_and_jump (adjusted_op0, const0_rtx, GT, compute_mode, label3);
4657 	  tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4658 			      quotient, 0, OPTAB_LIB_WIDEN);
4659 	  if (tem != quotient)
4660 	    emit_move_insn (quotient, tem);
4661 	  emit_jump_insn (targetm.gen_jump (label5));
4662 	  emit_barrier ();
4663 	  emit_label (label3);
4664 	  expand_dec (adjusted_op0, const1_rtx);
4665 	  emit_label (label4);
4666 	  tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4667 			      quotient, 0, OPTAB_LIB_WIDEN);
4668 	  if (tem != quotient)
4669 	    emit_move_insn (quotient, tem);
4670 	  expand_dec (quotient, const1_rtx);
4671 	  emit_label (label5);
4672 	}
4673 	break;
4674 
4675       case CEIL_DIV_EXPR:
4676       case CEIL_MOD_EXPR:
4677 	if (unsignedp)
4678 	  {
4679 	    if (op1_is_constant
4680 		&& EXACT_POWER_OF_2_OR_ZERO_P (INTVAL (op1))
4681 		&& (size <= HOST_BITS_PER_WIDE_INT
4682 		    || INTVAL (op1) >= 0))
4683 	      {
4684 		rtx t1, t2, t3;
4685 		unsigned HOST_WIDE_INT d = INTVAL (op1);
4686 		t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
4687 				   floor_log2 (d), tquotient, 1);
4688 		t2 = expand_binop (compute_mode, and_optab, op0,
4689 				   gen_int_mode (d - 1, compute_mode),
4690 				   NULL_RTX, 1, OPTAB_LIB_WIDEN);
4691 		t3 = gen_reg_rtx (compute_mode);
4692 		t3 = emit_store_flag (t3, NE, t2, const0_rtx,
4693 				      compute_mode, 1, 1);
4694 		if (t3 == 0)
4695 		  {
4696 		    rtx_code_label *lab;
4697 		    lab = gen_label_rtx ();
4698 		    do_cmp_and_jump (t2, const0_rtx, EQ, compute_mode, lab);
4699 		    expand_inc (t1, const1_rtx);
4700 		    emit_label (lab);
4701 		    quotient = t1;
4702 		  }
4703 		else
4704 		  quotient = force_operand (gen_rtx_PLUS (compute_mode,
4705 							  t1, t3),
4706 					    tquotient);
4707 		break;
4708 	      }
4709 
4710 	    /* Try using an instruction that produces both the quotient and
4711 	       remainder, using truncation.  We can easily compensate the
4712 	       quotient or remainder to get ceiling rounding, once we have the
4713 	       remainder.  Notice that we compute also the final remainder
4714 	       value here, and return the result right away.  */
4715 	    if (target == 0 || GET_MODE (target) != compute_mode)
4716 	      target = gen_reg_rtx (compute_mode);
4717 
4718 	    if (rem_flag)
4719 	      {
4720 		remainder = (REG_P (target)
4721 			     ? target : gen_reg_rtx (compute_mode));
4722 		quotient = gen_reg_rtx (compute_mode);
4723 	      }
4724 	    else
4725 	      {
4726 		quotient = (REG_P (target)
4727 			    ? target : gen_reg_rtx (compute_mode));
4728 		remainder = gen_reg_rtx (compute_mode);
4729 	      }
4730 
4731 	    if (expand_twoval_binop (udivmod_optab, op0, op1, quotient,
4732 				     remainder, 1))
4733 	      {
4734 		/* This could be computed with a branch-less sequence.
4735 		   Save that for later.  */
4736 		rtx_code_label *label = gen_label_rtx ();
4737 		do_cmp_and_jump (remainder, const0_rtx, EQ,
4738 				 compute_mode, label);
4739 		expand_inc (quotient, const1_rtx);
4740 		expand_dec (remainder, op1);
4741 		emit_label (label);
4742 		return gen_lowpart (mode, rem_flag ? remainder : quotient);
4743 	      }
4744 
4745 	    /* No luck with division elimination or divmod.  Have to do it
4746 	       by conditionally adjusting op0 *and* the result.  */
4747 	    {
4748 	      rtx_code_label *label1, *label2;
4749 	      rtx adjusted_op0, tem;
4750 
4751 	      quotient = gen_reg_rtx (compute_mode);
4752 	      adjusted_op0 = copy_to_mode_reg (compute_mode, op0);
4753 	      label1 = gen_label_rtx ();
4754 	      label2 = gen_label_rtx ();
4755 	      do_cmp_and_jump (adjusted_op0, const0_rtx, NE,
4756 			       compute_mode, label1);
4757 	      emit_move_insn  (quotient, const0_rtx);
4758 	      emit_jump_insn (targetm.gen_jump (label2));
4759 	      emit_barrier ();
4760 	      emit_label (label1);
4761 	      expand_dec (adjusted_op0, const1_rtx);
4762 	      tem = expand_binop (compute_mode, udiv_optab, adjusted_op0, op1,
4763 				  quotient, 1, OPTAB_LIB_WIDEN);
4764 	      if (tem != quotient)
4765 		emit_move_insn (quotient, tem);
4766 	      expand_inc (quotient, const1_rtx);
4767 	      emit_label (label2);
4768 	    }
4769 	  }
4770 	else /* signed */
4771 	  {
4772 	    if (op1_is_constant && EXACT_POWER_OF_2_OR_ZERO_P (INTVAL (op1))
4773 		&& INTVAL (op1) >= 0)
4774 	      {
4775 		/* This is extremely similar to the code for the unsigned case
4776 		   above.  For 2.7 we should merge these variants, but for
4777 		   2.6.1 I don't want to touch the code for unsigned since that
4778 		   get used in C.  The signed case will only be used by other
4779 		   languages (Ada).  */
4780 
4781 		rtx t1, t2, t3;
4782 		unsigned HOST_WIDE_INT d = INTVAL (op1);
4783 		t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
4784 				   floor_log2 (d), tquotient, 0);
4785 		t2 = expand_binop (compute_mode, and_optab, op0,
4786 				   gen_int_mode (d - 1, compute_mode),
4787 				   NULL_RTX, 1, OPTAB_LIB_WIDEN);
4788 		t3 = gen_reg_rtx (compute_mode);
4789 		t3 = emit_store_flag (t3, NE, t2, const0_rtx,
4790 				      compute_mode, 1, 1);
4791 		if (t3 == 0)
4792 		  {
4793 		    rtx_code_label *lab;
4794 		    lab = gen_label_rtx ();
4795 		    do_cmp_and_jump (t2, const0_rtx, EQ, compute_mode, lab);
4796 		    expand_inc (t1, const1_rtx);
4797 		    emit_label (lab);
4798 		    quotient = t1;
4799 		  }
4800 		else
4801 		  quotient = force_operand (gen_rtx_PLUS (compute_mode,
4802 							  t1, t3),
4803 					    tquotient);
4804 		break;
4805 	      }
4806 
4807 	    /* Try using an instruction that produces both the quotient and
4808 	       remainder, using truncation.  We can easily compensate the
4809 	       quotient or remainder to get ceiling rounding, once we have the
4810 	       remainder.  Notice that we compute also the final remainder
4811 	       value here, and return the result right away.  */
4812 	    if (target == 0 || GET_MODE (target) != compute_mode)
4813 	      target = gen_reg_rtx (compute_mode);
4814 	    if (rem_flag)
4815 	      {
4816 		remainder= (REG_P (target)
4817 			    ? target : gen_reg_rtx (compute_mode));
4818 		quotient = gen_reg_rtx (compute_mode);
4819 	      }
4820 	    else
4821 	      {
4822 		quotient = (REG_P (target)
4823 			    ? target : gen_reg_rtx (compute_mode));
4824 		remainder = gen_reg_rtx (compute_mode);
4825 	      }
4826 
4827 	    if (expand_twoval_binop (sdivmod_optab, op0, op1, quotient,
4828 				     remainder, 0))
4829 	      {
4830 		/* This could be computed with a branch-less sequence.
4831 		   Save that for later.  */
4832 		rtx tem;
4833 		rtx_code_label *label = gen_label_rtx ();
4834 		do_cmp_and_jump (remainder, const0_rtx, EQ,
4835 				 compute_mode, label);
4836 		tem = expand_binop (compute_mode, xor_optab, op0, op1,
4837 				    NULL_RTX, 0, OPTAB_WIDEN);
4838 		do_cmp_and_jump (tem, const0_rtx, LT, compute_mode, label);
4839 		expand_inc (quotient, const1_rtx);
4840 		expand_dec (remainder, op1);
4841 		emit_label (label);
4842 		return gen_lowpart (mode, rem_flag ? remainder : quotient);
4843 	      }
4844 
4845 	    /* No luck with division elimination or divmod.  Have to do it
4846 	       by conditionally adjusting op0 *and* the result.  */
4847 	    {
4848 	      rtx_code_label *label1, *label2, *label3, *label4, *label5;
4849 	      rtx adjusted_op0;
4850 	      rtx tem;
4851 
4852 	      quotient = gen_reg_rtx (compute_mode);
4853 	      adjusted_op0 = copy_to_mode_reg (compute_mode, op0);
4854 	      label1 = gen_label_rtx ();
4855 	      label2 = gen_label_rtx ();
4856 	      label3 = gen_label_rtx ();
4857 	      label4 = gen_label_rtx ();
4858 	      label5 = gen_label_rtx ();
4859 	      do_cmp_and_jump (op1, const0_rtx, LT, compute_mode, label2);
4860 	      do_cmp_and_jump (adjusted_op0, const0_rtx, GT,
4861 			       compute_mode, label1);
4862 	      tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4863 				  quotient, 0, OPTAB_LIB_WIDEN);
4864 	      if (tem != quotient)
4865 		emit_move_insn (quotient, tem);
4866 	      emit_jump_insn (targetm.gen_jump (label5));
4867 	      emit_barrier ();
4868 	      emit_label (label1);
4869 	      expand_dec (adjusted_op0, const1_rtx);
4870 	      emit_jump_insn (targetm.gen_jump (label4));
4871 	      emit_barrier ();
4872 	      emit_label (label2);
4873 	      do_cmp_and_jump (adjusted_op0, const0_rtx, LT,
4874 			       compute_mode, label3);
4875 	      tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4876 				  quotient, 0, OPTAB_LIB_WIDEN);
4877 	      if (tem != quotient)
4878 		emit_move_insn (quotient, tem);
4879 	      emit_jump_insn (targetm.gen_jump (label5));
4880 	      emit_barrier ();
4881 	      emit_label (label3);
4882 	      expand_inc (adjusted_op0, const1_rtx);
4883 	      emit_label (label4);
4884 	      tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4885 				  quotient, 0, OPTAB_LIB_WIDEN);
4886 	      if (tem != quotient)
4887 		emit_move_insn (quotient, tem);
4888 	      expand_inc (quotient, const1_rtx);
4889 	      emit_label (label5);
4890 	    }
4891 	  }
4892 	break;
4893 
4894       case EXACT_DIV_EXPR:
4895 	if (op1_is_constant && size <= HOST_BITS_PER_WIDE_INT)
4896 	  {
4897 	    HOST_WIDE_INT d = INTVAL (op1);
4898 	    unsigned HOST_WIDE_INT ml;
4899 	    int pre_shift;
4900 	    rtx t1;
4901 
4902 	    pre_shift = ctz_or_zero (d);
4903 	    ml = invert_mod2n (d >> pre_shift, size);
4904 	    t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
4905 			       pre_shift, NULL_RTX, unsignedp);
4906 	    quotient = expand_mult (compute_mode, t1,
4907 				    gen_int_mode (ml, compute_mode),
4908 				    NULL_RTX, 1);
4909 
4910 	    insn = get_last_insn ();
4911 	    set_dst_reg_note (insn, REG_EQUAL,
4912 			      gen_rtx_fmt_ee (unsignedp ? UDIV : DIV,
4913 					      compute_mode, op0, op1),
4914 			      quotient);
4915 	  }
4916 	break;
4917 
4918       case ROUND_DIV_EXPR:
4919       case ROUND_MOD_EXPR:
4920 	if (unsignedp)
4921 	  {
4922 	    rtx tem;
4923 	    rtx_code_label *label;
4924 	    label = gen_label_rtx ();
4925 	    quotient = gen_reg_rtx (compute_mode);
4926 	    remainder = gen_reg_rtx (compute_mode);
4927 	    if (expand_twoval_binop (udivmod_optab, op0, op1, quotient, remainder, 1) == 0)
4928 	      {
4929 		rtx tem;
4930 		quotient = expand_binop (compute_mode, udiv_optab, op0, op1,
4931 					 quotient, 1, OPTAB_LIB_WIDEN);
4932 		tem = expand_mult (compute_mode, quotient, op1, NULL_RTX, 1);
4933 		remainder = expand_binop (compute_mode, sub_optab, op0, tem,
4934 					  remainder, 1, OPTAB_LIB_WIDEN);
4935 	      }
4936 	    tem = plus_constant (compute_mode, op1, -1);
4937 	    tem = expand_shift (RSHIFT_EXPR, compute_mode, tem, 1, NULL_RTX, 1);
4938 	    do_cmp_and_jump (remainder, tem, LEU, compute_mode, label);
4939 	    expand_inc (quotient, const1_rtx);
4940 	    expand_dec (remainder, op1);
4941 	    emit_label (label);
4942 	  }
4943 	else
4944 	  {
4945 	    rtx abs_rem, abs_op1, tem, mask;
4946 	    rtx_code_label *label;
4947 	    label = gen_label_rtx ();
4948 	    quotient = gen_reg_rtx (compute_mode);
4949 	    remainder = gen_reg_rtx (compute_mode);
4950 	    if (expand_twoval_binop (sdivmod_optab, op0, op1, quotient, remainder, 0) == 0)
4951 	      {
4952 		rtx tem;
4953 		quotient = expand_binop (compute_mode, sdiv_optab, op0, op1,
4954 					 quotient, 0, OPTAB_LIB_WIDEN);
4955 		tem = expand_mult (compute_mode, quotient, op1, NULL_RTX, 0);
4956 		remainder = expand_binop (compute_mode, sub_optab, op0, tem,
4957 					  remainder, 0, OPTAB_LIB_WIDEN);
4958 	      }
4959 	    abs_rem = expand_abs (compute_mode, remainder, NULL_RTX, 1, 0);
4960 	    abs_op1 = expand_abs (compute_mode, op1, NULL_RTX, 1, 0);
4961 	    tem = expand_shift (LSHIFT_EXPR, compute_mode, abs_rem,
4962 				1, NULL_RTX, 1);
4963 	    do_cmp_and_jump (tem, abs_op1, LTU, compute_mode, label);
4964 	    tem = expand_binop (compute_mode, xor_optab, op0, op1,
4965 				NULL_RTX, 0, OPTAB_WIDEN);
4966 	    mask = expand_shift (RSHIFT_EXPR, compute_mode, tem,
4967 				 size - 1, NULL_RTX, 0);
4968 	    tem = expand_binop (compute_mode, xor_optab, mask, const1_rtx,
4969 				NULL_RTX, 0, OPTAB_WIDEN);
4970 	    tem = expand_binop (compute_mode, sub_optab, tem, mask,
4971 				NULL_RTX, 0, OPTAB_WIDEN);
4972 	    expand_inc (quotient, tem);
4973 	    tem = expand_binop (compute_mode, xor_optab, mask, op1,
4974 				NULL_RTX, 0, OPTAB_WIDEN);
4975 	    tem = expand_binop (compute_mode, sub_optab, tem, mask,
4976 				NULL_RTX, 0, OPTAB_WIDEN);
4977 	    expand_dec (remainder, tem);
4978 	    emit_label (label);
4979 	  }
4980 	return gen_lowpart (mode, rem_flag ? remainder : quotient);
4981 
4982       default:
4983 	gcc_unreachable ();
4984       }
4985 
4986   if (quotient == 0)
4987     {
4988       if (target && GET_MODE (target) != compute_mode)
4989 	target = 0;
4990 
4991       if (rem_flag)
4992 	{
4993 	  /* Try to produce the remainder without producing the quotient.
4994 	     If we seem to have a divmod pattern that does not require widening,
4995 	     don't try widening here.  We should really have a WIDEN argument
4996 	     to expand_twoval_binop, since what we'd really like to do here is
4997 	     1) try a mod insn in compute_mode
4998 	     2) try a divmod insn in compute_mode
4999 	     3) try a div insn in compute_mode and multiply-subtract to get
5000 	        remainder
5001 	     4) try the same things with widening allowed.  */
5002 	  remainder
5003 	    = sign_expand_binop (compute_mode, umod_optab, smod_optab,
5004 				 op0, op1, target,
5005 				 unsignedp,
5006 				 ((optab_handler (optab2, compute_mode)
5007 				   != CODE_FOR_nothing)
5008 				  ? OPTAB_DIRECT : OPTAB_WIDEN));
5009 	  if (remainder == 0)
5010 	    {
5011 	      /* No luck there.  Can we do remainder and divide at once
5012 		 without a library call?  */
5013 	      remainder = gen_reg_rtx (compute_mode);
5014 	      if (! expand_twoval_binop ((unsignedp
5015 					  ? udivmod_optab
5016 					  : sdivmod_optab),
5017 					 op0, op1,
5018 					 NULL_RTX, remainder, unsignedp))
5019 		remainder = 0;
5020 	    }
5021 
5022 	  if (remainder)
5023 	    return gen_lowpart (mode, remainder);
5024 	}
5025 
5026       /* Produce the quotient.  Try a quotient insn, but not a library call.
5027 	 If we have a divmod in this mode, use it in preference to widening
5028 	 the div (for this test we assume it will not fail). Note that optab2
5029 	 is set to the one of the two optabs that the call below will use.  */
5030       quotient
5031 	= sign_expand_binop (compute_mode, udiv_optab, sdiv_optab,
5032 			     op0, op1, rem_flag ? NULL_RTX : target,
5033 			     unsignedp,
5034 			     ((optab_handler (optab2, compute_mode)
5035 			       != CODE_FOR_nothing)
5036 			      ? OPTAB_DIRECT : OPTAB_WIDEN));
5037 
5038       if (quotient == 0)
5039 	{
5040 	  /* No luck there.  Try a quotient-and-remainder insn,
5041 	     keeping the quotient alone.  */
5042 	  quotient = gen_reg_rtx (compute_mode);
5043 	  if (! expand_twoval_binop (unsignedp ? udivmod_optab : sdivmod_optab,
5044 				     op0, op1,
5045 				     quotient, NULL_RTX, unsignedp))
5046 	    {
5047 	      quotient = 0;
5048 	      if (! rem_flag)
5049 		/* Still no luck.  If we are not computing the remainder,
5050 		   use a library call for the quotient.  */
5051 		quotient = sign_expand_binop (compute_mode,
5052 					      udiv_optab, sdiv_optab,
5053 					      op0, op1, target,
5054 					      unsignedp, OPTAB_LIB_WIDEN);
5055 	    }
5056 	}
5057     }
5058 
5059   if (rem_flag)
5060     {
5061       if (target && GET_MODE (target) != compute_mode)
5062 	target = 0;
5063 
5064       if (quotient == 0)
5065 	{
5066 	  /* No divide instruction either.  Use library for remainder.  */
5067 	  remainder = sign_expand_binop (compute_mode, umod_optab, smod_optab,
5068 					 op0, op1, target,
5069 					 unsignedp, OPTAB_LIB_WIDEN);
5070 	  /* No remainder function.  Try a quotient-and-remainder
5071 	     function, keeping the remainder.  */
5072 	  if (!remainder)
5073 	    {
5074 	      remainder = gen_reg_rtx (compute_mode);
5075 	      if (!expand_twoval_binop_libfunc
5076 		  (unsignedp ? udivmod_optab : sdivmod_optab,
5077 		   op0, op1,
5078 		   NULL_RTX, remainder,
5079 		   unsignedp ? UMOD : MOD))
5080 		remainder = NULL_RTX;
5081 	    }
5082 	}
5083       else
5084 	{
5085 	  /* We divided.  Now finish doing X - Y * (X / Y).  */
5086 	  remainder = expand_mult (compute_mode, quotient, op1,
5087 				   NULL_RTX, unsignedp);
5088 	  remainder = expand_binop (compute_mode, sub_optab, op0,
5089 				    remainder, target, unsignedp,
5090 				    OPTAB_LIB_WIDEN);
5091 	}
5092     }
5093 
5094   return gen_lowpart (mode, rem_flag ? remainder : quotient);
5095 }
5096 
5097 /* Return a tree node with data type TYPE, describing the value of X.
5098    Usually this is an VAR_DECL, if there is no obvious better choice.
5099    X may be an expression, however we only support those expressions
5100    generated by loop.c.  */
5101 
5102 tree
5103 make_tree (tree type, rtx x)
5104 {
5105   tree t;
5106 
5107   switch (GET_CODE (x))
5108     {
5109     case CONST_INT:
5110     case CONST_WIDE_INT:
5111       t = wide_int_to_tree (type, rtx_mode_t (x, TYPE_MODE (type)));
5112       return t;
5113 
5114     case CONST_DOUBLE:
5115       STATIC_ASSERT (HOST_BITS_PER_WIDE_INT * 2 <= MAX_BITSIZE_MODE_ANY_INT);
5116       if (TARGET_SUPPORTS_WIDE_INT == 0 && GET_MODE (x) == VOIDmode)
5117 	t = wide_int_to_tree (type,
5118 			      wide_int::from_array (&CONST_DOUBLE_LOW (x), 2,
5119 						    HOST_BITS_PER_WIDE_INT * 2));
5120       else
5121 	t = build_real (type, *CONST_DOUBLE_REAL_VALUE (x));
5122 
5123       return t;
5124 
5125     case CONST_VECTOR:
5126       {
5127 	int units = CONST_VECTOR_NUNITS (x);
5128 	tree itype = TREE_TYPE (type);
5129 	tree *elts;
5130 	int i;
5131 
5132 	/* Build a tree with vector elements.  */
5133 	elts = XALLOCAVEC (tree, units);
5134 	for (i = units - 1; i >= 0; --i)
5135 	  {
5136 	    rtx elt = CONST_VECTOR_ELT (x, i);
5137 	    elts[i] = make_tree (itype, elt);
5138 	  }
5139 
5140 	return build_vector (type, elts);
5141       }
5142 
5143     case PLUS:
5144       return fold_build2 (PLUS_EXPR, type, make_tree (type, XEXP (x, 0)),
5145 			  make_tree (type, XEXP (x, 1)));
5146 
5147     case MINUS:
5148       return fold_build2 (MINUS_EXPR, type, make_tree (type, XEXP (x, 0)),
5149 			  make_tree (type, XEXP (x, 1)));
5150 
5151     case NEG:
5152       return fold_build1 (NEGATE_EXPR, type, make_tree (type, XEXP (x, 0)));
5153 
5154     case MULT:
5155       return fold_build2 (MULT_EXPR, type, make_tree (type, XEXP (x, 0)),
5156 			  make_tree (type, XEXP (x, 1)));
5157 
5158     case ASHIFT:
5159       return fold_build2 (LSHIFT_EXPR, type, make_tree (type, XEXP (x, 0)),
5160 			  make_tree (type, XEXP (x, 1)));
5161 
5162     case LSHIFTRT:
5163       t = unsigned_type_for (type);
5164       return fold_convert (type, build2 (RSHIFT_EXPR, t,
5165 			    		 make_tree (t, XEXP (x, 0)),
5166 				    	 make_tree (type, XEXP (x, 1))));
5167 
5168     case ASHIFTRT:
5169       t = signed_type_for (type);
5170       return fold_convert (type, build2 (RSHIFT_EXPR, t,
5171 					 make_tree (t, XEXP (x, 0)),
5172 				    	 make_tree (type, XEXP (x, 1))));
5173 
5174     case DIV:
5175       if (TREE_CODE (type) != REAL_TYPE)
5176 	t = signed_type_for (type);
5177       else
5178 	t = type;
5179 
5180       return fold_convert (type, build2 (TRUNC_DIV_EXPR, t,
5181 				    	 make_tree (t, XEXP (x, 0)),
5182 				    	 make_tree (t, XEXP (x, 1))));
5183     case UDIV:
5184       t = unsigned_type_for (type);
5185       return fold_convert (type, build2 (TRUNC_DIV_EXPR, t,
5186 				    	 make_tree (t, XEXP (x, 0)),
5187 				    	 make_tree (t, XEXP (x, 1))));
5188 
5189     case SIGN_EXTEND:
5190     case ZERO_EXTEND:
5191       t = lang_hooks.types.type_for_mode (GET_MODE (XEXP (x, 0)),
5192 					  GET_CODE (x) == ZERO_EXTEND);
5193       return fold_convert (type, make_tree (t, XEXP (x, 0)));
5194 
5195     case CONST:
5196       return make_tree (type, XEXP (x, 0));
5197 
5198     case SYMBOL_REF:
5199       t = SYMBOL_REF_DECL (x);
5200       if (t)
5201 	return fold_convert (type, build_fold_addr_expr (t));
5202       /* fall through.  */
5203 
5204     default:
5205       t = build_decl (RTL_LOCATION (x), VAR_DECL, NULL_TREE, type);
5206 
5207       /* If TYPE is a POINTER_TYPE, we might need to convert X from
5208 	 address mode to pointer mode.  */
5209       if (POINTER_TYPE_P (type))
5210 	x = convert_memory_address_addr_space
5211 	      (TYPE_MODE (type), x, TYPE_ADDR_SPACE (TREE_TYPE (type)));
5212 
5213       /* Note that we do *not* use SET_DECL_RTL here, because we do not
5214 	 want set_decl_rtl to go adjusting REG_ATTRS for this temporary.  */
5215       t->decl_with_rtl.rtl = x;
5216 
5217       return t;
5218     }
5219 }
5220 
5221 /* Compute the logical-and of OP0 and OP1, storing it in TARGET
5222    and returning TARGET.
5223 
5224    If TARGET is 0, a pseudo-register or constant is returned.  */
5225 
5226 rtx
5227 expand_and (machine_mode mode, rtx op0, rtx op1, rtx target)
5228 {
5229   rtx tem = 0;
5230 
5231   if (GET_MODE (op0) == VOIDmode && GET_MODE (op1) == VOIDmode)
5232     tem = simplify_binary_operation (AND, mode, op0, op1);
5233   if (tem == 0)
5234     tem = expand_binop (mode, and_optab, op0, op1, target, 0, OPTAB_LIB_WIDEN);
5235 
5236   if (target == 0)
5237     target = tem;
5238   else if (tem != target)
5239     emit_move_insn (target, tem);
5240   return target;
5241 }
5242 
5243 /* Helper function for emit_store_flag.  */
5244 rtx
5245 emit_cstore (rtx target, enum insn_code icode, enum rtx_code code,
5246 	     machine_mode mode, machine_mode compare_mode,
5247 	     int unsignedp, rtx x, rtx y, int normalizep,
5248 	     machine_mode target_mode)
5249 {
5250   struct expand_operand ops[4];
5251   rtx op0, comparison, subtarget;
5252   rtx_insn *last;
5253   machine_mode result_mode = targetm.cstore_mode (icode);
5254 
5255   last = get_last_insn ();
5256   x = prepare_operand (icode, x, 2, mode, compare_mode, unsignedp);
5257   y = prepare_operand (icode, y, 3, mode, compare_mode, unsignedp);
5258   if (!x || !y)
5259     {
5260       delete_insns_since (last);
5261       return NULL_RTX;
5262     }
5263 
5264   if (target_mode == VOIDmode)
5265     target_mode = result_mode;
5266   if (!target)
5267     target = gen_reg_rtx (target_mode);
5268 
5269   comparison = gen_rtx_fmt_ee (code, result_mode, x, y);
5270 
5271   create_output_operand (&ops[0], optimize ? NULL_RTX : target, result_mode);
5272   create_fixed_operand (&ops[1], comparison);
5273   create_fixed_operand (&ops[2], x);
5274   create_fixed_operand (&ops[3], y);
5275   if (!maybe_expand_insn (icode, 4, ops))
5276     {
5277       delete_insns_since (last);
5278       return NULL_RTX;
5279     }
5280   subtarget = ops[0].value;
5281 
5282   /* If we are converting to a wider mode, first convert to
5283      TARGET_MODE, then normalize.  This produces better combining
5284      opportunities on machines that have a SIGN_EXTRACT when we are
5285      testing a single bit.  This mostly benefits the 68k.
5286 
5287      If STORE_FLAG_VALUE does not have the sign bit set when
5288      interpreted in MODE, we can do this conversion as unsigned, which
5289      is usually more efficient.  */
5290   if (GET_MODE_SIZE (target_mode) > GET_MODE_SIZE (result_mode))
5291     {
5292       convert_move (target, subtarget,
5293 		    val_signbit_known_clear_p (result_mode,
5294 					       STORE_FLAG_VALUE));
5295       op0 = target;
5296       result_mode = target_mode;
5297     }
5298   else
5299     op0 = subtarget;
5300 
5301   /* If we want to keep subexpressions around, don't reuse our last
5302      target.  */
5303   if (optimize)
5304     subtarget = 0;
5305 
5306   /* Now normalize to the proper value in MODE.  Sometimes we don't
5307      have to do anything.  */
5308   if (normalizep == 0 || normalizep == STORE_FLAG_VALUE)
5309     ;
5310   /* STORE_FLAG_VALUE might be the most negative number, so write
5311      the comparison this way to avoid a compiler-time warning.  */
5312   else if (- normalizep == STORE_FLAG_VALUE)
5313     op0 = expand_unop (result_mode, neg_optab, op0, subtarget, 0);
5314 
5315   /* We don't want to use STORE_FLAG_VALUE < 0 below since this makes
5316      it hard to use a value of just the sign bit due to ANSI integer
5317      constant typing rules.  */
5318   else if (val_signbit_known_set_p (result_mode, STORE_FLAG_VALUE))
5319     op0 = expand_shift (RSHIFT_EXPR, result_mode, op0,
5320 			GET_MODE_BITSIZE (result_mode) - 1, subtarget,
5321 			normalizep == 1);
5322   else
5323     {
5324       gcc_assert (STORE_FLAG_VALUE & 1);
5325 
5326       op0 = expand_and (result_mode, op0, const1_rtx, subtarget);
5327       if (normalizep == -1)
5328 	op0 = expand_unop (result_mode, neg_optab, op0, op0, 0);
5329     }
5330 
5331   /* If we were converting to a smaller mode, do the conversion now.  */
5332   if (target_mode != result_mode)
5333     {
5334       convert_move (target, op0, 0);
5335       return target;
5336     }
5337   else
5338     return op0;
5339 }
5340 
5341 
5342 /* A subroutine of emit_store_flag only including "tricks" that do not
5343    need a recursive call.  These are kept separate to avoid infinite
5344    loops.  */
5345 
5346 static rtx
5347 emit_store_flag_1 (rtx target, enum rtx_code code, rtx op0, rtx op1,
5348 		   machine_mode mode, int unsignedp, int normalizep,
5349 		   machine_mode target_mode)
5350 {
5351   rtx subtarget;
5352   enum insn_code icode;
5353   machine_mode compare_mode;
5354   enum mode_class mclass;
5355   enum rtx_code scode;
5356 
5357   if (unsignedp)
5358     code = unsigned_condition (code);
5359   scode = swap_condition (code);
5360 
5361   /* If one operand is constant, make it the second one.  Only do this
5362      if the other operand is not constant as well.  */
5363 
5364   if (swap_commutative_operands_p (op0, op1))
5365     {
5366       std::swap (op0, op1);
5367       code = swap_condition (code);
5368     }
5369 
5370   if (mode == VOIDmode)
5371     mode = GET_MODE (op0);
5372 
5373   /* For some comparisons with 1 and -1, we can convert this to
5374      comparisons with zero.  This will often produce more opportunities for
5375      store-flag insns.  */
5376 
5377   switch (code)
5378     {
5379     case LT:
5380       if (op1 == const1_rtx)
5381 	op1 = const0_rtx, code = LE;
5382       break;
5383     case LE:
5384       if (op1 == constm1_rtx)
5385 	op1 = const0_rtx, code = LT;
5386       break;
5387     case GE:
5388       if (op1 == const1_rtx)
5389 	op1 = const0_rtx, code = GT;
5390       break;
5391     case GT:
5392       if (op1 == constm1_rtx)
5393 	op1 = const0_rtx, code = GE;
5394       break;
5395     case GEU:
5396       if (op1 == const1_rtx)
5397 	op1 = const0_rtx, code = NE;
5398       break;
5399     case LTU:
5400       if (op1 == const1_rtx)
5401 	op1 = const0_rtx, code = EQ;
5402       break;
5403     default:
5404       break;
5405     }
5406 
5407   /* If we are comparing a double-word integer with zero or -1, we can
5408      convert the comparison into one involving a single word.  */
5409   if (GET_MODE_BITSIZE (mode) == BITS_PER_WORD * 2
5410       && GET_MODE_CLASS (mode) == MODE_INT
5411       && (!MEM_P (op0) || ! MEM_VOLATILE_P (op0)))
5412     {
5413       rtx tem;
5414       if ((code == EQ || code == NE)
5415 	  && (op1 == const0_rtx || op1 == constm1_rtx))
5416 	{
5417 	  rtx op00, op01;
5418 
5419 	  /* Do a logical OR or AND of the two words and compare the
5420 	     result.  */
5421 	  op00 = simplify_gen_subreg (word_mode, op0, mode, 0);
5422 	  op01 = simplify_gen_subreg (word_mode, op0, mode, UNITS_PER_WORD);
5423 	  tem = expand_binop (word_mode,
5424 			      op1 == const0_rtx ? ior_optab : and_optab,
5425 			      op00, op01, NULL_RTX, unsignedp,
5426 			      OPTAB_DIRECT);
5427 
5428 	  if (tem != 0)
5429 	    tem = emit_store_flag (NULL_RTX, code, tem, op1, word_mode,
5430 				   unsignedp, normalizep);
5431 	}
5432       else if ((code == LT || code == GE) && op1 == const0_rtx)
5433 	{
5434 	  rtx op0h;
5435 
5436 	  /* If testing the sign bit, can just test on high word.  */
5437 	  op0h = simplify_gen_subreg (word_mode, op0, mode,
5438 				      subreg_highpart_offset (word_mode,
5439 							      mode));
5440 	  tem = emit_store_flag (NULL_RTX, code, op0h, op1, word_mode,
5441 				 unsignedp, normalizep);
5442 	}
5443       else
5444 	tem = NULL_RTX;
5445 
5446       if (tem)
5447 	{
5448 	  if (target_mode == VOIDmode || GET_MODE (tem) == target_mode)
5449 	    return tem;
5450 	  if (!target)
5451 	    target = gen_reg_rtx (target_mode);
5452 
5453 	  convert_move (target, tem,
5454 			!val_signbit_known_set_p (word_mode,
5455 						  (normalizep ? normalizep
5456 						   : STORE_FLAG_VALUE)));
5457 	  return target;
5458 	}
5459     }
5460 
5461   /* If this is A < 0 or A >= 0, we can do this by taking the ones
5462      complement of A (for GE) and shifting the sign bit to the low bit.  */
5463   if (op1 == const0_rtx && (code == LT || code == GE)
5464       && GET_MODE_CLASS (mode) == MODE_INT
5465       && (normalizep || STORE_FLAG_VALUE == 1
5466 	  || val_signbit_p (mode, STORE_FLAG_VALUE)))
5467     {
5468       subtarget = target;
5469 
5470       if (!target)
5471 	target_mode = mode;
5472 
5473       /* If the result is to be wider than OP0, it is best to convert it
5474 	 first.  If it is to be narrower, it is *incorrect* to convert it
5475 	 first.  */
5476       else if (GET_MODE_SIZE (target_mode) > GET_MODE_SIZE (mode))
5477 	{
5478 	  op0 = convert_modes (target_mode, mode, op0, 0);
5479 	  mode = target_mode;
5480 	}
5481 
5482       if (target_mode != mode)
5483 	subtarget = 0;
5484 
5485       if (code == GE)
5486 	op0 = expand_unop (mode, one_cmpl_optab, op0,
5487 			   ((STORE_FLAG_VALUE == 1 || normalizep)
5488 			    ? 0 : subtarget), 0);
5489 
5490       if (STORE_FLAG_VALUE == 1 || normalizep)
5491 	/* If we are supposed to produce a 0/1 value, we want to do
5492 	   a logical shift from the sign bit to the low-order bit; for
5493 	   a -1/0 value, we do an arithmetic shift.  */
5494 	op0 = expand_shift (RSHIFT_EXPR, mode, op0,
5495 			    GET_MODE_BITSIZE (mode) - 1,
5496 			    subtarget, normalizep != -1);
5497 
5498       if (mode != target_mode)
5499 	op0 = convert_modes (target_mode, mode, op0, 0);
5500 
5501       return op0;
5502     }
5503 
5504   mclass = GET_MODE_CLASS (mode);
5505   for (compare_mode = mode; compare_mode != VOIDmode;
5506        compare_mode = GET_MODE_WIDER_MODE (compare_mode))
5507     {
5508      machine_mode optab_mode = mclass == MODE_CC ? CCmode : compare_mode;
5509      icode = optab_handler (cstore_optab, optab_mode);
5510      if (icode != CODE_FOR_nothing)
5511 	{
5512 	  do_pending_stack_adjust ();
5513 	  rtx tem = emit_cstore (target, icode, code, mode, compare_mode,
5514 				 unsignedp, op0, op1, normalizep, target_mode);
5515 	  if (tem)
5516 	    return tem;
5517 
5518 	  if (GET_MODE_CLASS (mode) == MODE_FLOAT)
5519 	    {
5520 	      tem = emit_cstore (target, icode, scode, mode, compare_mode,
5521 				 unsignedp, op1, op0, normalizep, target_mode);
5522 	      if (tem)
5523 	        return tem;
5524 	    }
5525 	  break;
5526 	}
5527     }
5528 
5529   return 0;
5530 }
5531 
5532 /* Emit a store-flags instruction for comparison CODE on OP0 and OP1
5533    and storing in TARGET.  Normally return TARGET.
5534    Return 0 if that cannot be done.
5535 
5536    MODE is the mode to use for OP0 and OP1 should they be CONST_INTs.  If
5537    it is VOIDmode, they cannot both be CONST_INT.
5538 
5539    UNSIGNEDP is for the case where we have to widen the operands
5540    to perform the operation.  It says to use zero-extension.
5541 
5542    NORMALIZEP is 1 if we should convert the result to be either zero
5543    or one.  Normalize is -1 if we should convert the result to be
5544    either zero or -1.  If NORMALIZEP is zero, the result will be left
5545    "raw" out of the scc insn.  */
5546 
5547 rtx
5548 emit_store_flag (rtx target, enum rtx_code code, rtx op0, rtx op1,
5549 		 machine_mode mode, int unsignedp, int normalizep)
5550 {
5551   machine_mode target_mode = target ? GET_MODE (target) : VOIDmode;
5552   enum rtx_code rcode;
5553   rtx subtarget;
5554   rtx tem, trueval;
5555   rtx_insn *last;
5556 
5557   /* If we compare constants, we shouldn't use a store-flag operation,
5558      but a constant load.  We can get there via the vanilla route that
5559      usually generates a compare-branch sequence, but will in this case
5560      fold the comparison to a constant, and thus elide the branch.  */
5561   if (CONSTANT_P (op0) && CONSTANT_P (op1))
5562     return NULL_RTX;
5563 
5564   tem = emit_store_flag_1 (target, code, op0, op1, mode, unsignedp, normalizep,
5565 			   target_mode);
5566   if (tem)
5567     return tem;
5568 
5569   /* If we reached here, we can't do this with a scc insn, however there
5570      are some comparisons that can be done in other ways.  Don't do any
5571      of these cases if branches are very cheap.  */
5572   if (BRANCH_COST (optimize_insn_for_speed_p (), false) == 0)
5573     return 0;
5574 
5575   /* See what we need to return.  We can only return a 1, -1, or the
5576      sign bit.  */
5577 
5578   if (normalizep == 0)
5579     {
5580       if (STORE_FLAG_VALUE == 1 || STORE_FLAG_VALUE == -1)
5581 	normalizep = STORE_FLAG_VALUE;
5582 
5583       else if (val_signbit_p (mode, STORE_FLAG_VALUE))
5584 	;
5585       else
5586 	return 0;
5587     }
5588 
5589   last = get_last_insn ();
5590 
5591   /* If optimizing, use different pseudo registers for each insn, instead
5592      of reusing the same pseudo.  This leads to better CSE, but slows
5593      down the compiler, since there are more pseudos */
5594   subtarget = (!optimize
5595 	       && (target_mode == mode)) ? target : NULL_RTX;
5596   trueval = GEN_INT (normalizep ? normalizep : STORE_FLAG_VALUE);
5597 
5598   /* For floating-point comparisons, try the reverse comparison or try
5599      changing the "orderedness" of the comparison.  */
5600   if (GET_MODE_CLASS (mode) == MODE_FLOAT)
5601     {
5602       enum rtx_code first_code;
5603       bool and_them;
5604 
5605       rcode = reverse_condition_maybe_unordered (code);
5606       if (can_compare_p (rcode, mode, ccp_store_flag)
5607           && (code == ORDERED || code == UNORDERED
5608 	      || (! HONOR_NANS (mode) && (code == LTGT || code == UNEQ))
5609 	      || (! HONOR_SNANS (mode) && (code == EQ || code == NE))))
5610 	{
5611           int want_add = ((STORE_FLAG_VALUE == 1 && normalizep == -1)
5612 		          || (STORE_FLAG_VALUE == -1 && normalizep == 1));
5613 
5614 	  /* For the reverse comparison, use either an addition or a XOR.  */
5615           if (want_add
5616 	      && rtx_cost (GEN_INT (normalizep), mode, PLUS, 1,
5617 			   optimize_insn_for_speed_p ()) == 0)
5618 	    {
5619 	      tem = emit_store_flag_1 (subtarget, rcode, op0, op1, mode, 0,
5620 				       STORE_FLAG_VALUE, target_mode);
5621 	      if (tem)
5622                 return expand_binop (target_mode, add_optab, tem,
5623 				     gen_int_mode (normalizep, target_mode),
5624 				     target, 0, OPTAB_WIDEN);
5625 	    }
5626           else if (!want_add
5627 	           && rtx_cost (trueval, mode, XOR, 1,
5628 			        optimize_insn_for_speed_p ()) == 0)
5629 	    {
5630 	      tem = emit_store_flag_1 (subtarget, rcode, op0, op1, mode, 0,
5631 				       normalizep, target_mode);
5632 	      if (tem)
5633                 return expand_binop (target_mode, xor_optab, tem, trueval,
5634 				     target, INTVAL (trueval) >= 0, OPTAB_WIDEN);
5635 	    }
5636 	}
5637 
5638       delete_insns_since (last);
5639 
5640       /* Cannot split ORDERED and UNORDERED, only try the above trick.   */
5641       if (code == ORDERED || code == UNORDERED)
5642 	return 0;
5643 
5644       and_them = split_comparison (code, mode, &first_code, &code);
5645 
5646       /* If there are no NaNs, the first comparison should always fall through.
5647          Effectively change the comparison to the other one.  */
5648       if (!HONOR_NANS (mode))
5649 	{
5650           gcc_assert (first_code == (and_them ? ORDERED : UNORDERED));
5651 	  return emit_store_flag_1 (target, code, op0, op1, mode, 0, normalizep,
5652 				    target_mode);
5653 	}
5654 
5655       if (!HAVE_conditional_move)
5656 	return 0;
5657 
5658       /* Try using a setcc instruction for ORDERED/UNORDERED, followed by a
5659 	 conditional move.  */
5660       tem = emit_store_flag_1 (subtarget, first_code, op0, op1, mode, 0,
5661 			       normalizep, target_mode);
5662       if (tem == 0)
5663 	return 0;
5664 
5665       if (and_them)
5666         tem = emit_conditional_move (target, code, op0, op1, mode,
5667 				     tem, const0_rtx, GET_MODE (tem), 0);
5668       else
5669         tem = emit_conditional_move (target, code, op0, op1, mode,
5670 				     trueval, tem, GET_MODE (tem), 0);
5671 
5672       if (tem == 0)
5673         delete_insns_since (last);
5674       return tem;
5675     }
5676 
5677   /* The remaining tricks only apply to integer comparisons.  */
5678 
5679   if (GET_MODE_CLASS (mode) != MODE_INT)
5680     return 0;
5681 
5682   /* If this is an equality comparison of integers, we can try to exclusive-or
5683      (or subtract) the two operands and use a recursive call to try the
5684      comparison with zero.  Don't do any of these cases if branches are
5685      very cheap.  */
5686 
5687   if ((code == EQ || code == NE) && op1 != const0_rtx)
5688     {
5689       tem = expand_binop (mode, xor_optab, op0, op1, subtarget, 1,
5690 			  OPTAB_WIDEN);
5691 
5692       if (tem == 0)
5693 	tem = expand_binop (mode, sub_optab, op0, op1, subtarget, 1,
5694 			    OPTAB_WIDEN);
5695       if (tem != 0)
5696 	tem = emit_store_flag (target, code, tem, const0_rtx,
5697 			       mode, unsignedp, normalizep);
5698       if (tem != 0)
5699 	return tem;
5700 
5701       delete_insns_since (last);
5702     }
5703 
5704   /* For integer comparisons, try the reverse comparison.  However, for
5705      small X and if we'd have anyway to extend, implementing "X != 0"
5706      as "-(int)X >> 31" is still cheaper than inverting "(int)X == 0".  */
5707   rcode = reverse_condition (code);
5708   if (can_compare_p (rcode, mode, ccp_store_flag)
5709       && ! (optab_handler (cstore_optab, mode) == CODE_FOR_nothing
5710 	    && code == NE
5711 	    && GET_MODE_SIZE (mode) < UNITS_PER_WORD
5712 	    && op1 == const0_rtx))
5713     {
5714       int want_add = ((STORE_FLAG_VALUE == 1 && normalizep == -1)
5715 		      || (STORE_FLAG_VALUE == -1 && normalizep == 1));
5716 
5717       /* Again, for the reverse comparison, use either an addition or a XOR.  */
5718       if (want_add
5719 	  && rtx_cost (GEN_INT (normalizep), mode, PLUS, 1,
5720 		       optimize_insn_for_speed_p ()) == 0)
5721 	{
5722 	  tem = emit_store_flag_1 (subtarget, rcode, op0, op1, mode, 0,
5723 				   STORE_FLAG_VALUE, target_mode);
5724 	  if (tem != 0)
5725             tem = expand_binop (target_mode, add_optab, tem,
5726 				gen_int_mode (normalizep, target_mode),
5727 				target, 0, OPTAB_WIDEN);
5728 	}
5729       else if (!want_add
5730 	       && rtx_cost (trueval, mode, XOR, 1,
5731 			    optimize_insn_for_speed_p ()) == 0)
5732 	{
5733 	  tem = emit_store_flag_1 (subtarget, rcode, op0, op1, mode, 0,
5734 				   normalizep, target_mode);
5735 	  if (tem != 0)
5736             tem = expand_binop (target_mode, xor_optab, tem, trueval, target,
5737 				INTVAL (trueval) >= 0, OPTAB_WIDEN);
5738 	}
5739 
5740       if (tem != 0)
5741 	return tem;
5742       delete_insns_since (last);
5743     }
5744 
5745   /* Some other cases we can do are EQ, NE, LE, and GT comparisons with
5746      the constant zero.  Reject all other comparisons at this point.  Only
5747      do LE and GT if branches are expensive since they are expensive on
5748      2-operand machines.  */
5749 
5750   if (op1 != const0_rtx
5751       || (code != EQ && code != NE
5752 	  && (BRANCH_COST (optimize_insn_for_speed_p (),
5753 			   false) <= 1 || (code != LE && code != GT))))
5754     return 0;
5755 
5756   /* Try to put the result of the comparison in the sign bit.  Assume we can't
5757      do the necessary operation below.  */
5758 
5759   tem = 0;
5760 
5761   /* To see if A <= 0, compute (A | (A - 1)).  A <= 0 iff that result has
5762      the sign bit set.  */
5763 
5764   if (code == LE)
5765     {
5766       /* This is destructive, so SUBTARGET can't be OP0.  */
5767       if (rtx_equal_p (subtarget, op0))
5768 	subtarget = 0;
5769 
5770       tem = expand_binop (mode, sub_optab, op0, const1_rtx, subtarget, 0,
5771 			  OPTAB_WIDEN);
5772       if (tem)
5773 	tem = expand_binop (mode, ior_optab, op0, tem, subtarget, 0,
5774 			    OPTAB_WIDEN);
5775     }
5776 
5777   /* To see if A > 0, compute (((signed) A) << BITS) - A, where BITS is the
5778      number of bits in the mode of OP0, minus one.  */
5779 
5780   if (code == GT)
5781     {
5782       if (rtx_equal_p (subtarget, op0))
5783 	subtarget = 0;
5784 
5785       tem = maybe_expand_shift (RSHIFT_EXPR, mode, op0,
5786 				GET_MODE_BITSIZE (mode) - 1,
5787 				subtarget, 0);
5788       if (tem)
5789 	tem = expand_binop (mode, sub_optab, tem, op0, subtarget, 0,
5790 			    OPTAB_WIDEN);
5791     }
5792 
5793   if (code == EQ || code == NE)
5794     {
5795       /* For EQ or NE, one way to do the comparison is to apply an operation
5796 	 that converts the operand into a positive number if it is nonzero
5797 	 or zero if it was originally zero.  Then, for EQ, we subtract 1 and
5798 	 for NE we negate.  This puts the result in the sign bit.  Then we
5799 	 normalize with a shift, if needed.
5800 
5801 	 Two operations that can do the above actions are ABS and FFS, so try
5802 	 them.  If that doesn't work, and MODE is smaller than a full word,
5803 	 we can use zero-extension to the wider mode (an unsigned conversion)
5804 	 as the operation.  */
5805 
5806       /* Note that ABS doesn't yield a positive number for INT_MIN, but
5807 	 that is compensated by the subsequent overflow when subtracting
5808 	 one / negating.  */
5809 
5810       if (optab_handler (abs_optab, mode) != CODE_FOR_nothing)
5811 	tem = expand_unop (mode, abs_optab, op0, subtarget, 1);
5812       else if (optab_handler (ffs_optab, mode) != CODE_FOR_nothing)
5813 	tem = expand_unop (mode, ffs_optab, op0, subtarget, 1);
5814       else if (GET_MODE_SIZE (mode) < UNITS_PER_WORD)
5815 	{
5816 	  tem = convert_modes (word_mode, mode, op0, 1);
5817 	  mode = word_mode;
5818 	}
5819 
5820       if (tem != 0)
5821 	{
5822 	  if (code == EQ)
5823 	    tem = expand_binop (mode, sub_optab, tem, const1_rtx, subtarget,
5824 				0, OPTAB_WIDEN);
5825 	  else
5826 	    tem = expand_unop (mode, neg_optab, tem, subtarget, 0);
5827 	}
5828 
5829       /* If we couldn't do it that way, for NE we can "or" the two's complement
5830 	 of the value with itself.  For EQ, we take the one's complement of
5831 	 that "or", which is an extra insn, so we only handle EQ if branches
5832 	 are expensive.  */
5833 
5834       if (tem == 0
5835 	  && (code == NE
5836 	      || BRANCH_COST (optimize_insn_for_speed_p (),
5837 		      	      false) > 1))
5838 	{
5839 	  if (rtx_equal_p (subtarget, op0))
5840 	    subtarget = 0;
5841 
5842 	  tem = expand_unop (mode, neg_optab, op0, subtarget, 0);
5843 	  tem = expand_binop (mode, ior_optab, tem, op0, subtarget, 0,
5844 			      OPTAB_WIDEN);
5845 
5846 	  if (tem && code == EQ)
5847 	    tem = expand_unop (mode, one_cmpl_optab, tem, subtarget, 0);
5848 	}
5849     }
5850 
5851   if (tem && normalizep)
5852     tem = maybe_expand_shift (RSHIFT_EXPR, mode, tem,
5853 			      GET_MODE_BITSIZE (mode) - 1,
5854 			      subtarget, normalizep == 1);
5855 
5856   if (tem)
5857     {
5858       if (!target)
5859         ;
5860       else if (GET_MODE (tem) != target_mode)
5861 	{
5862 	  convert_move (target, tem, 0);
5863 	  tem = target;
5864 	}
5865       else if (!subtarget)
5866 	{
5867 	  emit_move_insn (target, tem);
5868 	  tem = target;
5869 	}
5870     }
5871   else
5872     delete_insns_since (last);
5873 
5874   return tem;
5875 }
5876 
5877 /* Like emit_store_flag, but always succeeds.  */
5878 
5879 rtx
5880 emit_store_flag_force (rtx target, enum rtx_code code, rtx op0, rtx op1,
5881 		       machine_mode mode, int unsignedp, int normalizep)
5882 {
5883   rtx tem;
5884   rtx_code_label *label;
5885   rtx trueval, falseval;
5886 
5887   /* First see if emit_store_flag can do the job.  */
5888   tem = emit_store_flag (target, code, op0, op1, mode, unsignedp, normalizep);
5889   if (tem != 0)
5890     return tem;
5891 
5892   /* If one operand is constant, make it the second one.  Only do this
5893      if the other operand is not constant as well.  */
5894 
5895   if (swap_commutative_operands_p (op0, op1))
5896     {
5897       std::swap (op0, op1);
5898       code = swap_condition (code);
5899     }
5900 
5901   if (mode == VOIDmode)
5902     mode = GET_MODE (op0);
5903 
5904   if (!target)
5905     target = gen_reg_rtx (word_mode);
5906 
5907   /* If this failed, we have to do this with set/compare/jump/set code.
5908      For foo != 0, if foo is in OP0, just replace it with 1 if nonzero.  */
5909   trueval = normalizep ? GEN_INT (normalizep) : const1_rtx;
5910   if (code == NE
5911       && GET_MODE_CLASS (mode) == MODE_INT
5912       && REG_P (target)
5913       && op0 == target
5914       && op1 == const0_rtx)
5915     {
5916       label = gen_label_rtx ();
5917       do_compare_rtx_and_jump (target, const0_rtx, EQ, unsignedp, mode,
5918 			       NULL_RTX, NULL, label, -1);
5919       emit_move_insn (target, trueval);
5920       emit_label (label);
5921       return target;
5922     }
5923 
5924   if (!REG_P (target)
5925       || reg_mentioned_p (target, op0) || reg_mentioned_p (target, op1))
5926     target = gen_reg_rtx (GET_MODE (target));
5927 
5928   /* Jump in the right direction if the target cannot implement CODE
5929      but can jump on its reverse condition.  */
5930   falseval = const0_rtx;
5931   if (! can_compare_p (code, mode, ccp_jump)
5932       && (! FLOAT_MODE_P (mode)
5933           || code == ORDERED || code == UNORDERED
5934           || (! HONOR_NANS (mode) && (code == LTGT || code == UNEQ))
5935           || (! HONOR_SNANS (mode) && (code == EQ || code == NE))))
5936     {
5937       enum rtx_code rcode;
5938       if (FLOAT_MODE_P (mode))
5939         rcode = reverse_condition_maybe_unordered (code);
5940       else
5941         rcode = reverse_condition (code);
5942 
5943       /* Canonicalize to UNORDERED for the libcall.  */
5944       if (can_compare_p (rcode, mode, ccp_jump)
5945           || (code == ORDERED && ! can_compare_p (ORDERED, mode, ccp_jump)))
5946 	{
5947 	  falseval = trueval;
5948 	  trueval = const0_rtx;
5949 	  code = rcode;
5950 	}
5951     }
5952 
5953   emit_move_insn (target, trueval);
5954   label = gen_label_rtx ();
5955   do_compare_rtx_and_jump (op0, op1, code, unsignedp, mode, NULL_RTX, NULL,
5956 			   label, -1);
5957 
5958   emit_move_insn (target, falseval);
5959   emit_label (label);
5960 
5961   return target;
5962 }
5963 
5964 /* Perform possibly multi-word comparison and conditional jump to LABEL
5965    if ARG1 OP ARG2 true where ARG1 and ARG2 are of mode MODE.  This is
5966    now a thin wrapper around do_compare_rtx_and_jump.  */
5967 
5968 static void
5969 do_cmp_and_jump (rtx arg1, rtx arg2, enum rtx_code op, machine_mode mode,
5970 		 rtx_code_label *label)
5971 {
5972   int unsignedp = (op == LTU || op == LEU || op == GTU || op == GEU);
5973   do_compare_rtx_and_jump (arg1, arg2, op, unsignedp, mode, NULL_RTX,
5974 			   NULL, label, -1);
5975 }
5976