xref: /netbsd-src/external/gpl3/gcc.old/dist/gcc/expmed.c (revision bdc22b2e01993381dcefeff2bc9b56ca75a4235c)
1 /* Medium-level subroutines: convert bit-field store and extract
2    and shifts, multiplies and divides to rtl instructions.
3    Copyright (C) 1987-2015 Free Software Foundation, Inc.
4 
5 This file is part of GCC.
6 
7 GCC is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 3, or (at your option) any later
10 version.
11 
12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
15 for more details.
16 
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3.  If not see
19 <http://www.gnu.org/licenses/>.  */
20 
21 
22 #include "config.h"
23 #include "system.h"
24 #include "coretypes.h"
25 #include "tm.h"
26 #include "diagnostic-core.h"
27 #include "rtl.h"
28 #include "hash-set.h"
29 #include "machmode.h"
30 #include "vec.h"
31 #include "double-int.h"
32 #include "input.h"
33 #include "alias.h"
34 #include "symtab.h"
35 #include "wide-int.h"
36 #include "inchash.h"
37 #include "tree.h"
38 #include "fold-const.h"
39 #include "stor-layout.h"
40 #include "tm_p.h"
41 #include "flags.h"
42 #include "insn-config.h"
43 #include "hashtab.h"
44 #include "hard-reg-set.h"
45 #include "function.h"
46 #include "statistics.h"
47 #include "real.h"
48 #include "fixed-value.h"
49 #include "expmed.h"
50 #include "dojump.h"
51 #include "explow.h"
52 #include "calls.h"
53 #include "emit-rtl.h"
54 #include "varasm.h"
55 #include "stmt.h"
56 #include "expr.h"
57 #include "insn-codes.h"
58 #include "optabs.h"
59 #include "recog.h"
60 #include "langhooks.h"
61 #include "predict.h"
62 #include "basic-block.h"
63 #include "df.h"
64 #include "target.h"
65 
66 struct target_expmed default_target_expmed;
67 #if SWITCHABLE_TARGET
68 struct target_expmed *this_target_expmed = &default_target_expmed;
69 #endif
70 
71 static void store_fixed_bit_field (rtx, unsigned HOST_WIDE_INT,
72 				   unsigned HOST_WIDE_INT,
73 				   unsigned HOST_WIDE_INT,
74 				   unsigned HOST_WIDE_INT,
75 				   rtx);
76 static void store_fixed_bit_field_1 (rtx, unsigned HOST_WIDE_INT,
77 				     unsigned HOST_WIDE_INT,
78 				     rtx);
79 static void store_split_bit_field (rtx, unsigned HOST_WIDE_INT,
80 				   unsigned HOST_WIDE_INT,
81 				   unsigned HOST_WIDE_INT,
82 				   unsigned HOST_WIDE_INT,
83 				   rtx);
84 static rtx extract_fixed_bit_field (machine_mode, rtx,
85 				    unsigned HOST_WIDE_INT,
86 				    unsigned HOST_WIDE_INT, rtx, int);
87 static rtx extract_fixed_bit_field_1 (machine_mode, rtx,
88 				      unsigned HOST_WIDE_INT,
89 				      unsigned HOST_WIDE_INT, rtx, int);
90 static rtx lshift_value (machine_mode, unsigned HOST_WIDE_INT, int);
91 static rtx extract_split_bit_field (rtx, unsigned HOST_WIDE_INT,
92 				    unsigned HOST_WIDE_INT, int);
93 static void do_cmp_and_jump (rtx, rtx, enum rtx_code, machine_mode, rtx_code_label *);
94 static rtx expand_smod_pow2 (machine_mode, rtx, HOST_WIDE_INT);
95 static rtx expand_sdiv_pow2 (machine_mode, rtx, HOST_WIDE_INT);
96 
97 /* Return a constant integer mask value of mode MODE with BITSIZE ones
98    followed by BITPOS zeros, or the complement of that if COMPLEMENT.
99    The mask is truncated if necessary to the width of mode MODE.  The
100    mask is zero-extended if BITSIZE+BITPOS is too small for MODE.  */
101 
102 static inline rtx
103 mask_rtx (machine_mode mode, int bitpos, int bitsize, bool complement)
104 {
105   return immed_wide_int_const
106     (wi::shifted_mask (bitpos, bitsize, complement,
107 		       GET_MODE_PRECISION (mode)), mode);
108 }
109 
110 /* Test whether a value is zero of a power of two.  */
111 #define EXACT_POWER_OF_2_OR_ZERO_P(x) \
112   (((x) & ((x) - (unsigned HOST_WIDE_INT) 1)) == 0)
113 
114 struct init_expmed_rtl
115 {
116   rtx reg;
117   rtx plus;
118   rtx neg;
119   rtx mult;
120   rtx sdiv;
121   rtx udiv;
122   rtx sdiv_32;
123   rtx smod_32;
124   rtx wide_mult;
125   rtx wide_lshr;
126   rtx wide_trunc;
127   rtx shift;
128   rtx shift_mult;
129   rtx shift_add;
130   rtx shift_sub0;
131   rtx shift_sub1;
132   rtx zext;
133   rtx trunc;
134 
135   rtx pow2[MAX_BITS_PER_WORD];
136   rtx cint[MAX_BITS_PER_WORD];
137 };
138 
139 static void
140 init_expmed_one_conv (struct init_expmed_rtl *all, machine_mode to_mode,
141 		      machine_mode from_mode, bool speed)
142 {
143   int to_size, from_size;
144   rtx which;
145 
146   to_size = GET_MODE_PRECISION (to_mode);
147   from_size = GET_MODE_PRECISION (from_mode);
148 
149   /* Most partial integers have a precision less than the "full"
150      integer it requires for storage.  In case one doesn't, for
151      comparison purposes here, reduce the bit size by one in that
152      case.  */
153   if (GET_MODE_CLASS (to_mode) == MODE_PARTIAL_INT
154       && exact_log2 (to_size) != -1)
155     to_size --;
156   if (GET_MODE_CLASS (from_mode) == MODE_PARTIAL_INT
157       && exact_log2 (from_size) != -1)
158     from_size --;
159 
160   /* Assume cost of zero-extend and sign-extend is the same.  */
161   which = (to_size < from_size ? all->trunc : all->zext);
162 
163   PUT_MODE (all->reg, from_mode);
164   set_convert_cost (to_mode, from_mode, speed, set_src_cost (which, speed));
165 }
166 
167 static void
168 init_expmed_one_mode (struct init_expmed_rtl *all,
169 		      machine_mode mode, int speed)
170 {
171   int m, n, mode_bitsize;
172   machine_mode mode_from;
173 
174   mode_bitsize = GET_MODE_UNIT_BITSIZE (mode);
175 
176   PUT_MODE (all->reg, mode);
177   PUT_MODE (all->plus, mode);
178   PUT_MODE (all->neg, mode);
179   PUT_MODE (all->mult, mode);
180   PUT_MODE (all->sdiv, mode);
181   PUT_MODE (all->udiv, mode);
182   PUT_MODE (all->sdiv_32, mode);
183   PUT_MODE (all->smod_32, mode);
184   PUT_MODE (all->wide_trunc, mode);
185   PUT_MODE (all->shift, mode);
186   PUT_MODE (all->shift_mult, mode);
187   PUT_MODE (all->shift_add, mode);
188   PUT_MODE (all->shift_sub0, mode);
189   PUT_MODE (all->shift_sub1, mode);
190   PUT_MODE (all->zext, mode);
191   PUT_MODE (all->trunc, mode);
192 
193   set_add_cost (speed, mode, set_src_cost (all->plus, speed));
194   set_neg_cost (speed, mode, set_src_cost (all->neg, speed));
195   set_mul_cost (speed, mode, set_src_cost (all->mult, speed));
196   set_sdiv_cost (speed, mode, set_src_cost (all->sdiv, speed));
197   set_udiv_cost (speed, mode, set_src_cost (all->udiv, speed));
198 
199   set_sdiv_pow2_cheap (speed, mode, (set_src_cost (all->sdiv_32, speed)
200 				     <= 2 * add_cost (speed, mode)));
201   set_smod_pow2_cheap (speed, mode, (set_src_cost (all->smod_32, speed)
202 				     <= 4 * add_cost (speed, mode)));
203 
204   set_shift_cost (speed, mode, 0, 0);
205   {
206     int cost = add_cost (speed, mode);
207     set_shiftadd_cost (speed, mode, 0, cost);
208     set_shiftsub0_cost (speed, mode, 0, cost);
209     set_shiftsub1_cost (speed, mode, 0, cost);
210   }
211 
212   n = MIN (MAX_BITS_PER_WORD, mode_bitsize);
213   for (m = 1; m < n; m++)
214     {
215       XEXP (all->shift, 1) = all->cint[m];
216       XEXP (all->shift_mult, 1) = all->pow2[m];
217 
218       set_shift_cost (speed, mode, m, set_src_cost (all->shift, speed));
219       set_shiftadd_cost (speed, mode, m, set_src_cost (all->shift_add, speed));
220       set_shiftsub0_cost (speed, mode, m, set_src_cost (all->shift_sub0, speed));
221       set_shiftsub1_cost (speed, mode, m, set_src_cost (all->shift_sub1, speed));
222     }
223 
224   if (SCALAR_INT_MODE_P (mode))
225     {
226       for (mode_from = MIN_MODE_INT; mode_from <= MAX_MODE_INT;
227 	   mode_from = (machine_mode)(mode_from + 1))
228 	init_expmed_one_conv (all, mode, mode_from, speed);
229     }
230   if (GET_MODE_CLASS (mode) == MODE_INT)
231     {
232       machine_mode  wider_mode = GET_MODE_WIDER_MODE (mode);
233       if (wider_mode != VOIDmode)
234 	{
235 	  PUT_MODE (all->zext, wider_mode);
236 	  PUT_MODE (all->wide_mult, wider_mode);
237 	  PUT_MODE (all->wide_lshr, wider_mode);
238 	  XEXP (all->wide_lshr, 1) = GEN_INT (mode_bitsize);
239 
240 	  set_mul_widen_cost (speed, wider_mode,
241 			      set_src_cost (all->wide_mult, speed));
242 	  set_mul_highpart_cost (speed, mode,
243 				 set_src_cost (all->wide_trunc, speed));
244 	}
245     }
246 }
247 
248 void
249 init_expmed (void)
250 {
251   struct init_expmed_rtl all;
252   machine_mode mode = QImode;
253   int m, speed;
254 
255   memset (&all, 0, sizeof all);
256   for (m = 1; m < MAX_BITS_PER_WORD; m++)
257     {
258       all.pow2[m] = GEN_INT ((HOST_WIDE_INT) 1 << m);
259       all.cint[m] = GEN_INT (m);
260     }
261 
262   /* Avoid using hard regs in ways which may be unsupported.  */
263   all.reg = gen_rtx_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1);
264   all.plus = gen_rtx_PLUS (mode, all.reg, all.reg);
265   all.neg = gen_rtx_NEG (mode, all.reg);
266   all.mult = gen_rtx_MULT (mode, all.reg, all.reg);
267   all.sdiv = gen_rtx_DIV (mode, all.reg, all.reg);
268   all.udiv = gen_rtx_UDIV (mode, all.reg, all.reg);
269   all.sdiv_32 = gen_rtx_DIV (mode, all.reg, all.pow2[5]);
270   all.smod_32 = gen_rtx_MOD (mode, all.reg, all.pow2[5]);
271   all.zext = gen_rtx_ZERO_EXTEND (mode, all.reg);
272   all.wide_mult = gen_rtx_MULT (mode, all.zext, all.zext);
273   all.wide_lshr = gen_rtx_LSHIFTRT (mode, all.wide_mult, all.reg);
274   all.wide_trunc = gen_rtx_TRUNCATE (mode, all.wide_lshr);
275   all.shift = gen_rtx_ASHIFT (mode, all.reg, all.reg);
276   all.shift_mult = gen_rtx_MULT (mode, all.reg, all.reg);
277   all.shift_add = gen_rtx_PLUS (mode, all.shift_mult, all.reg);
278   all.shift_sub0 = gen_rtx_MINUS (mode, all.shift_mult, all.reg);
279   all.shift_sub1 = gen_rtx_MINUS (mode, all.reg, all.shift_mult);
280   all.trunc = gen_rtx_TRUNCATE (mode, all.reg);
281 
282   for (speed = 0; speed < 2; speed++)
283     {
284       crtl->maybe_hot_insn_p = speed;
285       set_zero_cost (speed, set_src_cost (const0_rtx, speed));
286 
287       for (mode = MIN_MODE_INT; mode <= MAX_MODE_INT;
288 	   mode = (machine_mode)(mode + 1))
289 	init_expmed_one_mode (&all, mode, speed);
290 
291       if (MIN_MODE_PARTIAL_INT != VOIDmode)
292 	for (mode = MIN_MODE_PARTIAL_INT; mode <= MAX_MODE_PARTIAL_INT;
293 	     mode = (machine_mode)(mode + 1))
294 	  init_expmed_one_mode (&all, mode, speed);
295 
296       if (MIN_MODE_VECTOR_INT != VOIDmode)
297 	for (mode = MIN_MODE_VECTOR_INT; mode <= MAX_MODE_VECTOR_INT;
298 	     mode = (machine_mode)(mode + 1))
299 	  init_expmed_one_mode (&all, mode, speed);
300     }
301 
302   if (alg_hash_used_p ())
303     {
304       struct alg_hash_entry *p = alg_hash_entry_ptr (0);
305       memset (p, 0, sizeof (*p) * NUM_ALG_HASH_ENTRIES);
306     }
307   else
308     set_alg_hash_used_p (true);
309   default_rtl_profile ();
310 
311   ggc_free (all.trunc);
312   ggc_free (all.shift_sub1);
313   ggc_free (all.shift_sub0);
314   ggc_free (all.shift_add);
315   ggc_free (all.shift_mult);
316   ggc_free (all.shift);
317   ggc_free (all.wide_trunc);
318   ggc_free (all.wide_lshr);
319   ggc_free (all.wide_mult);
320   ggc_free (all.zext);
321   ggc_free (all.smod_32);
322   ggc_free (all.sdiv_32);
323   ggc_free (all.udiv);
324   ggc_free (all.sdiv);
325   ggc_free (all.mult);
326   ggc_free (all.neg);
327   ggc_free (all.plus);
328   ggc_free (all.reg);
329 }
330 
331 /* Return an rtx representing minus the value of X.
332    MODE is the intended mode of the result,
333    useful if X is a CONST_INT.  */
334 
335 rtx
336 negate_rtx (machine_mode mode, rtx x)
337 {
338   rtx result = simplify_unary_operation (NEG, mode, x, mode);
339 
340   if (result == 0)
341     result = expand_unop (mode, neg_optab, x, NULL_RTX, 0);
342 
343   return result;
344 }
345 
346 /* Adjust bitfield memory MEM so that it points to the first unit of mode
347    MODE that contains a bitfield of size BITSIZE at bit position BITNUM.
348    If MODE is BLKmode, return a reference to every byte in the bitfield.
349    Set *NEW_BITNUM to the bit position of the field within the new memory.  */
350 
351 static rtx
352 narrow_bit_field_mem (rtx mem, machine_mode mode,
353 		      unsigned HOST_WIDE_INT bitsize,
354 		      unsigned HOST_WIDE_INT bitnum,
355 		      unsigned HOST_WIDE_INT *new_bitnum)
356 {
357   if (mode == BLKmode)
358     {
359       *new_bitnum = bitnum % BITS_PER_UNIT;
360       HOST_WIDE_INT offset = bitnum / BITS_PER_UNIT;
361       HOST_WIDE_INT size = ((*new_bitnum + bitsize + BITS_PER_UNIT - 1)
362 			    / BITS_PER_UNIT);
363       return adjust_bitfield_address_size (mem, mode, offset, size);
364     }
365   else
366     {
367       unsigned int unit = GET_MODE_BITSIZE (mode);
368       *new_bitnum = bitnum % unit;
369       HOST_WIDE_INT offset = (bitnum - *new_bitnum) / BITS_PER_UNIT;
370       return adjust_bitfield_address (mem, mode, offset);
371     }
372 }
373 
374 /* The caller wants to perform insertion or extraction PATTERN on a
375    bitfield of size BITSIZE at BITNUM bits into memory operand OP0.
376    BITREGION_START and BITREGION_END are as for store_bit_field
377    and FIELDMODE is the natural mode of the field.
378 
379    Search for a mode that is compatible with the memory access
380    restrictions and (where applicable) with a register insertion or
381    extraction.  Return the new memory on success, storing the adjusted
382    bit position in *NEW_BITNUM.  Return null otherwise.  */
383 
384 static rtx
385 adjust_bit_field_mem_for_reg (enum extraction_pattern pattern,
386 			      rtx op0, HOST_WIDE_INT bitsize,
387 			      HOST_WIDE_INT bitnum,
388 			      unsigned HOST_WIDE_INT bitregion_start,
389 			      unsigned HOST_WIDE_INT bitregion_end,
390 			      machine_mode fieldmode,
391 			      unsigned HOST_WIDE_INT *new_bitnum)
392 {
393   bit_field_mode_iterator iter (bitsize, bitnum, bitregion_start,
394 				bitregion_end, MEM_ALIGN (op0),
395 				MEM_VOLATILE_P (op0));
396   machine_mode best_mode;
397   if (iter.next_mode (&best_mode))
398     {
399       /* We can use a memory in BEST_MODE.  See whether this is true for
400 	 any wider modes.  All other things being equal, we prefer to
401 	 use the widest mode possible because it tends to expose more
402 	 CSE opportunities.  */
403       if (!iter.prefer_smaller_modes ())
404 	{
405 	  /* Limit the search to the mode required by the corresponding
406 	     register insertion or extraction instruction, if any.  */
407 	  machine_mode limit_mode = word_mode;
408 	  extraction_insn insn;
409 	  if (get_best_reg_extraction_insn (&insn, pattern,
410 					    GET_MODE_BITSIZE (best_mode),
411 					    fieldmode))
412 	    limit_mode = insn.field_mode;
413 
414 	  machine_mode wider_mode;
415 	  while (iter.next_mode (&wider_mode)
416 		 && GET_MODE_SIZE (wider_mode) <= GET_MODE_SIZE (limit_mode))
417 	    best_mode = wider_mode;
418 	}
419       return narrow_bit_field_mem (op0, best_mode, bitsize, bitnum,
420 				   new_bitnum);
421     }
422   return NULL_RTX;
423 }
424 
425 /* Return true if a bitfield of size BITSIZE at bit number BITNUM within
426    a structure of mode STRUCT_MODE represents a lowpart subreg.   The subreg
427    offset is then BITNUM / BITS_PER_UNIT.  */
428 
429 static bool
430 lowpart_bit_field_p (unsigned HOST_WIDE_INT bitnum,
431 		     unsigned HOST_WIDE_INT bitsize,
432 		     machine_mode struct_mode)
433 {
434   if (BYTES_BIG_ENDIAN)
435     return (bitnum % BITS_PER_UNIT == 0
436 	    && (bitnum + bitsize == GET_MODE_BITSIZE (struct_mode)
437 		|| (bitnum + bitsize) % BITS_PER_WORD == 0));
438   else
439     return bitnum % BITS_PER_WORD == 0;
440 }
441 
442 /* Return true if -fstrict-volatile-bitfields applies to an access of OP0
443    containing BITSIZE bits starting at BITNUM, with field mode FIELDMODE.
444    Return false if the access would touch memory outside the range
445    BITREGION_START to BITREGION_END for conformance to the C++ memory
446    model.  */
447 
448 static bool
449 strict_volatile_bitfield_p (rtx op0, unsigned HOST_WIDE_INT bitsize,
450 			    unsigned HOST_WIDE_INT bitnum,
451 			    machine_mode fieldmode,
452 			    unsigned HOST_WIDE_INT bitregion_start,
453 			    unsigned HOST_WIDE_INT bitregion_end)
454 {
455   unsigned HOST_WIDE_INT modesize = GET_MODE_BITSIZE (fieldmode);
456 
457   /* -fstrict-volatile-bitfields must be enabled and we must have a
458      volatile MEM.  */
459   if (!MEM_P (op0)
460       || !MEM_VOLATILE_P (op0)
461       || flag_strict_volatile_bitfields <= 0)
462     return false;
463 
464   /* Non-integral modes likely only happen with packed structures.
465      Punt.  */
466   if (!SCALAR_INT_MODE_P (fieldmode))
467     return false;
468 
469   /* The bit size must not be larger than the field mode, and
470      the field mode must not be larger than a word.  */
471   if (bitsize > modesize || modesize > BITS_PER_WORD)
472     return false;
473 
474   /* Check for cases of unaligned fields that must be split.  */
475   if (bitnum % modesize + bitsize > modesize)
476     return false;
477 
478   /* The memory must be sufficiently aligned for a MODESIZE access.
479      This condition guarantees, that the memory access will not
480      touch anything after the end of the structure.  */
481   if (MEM_ALIGN (op0) < modesize)
482     return false;
483 
484   /* Check for cases where the C++ memory model applies.  */
485   if (bitregion_end != 0
486       && (bitnum - bitnum % modesize < bitregion_start
487 	  || bitnum - bitnum % modesize + modesize - 1 > bitregion_end))
488     return false;
489 
490   return true;
491 }
492 
493 /* Return true if OP is a memory and if a bitfield of size BITSIZE at
494    bit number BITNUM can be treated as a simple value of mode MODE.  */
495 
496 static bool
497 simple_mem_bitfield_p (rtx op0, unsigned HOST_WIDE_INT bitsize,
498 		       unsigned HOST_WIDE_INT bitnum, machine_mode mode)
499 {
500   return (MEM_P (op0)
501 	  && bitnum % BITS_PER_UNIT == 0
502 	  && bitsize == GET_MODE_BITSIZE (mode)
503 	  && (!SLOW_UNALIGNED_ACCESS (mode, MEM_ALIGN (op0))
504 	      || (bitnum % GET_MODE_ALIGNMENT (mode) == 0
505 		  && MEM_ALIGN (op0) >= GET_MODE_ALIGNMENT (mode))));
506 }
507 
508 /* Try to use instruction INSV to store VALUE into a field of OP0.
509    BITSIZE and BITNUM are as for store_bit_field.  */
510 
511 static bool
512 store_bit_field_using_insv (const extraction_insn *insv, rtx op0,
513 			    unsigned HOST_WIDE_INT bitsize,
514 			    unsigned HOST_WIDE_INT bitnum,
515 			    rtx value)
516 {
517   struct expand_operand ops[4];
518   rtx value1;
519   rtx xop0 = op0;
520   rtx_insn *last = get_last_insn ();
521   bool copy_back = false;
522 
523   machine_mode op_mode = insv->field_mode;
524   unsigned int unit = GET_MODE_BITSIZE (op_mode);
525   if (bitsize == 0 || bitsize > unit)
526     return false;
527 
528   if (MEM_P (xop0))
529     /* Get a reference to the first byte of the field.  */
530     xop0 = narrow_bit_field_mem (xop0, insv->struct_mode, bitsize, bitnum,
531 				 &bitnum);
532   else
533     {
534       /* Convert from counting within OP0 to counting in OP_MODE.  */
535       if (BYTES_BIG_ENDIAN)
536 	bitnum += unit - GET_MODE_BITSIZE (GET_MODE (op0));
537 
538       /* If xop0 is a register, we need it in OP_MODE
539 	 to make it acceptable to the format of insv.  */
540       if (GET_CODE (xop0) == SUBREG)
541 	/* We can't just change the mode, because this might clobber op0,
542 	   and we will need the original value of op0 if insv fails.  */
543 	xop0 = gen_rtx_SUBREG (op_mode, SUBREG_REG (xop0), SUBREG_BYTE (xop0));
544       if (REG_P (xop0) && GET_MODE (xop0) != op_mode)
545 	xop0 = gen_lowpart_SUBREG (op_mode, xop0);
546     }
547 
548   /* If the destination is a paradoxical subreg such that we need a
549      truncate to the inner mode, perform the insertion on a temporary and
550      truncate the result to the original destination.  Note that we can't
551      just truncate the paradoxical subreg as (truncate:N (subreg:W (reg:N
552      X) 0)) is (reg:N X).  */
553   if (GET_CODE (xop0) == SUBREG
554       && REG_P (SUBREG_REG (xop0))
555       && !TRULY_NOOP_TRUNCATION_MODES_P (GET_MODE (SUBREG_REG (xop0)),
556 					 op_mode))
557     {
558       rtx tem = gen_reg_rtx (op_mode);
559       emit_move_insn (tem, xop0);
560       xop0 = tem;
561       copy_back = true;
562     }
563 
564   /* There are similar overflow check at the start of store_bit_field_1,
565      but that only check the situation where the field lies completely
566      outside the register, while there do have situation where the field
567      lies partialy in the register, we need to adjust bitsize for this
568      partial overflow situation.  Without this fix, pr48335-2.c on big-endian
569      will broken on those arch support bit insert instruction, like arm, aarch64
570      etc.  */
571   if (bitsize + bitnum > unit && bitnum < unit)
572     {
573       warning (OPT_Wextra, "write of %wu-bit data outside the bound of "
574 	       "destination object, data truncated into %wu-bit",
575 	       bitsize, unit - bitnum);
576       bitsize = unit - bitnum;
577     }
578 
579   /* If BITS_BIG_ENDIAN is zero on a BYTES_BIG_ENDIAN machine, we count
580      "backwards" from the size of the unit we are inserting into.
581      Otherwise, we count bits from the most significant on a
582      BYTES/BITS_BIG_ENDIAN machine.  */
583 
584   if (BITS_BIG_ENDIAN != BYTES_BIG_ENDIAN)
585     bitnum = unit - bitsize - bitnum;
586 
587   /* Convert VALUE to op_mode (which insv insn wants) in VALUE1.  */
588   value1 = value;
589   if (GET_MODE (value) != op_mode)
590     {
591       if (GET_MODE_BITSIZE (GET_MODE (value)) >= bitsize)
592 	{
593 	  /* Optimization: Don't bother really extending VALUE
594 	     if it has all the bits we will actually use.  However,
595 	     if we must narrow it, be sure we do it correctly.  */
596 
597 	  if (GET_MODE_SIZE (GET_MODE (value)) < GET_MODE_SIZE (op_mode))
598 	    {
599 	      rtx tmp;
600 
601 	      tmp = simplify_subreg (op_mode, value1, GET_MODE (value), 0);
602 	      if (! tmp)
603 		tmp = simplify_gen_subreg (op_mode,
604 					   force_reg (GET_MODE (value),
605 						      value1),
606 					   GET_MODE (value), 0);
607 	      value1 = tmp;
608 	    }
609 	  else
610 	    value1 = gen_lowpart (op_mode, value1);
611 	}
612       else if (CONST_INT_P (value))
613 	value1 = gen_int_mode (INTVAL (value), op_mode);
614       else
615 	/* Parse phase is supposed to make VALUE's data type
616 	   match that of the component reference, which is a type
617 	   at least as wide as the field; so VALUE should have
618 	   a mode that corresponds to that type.  */
619 	gcc_assert (CONSTANT_P (value));
620     }
621 
622   create_fixed_operand (&ops[0], xop0);
623   create_integer_operand (&ops[1], bitsize);
624   create_integer_operand (&ops[2], bitnum);
625   create_input_operand (&ops[3], value1, op_mode);
626   if (maybe_expand_insn (insv->icode, 4, ops))
627     {
628       if (copy_back)
629 	convert_move (op0, xop0, true);
630       return true;
631     }
632   delete_insns_since (last);
633   return false;
634 }
635 
636 /* A subroutine of store_bit_field, with the same arguments.  Return true
637    if the operation could be implemented.
638 
639    If FALLBACK_P is true, fall back to store_fixed_bit_field if we have
640    no other way of implementing the operation.  If FALLBACK_P is false,
641    return false instead.  */
642 
643 static bool
644 store_bit_field_1 (rtx str_rtx, unsigned HOST_WIDE_INT bitsize,
645 		   unsigned HOST_WIDE_INT bitnum,
646 		   unsigned HOST_WIDE_INT bitregion_start,
647 		   unsigned HOST_WIDE_INT bitregion_end,
648 		   machine_mode fieldmode,
649 		   rtx value, bool fallback_p)
650 {
651   rtx op0 = str_rtx;
652   rtx orig_value;
653 
654   while (GET_CODE (op0) == SUBREG)
655     {
656       /* The following line once was done only if WORDS_BIG_ENDIAN,
657 	 but I think that is a mistake.  WORDS_BIG_ENDIAN is
658 	 meaningful at a much higher level; when structures are copied
659 	 between memory and regs, the higher-numbered regs
660 	 always get higher addresses.  */
661       int inner_mode_size = GET_MODE_SIZE (GET_MODE (SUBREG_REG (op0)));
662       int outer_mode_size = GET_MODE_SIZE (GET_MODE (op0));
663       int byte_offset = 0;
664 
665       /* Paradoxical subregs need special handling on big endian machines.  */
666       if (SUBREG_BYTE (op0) == 0 && inner_mode_size < outer_mode_size)
667 	{
668 	  int difference = inner_mode_size - outer_mode_size;
669 
670 	  if (WORDS_BIG_ENDIAN)
671 	    byte_offset += (difference / UNITS_PER_WORD) * UNITS_PER_WORD;
672 	  if (BYTES_BIG_ENDIAN)
673 	    byte_offset += difference % UNITS_PER_WORD;
674 	}
675       else
676 	byte_offset = SUBREG_BYTE (op0);
677 
678       bitnum += byte_offset * BITS_PER_UNIT;
679       op0 = SUBREG_REG (op0);
680     }
681 
682   /* No action is needed if the target is a register and if the field
683      lies completely outside that register.  This can occur if the source
684      code contains an out-of-bounds access to a small array.  */
685   if (REG_P (op0) && bitnum >= GET_MODE_BITSIZE (GET_MODE (op0)))
686     return true;
687 
688   /* Use vec_set patterns for inserting parts of vectors whenever
689      available.  */
690   if (VECTOR_MODE_P (GET_MODE (op0))
691       && !MEM_P (op0)
692       && optab_handler (vec_set_optab, GET_MODE (op0)) != CODE_FOR_nothing
693       && fieldmode == GET_MODE_INNER (GET_MODE (op0))
694       && bitsize == GET_MODE_BITSIZE (GET_MODE_INNER (GET_MODE (op0)))
695       && !(bitnum % GET_MODE_BITSIZE (GET_MODE_INNER (GET_MODE (op0)))))
696     {
697       struct expand_operand ops[3];
698       machine_mode outermode = GET_MODE (op0);
699       machine_mode innermode = GET_MODE_INNER (outermode);
700       enum insn_code icode = optab_handler (vec_set_optab, outermode);
701       int pos = bitnum / GET_MODE_BITSIZE (innermode);
702 
703       create_fixed_operand (&ops[0], op0);
704       create_input_operand (&ops[1], value, innermode);
705       create_integer_operand (&ops[2], pos);
706       if (maybe_expand_insn (icode, 3, ops))
707 	return true;
708     }
709 
710   /* If the target is a register, overwriting the entire object, or storing
711      a full-word or multi-word field can be done with just a SUBREG.  */
712   if (!MEM_P (op0)
713       && bitsize == GET_MODE_BITSIZE (fieldmode)
714       && ((bitsize == GET_MODE_BITSIZE (GET_MODE (op0)) && bitnum == 0)
715 	  || (bitsize % BITS_PER_WORD == 0 && bitnum % BITS_PER_WORD == 0)))
716     {
717       /* Use the subreg machinery either to narrow OP0 to the required
718 	 words or to cope with mode punning between equal-sized modes.
719 	 In the latter case, use subreg on the rhs side, not lhs.  */
720       rtx sub;
721 
722       if (bitsize == GET_MODE_BITSIZE (GET_MODE (op0)))
723 	{
724 	  sub = simplify_gen_subreg (GET_MODE (op0), value, fieldmode, 0);
725 	  if (sub)
726 	    {
727 	      emit_move_insn (op0, sub);
728 	      return true;
729 	    }
730 	}
731       else
732 	{
733 	  sub = simplify_gen_subreg (fieldmode, op0, GET_MODE (op0),
734 				     bitnum / BITS_PER_UNIT);
735 	  if (sub)
736 	    {
737 	      emit_move_insn (sub, value);
738 	      return true;
739 	    }
740 	}
741     }
742 
743   /* If the target is memory, storing any naturally aligned field can be
744      done with a simple store.  For targets that support fast unaligned
745      memory, any naturally sized, unit aligned field can be done directly.  */
746   if (simple_mem_bitfield_p (op0, bitsize, bitnum, fieldmode))
747     {
748       op0 = adjust_bitfield_address (op0, fieldmode, bitnum / BITS_PER_UNIT);
749       emit_move_insn (op0, value);
750       return true;
751     }
752 
753   /* Make sure we are playing with integral modes.  Pun with subregs
754      if we aren't.  This must come after the entire register case above,
755      since that case is valid for any mode.  The following cases are only
756      valid for integral modes.  */
757   {
758     machine_mode imode = int_mode_for_mode (GET_MODE (op0));
759     if (imode != GET_MODE (op0))
760       {
761 	if (MEM_P (op0))
762 	  op0 = adjust_bitfield_address_size (op0, imode, 0, MEM_SIZE (op0));
763 	else
764 	  {
765 	    gcc_assert (imode != BLKmode);
766 	    op0 = gen_lowpart (imode, op0);
767 	  }
768       }
769   }
770 
771   /* Storing an lsb-aligned field in a register
772      can be done with a movstrict instruction.  */
773 
774   if (!MEM_P (op0)
775       && lowpart_bit_field_p (bitnum, bitsize, GET_MODE (op0))
776       && bitsize == GET_MODE_BITSIZE (fieldmode)
777       && optab_handler (movstrict_optab, fieldmode) != CODE_FOR_nothing)
778     {
779       struct expand_operand ops[2];
780       enum insn_code icode = optab_handler (movstrict_optab, fieldmode);
781       rtx arg0 = op0;
782       unsigned HOST_WIDE_INT subreg_off;
783 
784       if (GET_CODE (arg0) == SUBREG)
785 	{
786 	  /* Else we've got some float mode source being extracted into
787 	     a different float mode destination -- this combination of
788 	     subregs results in Severe Tire Damage.  */
789 	  gcc_assert (GET_MODE (SUBREG_REG (arg0)) == fieldmode
790 		      || GET_MODE_CLASS (fieldmode) == MODE_INT
791 		      || GET_MODE_CLASS (fieldmode) == MODE_PARTIAL_INT);
792 	  arg0 = SUBREG_REG (arg0);
793 	}
794 
795       subreg_off = bitnum / BITS_PER_UNIT;
796       if (validate_subreg (fieldmode, GET_MODE (arg0), arg0, subreg_off))
797 	{
798 	  arg0 = gen_rtx_SUBREG (fieldmode, arg0, subreg_off);
799 
800 	  create_fixed_operand (&ops[0], arg0);
801 	  /* Shrink the source operand to FIELDMODE.  */
802 	  create_convert_operand_to (&ops[1], value, fieldmode, false);
803 	  if (maybe_expand_insn (icode, 2, ops))
804 	    return true;
805 	}
806     }
807 
808   /* Handle fields bigger than a word.  */
809 
810   if (bitsize > BITS_PER_WORD)
811     {
812       /* Here we transfer the words of the field
813 	 in the order least significant first.
814 	 This is because the most significant word is the one which may
815 	 be less than full.
816 	 However, only do that if the value is not BLKmode.  */
817 
818       unsigned int backwards = WORDS_BIG_ENDIAN && fieldmode != BLKmode;
819       unsigned int nwords = (bitsize + (BITS_PER_WORD - 1)) / BITS_PER_WORD;
820       unsigned int i;
821       rtx_insn *last;
822 
823       /* This is the mode we must force value to, so that there will be enough
824 	 subwords to extract.  Note that fieldmode will often (always?) be
825 	 VOIDmode, because that is what store_field uses to indicate that this
826 	 is a bit field, but passing VOIDmode to operand_subword_force
827 	 is not allowed.  */
828       fieldmode = GET_MODE (value);
829       if (fieldmode == VOIDmode)
830 	fieldmode = smallest_mode_for_size (nwords * BITS_PER_WORD, MODE_INT);
831 
832       last = get_last_insn ();
833       for (i = 0; i < nwords; i++)
834 	{
835 	  /* If I is 0, use the low-order word in both field and target;
836 	     if I is 1, use the next to lowest word; and so on.  */
837 	  unsigned int wordnum = (backwards
838 				  ? GET_MODE_SIZE (fieldmode) / UNITS_PER_WORD
839 				  - i - 1
840 				  : i);
841 	  unsigned int bit_offset = (backwards
842 				     ? MAX ((int) bitsize - ((int) i + 1)
843 					    * BITS_PER_WORD,
844 					    0)
845 				     : (int) i * BITS_PER_WORD);
846 	  rtx value_word = operand_subword_force (value, wordnum, fieldmode);
847 	  unsigned HOST_WIDE_INT new_bitsize =
848 	    MIN (BITS_PER_WORD, bitsize - i * BITS_PER_WORD);
849 
850 	  /* If the remaining chunk doesn't have full wordsize we have
851 	     to make sure that for big endian machines the higher order
852 	     bits are used.  */
853 	  if (new_bitsize < BITS_PER_WORD && BYTES_BIG_ENDIAN && !backwards)
854 	    value_word = simplify_expand_binop (word_mode, lshr_optab,
855 						value_word,
856 						GEN_INT (BITS_PER_WORD
857 							 - new_bitsize),
858 						NULL_RTX, true,
859 						OPTAB_LIB_WIDEN);
860 
861 	  if (!store_bit_field_1 (op0, new_bitsize,
862 				  bitnum + bit_offset,
863 				  bitregion_start, bitregion_end,
864 				  word_mode,
865 				  value_word, fallback_p))
866 	    {
867 	      delete_insns_since (last);
868 	      return false;
869 	    }
870 	}
871       return true;
872     }
873 
874   /* If VALUE has a floating-point or complex mode, access it as an
875      integer of the corresponding size.  This can occur on a machine
876      with 64 bit registers that uses SFmode for float.  It can also
877      occur for unaligned float or complex fields.  */
878   orig_value = value;
879   if (GET_MODE (value) != VOIDmode
880       && GET_MODE_CLASS (GET_MODE (value)) != MODE_INT
881       && GET_MODE_CLASS (GET_MODE (value)) != MODE_PARTIAL_INT)
882     {
883       value = gen_reg_rtx (int_mode_for_mode (GET_MODE (value)));
884       emit_move_insn (gen_lowpart (GET_MODE (orig_value), value), orig_value);
885     }
886 
887   /* If OP0 is a multi-word register, narrow it to the affected word.
888      If the region spans two words, defer to store_split_bit_field.  */
889   if (!MEM_P (op0) && GET_MODE_SIZE (GET_MODE (op0)) > UNITS_PER_WORD)
890     {
891       op0 = simplify_gen_subreg (word_mode, op0, GET_MODE (op0),
892 				 bitnum / BITS_PER_WORD * UNITS_PER_WORD);
893       gcc_assert (op0);
894       bitnum %= BITS_PER_WORD;
895       if (bitnum + bitsize > BITS_PER_WORD)
896 	{
897 	  if (!fallback_p)
898 	    return false;
899 
900 	  store_split_bit_field (op0, bitsize, bitnum, bitregion_start,
901 				 bitregion_end, value);
902 	  return true;
903 	}
904     }
905 
906   /* From here on we can assume that the field to be stored in fits
907      within a word.  If the destination is a register, it too fits
908      in a word.  */
909 
910   extraction_insn insv;
911   if (!MEM_P (op0)
912       && get_best_reg_extraction_insn (&insv, EP_insv,
913 				       GET_MODE_BITSIZE (GET_MODE (op0)),
914 				       fieldmode)
915       && store_bit_field_using_insv (&insv, op0, bitsize, bitnum, value))
916     return true;
917 
918   /* If OP0 is a memory, try copying it to a register and seeing if a
919      cheap register alternative is available.  */
920   if (MEM_P (op0))
921     {
922       if (get_best_mem_extraction_insn (&insv, EP_insv, bitsize, bitnum,
923 					fieldmode)
924 	  && store_bit_field_using_insv (&insv, op0, bitsize, bitnum, value))
925 	return true;
926 
927       rtx_insn *last = get_last_insn ();
928 
929       /* Try loading part of OP0 into a register, inserting the bitfield
930 	 into that, and then copying the result back to OP0.  */
931       unsigned HOST_WIDE_INT bitpos;
932       rtx xop0 = adjust_bit_field_mem_for_reg (EP_insv, op0, bitsize, bitnum,
933 					       bitregion_start, bitregion_end,
934 					       fieldmode, &bitpos);
935       if (xop0)
936 	{
937 	  rtx tempreg = copy_to_reg (xop0);
938 	  if (store_bit_field_1 (tempreg, bitsize, bitpos,
939 				 bitregion_start, bitregion_end,
940 				 fieldmode, orig_value, false))
941 	    {
942 	      emit_move_insn (xop0, tempreg);
943 	      return true;
944 	    }
945 	  delete_insns_since (last);
946 	}
947     }
948 
949   if (!fallback_p)
950     return false;
951 
952   store_fixed_bit_field (op0, bitsize, bitnum, bitregion_start,
953 			 bitregion_end, value);
954   return true;
955 }
956 
957 /* Generate code to store value from rtx VALUE
958    into a bit-field within structure STR_RTX
959    containing BITSIZE bits starting at bit BITNUM.
960 
961    BITREGION_START is bitpos of the first bitfield in this region.
962    BITREGION_END is the bitpos of the ending bitfield in this region.
963    These two fields are 0, if the C++ memory model does not apply,
964    or we are not interested in keeping track of bitfield regions.
965 
966    FIELDMODE is the machine-mode of the FIELD_DECL node for this field.  */
967 
968 void
969 store_bit_field (rtx str_rtx, unsigned HOST_WIDE_INT bitsize,
970 		 unsigned HOST_WIDE_INT bitnum,
971 		 unsigned HOST_WIDE_INT bitregion_start,
972 		 unsigned HOST_WIDE_INT bitregion_end,
973 		 machine_mode fieldmode,
974 		 rtx value)
975 {
976   /* Handle -fstrict-volatile-bitfields in the cases where it applies.  */
977   if (strict_volatile_bitfield_p (str_rtx, bitsize, bitnum, fieldmode,
978 				  bitregion_start, bitregion_end))
979     {
980       /* Storing of a full word can be done with a simple store.
981 	 We know here that the field can be accessed with one single
982 	 instruction.  For targets that support unaligned memory,
983 	 an unaligned access may be necessary.  */
984       if (bitsize == GET_MODE_BITSIZE (fieldmode))
985 	{
986 	  str_rtx = adjust_bitfield_address (str_rtx, fieldmode,
987 					     bitnum / BITS_PER_UNIT);
988 	  gcc_assert (bitnum % BITS_PER_UNIT == 0);
989 	  emit_move_insn (str_rtx, value);
990 	}
991       else
992 	{
993 	  rtx temp;
994 
995 	  str_rtx = narrow_bit_field_mem (str_rtx, fieldmode, bitsize, bitnum,
996 					  &bitnum);
997 	  gcc_assert (bitnum + bitsize <= GET_MODE_BITSIZE (fieldmode));
998 	  temp = copy_to_reg (str_rtx);
999 	  if (!store_bit_field_1 (temp, bitsize, bitnum, 0, 0,
1000 				  fieldmode, value, true))
1001 	    gcc_unreachable ();
1002 
1003 	  emit_move_insn (str_rtx, temp);
1004 	}
1005 
1006       return;
1007     }
1008 
1009   /* Under the C++0x memory model, we must not touch bits outside the
1010      bit region.  Adjust the address to start at the beginning of the
1011      bit region.  */
1012   if (MEM_P (str_rtx) && bitregion_start > 0)
1013     {
1014       machine_mode bestmode;
1015       HOST_WIDE_INT offset, size;
1016 
1017       gcc_assert ((bitregion_start % BITS_PER_UNIT) == 0);
1018 
1019       offset = bitregion_start / BITS_PER_UNIT;
1020       bitnum -= bitregion_start;
1021       size = (bitnum + bitsize + BITS_PER_UNIT - 1) / BITS_PER_UNIT;
1022       bitregion_end -= bitregion_start;
1023       bitregion_start = 0;
1024       bestmode = get_best_mode (bitsize, bitnum,
1025 				bitregion_start, bitregion_end,
1026 				MEM_ALIGN (str_rtx), VOIDmode,
1027 				MEM_VOLATILE_P (str_rtx));
1028       str_rtx = adjust_bitfield_address_size (str_rtx, bestmode, offset, size);
1029     }
1030 
1031   if (!store_bit_field_1 (str_rtx, bitsize, bitnum,
1032 			  bitregion_start, bitregion_end,
1033 			  fieldmode, value, true))
1034     gcc_unreachable ();
1035 }
1036 
1037 /* Use shifts and boolean operations to store VALUE into a bit field of
1038    width BITSIZE in OP0, starting at bit BITNUM.  */
1039 
1040 static void
1041 store_fixed_bit_field (rtx op0, unsigned HOST_WIDE_INT bitsize,
1042 		       unsigned HOST_WIDE_INT bitnum,
1043 		       unsigned HOST_WIDE_INT bitregion_start,
1044 		       unsigned HOST_WIDE_INT bitregion_end,
1045 		       rtx value)
1046 {
1047   /* There is a case not handled here:
1048      a structure with a known alignment of just a halfword
1049      and a field split across two aligned halfwords within the structure.
1050      Or likewise a structure with a known alignment of just a byte
1051      and a field split across two bytes.
1052      Such cases are not supposed to be able to occur.  */
1053 
1054   if (MEM_P (op0))
1055     {
1056       machine_mode mode = GET_MODE (op0);
1057       if (GET_MODE_BITSIZE (mode) == 0
1058 	  || GET_MODE_BITSIZE (mode) > GET_MODE_BITSIZE (word_mode))
1059 	mode = word_mode;
1060       mode = get_best_mode (bitsize, bitnum, bitregion_start, bitregion_end,
1061 			    MEM_ALIGN (op0), mode, MEM_VOLATILE_P (op0));
1062 
1063       if (mode == VOIDmode)
1064 	{
1065 	  /* The only way this should occur is if the field spans word
1066 	     boundaries.  */
1067 	  store_split_bit_field (op0, bitsize, bitnum, bitregion_start,
1068 				 bitregion_end, value);
1069 	  return;
1070 	}
1071 
1072       op0 = narrow_bit_field_mem (op0, mode, bitsize, bitnum, &bitnum);
1073     }
1074 
1075   store_fixed_bit_field_1 (op0, bitsize, bitnum, value);
1076 }
1077 
1078 /* Helper function for store_fixed_bit_field, stores
1079    the bit field always using the MODE of OP0.  */
1080 
1081 static void
1082 store_fixed_bit_field_1 (rtx op0, unsigned HOST_WIDE_INT bitsize,
1083 			 unsigned HOST_WIDE_INT bitnum,
1084 			 rtx value)
1085 {
1086   machine_mode mode;
1087   rtx temp;
1088   int all_zero = 0;
1089   int all_one = 0;
1090 
1091   mode = GET_MODE (op0);
1092   gcc_assert (SCALAR_INT_MODE_P (mode));
1093 
1094   /* Note that bitsize + bitnum can be greater than GET_MODE_BITSIZE (mode)
1095      for invalid input, such as f5 from gcc.dg/pr48335-2.c.  */
1096 
1097   if (BYTES_BIG_ENDIAN)
1098     /* BITNUM is the distance between our msb
1099        and that of the containing datum.
1100        Convert it to the distance from the lsb.  */
1101     bitnum = GET_MODE_BITSIZE (mode) - bitsize - bitnum;
1102 
1103   /* Now BITNUM is always the distance between our lsb
1104      and that of OP0.  */
1105 
1106   /* Shift VALUE left by BITNUM bits.  If VALUE is not constant,
1107      we must first convert its mode to MODE.  */
1108 
1109   if (CONST_INT_P (value))
1110     {
1111       unsigned HOST_WIDE_INT v = UINTVAL (value);
1112 
1113       if (bitsize < HOST_BITS_PER_WIDE_INT)
1114 	v &= ((unsigned HOST_WIDE_INT) 1 << bitsize) - 1;
1115 
1116       if (v == 0)
1117 	all_zero = 1;
1118       else if ((bitsize < HOST_BITS_PER_WIDE_INT
1119 		&& v == ((unsigned HOST_WIDE_INT) 1 << bitsize) - 1)
1120 	       || (bitsize == HOST_BITS_PER_WIDE_INT
1121 		   && v == (unsigned HOST_WIDE_INT) -1))
1122 	all_one = 1;
1123 
1124       value = lshift_value (mode, v, bitnum);
1125     }
1126   else
1127     {
1128       int must_and = (GET_MODE_BITSIZE (GET_MODE (value)) != bitsize
1129 		      && bitnum + bitsize != GET_MODE_BITSIZE (mode));
1130 
1131       if (GET_MODE (value) != mode)
1132 	value = convert_to_mode (mode, value, 1);
1133 
1134       if (must_and)
1135 	value = expand_binop (mode, and_optab, value,
1136 			      mask_rtx (mode, 0, bitsize, 0),
1137 			      NULL_RTX, 1, OPTAB_LIB_WIDEN);
1138       if (bitnum > 0)
1139 	value = expand_shift (LSHIFT_EXPR, mode, value,
1140 			      bitnum, NULL_RTX, 1);
1141     }
1142 
1143   /* Now clear the chosen bits in OP0,
1144      except that if VALUE is -1 we need not bother.  */
1145   /* We keep the intermediates in registers to allow CSE to combine
1146      consecutive bitfield assignments.  */
1147 
1148   temp = force_reg (mode, op0);
1149 
1150   if (! all_one)
1151     {
1152       temp = expand_binop (mode, and_optab, temp,
1153 			   mask_rtx (mode, bitnum, bitsize, 1),
1154 			   NULL_RTX, 1, OPTAB_LIB_WIDEN);
1155       temp = force_reg (mode, temp);
1156     }
1157 
1158   /* Now logical-or VALUE into OP0, unless it is zero.  */
1159 
1160   if (! all_zero)
1161     {
1162       temp = expand_binop (mode, ior_optab, temp, value,
1163 			   NULL_RTX, 1, OPTAB_LIB_WIDEN);
1164       temp = force_reg (mode, temp);
1165     }
1166 
1167   if (op0 != temp)
1168     {
1169       op0 = copy_rtx (op0);
1170       emit_move_insn (op0, temp);
1171     }
1172 }
1173 
1174 /* Store a bit field that is split across multiple accessible memory objects.
1175 
1176    OP0 is the REG, SUBREG or MEM rtx for the first of the objects.
1177    BITSIZE is the field width; BITPOS the position of its first bit
1178    (within the word).
1179    VALUE is the value to store.
1180 
1181    This does not yet handle fields wider than BITS_PER_WORD.  */
1182 
1183 static void
1184 store_split_bit_field (rtx op0, unsigned HOST_WIDE_INT bitsize,
1185 		       unsigned HOST_WIDE_INT bitpos,
1186 		       unsigned HOST_WIDE_INT bitregion_start,
1187 		       unsigned HOST_WIDE_INT bitregion_end,
1188 		       rtx value)
1189 {
1190   unsigned int unit;
1191   unsigned int bitsdone = 0;
1192 
1193   /* Make sure UNIT isn't larger than BITS_PER_WORD, we can only handle that
1194      much at a time.  */
1195   if (REG_P (op0) || GET_CODE (op0) == SUBREG)
1196     unit = BITS_PER_WORD;
1197   else
1198     unit = MIN (MEM_ALIGN (op0), BITS_PER_WORD);
1199 
1200   /* If OP0 is a memory with a mode, then UNIT must not be larger than
1201      OP0's mode as well.  Otherwise, store_fixed_bit_field will call us
1202      again, and we will mutually recurse forever.  */
1203   if (MEM_P (op0) && GET_MODE_BITSIZE (GET_MODE (op0)) > 0)
1204     unit = MIN (unit, GET_MODE_BITSIZE (GET_MODE (op0)));
1205 
1206   /* If VALUE is a constant other than a CONST_INT, get it into a register in
1207      WORD_MODE.  If we can do this using gen_lowpart_common, do so.  Note
1208      that VALUE might be a floating-point constant.  */
1209   if (CONSTANT_P (value) && !CONST_INT_P (value))
1210     {
1211       rtx word = gen_lowpart_common (word_mode, value);
1212 
1213       if (word && (value != word))
1214 	value = word;
1215       else
1216 	value = gen_lowpart_common (word_mode,
1217 				    force_reg (GET_MODE (value) != VOIDmode
1218 					       ? GET_MODE (value)
1219 					       : word_mode, value));
1220     }
1221 
1222   while (bitsdone < bitsize)
1223     {
1224       unsigned HOST_WIDE_INT thissize;
1225       rtx part, word;
1226       unsigned HOST_WIDE_INT thispos;
1227       unsigned HOST_WIDE_INT offset;
1228 
1229       offset = (bitpos + bitsdone) / unit;
1230       thispos = (bitpos + bitsdone) % unit;
1231 
1232       /* When region of bytes we can touch is restricted, decrease
1233 	 UNIT close to the end of the region as needed.  If op0 is a REG
1234 	 or SUBREG of REG, don't do this, as there can't be data races
1235 	 on a register and we can expand shorter code in some cases.  */
1236       if (bitregion_end
1237 	  && unit > BITS_PER_UNIT
1238 	  && bitpos + bitsdone - thispos + unit > bitregion_end + 1
1239 	  && !REG_P (op0)
1240 	  && (GET_CODE (op0) != SUBREG || !REG_P (SUBREG_REG (op0))))
1241 	{
1242 	  unit = unit / 2;
1243 	  continue;
1244 	}
1245 
1246       /* THISSIZE must not overrun a word boundary.  Otherwise,
1247 	 store_fixed_bit_field will call us again, and we will mutually
1248 	 recurse forever.  */
1249       thissize = MIN (bitsize - bitsdone, BITS_PER_WORD);
1250       thissize = MIN (thissize, unit - thispos);
1251 
1252       if (BYTES_BIG_ENDIAN)
1253 	{
1254 	  /* Fetch successively less significant portions.  */
1255 	  if (CONST_INT_P (value))
1256 	    part = GEN_INT (((unsigned HOST_WIDE_INT) (INTVAL (value))
1257 			     >> (bitsize - bitsdone - thissize))
1258 			    & (((HOST_WIDE_INT) 1 << thissize) - 1));
1259 	  else
1260 	    {
1261 	      int total_bits = GET_MODE_BITSIZE (GET_MODE (value));
1262 	      /* The args are chosen so that the last part includes the
1263 		 lsb.  Give extract_bit_field the value it needs (with
1264 		 endianness compensation) to fetch the piece we want.  */
1265 	      part = extract_fixed_bit_field (word_mode, value, thissize,
1266 					      total_bits - bitsize + bitsdone,
1267 					      NULL_RTX, 1);
1268 	    }
1269 	}
1270       else
1271 	{
1272 	  /* Fetch successively more significant portions.  */
1273 	  if (CONST_INT_P (value))
1274 	    part = GEN_INT (((unsigned HOST_WIDE_INT) (INTVAL (value))
1275 			     >> bitsdone)
1276 			    & (((HOST_WIDE_INT) 1 << thissize) - 1));
1277 	  else
1278 	    part = extract_fixed_bit_field (word_mode, value, thissize,
1279 					    bitsdone, NULL_RTX, 1);
1280 	}
1281 
1282       /* If OP0 is a register, then handle OFFSET here.
1283 
1284 	 When handling multiword bitfields, extract_bit_field may pass
1285 	 down a word_mode SUBREG of a larger REG for a bitfield that actually
1286 	 crosses a word boundary.  Thus, for a SUBREG, we must find
1287 	 the current word starting from the base register.  */
1288       if (GET_CODE (op0) == SUBREG)
1289 	{
1290 	  int word_offset = (SUBREG_BYTE (op0) / UNITS_PER_WORD)
1291 			    + (offset * unit / BITS_PER_WORD);
1292 	  machine_mode sub_mode = GET_MODE (SUBREG_REG (op0));
1293 	  if (sub_mode != BLKmode && GET_MODE_SIZE (sub_mode) < UNITS_PER_WORD)
1294 	    word = word_offset ? const0_rtx : op0;
1295 	  else
1296 	    word = operand_subword_force (SUBREG_REG (op0), word_offset,
1297 					  GET_MODE (SUBREG_REG (op0)));
1298 	  offset &= BITS_PER_WORD / unit - 1;
1299 	}
1300       else if (REG_P (op0))
1301 	{
1302 	  machine_mode op0_mode = GET_MODE (op0);
1303 	  if (op0_mode != BLKmode && GET_MODE_SIZE (op0_mode) < UNITS_PER_WORD)
1304 	    word = offset ? const0_rtx : op0;
1305 	  else
1306 	    word = operand_subword_force (op0, offset * unit / BITS_PER_WORD,
1307 					  GET_MODE (op0));
1308 	  offset &= BITS_PER_WORD / unit - 1;
1309 	}
1310       else
1311 	word = op0;
1312 
1313       /* OFFSET is in UNITs, and UNIT is in bits.  If WORD is const0_rtx,
1314 	 it is just an out-of-bounds access.  Ignore it.  */
1315       if (word != const0_rtx)
1316 	store_fixed_bit_field (word, thissize, offset * unit + thispos,
1317 			       bitregion_start, bitregion_end, part);
1318       bitsdone += thissize;
1319     }
1320 }
1321 
1322 /* A subroutine of extract_bit_field_1 that converts return value X
1323    to either MODE or TMODE.  MODE, TMODE and UNSIGNEDP are arguments
1324    to extract_bit_field.  */
1325 
1326 static rtx
1327 convert_extracted_bit_field (rtx x, machine_mode mode,
1328 			     machine_mode tmode, bool unsignedp)
1329 {
1330   if (GET_MODE (x) == tmode || GET_MODE (x) == mode)
1331     return x;
1332 
1333   /* If the x mode is not a scalar integral, first convert to the
1334      integer mode of that size and then access it as a floating-point
1335      value via a SUBREG.  */
1336   if (!SCALAR_INT_MODE_P (tmode))
1337     {
1338       machine_mode smode;
1339 
1340       smode = mode_for_size (GET_MODE_BITSIZE (tmode), MODE_INT, 0);
1341       x = convert_to_mode (smode, x, unsignedp);
1342       x = force_reg (smode, x);
1343       return gen_lowpart (tmode, x);
1344     }
1345 
1346   return convert_to_mode (tmode, x, unsignedp);
1347 }
1348 
1349 /* Try to use an ext(z)v pattern to extract a field from OP0.
1350    Return the extracted value on success, otherwise return null.
1351    EXT_MODE is the mode of the extraction and the other arguments
1352    are as for extract_bit_field.  */
1353 
1354 static rtx
1355 extract_bit_field_using_extv (const extraction_insn *extv, rtx op0,
1356 			      unsigned HOST_WIDE_INT bitsize,
1357 			      unsigned HOST_WIDE_INT bitnum,
1358 			      int unsignedp, rtx target,
1359 			      machine_mode mode, machine_mode tmode)
1360 {
1361   struct expand_operand ops[4];
1362   rtx spec_target = target;
1363   rtx spec_target_subreg = 0;
1364   machine_mode ext_mode = extv->field_mode;
1365   unsigned unit = GET_MODE_BITSIZE (ext_mode);
1366 
1367   if (bitsize == 0 || unit < bitsize)
1368     return NULL_RTX;
1369 
1370   if (MEM_P (op0))
1371     /* Get a reference to the first byte of the field.  */
1372     op0 = narrow_bit_field_mem (op0, extv->struct_mode, bitsize, bitnum,
1373 				&bitnum);
1374   else
1375     {
1376       /* Convert from counting within OP0 to counting in EXT_MODE.  */
1377       if (BYTES_BIG_ENDIAN)
1378 	bitnum += unit - GET_MODE_BITSIZE (GET_MODE (op0));
1379 
1380       /* If op0 is a register, we need it in EXT_MODE to make it
1381 	 acceptable to the format of ext(z)v.  */
1382       if (GET_CODE (op0) == SUBREG && GET_MODE (op0) != ext_mode)
1383 	return NULL_RTX;
1384       if (REG_P (op0) && GET_MODE (op0) != ext_mode)
1385 	op0 = gen_lowpart_SUBREG (ext_mode, op0);
1386     }
1387 
1388   /* If BITS_BIG_ENDIAN is zero on a BYTES_BIG_ENDIAN machine, we count
1389      "backwards" from the size of the unit we are extracting from.
1390      Otherwise, we count bits from the most significant on a
1391      BYTES/BITS_BIG_ENDIAN machine.  */
1392 
1393   if (BITS_BIG_ENDIAN != BYTES_BIG_ENDIAN)
1394     bitnum = unit - bitsize - bitnum;
1395 
1396   if (target == 0)
1397     target = spec_target = gen_reg_rtx (tmode);
1398 
1399   if (GET_MODE (target) != ext_mode)
1400     {
1401       /* Don't use LHS paradoxical subreg if explicit truncation is needed
1402 	 between the mode of the extraction (word_mode) and the target
1403 	 mode.  Instead, create a temporary and use convert_move to set
1404 	 the target.  */
1405       if (REG_P (target)
1406 	  && TRULY_NOOP_TRUNCATION_MODES_P (GET_MODE (target), ext_mode))
1407 	{
1408 	  target = gen_lowpart (ext_mode, target);
1409 	  if (GET_MODE_PRECISION (ext_mode)
1410 	      > GET_MODE_PRECISION (GET_MODE (spec_target)))
1411 	    spec_target_subreg = target;
1412 	}
1413       else
1414 	target = gen_reg_rtx (ext_mode);
1415     }
1416 
1417   create_output_operand (&ops[0], target, ext_mode);
1418   create_fixed_operand (&ops[1], op0);
1419   create_integer_operand (&ops[2], bitsize);
1420   create_integer_operand (&ops[3], bitnum);
1421   if (maybe_expand_insn (extv->icode, 4, ops))
1422     {
1423       target = ops[0].value;
1424       if (target == spec_target)
1425 	return target;
1426       if (target == spec_target_subreg)
1427 	return spec_target;
1428       return convert_extracted_bit_field (target, mode, tmode, unsignedp);
1429     }
1430   return NULL_RTX;
1431 }
1432 
1433 /* A subroutine of extract_bit_field, with the same arguments.
1434    If FALLBACK_P is true, fall back to extract_fixed_bit_field
1435    if we can find no other means of implementing the operation.
1436    if FALLBACK_P is false, return NULL instead.  */
1437 
1438 static rtx
1439 extract_bit_field_1 (rtx str_rtx, unsigned HOST_WIDE_INT bitsize,
1440 		     unsigned HOST_WIDE_INT bitnum, int unsignedp, rtx target,
1441 		     machine_mode mode, machine_mode tmode,
1442 		     bool fallback_p)
1443 {
1444   rtx op0 = str_rtx;
1445   machine_mode int_mode;
1446   machine_mode mode1;
1447 
1448   if (tmode == VOIDmode)
1449     tmode = mode;
1450 
1451   while (GET_CODE (op0) == SUBREG)
1452     {
1453       bitnum += SUBREG_BYTE (op0) * BITS_PER_UNIT;
1454       op0 = SUBREG_REG (op0);
1455     }
1456 
1457   /* If we have an out-of-bounds access to a register, just return an
1458      uninitialized register of the required mode.  This can occur if the
1459      source code contains an out-of-bounds access to a small array.  */
1460   if (REG_P (op0) && bitnum >= GET_MODE_BITSIZE (GET_MODE (op0)))
1461     return gen_reg_rtx (tmode);
1462 
1463   if (REG_P (op0)
1464       && mode == GET_MODE (op0)
1465       && bitnum == 0
1466       && bitsize == GET_MODE_BITSIZE (GET_MODE (op0)))
1467     {
1468       /* We're trying to extract a full register from itself.  */
1469       return op0;
1470     }
1471 
1472   /* See if we can get a better vector mode before extracting.  */
1473   if (VECTOR_MODE_P (GET_MODE (op0))
1474       && !MEM_P (op0)
1475       && GET_MODE_INNER (GET_MODE (op0)) != tmode)
1476     {
1477       machine_mode new_mode;
1478 
1479       if (GET_MODE_CLASS (tmode) == MODE_FLOAT)
1480 	new_mode = MIN_MODE_VECTOR_FLOAT;
1481       else if (GET_MODE_CLASS (tmode) == MODE_FRACT)
1482 	new_mode = MIN_MODE_VECTOR_FRACT;
1483       else if (GET_MODE_CLASS (tmode) == MODE_UFRACT)
1484 	new_mode = MIN_MODE_VECTOR_UFRACT;
1485       else if (GET_MODE_CLASS (tmode) == MODE_ACCUM)
1486 	new_mode = MIN_MODE_VECTOR_ACCUM;
1487       else if (GET_MODE_CLASS (tmode) == MODE_UACCUM)
1488 	new_mode = MIN_MODE_VECTOR_UACCUM;
1489       else
1490 	new_mode = MIN_MODE_VECTOR_INT;
1491 
1492       for (; new_mode != VOIDmode ; new_mode = GET_MODE_WIDER_MODE (new_mode))
1493 	if (GET_MODE_SIZE (new_mode) == GET_MODE_SIZE (GET_MODE (op0))
1494 	    && targetm.vector_mode_supported_p (new_mode))
1495 	  break;
1496       if (new_mode != VOIDmode)
1497 	op0 = gen_lowpart (new_mode, op0);
1498     }
1499 
1500   /* Use vec_extract patterns for extracting parts of vectors whenever
1501      available.  */
1502   if (VECTOR_MODE_P (GET_MODE (op0))
1503       && !MEM_P (op0)
1504       && optab_handler (vec_extract_optab, GET_MODE (op0)) != CODE_FOR_nothing
1505       && ((bitnum + bitsize - 1) / GET_MODE_BITSIZE (GET_MODE_INNER (GET_MODE (op0)))
1506 	  == bitnum / GET_MODE_BITSIZE (GET_MODE_INNER (GET_MODE (op0)))))
1507     {
1508       struct expand_operand ops[3];
1509       machine_mode outermode = GET_MODE (op0);
1510       machine_mode innermode = GET_MODE_INNER (outermode);
1511       enum insn_code icode = optab_handler (vec_extract_optab, outermode);
1512       unsigned HOST_WIDE_INT pos = bitnum / GET_MODE_BITSIZE (innermode);
1513 
1514       create_output_operand (&ops[0], target, innermode);
1515       create_input_operand (&ops[1], op0, outermode);
1516       create_integer_operand (&ops[2], pos);
1517       if (maybe_expand_insn (icode, 3, ops))
1518 	{
1519 	  target = ops[0].value;
1520       	  if (GET_MODE (target) != mode)
1521 	    return gen_lowpart (tmode, target);
1522 	  return target;
1523 	}
1524     }
1525 
1526   /* Make sure we are playing with integral modes.  Pun with subregs
1527      if we aren't.  */
1528   {
1529     machine_mode imode = int_mode_for_mode (GET_MODE (op0));
1530     if (imode != GET_MODE (op0))
1531       {
1532 	if (MEM_P (op0))
1533 	  op0 = adjust_bitfield_address_size (op0, imode, 0, MEM_SIZE (op0));
1534 	else if (imode != BLKmode)
1535 	  {
1536 	    op0 = gen_lowpart (imode, op0);
1537 
1538 	    /* If we got a SUBREG, force it into a register since we
1539 	       aren't going to be able to do another SUBREG on it.  */
1540 	    if (GET_CODE (op0) == SUBREG)
1541 	      op0 = force_reg (imode, op0);
1542 	  }
1543 	else if (REG_P (op0))
1544 	  {
1545 	    rtx reg, subreg;
1546 	    imode = smallest_mode_for_size (GET_MODE_BITSIZE (GET_MODE (op0)),
1547 					    MODE_INT);
1548 	    reg = gen_reg_rtx (imode);
1549 	    subreg = gen_lowpart_SUBREG (GET_MODE (op0), reg);
1550 	    emit_move_insn (subreg, op0);
1551 	    op0 = reg;
1552 	    bitnum += SUBREG_BYTE (subreg) * BITS_PER_UNIT;
1553 	  }
1554 	else
1555 	  {
1556 	    HOST_WIDE_INT size = GET_MODE_SIZE (GET_MODE (op0));
1557 	    rtx mem = assign_stack_temp (GET_MODE (op0), size);
1558 	    emit_move_insn (mem, op0);
1559 	    op0 = adjust_bitfield_address_size (mem, BLKmode, 0, size);
1560 	  }
1561       }
1562   }
1563 
1564   /* ??? We currently assume TARGET is at least as big as BITSIZE.
1565      If that's wrong, the solution is to test for it and set TARGET to 0
1566      if needed.  */
1567 
1568   /* Get the mode of the field to use for atomic access or subreg
1569      conversion.  */
1570   mode1 = mode;
1571   if (SCALAR_INT_MODE_P (tmode))
1572     {
1573       machine_mode try_mode = mode_for_size (bitsize,
1574 						  GET_MODE_CLASS (tmode), 0);
1575       if (try_mode != BLKmode)
1576 	mode1 = try_mode;
1577     }
1578   gcc_assert (mode1 != BLKmode);
1579 
1580   /* Extraction of a full MODE1 value can be done with a subreg as long
1581      as the least significant bit of the value is the least significant
1582      bit of either OP0 or a word of OP0.  */
1583   if (!MEM_P (op0)
1584       && lowpart_bit_field_p (bitnum, bitsize, GET_MODE (op0))
1585       && bitsize == GET_MODE_BITSIZE (mode1)
1586       && TRULY_NOOP_TRUNCATION_MODES_P (mode1, GET_MODE (op0)))
1587     {
1588       rtx sub = simplify_gen_subreg (mode1, op0, GET_MODE (op0),
1589 				     bitnum / BITS_PER_UNIT);
1590       if (sub)
1591 	return convert_extracted_bit_field (sub, mode, tmode, unsignedp);
1592     }
1593 
1594   /* Extraction of a full MODE1 value can be done with a load as long as
1595      the field is on a byte boundary and is sufficiently aligned.  */
1596   if (simple_mem_bitfield_p (op0, bitsize, bitnum, mode1))
1597     {
1598       op0 = adjust_bitfield_address (op0, mode1, bitnum / BITS_PER_UNIT);
1599       return convert_extracted_bit_field (op0, mode, tmode, unsignedp);
1600     }
1601 
1602   /* Handle fields bigger than a word.  */
1603 
1604   if (bitsize > BITS_PER_WORD)
1605     {
1606       /* Here we transfer the words of the field
1607 	 in the order least significant first.
1608 	 This is because the most significant word is the one which may
1609 	 be less than full.  */
1610 
1611       unsigned int backwards = WORDS_BIG_ENDIAN;
1612       unsigned int nwords = (bitsize + (BITS_PER_WORD - 1)) / BITS_PER_WORD;
1613       unsigned int i;
1614       rtx_insn *last;
1615 
1616       if (target == 0 || !REG_P (target) || !valid_multiword_target_p (target))
1617 	target = gen_reg_rtx (mode);
1618 
1619       /* Indicate for flow that the entire target reg is being set.  */
1620       emit_clobber (target);
1621 
1622       last = get_last_insn ();
1623       for (i = 0; i < nwords; i++)
1624 	{
1625 	  /* If I is 0, use the low-order word in both field and target;
1626 	     if I is 1, use the next to lowest word; and so on.  */
1627 	  /* Word number in TARGET to use.  */
1628 	  unsigned int wordnum
1629 	    = (backwards
1630 	       ? GET_MODE_SIZE (GET_MODE (target)) / UNITS_PER_WORD - i - 1
1631 	       : i);
1632 	  /* Offset from start of field in OP0.  */
1633 	  unsigned int bit_offset = (backwards
1634 				     ? MAX ((int) bitsize - ((int) i + 1)
1635 					    * BITS_PER_WORD,
1636 					    0)
1637 				     : (int) i * BITS_PER_WORD);
1638 	  rtx target_part = operand_subword (target, wordnum, 1, VOIDmode);
1639 	  rtx result_part
1640 	    = extract_bit_field_1 (op0, MIN (BITS_PER_WORD,
1641 					     bitsize - i * BITS_PER_WORD),
1642 				   bitnum + bit_offset, 1, target_part,
1643 				   mode, word_mode, fallback_p);
1644 
1645 	  gcc_assert (target_part);
1646 	  if (!result_part)
1647 	    {
1648 	      delete_insns_since (last);
1649 	      return NULL;
1650 	    }
1651 
1652 	  if (result_part != target_part)
1653 	    emit_move_insn (target_part, result_part);
1654 	}
1655 
1656       if (unsignedp)
1657 	{
1658 	  /* Unless we've filled TARGET, the upper regs in a multi-reg value
1659 	     need to be zero'd out.  */
1660 	  if (GET_MODE_SIZE (GET_MODE (target)) > nwords * UNITS_PER_WORD)
1661 	    {
1662 	      unsigned int i, total_words;
1663 
1664 	      total_words = GET_MODE_SIZE (GET_MODE (target)) / UNITS_PER_WORD;
1665 	      for (i = nwords; i < total_words; i++)
1666 		emit_move_insn
1667 		  (operand_subword (target,
1668 				    backwards ? total_words - i - 1 : i,
1669 				    1, VOIDmode),
1670 		   const0_rtx);
1671 	    }
1672 	  return target;
1673 	}
1674 
1675       /* Signed bit field: sign-extend with two arithmetic shifts.  */
1676       target = expand_shift (LSHIFT_EXPR, mode, target,
1677 			     GET_MODE_BITSIZE (mode) - bitsize, NULL_RTX, 0);
1678       return expand_shift (RSHIFT_EXPR, mode, target,
1679 			   GET_MODE_BITSIZE (mode) - bitsize, NULL_RTX, 0);
1680     }
1681 
1682   /* If OP0 is a multi-word register, narrow it to the affected word.
1683      If the region spans two words, defer to extract_split_bit_field.  */
1684   if (!MEM_P (op0) && GET_MODE_SIZE (GET_MODE (op0)) > UNITS_PER_WORD)
1685     {
1686       op0 = simplify_gen_subreg (word_mode, op0, GET_MODE (op0),
1687 				 bitnum / BITS_PER_WORD * UNITS_PER_WORD);
1688       bitnum %= BITS_PER_WORD;
1689       if (bitnum + bitsize > BITS_PER_WORD)
1690 	{
1691 	  if (!fallback_p)
1692 	    return NULL_RTX;
1693 	  target = extract_split_bit_field (op0, bitsize, bitnum, unsignedp);
1694 	  return convert_extracted_bit_field (target, mode, tmode, unsignedp);
1695 	}
1696     }
1697 
1698   /* From here on we know the desired field is smaller than a word.
1699      If OP0 is a register, it too fits within a word.  */
1700   enum extraction_pattern pattern = unsignedp ? EP_extzv : EP_extv;
1701   extraction_insn extv;
1702   if (!MEM_P (op0)
1703       /* ??? We could limit the structure size to the part of OP0 that
1704 	 contains the field, with appropriate checks for endianness
1705 	 and TRULY_NOOP_TRUNCATION.  */
1706       && get_best_reg_extraction_insn (&extv, pattern,
1707 				       GET_MODE_BITSIZE (GET_MODE (op0)),
1708 				       tmode))
1709     {
1710       rtx result = extract_bit_field_using_extv (&extv, op0, bitsize, bitnum,
1711 						 unsignedp, target, mode,
1712 						 tmode);
1713       if (result)
1714 	return result;
1715     }
1716 
1717   /* If OP0 is a memory, try copying it to a register and seeing if a
1718      cheap register alternative is available.  */
1719   if (MEM_P (op0))
1720     {
1721       if (get_best_mem_extraction_insn (&extv, pattern, bitsize, bitnum,
1722 					tmode))
1723 	{
1724 	  rtx result = extract_bit_field_using_extv (&extv, op0, bitsize,
1725 						     bitnum, unsignedp,
1726 						     target, mode,
1727 						     tmode);
1728 	  if (result)
1729 	    return result;
1730 	}
1731 
1732       rtx_insn *last = get_last_insn ();
1733 
1734       /* Try loading part of OP0 into a register and extracting the
1735 	 bitfield from that.  */
1736       unsigned HOST_WIDE_INT bitpos;
1737       rtx xop0 = adjust_bit_field_mem_for_reg (pattern, op0, bitsize, bitnum,
1738 					       0, 0, tmode, &bitpos);
1739       if (xop0)
1740 	{
1741 	  xop0 = copy_to_reg (xop0);
1742 	  rtx result = extract_bit_field_1 (xop0, bitsize, bitpos,
1743 					    unsignedp, target,
1744 					    mode, tmode, false);
1745 	  if (result)
1746 	    return result;
1747 	  delete_insns_since (last);
1748 	}
1749     }
1750 
1751   if (!fallback_p)
1752     return NULL;
1753 
1754   /* Find a correspondingly-sized integer field, so we can apply
1755      shifts and masks to it.  */
1756   int_mode = int_mode_for_mode (tmode);
1757   if (int_mode == BLKmode)
1758     int_mode = int_mode_for_mode (mode);
1759   /* Should probably push op0 out to memory and then do a load.  */
1760   gcc_assert (int_mode != BLKmode);
1761 
1762   target = extract_fixed_bit_field (int_mode, op0, bitsize, bitnum,
1763 				    target, unsignedp);
1764   return convert_extracted_bit_field (target, mode, tmode, unsignedp);
1765 }
1766 
1767 /* Generate code to extract a byte-field from STR_RTX
1768    containing BITSIZE bits, starting at BITNUM,
1769    and put it in TARGET if possible (if TARGET is nonzero).
1770    Regardless of TARGET, we return the rtx for where the value is placed.
1771 
1772    STR_RTX is the structure containing the byte (a REG or MEM).
1773    UNSIGNEDP is nonzero if this is an unsigned bit field.
1774    MODE is the natural mode of the field value once extracted.
1775    TMODE is the mode the caller would like the value to have;
1776    but the value may be returned with type MODE instead.
1777 
1778    If a TARGET is specified and we can store in it at no extra cost,
1779    we do so, and return TARGET.
1780    Otherwise, we return a REG of mode TMODE or MODE, with TMODE preferred
1781    if they are equally easy.  */
1782 
1783 rtx
1784 extract_bit_field (rtx str_rtx, unsigned HOST_WIDE_INT bitsize,
1785 		   unsigned HOST_WIDE_INT bitnum, int unsignedp, rtx target,
1786 		   machine_mode mode, machine_mode tmode)
1787 {
1788   machine_mode mode1;
1789 
1790   /* Handle -fstrict-volatile-bitfields in the cases where it applies.  */
1791   if (GET_MODE_BITSIZE (GET_MODE (str_rtx)) > 0)
1792     mode1 = GET_MODE (str_rtx);
1793   else if (target && GET_MODE_BITSIZE (GET_MODE (target)) > 0)
1794     mode1 = GET_MODE (target);
1795   else
1796     mode1 = tmode;
1797 
1798   if (strict_volatile_bitfield_p (str_rtx, bitsize, bitnum, mode1, 0, 0))
1799     {
1800       /* Extraction of a full MODE1 value can be done with a simple load.
1801 	 We know here that the field can be accessed with one single
1802 	 instruction.  For targets that support unaligned memory,
1803 	 an unaligned access may be necessary.  */
1804       if (bitsize == GET_MODE_BITSIZE (mode1))
1805 	{
1806 	  rtx result = adjust_bitfield_address (str_rtx, mode1,
1807 						bitnum / BITS_PER_UNIT);
1808 	  gcc_assert (bitnum % BITS_PER_UNIT == 0);
1809 	  return convert_extracted_bit_field (result, mode, tmode, unsignedp);
1810 	}
1811 
1812       str_rtx = narrow_bit_field_mem (str_rtx, mode1, bitsize, bitnum,
1813 				      &bitnum);
1814       gcc_assert (bitnum + bitsize <= GET_MODE_BITSIZE (mode1));
1815       str_rtx = copy_to_reg (str_rtx);
1816     }
1817 
1818   return extract_bit_field_1 (str_rtx, bitsize, bitnum, unsignedp,
1819 			      target, mode, tmode, true);
1820 }
1821 
1822 /* Use shifts and boolean operations to extract a field of BITSIZE bits
1823    from bit BITNUM of OP0.
1824 
1825    UNSIGNEDP is nonzero for an unsigned bit field (don't sign-extend value).
1826    If TARGET is nonzero, attempts to store the value there
1827    and return TARGET, but this is not guaranteed.
1828    If TARGET is not used, create a pseudo-reg of mode TMODE for the value.  */
1829 
1830 static rtx
1831 extract_fixed_bit_field (machine_mode tmode, rtx op0,
1832 			 unsigned HOST_WIDE_INT bitsize,
1833 			 unsigned HOST_WIDE_INT bitnum, rtx target,
1834 			 int unsignedp)
1835 {
1836   if (MEM_P (op0))
1837     {
1838       machine_mode mode
1839 	= get_best_mode (bitsize, bitnum, 0, 0, MEM_ALIGN (op0), word_mode,
1840 			 MEM_VOLATILE_P (op0));
1841 
1842       if (mode == VOIDmode)
1843 	/* The only way this should occur is if the field spans word
1844 	   boundaries.  */
1845 	return extract_split_bit_field (op0, bitsize, bitnum, unsignedp);
1846 
1847       op0 = narrow_bit_field_mem (op0, mode, bitsize, bitnum, &bitnum);
1848     }
1849 
1850   return extract_fixed_bit_field_1 (tmode, op0, bitsize, bitnum,
1851 				    target, unsignedp);
1852 }
1853 
1854 /* Helper function for extract_fixed_bit_field, extracts
1855    the bit field always using the MODE of OP0.  */
1856 
1857 static rtx
1858 extract_fixed_bit_field_1 (machine_mode tmode, rtx op0,
1859 			   unsigned HOST_WIDE_INT bitsize,
1860 			   unsigned HOST_WIDE_INT bitnum, rtx target,
1861 			   int unsignedp)
1862 {
1863   machine_mode mode = GET_MODE (op0);
1864   gcc_assert (SCALAR_INT_MODE_P (mode));
1865 
1866   /* Note that bitsize + bitnum can be greater than GET_MODE_BITSIZE (mode)
1867      for invalid input, such as extract equivalent of f5 from
1868      gcc.dg/pr48335-2.c.  */
1869 
1870   if (BYTES_BIG_ENDIAN)
1871     /* BITNUM is the distance between our msb and that of OP0.
1872        Convert it to the distance from the lsb.  */
1873     bitnum = GET_MODE_BITSIZE (mode) - bitsize - bitnum;
1874 
1875   /* Now BITNUM is always the distance between the field's lsb and that of OP0.
1876      We have reduced the big-endian case to the little-endian case.  */
1877 
1878   if (unsignedp)
1879     {
1880       if (bitnum)
1881 	{
1882 	  /* If the field does not already start at the lsb,
1883 	     shift it so it does.  */
1884 	  /* Maybe propagate the target for the shift.  */
1885 	  rtx subtarget = (target != 0 && REG_P (target) ? target : 0);
1886 	  if (tmode != mode)
1887 	    subtarget = 0;
1888 	  op0 = expand_shift (RSHIFT_EXPR, mode, op0, bitnum, subtarget, 1);
1889 	}
1890       /* Convert the value to the desired mode.  */
1891       if (mode != tmode)
1892 	op0 = convert_to_mode (tmode, op0, 1);
1893 
1894       /* Unless the msb of the field used to be the msb when we shifted,
1895 	 mask out the upper bits.  */
1896 
1897       if (GET_MODE_BITSIZE (mode) != bitnum + bitsize)
1898 	return expand_binop (GET_MODE (op0), and_optab, op0,
1899 			     mask_rtx (GET_MODE (op0), 0, bitsize, 0),
1900 			     target, 1, OPTAB_LIB_WIDEN);
1901       return op0;
1902     }
1903 
1904   /* To extract a signed bit-field, first shift its msb to the msb of the word,
1905      then arithmetic-shift its lsb to the lsb of the word.  */
1906   op0 = force_reg (mode, op0);
1907 
1908   /* Find the narrowest integer mode that contains the field.  */
1909 
1910   for (mode = GET_CLASS_NARROWEST_MODE (MODE_INT); mode != VOIDmode;
1911        mode = GET_MODE_WIDER_MODE (mode))
1912     if (GET_MODE_BITSIZE (mode) >= bitsize + bitnum)
1913       {
1914 	op0 = convert_to_mode (mode, op0, 0);
1915 	break;
1916       }
1917 
1918   if (mode != tmode)
1919     target = 0;
1920 
1921   if (GET_MODE_BITSIZE (mode) != (bitsize + bitnum))
1922     {
1923       int amount = GET_MODE_BITSIZE (mode) - (bitsize + bitnum);
1924       /* Maybe propagate the target for the shift.  */
1925       rtx subtarget = (target != 0 && REG_P (target) ? target : 0);
1926       op0 = expand_shift (LSHIFT_EXPR, mode, op0, amount, subtarget, 1);
1927     }
1928 
1929   return expand_shift (RSHIFT_EXPR, mode, op0,
1930 		       GET_MODE_BITSIZE (mode) - bitsize, target, 0);
1931 }
1932 
1933 /* Return a constant integer (CONST_INT or CONST_DOUBLE) rtx with the value
1934    VALUE << BITPOS.  */
1935 
1936 static rtx
1937 lshift_value (machine_mode mode, unsigned HOST_WIDE_INT value,
1938 	      int bitpos)
1939 {
1940   return immed_wide_int_const (wi::lshift (value, bitpos), mode);
1941 }
1942 
1943 /* Extract a bit field that is split across two words
1944    and return an RTX for the result.
1945 
1946    OP0 is the REG, SUBREG or MEM rtx for the first of the two words.
1947    BITSIZE is the field width; BITPOS, position of its first bit, in the word.
1948    UNSIGNEDP is 1 if should zero-extend the contents; else sign-extend.  */
1949 
1950 static rtx
1951 extract_split_bit_field (rtx op0, unsigned HOST_WIDE_INT bitsize,
1952 			 unsigned HOST_WIDE_INT bitpos, int unsignedp)
1953 {
1954   unsigned int unit;
1955   unsigned int bitsdone = 0;
1956   rtx result = NULL_RTX;
1957   int first = 1;
1958 
1959   /* Make sure UNIT isn't larger than BITS_PER_WORD, we can only handle that
1960      much at a time.  */
1961   if (REG_P (op0) || GET_CODE (op0) == SUBREG)
1962     unit = BITS_PER_WORD;
1963   else
1964     unit = MIN (MEM_ALIGN (op0), BITS_PER_WORD);
1965 
1966   while (bitsdone < bitsize)
1967     {
1968       unsigned HOST_WIDE_INT thissize;
1969       rtx part, word;
1970       unsigned HOST_WIDE_INT thispos;
1971       unsigned HOST_WIDE_INT offset;
1972 
1973       offset = (bitpos + bitsdone) / unit;
1974       thispos = (bitpos + bitsdone) % unit;
1975 
1976       /* THISSIZE must not overrun a word boundary.  Otherwise,
1977 	 extract_fixed_bit_field will call us again, and we will mutually
1978 	 recurse forever.  */
1979       thissize = MIN (bitsize - bitsdone, BITS_PER_WORD);
1980       thissize = MIN (thissize, unit - thispos);
1981 
1982       /* If OP0 is a register, then handle OFFSET here.
1983 
1984 	 When handling multiword bitfields, extract_bit_field may pass
1985 	 down a word_mode SUBREG of a larger REG for a bitfield that actually
1986 	 crosses a word boundary.  Thus, for a SUBREG, we must find
1987 	 the current word starting from the base register.  */
1988       if (GET_CODE (op0) == SUBREG)
1989 	{
1990 	  int word_offset = (SUBREG_BYTE (op0) / UNITS_PER_WORD) + offset;
1991 	  word = operand_subword_force (SUBREG_REG (op0), word_offset,
1992 					GET_MODE (SUBREG_REG (op0)));
1993 	  offset = 0;
1994 	}
1995       else if (REG_P (op0))
1996 	{
1997 	  word = operand_subword_force (op0, offset, GET_MODE (op0));
1998 	  offset = 0;
1999 	}
2000       else
2001 	word = op0;
2002 
2003       /* Extract the parts in bit-counting order,
2004 	 whose meaning is determined by BYTES_PER_UNIT.
2005 	 OFFSET is in UNITs, and UNIT is in bits.  */
2006       part = extract_fixed_bit_field (word_mode, word, thissize,
2007 				      offset * unit + thispos, 0, 1);
2008       bitsdone += thissize;
2009 
2010       /* Shift this part into place for the result.  */
2011       if (BYTES_BIG_ENDIAN)
2012 	{
2013 	  if (bitsize != bitsdone)
2014 	    part = expand_shift (LSHIFT_EXPR, word_mode, part,
2015 				 bitsize - bitsdone, 0, 1);
2016 	}
2017       else
2018 	{
2019 	  if (bitsdone != thissize)
2020 	    part = expand_shift (LSHIFT_EXPR, word_mode, part,
2021 				 bitsdone - thissize, 0, 1);
2022 	}
2023 
2024       if (first)
2025 	result = part;
2026       else
2027 	/* Combine the parts with bitwise or.  This works
2028 	   because we extracted each part as an unsigned bit field.  */
2029 	result = expand_binop (word_mode, ior_optab, part, result, NULL_RTX, 1,
2030 			       OPTAB_LIB_WIDEN);
2031 
2032       first = 0;
2033     }
2034 
2035   /* Unsigned bit field: we are done.  */
2036   if (unsignedp)
2037     return result;
2038   /* Signed bit field: sign-extend with two arithmetic shifts.  */
2039   result = expand_shift (LSHIFT_EXPR, word_mode, result,
2040 			 BITS_PER_WORD - bitsize, NULL_RTX, 0);
2041   return expand_shift (RSHIFT_EXPR, word_mode, result,
2042 		       BITS_PER_WORD - bitsize, NULL_RTX, 0);
2043 }
2044 
2045 /* Try to read the low bits of SRC as an rvalue of mode MODE, preserving
2046    the bit pattern.  SRC_MODE is the mode of SRC; if this is smaller than
2047    MODE, fill the upper bits with zeros.  Fail if the layout of either
2048    mode is unknown (as for CC modes) or if the extraction would involve
2049    unprofitable mode punning.  Return the value on success, otherwise
2050    return null.
2051 
2052    This is different from gen_lowpart* in these respects:
2053 
2054      - the returned value must always be considered an rvalue
2055 
2056      - when MODE is wider than SRC_MODE, the extraction involves
2057        a zero extension
2058 
2059      - when MODE is smaller than SRC_MODE, the extraction involves
2060        a truncation (and is thus subject to TRULY_NOOP_TRUNCATION).
2061 
2062    In other words, this routine performs a computation, whereas the
2063    gen_lowpart* routines are conceptually lvalue or rvalue subreg
2064    operations.  */
2065 
2066 rtx
2067 extract_low_bits (machine_mode mode, machine_mode src_mode, rtx src)
2068 {
2069   machine_mode int_mode, src_int_mode;
2070 
2071   if (mode == src_mode)
2072     return src;
2073 
2074   if (CONSTANT_P (src))
2075     {
2076       /* simplify_gen_subreg can't be used here, as if simplify_subreg
2077 	 fails, it will happily create (subreg (symbol_ref)) or similar
2078 	 invalid SUBREGs.  */
2079       unsigned int byte = subreg_lowpart_offset (mode, src_mode);
2080       rtx ret = simplify_subreg (mode, src, src_mode, byte);
2081       if (ret)
2082 	return ret;
2083 
2084       if (GET_MODE (src) == VOIDmode
2085 	  || !validate_subreg (mode, src_mode, src, byte))
2086 	return NULL_RTX;
2087 
2088       src = force_reg (GET_MODE (src), src);
2089       return gen_rtx_SUBREG (mode, src, byte);
2090     }
2091 
2092   if (GET_MODE_CLASS (mode) == MODE_CC || GET_MODE_CLASS (src_mode) == MODE_CC)
2093     return NULL_RTX;
2094 
2095   if (GET_MODE_BITSIZE (mode) == GET_MODE_BITSIZE (src_mode)
2096       && MODES_TIEABLE_P (mode, src_mode))
2097     {
2098       rtx x = gen_lowpart_common (mode, src);
2099       if (x)
2100         return x;
2101     }
2102 
2103   src_int_mode = int_mode_for_mode (src_mode);
2104   int_mode = int_mode_for_mode (mode);
2105   if (src_int_mode == BLKmode || int_mode == BLKmode)
2106     return NULL_RTX;
2107 
2108   if (!MODES_TIEABLE_P (src_int_mode, src_mode))
2109     return NULL_RTX;
2110   if (!MODES_TIEABLE_P (int_mode, mode))
2111     return NULL_RTX;
2112 
2113   src = gen_lowpart (src_int_mode, src);
2114   src = convert_modes (int_mode, src_int_mode, src, true);
2115   src = gen_lowpart (mode, src);
2116   return src;
2117 }
2118 
2119 /* Add INC into TARGET.  */
2120 
2121 void
2122 expand_inc (rtx target, rtx inc)
2123 {
2124   rtx value = expand_binop (GET_MODE (target), add_optab,
2125 			    target, inc,
2126 			    target, 0, OPTAB_LIB_WIDEN);
2127   if (value != target)
2128     emit_move_insn (target, value);
2129 }
2130 
2131 /* Subtract DEC from TARGET.  */
2132 
2133 void
2134 expand_dec (rtx target, rtx dec)
2135 {
2136   rtx value = expand_binop (GET_MODE (target), sub_optab,
2137 			    target, dec,
2138 			    target, 0, OPTAB_LIB_WIDEN);
2139   if (value != target)
2140     emit_move_insn (target, value);
2141 }
2142 
2143 /* Output a shift instruction for expression code CODE,
2144    with SHIFTED being the rtx for the value to shift,
2145    and AMOUNT the rtx for the amount to shift by.
2146    Store the result in the rtx TARGET, if that is convenient.
2147    If UNSIGNEDP is nonzero, do a logical shift; otherwise, arithmetic.
2148    Return the rtx for where the value is.  */
2149 
2150 static rtx
2151 expand_shift_1 (enum tree_code code, machine_mode mode, rtx shifted,
2152 		rtx amount, rtx target, int unsignedp)
2153 {
2154   rtx op1, temp = 0;
2155   int left = (code == LSHIFT_EXPR || code == LROTATE_EXPR);
2156   int rotate = (code == LROTATE_EXPR || code == RROTATE_EXPR);
2157   optab lshift_optab = ashl_optab;
2158   optab rshift_arith_optab = ashr_optab;
2159   optab rshift_uns_optab = lshr_optab;
2160   optab lrotate_optab = rotl_optab;
2161   optab rrotate_optab = rotr_optab;
2162   machine_mode op1_mode;
2163   machine_mode scalar_mode = mode;
2164   int attempt;
2165   bool speed = optimize_insn_for_speed_p ();
2166 
2167   if (VECTOR_MODE_P (mode))
2168     scalar_mode = GET_MODE_INNER (mode);
2169   op1 = amount;
2170   op1_mode = GET_MODE (op1);
2171 
2172   /* Determine whether the shift/rotate amount is a vector, or scalar.  If the
2173      shift amount is a vector, use the vector/vector shift patterns.  */
2174   if (VECTOR_MODE_P (mode) && VECTOR_MODE_P (op1_mode))
2175     {
2176       lshift_optab = vashl_optab;
2177       rshift_arith_optab = vashr_optab;
2178       rshift_uns_optab = vlshr_optab;
2179       lrotate_optab = vrotl_optab;
2180       rrotate_optab = vrotr_optab;
2181     }
2182 
2183   /* Previously detected shift-counts computed by NEGATE_EXPR
2184      and shifted in the other direction; but that does not work
2185      on all machines.  */
2186 
2187   if (SHIFT_COUNT_TRUNCATED)
2188     {
2189       if (CONST_INT_P (op1)
2190 	  && ((unsigned HOST_WIDE_INT) INTVAL (op1) >=
2191 	      (unsigned HOST_WIDE_INT) GET_MODE_BITSIZE (scalar_mode)))
2192 	op1 = GEN_INT ((unsigned HOST_WIDE_INT) INTVAL (op1)
2193 		       % GET_MODE_BITSIZE (scalar_mode));
2194       else if (GET_CODE (op1) == SUBREG
2195 	       && subreg_lowpart_p (op1)
2196 	       && SCALAR_INT_MODE_P (GET_MODE (SUBREG_REG (op1)))
2197 	       && SCALAR_INT_MODE_P (GET_MODE (op1)))
2198 	op1 = SUBREG_REG (op1);
2199     }
2200 
2201   /* Canonicalize rotates by constant amount.  If op1 is bitsize / 2,
2202      prefer left rotation, if op1 is from bitsize / 2 + 1 to
2203      bitsize - 1, use other direction of rotate with 1 .. bitsize / 2 - 1
2204      amount instead.  */
2205   if (rotate
2206       && CONST_INT_P (op1)
2207       && IN_RANGE (INTVAL (op1), GET_MODE_BITSIZE (scalar_mode) / 2 + left,
2208 		   GET_MODE_BITSIZE (scalar_mode) - 1))
2209     {
2210       op1 = GEN_INT (GET_MODE_BITSIZE (scalar_mode) - INTVAL (op1));
2211       left = !left;
2212       code = left ? LROTATE_EXPR : RROTATE_EXPR;
2213     }
2214 
2215   /* Rotation of 16bit values by 8 bits is effectively equivalent to a bswaphi.
2216      Note that this is not the case for bigger values.  For instance a rotation
2217      of 0x01020304 by 16 bits gives 0x03040102 which is different from
2218      0x04030201 (bswapsi).  */
2219   if (rotate
2220       && CONST_INT_P (op1)
2221       && INTVAL (op1) == BITS_PER_UNIT
2222       && GET_MODE_SIZE (scalar_mode) == 2
2223       && optab_handler (bswap_optab, HImode) != CODE_FOR_nothing)
2224     return expand_unop (HImode, bswap_optab, shifted, NULL_RTX,
2225 				  unsignedp);
2226 
2227   if (op1 == const0_rtx)
2228     return shifted;
2229 
2230   /* Check whether its cheaper to implement a left shift by a constant
2231      bit count by a sequence of additions.  */
2232   if (code == LSHIFT_EXPR
2233       && CONST_INT_P (op1)
2234       && INTVAL (op1) > 0
2235       && INTVAL (op1) < GET_MODE_PRECISION (scalar_mode)
2236       && INTVAL (op1) < MAX_BITS_PER_WORD
2237       && (shift_cost (speed, mode, INTVAL (op1))
2238 	  > INTVAL (op1) * add_cost (speed, mode))
2239       && shift_cost (speed, mode, INTVAL (op1)) != MAX_COST)
2240     {
2241       int i;
2242       for (i = 0; i < INTVAL (op1); i++)
2243 	{
2244 	  temp = force_reg (mode, shifted);
2245 	  shifted = expand_binop (mode, add_optab, temp, temp, NULL_RTX,
2246 				  unsignedp, OPTAB_LIB_WIDEN);
2247 	}
2248       return shifted;
2249     }
2250 
2251   for (attempt = 0; temp == 0 && attempt < 3; attempt++)
2252     {
2253       enum optab_methods methods;
2254 
2255       if (attempt == 0)
2256 	methods = OPTAB_DIRECT;
2257       else if (attempt == 1)
2258 	methods = OPTAB_WIDEN;
2259       else
2260 	methods = OPTAB_LIB_WIDEN;
2261 
2262       if (rotate)
2263 	{
2264 	  /* Widening does not work for rotation.  */
2265 	  if (methods == OPTAB_WIDEN)
2266 	    continue;
2267 	  else if (methods == OPTAB_LIB_WIDEN)
2268 	    {
2269 	      /* If we have been unable to open-code this by a rotation,
2270 		 do it as the IOR of two shifts.  I.e., to rotate A
2271 		 by N bits, compute
2272 		 (A << N) | ((unsigned) A >> ((-N) & (C - 1)))
2273 		 where C is the bitsize of A.
2274 
2275 		 It is theoretically possible that the target machine might
2276 		 not be able to perform either shift and hence we would
2277 		 be making two libcalls rather than just the one for the
2278 		 shift (similarly if IOR could not be done).  We will allow
2279 		 this extremely unlikely lossage to avoid complicating the
2280 		 code below.  */
2281 
2282 	      rtx subtarget = target == shifted ? 0 : target;
2283 	      rtx new_amount, other_amount;
2284 	      rtx temp1;
2285 
2286 	      new_amount = op1;
2287 	      if (op1 == const0_rtx)
2288 		return shifted;
2289 	      else if (CONST_INT_P (op1))
2290 		other_amount = GEN_INT (GET_MODE_BITSIZE (scalar_mode)
2291 					- INTVAL (op1));
2292 	      else
2293 		{
2294 		  other_amount
2295 		    = simplify_gen_unary (NEG, GET_MODE (op1),
2296 					  op1, GET_MODE (op1));
2297 		  HOST_WIDE_INT mask = GET_MODE_PRECISION (scalar_mode) - 1;
2298 		  other_amount
2299 		    = simplify_gen_binary (AND, GET_MODE (op1), other_amount,
2300 					   gen_int_mode (mask, GET_MODE (op1)));
2301 		}
2302 
2303 	      shifted = force_reg (mode, shifted);
2304 
2305 	      temp = expand_shift_1 (left ? LSHIFT_EXPR : RSHIFT_EXPR,
2306 				     mode, shifted, new_amount, 0, 1);
2307 	      temp1 = expand_shift_1 (left ? RSHIFT_EXPR : LSHIFT_EXPR,
2308 				      mode, shifted, other_amount,
2309 				      subtarget, 1);
2310 	      return expand_binop (mode, ior_optab, temp, temp1, target,
2311 				   unsignedp, methods);
2312 	    }
2313 
2314 	  temp = expand_binop (mode,
2315 			       left ? lrotate_optab : rrotate_optab,
2316 			       shifted, op1, target, unsignedp, methods);
2317 	}
2318       else if (unsignedp)
2319 	temp = expand_binop (mode,
2320 			     left ? lshift_optab : rshift_uns_optab,
2321 			     shifted, op1, target, unsignedp, methods);
2322 
2323       /* Do arithmetic shifts.
2324 	 Also, if we are going to widen the operand, we can just as well
2325 	 use an arithmetic right-shift instead of a logical one.  */
2326       if (temp == 0 && ! rotate
2327 	  && (! unsignedp || (! left && methods == OPTAB_WIDEN)))
2328 	{
2329 	  enum optab_methods methods1 = methods;
2330 
2331 	  /* If trying to widen a log shift to an arithmetic shift,
2332 	     don't accept an arithmetic shift of the same size.  */
2333 	  if (unsignedp)
2334 	    methods1 = OPTAB_MUST_WIDEN;
2335 
2336 	  /* Arithmetic shift */
2337 
2338 	  temp = expand_binop (mode,
2339 			       left ? lshift_optab : rshift_arith_optab,
2340 			       shifted, op1, target, unsignedp, methods1);
2341 	}
2342 
2343       /* We used to try extzv here for logical right shifts, but that was
2344 	 only useful for one machine, the VAX, and caused poor code
2345 	 generation there for lshrdi3, so the code was deleted and a
2346 	 define_expand for lshrsi3 was added to vax.md.  */
2347     }
2348 
2349   gcc_assert (temp);
2350   return temp;
2351 }
2352 
2353 /* Output a shift instruction for expression code CODE,
2354    with SHIFTED being the rtx for the value to shift,
2355    and AMOUNT the amount to shift by.
2356    Store the result in the rtx TARGET, if that is convenient.
2357    If UNSIGNEDP is nonzero, do a logical shift; otherwise, arithmetic.
2358    Return the rtx for where the value is.  */
2359 
2360 rtx
2361 expand_shift (enum tree_code code, machine_mode mode, rtx shifted,
2362 	      int amount, rtx target, int unsignedp)
2363 {
2364   return expand_shift_1 (code, mode,
2365 			 shifted, GEN_INT (amount), target, unsignedp);
2366 }
2367 
2368 /* Output a shift instruction for expression code CODE,
2369    with SHIFTED being the rtx for the value to shift,
2370    and AMOUNT the tree for the amount to shift by.
2371    Store the result in the rtx TARGET, if that is convenient.
2372    If UNSIGNEDP is nonzero, do a logical shift; otherwise, arithmetic.
2373    Return the rtx for where the value is.  */
2374 
2375 rtx
2376 expand_variable_shift (enum tree_code code, machine_mode mode, rtx shifted,
2377 		       tree amount, rtx target, int unsignedp)
2378 {
2379   return expand_shift_1 (code, mode,
2380 			 shifted, expand_normal (amount), target, unsignedp);
2381 }
2382 
2383 
2384 /* Indicates the type of fixup needed after a constant multiplication.
2385    BASIC_VARIANT means no fixup is needed, NEGATE_VARIANT means that
2386    the result should be negated, and ADD_VARIANT means that the
2387    multiplicand should be added to the result.  */
2388 enum mult_variant {basic_variant, negate_variant, add_variant};
2389 
2390 static void synth_mult (struct algorithm *, unsigned HOST_WIDE_INT,
2391 			const struct mult_cost *, machine_mode mode);
2392 static bool choose_mult_variant (machine_mode, HOST_WIDE_INT,
2393 				 struct algorithm *, enum mult_variant *, int);
2394 static rtx expand_mult_const (machine_mode, rtx, HOST_WIDE_INT, rtx,
2395 			      const struct algorithm *, enum mult_variant);
2396 static unsigned HOST_WIDE_INT invert_mod2n (unsigned HOST_WIDE_INT, int);
2397 static rtx extract_high_half (machine_mode, rtx);
2398 static rtx expmed_mult_highpart (machine_mode, rtx, rtx, rtx, int, int);
2399 static rtx expmed_mult_highpart_optab (machine_mode, rtx, rtx, rtx,
2400 				       int, int);
2401 /* Compute and return the best algorithm for multiplying by T.
2402    The algorithm must cost less than cost_limit
2403    If retval.cost >= COST_LIMIT, no algorithm was found and all
2404    other field of the returned struct are undefined.
2405    MODE is the machine mode of the multiplication.  */
2406 
2407 static void
2408 synth_mult (struct algorithm *alg_out, unsigned HOST_WIDE_INT t,
2409 	    const struct mult_cost *cost_limit, machine_mode mode)
2410 {
2411   int m;
2412   struct algorithm *alg_in, *best_alg;
2413   struct mult_cost best_cost;
2414   struct mult_cost new_limit;
2415   int op_cost, op_latency;
2416   unsigned HOST_WIDE_INT orig_t = t;
2417   unsigned HOST_WIDE_INT q;
2418   int maxm, hash_index;
2419   bool cache_hit = false;
2420   enum alg_code cache_alg = alg_zero;
2421   bool speed = optimize_insn_for_speed_p ();
2422   machine_mode imode;
2423   struct alg_hash_entry *entry_ptr;
2424 
2425   /* Indicate that no algorithm is yet found.  If no algorithm
2426      is found, this value will be returned and indicate failure.  */
2427   alg_out->cost.cost = cost_limit->cost + 1;
2428   alg_out->cost.latency = cost_limit->latency + 1;
2429 
2430   if (cost_limit->cost < 0
2431       || (cost_limit->cost == 0 && cost_limit->latency <= 0))
2432     return;
2433 
2434   /* Be prepared for vector modes.  */
2435   imode = GET_MODE_INNER (mode);
2436   if (imode == VOIDmode)
2437     imode = mode;
2438 
2439   maxm = MIN (BITS_PER_WORD, GET_MODE_BITSIZE (imode));
2440 
2441   /* Restrict the bits of "t" to the multiplication's mode.  */
2442   t &= GET_MODE_MASK (imode);
2443 
2444   /* t == 1 can be done in zero cost.  */
2445   if (t == 1)
2446     {
2447       alg_out->ops = 1;
2448       alg_out->cost.cost = 0;
2449       alg_out->cost.latency = 0;
2450       alg_out->op[0] = alg_m;
2451       return;
2452     }
2453 
2454   /* t == 0 sometimes has a cost.  If it does and it exceeds our limit,
2455      fail now.  */
2456   if (t == 0)
2457     {
2458       if (MULT_COST_LESS (cost_limit, zero_cost (speed)))
2459 	return;
2460       else
2461 	{
2462 	  alg_out->ops = 1;
2463 	  alg_out->cost.cost = zero_cost (speed);
2464 	  alg_out->cost.latency = zero_cost (speed);
2465 	  alg_out->op[0] = alg_zero;
2466 	  return;
2467 	}
2468     }
2469 
2470   /* We'll be needing a couple extra algorithm structures now.  */
2471 
2472   alg_in = XALLOCA (struct algorithm);
2473   best_alg = XALLOCA (struct algorithm);
2474   best_cost = *cost_limit;
2475 
2476   /* Compute the hash index.  */
2477   hash_index = (t ^ (unsigned int) mode ^ (speed * 256)) % NUM_ALG_HASH_ENTRIES;
2478 
2479   /* See if we already know what to do for T.  */
2480   entry_ptr = alg_hash_entry_ptr (hash_index);
2481   if (entry_ptr->t == t
2482       && entry_ptr->mode == mode
2483       && entry_ptr->mode == mode
2484       && entry_ptr->speed == speed
2485       && entry_ptr->alg != alg_unknown)
2486     {
2487       cache_alg = entry_ptr->alg;
2488 
2489       if (cache_alg == alg_impossible)
2490 	{
2491 	  /* The cache tells us that it's impossible to synthesize
2492 	     multiplication by T within entry_ptr->cost.  */
2493 	  if (!CHEAPER_MULT_COST (&entry_ptr->cost, cost_limit))
2494 	    /* COST_LIMIT is at least as restrictive as the one
2495 	       recorded in the hash table, in which case we have no
2496 	       hope of synthesizing a multiplication.  Just
2497 	       return.  */
2498 	    return;
2499 
2500 	  /* If we get here, COST_LIMIT is less restrictive than the
2501 	     one recorded in the hash table, so we may be able to
2502 	     synthesize a multiplication.  Proceed as if we didn't
2503 	     have the cache entry.  */
2504 	}
2505       else
2506 	{
2507 	  if (CHEAPER_MULT_COST (cost_limit, &entry_ptr->cost))
2508 	    /* The cached algorithm shows that this multiplication
2509 	       requires more cost than COST_LIMIT.  Just return.  This
2510 	       way, we don't clobber this cache entry with
2511 	       alg_impossible but retain useful information.  */
2512 	    return;
2513 
2514 	  cache_hit = true;
2515 
2516 	  switch (cache_alg)
2517 	    {
2518 	    case alg_shift:
2519 	      goto do_alg_shift;
2520 
2521 	    case alg_add_t_m2:
2522 	    case alg_sub_t_m2:
2523 	      goto do_alg_addsub_t_m2;
2524 
2525 	    case alg_add_factor:
2526 	    case alg_sub_factor:
2527 	      goto do_alg_addsub_factor;
2528 
2529 	    case alg_add_t2_m:
2530 	      goto do_alg_add_t2_m;
2531 
2532 	    case alg_sub_t2_m:
2533 	      goto do_alg_sub_t2_m;
2534 
2535 	    default:
2536 	      gcc_unreachable ();
2537 	    }
2538 	}
2539     }
2540 
2541   /* If we have a group of zero bits at the low-order part of T, try
2542      multiplying by the remaining bits and then doing a shift.  */
2543 
2544   if ((t & 1) == 0)
2545     {
2546     do_alg_shift:
2547       m = floor_log2 (t & -t);	/* m = number of low zero bits */
2548       if (m < maxm)
2549 	{
2550 	  q = t >> m;
2551 	  /* The function expand_shift will choose between a shift and
2552 	     a sequence of additions, so the observed cost is given as
2553 	     MIN (m * add_cost(speed, mode), shift_cost(speed, mode, m)).  */
2554 	  op_cost = m * add_cost (speed, mode);
2555 	  if (shift_cost (speed, mode, m) < op_cost)
2556 	    op_cost = shift_cost (speed, mode, m);
2557 	  new_limit.cost = best_cost.cost - op_cost;
2558 	  new_limit.latency = best_cost.latency - op_cost;
2559 	  synth_mult (alg_in, q, &new_limit, mode);
2560 
2561 	  alg_in->cost.cost += op_cost;
2562 	  alg_in->cost.latency += op_cost;
2563 	  if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2564 	    {
2565 	      best_cost = alg_in->cost;
2566 	      std::swap (alg_in, best_alg);
2567 	      best_alg->log[best_alg->ops] = m;
2568 	      best_alg->op[best_alg->ops] = alg_shift;
2569 	    }
2570 
2571 	  /* See if treating ORIG_T as a signed number yields a better
2572 	     sequence.  Try this sequence only for a negative ORIG_T
2573 	     as it would be useless for a non-negative ORIG_T.  */
2574 	  if ((HOST_WIDE_INT) orig_t < 0)
2575 	    {
2576 	      /* Shift ORIG_T as follows because a right shift of a
2577 		 negative-valued signed type is implementation
2578 		 defined.  */
2579 	      q = ~(~orig_t >> m);
2580 	      /* The function expand_shift will choose between a shift
2581 		 and a sequence of additions, so the observed cost is
2582 		 given as MIN (m * add_cost(speed, mode),
2583 		 shift_cost(speed, mode, m)).  */
2584 	      op_cost = m * add_cost (speed, mode);
2585 	      if (shift_cost (speed, mode, m) < op_cost)
2586 		op_cost = shift_cost (speed, mode, m);
2587 	      new_limit.cost = best_cost.cost - op_cost;
2588 	      new_limit.latency = best_cost.latency - op_cost;
2589 	      synth_mult (alg_in, q, &new_limit, mode);
2590 
2591 	      alg_in->cost.cost += op_cost;
2592 	      alg_in->cost.latency += op_cost;
2593 	      if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2594 		{
2595 		  best_cost = alg_in->cost;
2596 		  std::swap (alg_in, best_alg);
2597 		  best_alg->log[best_alg->ops] = m;
2598 		  best_alg->op[best_alg->ops] = alg_shift;
2599 		}
2600 	    }
2601 	}
2602       if (cache_hit)
2603 	goto done;
2604     }
2605 
2606   /* If we have an odd number, add or subtract one.  */
2607   if ((t & 1) != 0)
2608     {
2609       unsigned HOST_WIDE_INT w;
2610 
2611     do_alg_addsub_t_m2:
2612       for (w = 1; (w & t) != 0; w <<= 1)
2613 	;
2614       /* If T was -1, then W will be zero after the loop.  This is another
2615 	 case where T ends with ...111.  Handling this with (T + 1) and
2616 	 subtract 1 produces slightly better code and results in algorithm
2617 	 selection much faster than treating it like the ...0111 case
2618 	 below.  */
2619       if (w == 0
2620 	  || (w > 2
2621 	      /* Reject the case where t is 3.
2622 		 Thus we prefer addition in that case.  */
2623 	      && t != 3))
2624 	{
2625 	  /* T ends with ...111.  Multiply by (T + 1) and subtract T.  */
2626 
2627 	  op_cost = add_cost (speed, mode);
2628 	  new_limit.cost = best_cost.cost - op_cost;
2629 	  new_limit.latency = best_cost.latency - op_cost;
2630 	  synth_mult (alg_in, t + 1, &new_limit, mode);
2631 
2632 	  alg_in->cost.cost += op_cost;
2633 	  alg_in->cost.latency += op_cost;
2634 	  if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2635 	    {
2636 	      best_cost = alg_in->cost;
2637 	      std::swap (alg_in, best_alg);
2638 	      best_alg->log[best_alg->ops] = 0;
2639 	      best_alg->op[best_alg->ops] = alg_sub_t_m2;
2640 	    }
2641 	}
2642       else
2643 	{
2644 	  /* T ends with ...01 or ...011.  Multiply by (T - 1) and add T.  */
2645 
2646 	  op_cost = add_cost (speed, mode);
2647 	  new_limit.cost = best_cost.cost - op_cost;
2648 	  new_limit.latency = best_cost.latency - op_cost;
2649 	  synth_mult (alg_in, t - 1, &new_limit, mode);
2650 
2651 	  alg_in->cost.cost += op_cost;
2652 	  alg_in->cost.latency += op_cost;
2653 	  if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2654 	    {
2655 	      best_cost = alg_in->cost;
2656 	      std::swap (alg_in, best_alg);
2657 	      best_alg->log[best_alg->ops] = 0;
2658 	      best_alg->op[best_alg->ops] = alg_add_t_m2;
2659 	    }
2660 	}
2661 
2662       /* We may be able to calculate a * -7, a * -15, a * -31, etc
2663 	 quickly with a - a * n for some appropriate constant n.  */
2664       m = exact_log2 (-orig_t + 1);
2665       if (m >= 0 && m < maxm)
2666 	{
2667 	  op_cost = shiftsub1_cost (speed, mode, m);
2668 	  new_limit.cost = best_cost.cost - op_cost;
2669 	  new_limit.latency = best_cost.latency - op_cost;
2670 	  synth_mult (alg_in, (unsigned HOST_WIDE_INT) (-orig_t + 1) >> m,
2671 		      &new_limit, mode);
2672 
2673 	  alg_in->cost.cost += op_cost;
2674 	  alg_in->cost.latency += op_cost;
2675 	  if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2676 	    {
2677 	      best_cost = alg_in->cost;
2678 	      std::swap (alg_in, best_alg);
2679 	      best_alg->log[best_alg->ops] = m;
2680 	      best_alg->op[best_alg->ops] = alg_sub_t_m2;
2681 	    }
2682 	}
2683 
2684       if (cache_hit)
2685 	goto done;
2686     }
2687 
2688   /* Look for factors of t of the form
2689      t = q(2**m +- 1), 2 <= m <= floor(log2(t - 1)).
2690      If we find such a factor, we can multiply by t using an algorithm that
2691      multiplies by q, shift the result by m and add/subtract it to itself.
2692 
2693      We search for large factors first and loop down, even if large factors
2694      are less probable than small; if we find a large factor we will find a
2695      good sequence quickly, and therefore be able to prune (by decreasing
2696      COST_LIMIT) the search.  */
2697 
2698  do_alg_addsub_factor:
2699   for (m = floor_log2 (t - 1); m >= 2; m--)
2700     {
2701       unsigned HOST_WIDE_INT d;
2702 
2703       d = ((unsigned HOST_WIDE_INT) 1 << m) + 1;
2704       if (t % d == 0 && t > d && m < maxm
2705 	  && (!cache_hit || cache_alg == alg_add_factor))
2706 	{
2707 	  /* If the target has a cheap shift-and-add instruction use
2708 	     that in preference to a shift insn followed by an add insn.
2709 	     Assume that the shift-and-add is "atomic" with a latency
2710 	     equal to its cost, otherwise assume that on superscalar
2711 	     hardware the shift may be executed concurrently with the
2712 	     earlier steps in the algorithm.  */
2713 	  op_cost = add_cost (speed, mode) + shift_cost (speed, mode, m);
2714 	  if (shiftadd_cost (speed, mode, m) < op_cost)
2715 	    {
2716 	      op_cost = shiftadd_cost (speed, mode, m);
2717 	      op_latency = op_cost;
2718 	    }
2719 	  else
2720 	    op_latency = add_cost (speed, mode);
2721 
2722 	  new_limit.cost = best_cost.cost - op_cost;
2723 	  new_limit.latency = best_cost.latency - op_latency;
2724 	  synth_mult (alg_in, t / d, &new_limit, mode);
2725 
2726 	  alg_in->cost.cost += op_cost;
2727 	  alg_in->cost.latency += op_latency;
2728 	  if (alg_in->cost.latency < op_cost)
2729 	    alg_in->cost.latency = op_cost;
2730 	  if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2731 	    {
2732 	      best_cost = alg_in->cost;
2733 	      std::swap (alg_in, best_alg);
2734 	      best_alg->log[best_alg->ops] = m;
2735 	      best_alg->op[best_alg->ops] = alg_add_factor;
2736 	    }
2737 	  /* Other factors will have been taken care of in the recursion.  */
2738 	  break;
2739 	}
2740 
2741       d = ((unsigned HOST_WIDE_INT) 1 << m) - 1;
2742       if (t % d == 0 && t > d && m < maxm
2743 	  && (!cache_hit || cache_alg == alg_sub_factor))
2744 	{
2745 	  /* If the target has a cheap shift-and-subtract insn use
2746 	     that in preference to a shift insn followed by a sub insn.
2747 	     Assume that the shift-and-sub is "atomic" with a latency
2748 	     equal to it's cost, otherwise assume that on superscalar
2749 	     hardware the shift may be executed concurrently with the
2750 	     earlier steps in the algorithm.  */
2751 	  op_cost = add_cost (speed, mode) + shift_cost (speed, mode, m);
2752 	  if (shiftsub0_cost (speed, mode, m) < op_cost)
2753 	    {
2754 	      op_cost = shiftsub0_cost (speed, mode, m);
2755 	      op_latency = op_cost;
2756 	    }
2757 	  else
2758 	    op_latency = add_cost (speed, mode);
2759 
2760 	  new_limit.cost = best_cost.cost - op_cost;
2761 	  new_limit.latency = best_cost.latency - op_latency;
2762 	  synth_mult (alg_in, t / d, &new_limit, mode);
2763 
2764 	  alg_in->cost.cost += op_cost;
2765 	  alg_in->cost.latency += op_latency;
2766 	  if (alg_in->cost.latency < op_cost)
2767 	    alg_in->cost.latency = op_cost;
2768 	  if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2769 	    {
2770 	      best_cost = alg_in->cost;
2771 	      std::swap (alg_in, best_alg);
2772 	      best_alg->log[best_alg->ops] = m;
2773 	      best_alg->op[best_alg->ops] = alg_sub_factor;
2774 	    }
2775 	  break;
2776 	}
2777     }
2778   if (cache_hit)
2779     goto done;
2780 
2781   /* Try shift-and-add (load effective address) instructions,
2782      i.e. do a*3, a*5, a*9.  */
2783   if ((t & 1) != 0)
2784     {
2785     do_alg_add_t2_m:
2786       q = t - 1;
2787       q = q & -q;
2788       m = exact_log2 (q);
2789       if (m >= 0 && m < maxm)
2790 	{
2791 	  op_cost = shiftadd_cost (speed, mode, m);
2792 	  new_limit.cost = best_cost.cost - op_cost;
2793 	  new_limit.latency = best_cost.latency - op_cost;
2794 	  synth_mult (alg_in, (t - 1) >> m, &new_limit, mode);
2795 
2796 	  alg_in->cost.cost += op_cost;
2797 	  alg_in->cost.latency += op_cost;
2798 	  if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2799 	    {
2800 	      best_cost = alg_in->cost;
2801 	      std::swap (alg_in, best_alg);
2802 	      best_alg->log[best_alg->ops] = m;
2803 	      best_alg->op[best_alg->ops] = alg_add_t2_m;
2804 	    }
2805 	}
2806       if (cache_hit)
2807 	goto done;
2808 
2809     do_alg_sub_t2_m:
2810       q = t + 1;
2811       q = q & -q;
2812       m = exact_log2 (q);
2813       if (m >= 0 && m < maxm)
2814 	{
2815 	  op_cost = shiftsub0_cost (speed, mode, m);
2816 	  new_limit.cost = best_cost.cost - op_cost;
2817 	  new_limit.latency = best_cost.latency - op_cost;
2818 	  synth_mult (alg_in, (t + 1) >> m, &new_limit, mode);
2819 
2820 	  alg_in->cost.cost += op_cost;
2821 	  alg_in->cost.latency += op_cost;
2822 	  if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2823 	    {
2824 	      best_cost = alg_in->cost;
2825 	      std::swap (alg_in, best_alg);
2826 	      best_alg->log[best_alg->ops] = m;
2827 	      best_alg->op[best_alg->ops] = alg_sub_t2_m;
2828 	    }
2829 	}
2830       if (cache_hit)
2831 	goto done;
2832     }
2833 
2834  done:
2835   /* If best_cost has not decreased, we have not found any algorithm.  */
2836   if (!CHEAPER_MULT_COST (&best_cost, cost_limit))
2837     {
2838       /* We failed to find an algorithm.  Record alg_impossible for
2839 	 this case (that is, <T, MODE, COST_LIMIT>) so that next time
2840 	 we are asked to find an algorithm for T within the same or
2841 	 lower COST_LIMIT, we can immediately return to the
2842 	 caller.  */
2843       entry_ptr->t = t;
2844       entry_ptr->mode = mode;
2845       entry_ptr->speed = speed;
2846       entry_ptr->alg = alg_impossible;
2847       entry_ptr->cost = *cost_limit;
2848       return;
2849     }
2850 
2851   /* Cache the result.  */
2852   if (!cache_hit)
2853     {
2854       entry_ptr->t = t;
2855       entry_ptr->mode = mode;
2856       entry_ptr->speed = speed;
2857       entry_ptr->alg = best_alg->op[best_alg->ops];
2858       entry_ptr->cost.cost = best_cost.cost;
2859       entry_ptr->cost.latency = best_cost.latency;
2860     }
2861 
2862   /* If we are getting a too long sequence for `struct algorithm'
2863      to record, make this search fail.  */
2864   if (best_alg->ops == MAX_BITS_PER_WORD)
2865     return;
2866 
2867   /* Copy the algorithm from temporary space to the space at alg_out.
2868      We avoid using structure assignment because the majority of
2869      best_alg is normally undefined, and this is a critical function.  */
2870   alg_out->ops = best_alg->ops + 1;
2871   alg_out->cost = best_cost;
2872   memcpy (alg_out->op, best_alg->op,
2873 	  alg_out->ops * sizeof *alg_out->op);
2874   memcpy (alg_out->log, best_alg->log,
2875 	  alg_out->ops * sizeof *alg_out->log);
2876 }
2877 
2878 /* Find the cheapest way of multiplying a value of mode MODE by VAL.
2879    Try three variations:
2880 
2881        - a shift/add sequence based on VAL itself
2882        - a shift/add sequence based on -VAL, followed by a negation
2883        - a shift/add sequence based on VAL - 1, followed by an addition.
2884 
2885    Return true if the cheapest of these cost less than MULT_COST,
2886    describing the algorithm in *ALG and final fixup in *VARIANT.  */
2887 
2888 static bool
2889 choose_mult_variant (machine_mode mode, HOST_WIDE_INT val,
2890 		     struct algorithm *alg, enum mult_variant *variant,
2891 		     int mult_cost)
2892 {
2893   struct algorithm alg2;
2894   struct mult_cost limit;
2895   int op_cost;
2896   bool speed = optimize_insn_for_speed_p ();
2897 
2898   /* Fail quickly for impossible bounds.  */
2899   if (mult_cost < 0)
2900     return false;
2901 
2902   /* Ensure that mult_cost provides a reasonable upper bound.
2903      Any constant multiplication can be performed with less
2904      than 2 * bits additions.  */
2905   op_cost = 2 * GET_MODE_UNIT_BITSIZE (mode) * add_cost (speed, mode);
2906   if (mult_cost > op_cost)
2907     mult_cost = op_cost;
2908 
2909   *variant = basic_variant;
2910   limit.cost = mult_cost;
2911   limit.latency = mult_cost;
2912   synth_mult (alg, val, &limit, mode);
2913 
2914   /* This works only if the inverted value actually fits in an
2915      `unsigned int' */
2916   if (HOST_BITS_PER_INT >= GET_MODE_UNIT_BITSIZE (mode))
2917     {
2918       op_cost = neg_cost (speed, mode);
2919       if (MULT_COST_LESS (&alg->cost, mult_cost))
2920 	{
2921 	  limit.cost = alg->cost.cost - op_cost;
2922 	  limit.latency = alg->cost.latency - op_cost;
2923 	}
2924       else
2925 	{
2926 	  limit.cost = mult_cost - op_cost;
2927 	  limit.latency = mult_cost - op_cost;
2928 	}
2929 
2930       synth_mult (&alg2, -val, &limit, mode);
2931       alg2.cost.cost += op_cost;
2932       alg2.cost.latency += op_cost;
2933       if (CHEAPER_MULT_COST (&alg2.cost, &alg->cost))
2934 	*alg = alg2, *variant = negate_variant;
2935     }
2936 
2937   /* This proves very useful for division-by-constant.  */
2938   op_cost = add_cost (speed, mode);
2939   if (MULT_COST_LESS (&alg->cost, mult_cost))
2940     {
2941       limit.cost = alg->cost.cost - op_cost;
2942       limit.latency = alg->cost.latency - op_cost;
2943     }
2944   else
2945     {
2946       limit.cost = mult_cost - op_cost;
2947       limit.latency = mult_cost - op_cost;
2948     }
2949 
2950   synth_mult (&alg2, val - 1, &limit, mode);
2951   alg2.cost.cost += op_cost;
2952   alg2.cost.latency += op_cost;
2953   if (CHEAPER_MULT_COST (&alg2.cost, &alg->cost))
2954     *alg = alg2, *variant = add_variant;
2955 
2956   return MULT_COST_LESS (&alg->cost, mult_cost);
2957 }
2958 
2959 /* A subroutine of expand_mult, used for constant multiplications.
2960    Multiply OP0 by VAL in mode MODE, storing the result in TARGET if
2961    convenient.  Use the shift/add sequence described by ALG and apply
2962    the final fixup specified by VARIANT.  */
2963 
2964 static rtx
2965 expand_mult_const (machine_mode mode, rtx op0, HOST_WIDE_INT val,
2966 		   rtx target, const struct algorithm *alg,
2967 		   enum mult_variant variant)
2968 {
2969   HOST_WIDE_INT val_so_far;
2970   rtx_insn *insn;
2971   rtx accum, tem;
2972   int opno;
2973   machine_mode nmode;
2974 
2975   /* Avoid referencing memory over and over and invalid sharing
2976      on SUBREGs.  */
2977   op0 = force_reg (mode, op0);
2978 
2979   /* ACCUM starts out either as OP0 or as a zero, depending on
2980      the first operation.  */
2981 
2982   if (alg->op[0] == alg_zero)
2983     {
2984       accum = copy_to_mode_reg (mode, CONST0_RTX (mode));
2985       val_so_far = 0;
2986     }
2987   else if (alg->op[0] == alg_m)
2988     {
2989       accum = copy_to_mode_reg (mode, op0);
2990       val_so_far = 1;
2991     }
2992   else
2993     gcc_unreachable ();
2994 
2995   for (opno = 1; opno < alg->ops; opno++)
2996     {
2997       int log = alg->log[opno];
2998       rtx shift_subtarget = optimize ? 0 : accum;
2999       rtx add_target
3000 	= (opno == alg->ops - 1 && target != 0 && variant != add_variant
3001 	   && !optimize)
3002 	  ? target : 0;
3003       rtx accum_target = optimize ? 0 : accum;
3004       rtx accum_inner;
3005 
3006       switch (alg->op[opno])
3007 	{
3008 	case alg_shift:
3009 	  tem = expand_shift (LSHIFT_EXPR, mode, accum, log, NULL_RTX, 0);
3010 	  /* REG_EQUAL note will be attached to the following insn.  */
3011 	  emit_move_insn (accum, tem);
3012 	  val_so_far <<= log;
3013 	  break;
3014 
3015 	case alg_add_t_m2:
3016 	  tem = expand_shift (LSHIFT_EXPR, mode, op0, log, NULL_RTX, 0);
3017 	  accum = force_operand (gen_rtx_PLUS (mode, accum, tem),
3018 				 add_target ? add_target : accum_target);
3019 	  val_so_far += (HOST_WIDE_INT) 1 << log;
3020 	  break;
3021 
3022 	case alg_sub_t_m2:
3023 	  tem = expand_shift (LSHIFT_EXPR, mode, op0, log, NULL_RTX, 0);
3024 	  accum = force_operand (gen_rtx_MINUS (mode, accum, tem),
3025 				 add_target ? add_target : accum_target);
3026 	  val_so_far -= (HOST_WIDE_INT) 1 << log;
3027 	  break;
3028 
3029 	case alg_add_t2_m:
3030 	  accum = expand_shift (LSHIFT_EXPR, mode, accum,
3031 				log, shift_subtarget, 0);
3032 	  accum = force_operand (gen_rtx_PLUS (mode, accum, op0),
3033 				 add_target ? add_target : accum_target);
3034 	  val_so_far = (val_so_far << log) + 1;
3035 	  break;
3036 
3037 	case alg_sub_t2_m:
3038 	  accum = expand_shift (LSHIFT_EXPR, mode, accum,
3039 				log, shift_subtarget, 0);
3040 	  accum = force_operand (gen_rtx_MINUS (mode, accum, op0),
3041 				 add_target ? add_target : accum_target);
3042 	  val_so_far = (val_so_far << log) - 1;
3043 	  break;
3044 
3045 	case alg_add_factor:
3046 	  tem = expand_shift (LSHIFT_EXPR, mode, accum, log, NULL_RTX, 0);
3047 	  accum = force_operand (gen_rtx_PLUS (mode, accum, tem),
3048 				 add_target ? add_target : accum_target);
3049 	  val_so_far += val_so_far << log;
3050 	  break;
3051 
3052 	case alg_sub_factor:
3053 	  tem = expand_shift (LSHIFT_EXPR, mode, accum, log, NULL_RTX, 0);
3054 	  accum = force_operand (gen_rtx_MINUS (mode, tem, accum),
3055 				 (add_target
3056 				  ? add_target : (optimize ? 0 : tem)));
3057 	  val_so_far = (val_so_far << log) - val_so_far;
3058 	  break;
3059 
3060 	default:
3061 	  gcc_unreachable ();
3062 	}
3063 
3064       if (SCALAR_INT_MODE_P (mode))
3065 	{
3066 	  /* Write a REG_EQUAL note on the last insn so that we can cse
3067 	     multiplication sequences.  Note that if ACCUM is a SUBREG,
3068 	     we've set the inner register and must properly indicate that.  */
3069           tem = op0, nmode = mode;
3070           accum_inner = accum;
3071           if (GET_CODE (accum) == SUBREG)
3072 	    {
3073 	      accum_inner = SUBREG_REG (accum);
3074 	      nmode = GET_MODE (accum_inner);
3075 	      tem = gen_lowpart (nmode, op0);
3076 	    }
3077 
3078           insn = get_last_insn ();
3079           set_dst_reg_note (insn, REG_EQUAL,
3080 			    gen_rtx_MULT (nmode, tem,
3081 					  gen_int_mode (val_so_far, nmode)),
3082 			    accum_inner);
3083 	}
3084     }
3085 
3086   if (variant == negate_variant)
3087     {
3088       val_so_far = -val_so_far;
3089       accum = expand_unop (mode, neg_optab, accum, target, 0);
3090     }
3091   else if (variant == add_variant)
3092     {
3093       val_so_far = val_so_far + 1;
3094       accum = force_operand (gen_rtx_PLUS (mode, accum, op0), target);
3095     }
3096 
3097   /* Compare only the bits of val and val_so_far that are significant
3098      in the result mode, to avoid sign-/zero-extension confusion.  */
3099   nmode = GET_MODE_INNER (mode);
3100   if (nmode == VOIDmode)
3101     nmode = mode;
3102   val &= GET_MODE_MASK (nmode);
3103   val_so_far &= GET_MODE_MASK (nmode);
3104   gcc_assert (val == val_so_far);
3105 
3106   return accum;
3107 }
3108 
3109 /* Perform a multiplication and return an rtx for the result.
3110    MODE is mode of value; OP0 and OP1 are what to multiply (rtx's);
3111    TARGET is a suggestion for where to store the result (an rtx).
3112 
3113    We check specially for a constant integer as OP1.
3114    If you want this check for OP0 as well, then before calling
3115    you should swap the two operands if OP0 would be constant.  */
3116 
3117 rtx
3118 expand_mult (machine_mode mode, rtx op0, rtx op1, rtx target,
3119 	     int unsignedp)
3120 {
3121   enum mult_variant variant;
3122   struct algorithm algorithm;
3123   rtx scalar_op1;
3124   int max_cost;
3125   bool speed = optimize_insn_for_speed_p ();
3126   bool do_trapv = flag_trapv && SCALAR_INT_MODE_P (mode) && !unsignedp;
3127 
3128   if (CONSTANT_P (op0))
3129     std::swap (op0, op1);
3130 
3131   /* For vectors, there are several simplifications that can be made if
3132      all elements of the vector constant are identical.  */
3133   scalar_op1 = op1;
3134   if (GET_CODE (op1) == CONST_VECTOR)
3135     {
3136       int i, n = CONST_VECTOR_NUNITS (op1);
3137       scalar_op1 = CONST_VECTOR_ELT (op1, 0);
3138       for (i = 1; i < n; ++i)
3139 	if (!rtx_equal_p (scalar_op1, CONST_VECTOR_ELT (op1, i)))
3140 	  goto skip_scalar;
3141     }
3142 
3143   if (INTEGRAL_MODE_P (mode))
3144     {
3145       rtx fake_reg;
3146       HOST_WIDE_INT coeff;
3147       bool is_neg;
3148       int mode_bitsize;
3149 
3150       if (op1 == CONST0_RTX (mode))
3151 	return op1;
3152       if (op1 == CONST1_RTX (mode))
3153 	return op0;
3154       if (op1 == CONSTM1_RTX (mode))
3155 	return expand_unop (mode, do_trapv ? negv_optab : neg_optab,
3156 			    op0, target, 0);
3157 
3158       if (do_trapv)
3159 	goto skip_synth;
3160 
3161       /* If mode is integer vector mode, check if the backend supports
3162 	 vector lshift (by scalar or vector) at all.  If not, we can't use
3163 	 synthetized multiply.  */
3164       if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT
3165 	  && optab_handler (vashl_optab, mode) == CODE_FOR_nothing
3166 	  && optab_handler (ashl_optab, mode) == CODE_FOR_nothing)
3167 	goto skip_synth;
3168 
3169       /* These are the operations that are potentially turned into
3170 	 a sequence of shifts and additions.  */
3171       mode_bitsize = GET_MODE_UNIT_BITSIZE (mode);
3172 
3173       /* synth_mult does an `unsigned int' multiply.  As long as the mode is
3174 	 less than or equal in size to `unsigned int' this doesn't matter.
3175 	 If the mode is larger than `unsigned int', then synth_mult works
3176 	 only if the constant value exactly fits in an `unsigned int' without
3177 	 any truncation.  This means that multiplying by negative values does
3178 	 not work; results are off by 2^32 on a 32 bit machine.  */
3179       if (CONST_INT_P (scalar_op1))
3180 	{
3181 	  coeff = INTVAL (scalar_op1);
3182 	  is_neg = coeff < 0;
3183 	}
3184 #if TARGET_SUPPORTS_WIDE_INT
3185       else if (CONST_WIDE_INT_P (scalar_op1))
3186 #else
3187       else if (CONST_DOUBLE_AS_INT_P (scalar_op1))
3188 #endif
3189 	{
3190 	  int shift = wi::exact_log2 (std::make_pair (scalar_op1, mode));
3191 	  /* Perfect power of 2 (other than 1, which is handled above).  */
3192 	  if (shift > 0)
3193 	    return expand_shift (LSHIFT_EXPR, mode, op0,
3194 				 shift, target, unsignedp);
3195 	  else
3196 	    goto skip_synth;
3197 	}
3198       else
3199 	goto skip_synth;
3200 
3201       /* We used to test optimize here, on the grounds that it's better to
3202 	 produce a smaller program when -O is not used.  But this causes
3203 	 such a terrible slowdown sometimes that it seems better to always
3204 	 use synth_mult.  */
3205 
3206       /* Special case powers of two.  */
3207       if (EXACT_POWER_OF_2_OR_ZERO_P (coeff)
3208 	  && !(is_neg && mode_bitsize > HOST_BITS_PER_WIDE_INT))
3209 	return expand_shift (LSHIFT_EXPR, mode, op0,
3210 			     floor_log2 (coeff), target, unsignedp);
3211 
3212       fake_reg = gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1);
3213 
3214       /* Attempt to handle multiplication of DImode values by negative
3215 	 coefficients, by performing the multiplication by a positive
3216 	 multiplier and then inverting the result.  */
3217       if (is_neg && mode_bitsize > HOST_BITS_PER_WIDE_INT)
3218 	{
3219 	  /* Its safe to use -coeff even for INT_MIN, as the
3220 	     result is interpreted as an unsigned coefficient.
3221 	     Exclude cost of op0 from max_cost to match the cost
3222 	     calculation of the synth_mult.  */
3223 	  coeff = -(unsigned HOST_WIDE_INT) coeff;
3224 	  max_cost = (set_src_cost (gen_rtx_MULT (mode, fake_reg, op1), speed)
3225 		      - neg_cost (speed, mode));
3226 	  if (max_cost <= 0)
3227 	    goto skip_synth;
3228 
3229 	  /* Special case powers of two.  */
3230 	  if (EXACT_POWER_OF_2_OR_ZERO_P (coeff))
3231 	    {
3232 	      rtx temp = expand_shift (LSHIFT_EXPR, mode, op0,
3233 				       floor_log2 (coeff), target, unsignedp);
3234 	      return expand_unop (mode, neg_optab, temp, target, 0);
3235 	    }
3236 
3237 	  if (choose_mult_variant (mode, coeff, &algorithm, &variant,
3238 				   max_cost))
3239 	    {
3240 	      rtx temp = expand_mult_const (mode, op0, coeff, NULL_RTX,
3241 					    &algorithm, variant);
3242 	      return expand_unop (mode, neg_optab, temp, target, 0);
3243 	    }
3244 	  goto skip_synth;
3245 	}
3246 
3247       /* Exclude cost of op0 from max_cost to match the cost
3248 	 calculation of the synth_mult.  */
3249       max_cost = set_src_cost (gen_rtx_MULT (mode, fake_reg, op1), speed);
3250       if (choose_mult_variant (mode, coeff, &algorithm, &variant, max_cost))
3251 	return expand_mult_const (mode, op0, coeff, target,
3252 				  &algorithm, variant);
3253     }
3254  skip_synth:
3255 
3256   /* Expand x*2.0 as x+x.  */
3257   if (CONST_DOUBLE_AS_FLOAT_P (scalar_op1))
3258     {
3259       REAL_VALUE_TYPE d;
3260       REAL_VALUE_FROM_CONST_DOUBLE (d, scalar_op1);
3261 
3262       if (REAL_VALUES_EQUAL (d, dconst2))
3263 	{
3264 	  op0 = force_reg (GET_MODE (op0), op0);
3265 	  return expand_binop (mode, add_optab, op0, op0,
3266 			       target, unsignedp, OPTAB_LIB_WIDEN);
3267 	}
3268     }
3269  skip_scalar:
3270 
3271   /* This used to use umul_optab if unsigned, but for non-widening multiply
3272      there is no difference between signed and unsigned.  */
3273   op0 = expand_binop (mode, do_trapv ? smulv_optab : smul_optab,
3274 		      op0, op1, target, unsignedp, OPTAB_LIB_WIDEN);
3275   gcc_assert (op0);
3276   return op0;
3277 }
3278 
3279 /* Return a cost estimate for multiplying a register by the given
3280    COEFFicient in the given MODE and SPEED.  */
3281 
3282 int
3283 mult_by_coeff_cost (HOST_WIDE_INT coeff, machine_mode mode, bool speed)
3284 {
3285   int max_cost;
3286   struct algorithm algorithm;
3287   enum mult_variant variant;
3288 
3289   rtx fake_reg = gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1);
3290   max_cost = set_src_cost (gen_rtx_MULT (mode, fake_reg, fake_reg), speed);
3291   if (choose_mult_variant (mode, coeff, &algorithm, &variant, max_cost))
3292     return algorithm.cost.cost;
3293   else
3294     return max_cost;
3295 }
3296 
3297 /* Perform a widening multiplication and return an rtx for the result.
3298    MODE is mode of value; OP0 and OP1 are what to multiply (rtx's);
3299    TARGET is a suggestion for where to store the result (an rtx).
3300    THIS_OPTAB is the optab we should use, it must be either umul_widen_optab
3301    or smul_widen_optab.
3302 
3303    We check specially for a constant integer as OP1, comparing the
3304    cost of a widening multiply against the cost of a sequence of shifts
3305    and adds.  */
3306 
3307 rtx
3308 expand_widening_mult (machine_mode mode, rtx op0, rtx op1, rtx target,
3309 		      int unsignedp, optab this_optab)
3310 {
3311   bool speed = optimize_insn_for_speed_p ();
3312   rtx cop1;
3313 
3314   if (CONST_INT_P (op1)
3315       && GET_MODE (op0) != VOIDmode
3316       && (cop1 = convert_modes (mode, GET_MODE (op0), op1,
3317 				this_optab == umul_widen_optab))
3318       && CONST_INT_P (cop1)
3319       && (INTVAL (cop1) >= 0
3320 	  || HWI_COMPUTABLE_MODE_P (mode)))
3321     {
3322       HOST_WIDE_INT coeff = INTVAL (cop1);
3323       int max_cost;
3324       enum mult_variant variant;
3325       struct algorithm algorithm;
3326 
3327       if (coeff == 0)
3328 	return CONST0_RTX (mode);
3329 
3330       /* Special case powers of two.  */
3331       if (EXACT_POWER_OF_2_OR_ZERO_P (coeff))
3332 	{
3333 	  op0 = convert_to_mode (mode, op0, this_optab == umul_widen_optab);
3334 	  return expand_shift (LSHIFT_EXPR, mode, op0,
3335 			       floor_log2 (coeff), target, unsignedp);
3336 	}
3337 
3338       /* Exclude cost of op0 from max_cost to match the cost
3339 	 calculation of the synth_mult.  */
3340       max_cost = mul_widen_cost (speed, mode);
3341       if (choose_mult_variant (mode, coeff, &algorithm, &variant,
3342 			       max_cost))
3343 	{
3344 	  op0 = convert_to_mode (mode, op0, this_optab == umul_widen_optab);
3345 	  return expand_mult_const (mode, op0, coeff, target,
3346 				    &algorithm, variant);
3347 	}
3348     }
3349   return expand_binop (mode, this_optab, op0, op1, target,
3350 		       unsignedp, OPTAB_LIB_WIDEN);
3351 }
3352 
3353 /* Choose a minimal N + 1 bit approximation to 1/D that can be used to
3354    replace division by D, and put the least significant N bits of the result
3355    in *MULTIPLIER_PTR and return the most significant bit.
3356 
3357    The width of operations is N (should be <= HOST_BITS_PER_WIDE_INT), the
3358    needed precision is in PRECISION (should be <= N).
3359 
3360    PRECISION should be as small as possible so this function can choose
3361    multiplier more freely.
3362 
3363    The rounded-up logarithm of D is placed in *lgup_ptr.  A shift count that
3364    is to be used for a final right shift is placed in *POST_SHIFT_PTR.
3365 
3366    Using this function, x/D will be equal to (x * m) >> (*POST_SHIFT_PTR),
3367    where m is the full HOST_BITS_PER_WIDE_INT + 1 bit multiplier.  */
3368 
3369 unsigned HOST_WIDE_INT
3370 choose_multiplier (unsigned HOST_WIDE_INT d, int n, int precision,
3371 		   unsigned HOST_WIDE_INT *multiplier_ptr,
3372 		   int *post_shift_ptr, int *lgup_ptr)
3373 {
3374   int lgup, post_shift;
3375   int pow, pow2;
3376 
3377   /* lgup = ceil(log2(divisor)); */
3378   lgup = ceil_log2 (d);
3379 
3380   gcc_assert (lgup <= n);
3381 
3382   pow = n + lgup;
3383   pow2 = n + lgup - precision;
3384 
3385   /* mlow = 2^(N + lgup)/d */
3386   wide_int val = wi::set_bit_in_zero (pow, HOST_BITS_PER_DOUBLE_INT);
3387   wide_int mlow = wi::udiv_trunc (val, d);
3388 
3389   /* mhigh = (2^(N + lgup) + 2^(N + lgup - precision))/d */
3390   val |= wi::set_bit_in_zero (pow2, HOST_BITS_PER_DOUBLE_INT);
3391   wide_int mhigh = wi::udiv_trunc (val, d);
3392 
3393   /* If precision == N, then mlow, mhigh exceed 2^N
3394      (but they do not exceed 2^(N+1)).  */
3395 
3396   /* Reduce to lowest terms.  */
3397   for (post_shift = lgup; post_shift > 0; post_shift--)
3398     {
3399       unsigned HOST_WIDE_INT ml_lo = wi::extract_uhwi (mlow, 1,
3400 						       HOST_BITS_PER_WIDE_INT);
3401       unsigned HOST_WIDE_INT mh_lo = wi::extract_uhwi (mhigh, 1,
3402 						       HOST_BITS_PER_WIDE_INT);
3403       if (ml_lo >= mh_lo)
3404 	break;
3405 
3406       mlow = wi::uhwi (ml_lo, HOST_BITS_PER_DOUBLE_INT);
3407       mhigh = wi::uhwi (mh_lo, HOST_BITS_PER_DOUBLE_INT);
3408     }
3409 
3410   *post_shift_ptr = post_shift;
3411   *lgup_ptr = lgup;
3412   if (n < HOST_BITS_PER_WIDE_INT)
3413     {
3414       unsigned HOST_WIDE_INT mask = ((unsigned HOST_WIDE_INT) 1 << n) - 1;
3415       *multiplier_ptr = mhigh.to_uhwi () & mask;
3416       return mhigh.to_uhwi () >= mask;
3417     }
3418   else
3419     {
3420       *multiplier_ptr = mhigh.to_uhwi ();
3421       return wi::extract_uhwi (mhigh, HOST_BITS_PER_WIDE_INT, 1);
3422     }
3423 }
3424 
3425 /* Compute the inverse of X mod 2**n, i.e., find Y such that X * Y is
3426    congruent to 1 (mod 2**N).  */
3427 
3428 static unsigned HOST_WIDE_INT
3429 invert_mod2n (unsigned HOST_WIDE_INT x, int n)
3430 {
3431   /* Solve x*y == 1 (mod 2^n), where x is odd.  Return y.  */
3432 
3433   /* The algorithm notes that the choice y = x satisfies
3434      x*y == 1 mod 2^3, since x is assumed odd.
3435      Each iteration doubles the number of bits of significance in y.  */
3436 
3437   unsigned HOST_WIDE_INT mask;
3438   unsigned HOST_WIDE_INT y = x;
3439   int nbit = 3;
3440 
3441   mask = (n == HOST_BITS_PER_WIDE_INT
3442 	  ? ~(unsigned HOST_WIDE_INT) 0
3443 	  : ((unsigned HOST_WIDE_INT) 1 << n) - 1);
3444 
3445   while (nbit < n)
3446     {
3447       y = y * (2 - x*y) & mask;		/* Modulo 2^N */
3448       nbit *= 2;
3449     }
3450   return y;
3451 }
3452 
3453 /* Emit code to adjust ADJ_OPERAND after multiplication of wrong signedness
3454    flavor of OP0 and OP1.  ADJ_OPERAND is already the high half of the
3455    product OP0 x OP1.  If UNSIGNEDP is nonzero, adjust the signed product
3456    to become unsigned, if UNSIGNEDP is zero, adjust the unsigned product to
3457    become signed.
3458 
3459    The result is put in TARGET if that is convenient.
3460 
3461    MODE is the mode of operation.  */
3462 
3463 rtx
3464 expand_mult_highpart_adjust (machine_mode mode, rtx adj_operand, rtx op0,
3465 			     rtx op1, rtx target, int unsignedp)
3466 {
3467   rtx tem;
3468   enum rtx_code adj_code = unsignedp ? PLUS : MINUS;
3469 
3470   tem = expand_shift (RSHIFT_EXPR, mode, op0,
3471 		      GET_MODE_BITSIZE (mode) - 1, NULL_RTX, 0);
3472   tem = expand_and (mode, tem, op1, NULL_RTX);
3473   adj_operand
3474     = force_operand (gen_rtx_fmt_ee (adj_code, mode, adj_operand, tem),
3475 		     adj_operand);
3476 
3477   tem = expand_shift (RSHIFT_EXPR, mode, op1,
3478 		      GET_MODE_BITSIZE (mode) - 1, NULL_RTX, 0);
3479   tem = expand_and (mode, tem, op0, NULL_RTX);
3480   target = force_operand (gen_rtx_fmt_ee (adj_code, mode, adj_operand, tem),
3481 			  target);
3482 
3483   return target;
3484 }
3485 
3486 /* Subroutine of expmed_mult_highpart.  Return the MODE high part of OP.  */
3487 
3488 static rtx
3489 extract_high_half (machine_mode mode, rtx op)
3490 {
3491   machine_mode wider_mode;
3492 
3493   if (mode == word_mode)
3494     return gen_highpart (mode, op);
3495 
3496   gcc_assert (!SCALAR_FLOAT_MODE_P (mode));
3497 
3498   wider_mode = GET_MODE_WIDER_MODE (mode);
3499   op = expand_shift (RSHIFT_EXPR, wider_mode, op,
3500 		     GET_MODE_BITSIZE (mode), 0, 1);
3501   return convert_modes (mode, wider_mode, op, 0);
3502 }
3503 
3504 /* Like expmed_mult_highpart, but only consider using a multiplication
3505    optab.  OP1 is an rtx for the constant operand.  */
3506 
3507 static rtx
3508 expmed_mult_highpart_optab (machine_mode mode, rtx op0, rtx op1,
3509 			    rtx target, int unsignedp, int max_cost)
3510 {
3511   rtx narrow_op1 = gen_int_mode (INTVAL (op1), mode);
3512   machine_mode wider_mode;
3513   optab moptab;
3514   rtx tem;
3515   int size;
3516   bool speed = optimize_insn_for_speed_p ();
3517 
3518   gcc_assert (!SCALAR_FLOAT_MODE_P (mode));
3519 
3520   wider_mode = GET_MODE_WIDER_MODE (mode);
3521   size = GET_MODE_BITSIZE (mode);
3522 
3523   /* Firstly, try using a multiplication insn that only generates the needed
3524      high part of the product, and in the sign flavor of unsignedp.  */
3525   if (mul_highpart_cost (speed, mode) < max_cost)
3526     {
3527       moptab = unsignedp ? umul_highpart_optab : smul_highpart_optab;
3528       tem = expand_binop (mode, moptab, op0, narrow_op1, target,
3529 			  unsignedp, OPTAB_DIRECT);
3530       if (tem)
3531 	return tem;
3532     }
3533 
3534   /* Secondly, same as above, but use sign flavor opposite of unsignedp.
3535      Need to adjust the result after the multiplication.  */
3536   if (size - 1 < BITS_PER_WORD
3537       && (mul_highpart_cost (speed, mode)
3538 	  + 2 * shift_cost (speed, mode, size-1)
3539 	  + 4 * add_cost (speed, mode) < max_cost))
3540     {
3541       moptab = unsignedp ? smul_highpart_optab : umul_highpart_optab;
3542       tem = expand_binop (mode, moptab, op0, narrow_op1, target,
3543 			  unsignedp, OPTAB_DIRECT);
3544       if (tem)
3545 	/* We used the wrong signedness.  Adjust the result.  */
3546 	return expand_mult_highpart_adjust (mode, tem, op0, narrow_op1,
3547 					    tem, unsignedp);
3548     }
3549 
3550   /* Try widening multiplication.  */
3551   moptab = unsignedp ? umul_widen_optab : smul_widen_optab;
3552   if (widening_optab_handler (moptab, wider_mode, mode) != CODE_FOR_nothing
3553       && mul_widen_cost (speed, wider_mode) < max_cost)
3554     {
3555       tem = expand_binop (wider_mode, moptab, op0, narrow_op1, 0,
3556 			  unsignedp, OPTAB_WIDEN);
3557       if (tem)
3558 	return extract_high_half (mode, tem);
3559     }
3560 
3561   /* Try widening the mode and perform a non-widening multiplication.  */
3562   if (optab_handler (smul_optab, wider_mode) != CODE_FOR_nothing
3563       && size - 1 < BITS_PER_WORD
3564       && (mul_cost (speed, wider_mode) + shift_cost (speed, mode, size-1)
3565 	  < max_cost))
3566     {
3567       rtx_insn *insns;
3568       rtx wop0, wop1;
3569 
3570       /* We need to widen the operands, for example to ensure the
3571 	 constant multiplier is correctly sign or zero extended.
3572 	 Use a sequence to clean-up any instructions emitted by
3573 	 the conversions if things don't work out.  */
3574       start_sequence ();
3575       wop0 = convert_modes (wider_mode, mode, op0, unsignedp);
3576       wop1 = convert_modes (wider_mode, mode, op1, unsignedp);
3577       tem = expand_binop (wider_mode, smul_optab, wop0, wop1, 0,
3578 			  unsignedp, OPTAB_WIDEN);
3579       insns = get_insns ();
3580       end_sequence ();
3581 
3582       if (tem)
3583 	{
3584 	  emit_insn (insns);
3585 	  return extract_high_half (mode, tem);
3586 	}
3587     }
3588 
3589   /* Try widening multiplication of opposite signedness, and adjust.  */
3590   moptab = unsignedp ? smul_widen_optab : umul_widen_optab;
3591   if (widening_optab_handler (moptab, wider_mode, mode) != CODE_FOR_nothing
3592       && size - 1 < BITS_PER_WORD
3593       && (mul_widen_cost (speed, wider_mode)
3594 	  + 2 * shift_cost (speed, mode, size-1)
3595 	  + 4 * add_cost (speed, mode) < max_cost))
3596     {
3597       tem = expand_binop (wider_mode, moptab, op0, narrow_op1,
3598 			  NULL_RTX, ! unsignedp, OPTAB_WIDEN);
3599       if (tem != 0)
3600 	{
3601 	  tem = extract_high_half (mode, tem);
3602 	  /* We used the wrong signedness.  Adjust the result.  */
3603 	  return expand_mult_highpart_adjust (mode, tem, op0, narrow_op1,
3604 					      target, unsignedp);
3605 	}
3606     }
3607 
3608   return 0;
3609 }
3610 
3611 /* Emit code to multiply OP0 and OP1 (where OP1 is an integer constant),
3612    putting the high half of the result in TARGET if that is convenient,
3613    and return where the result is.  If the operation can not be performed,
3614    0 is returned.
3615 
3616    MODE is the mode of operation and result.
3617 
3618    UNSIGNEDP nonzero means unsigned multiply.
3619 
3620    MAX_COST is the total allowed cost for the expanded RTL.  */
3621 
3622 static rtx
3623 expmed_mult_highpart (machine_mode mode, rtx op0, rtx op1,
3624 		      rtx target, int unsignedp, int max_cost)
3625 {
3626   machine_mode wider_mode = GET_MODE_WIDER_MODE (mode);
3627   unsigned HOST_WIDE_INT cnst1;
3628   int extra_cost;
3629   bool sign_adjust = false;
3630   enum mult_variant variant;
3631   struct algorithm alg;
3632   rtx tem;
3633   bool speed = optimize_insn_for_speed_p ();
3634 
3635   gcc_assert (!SCALAR_FLOAT_MODE_P (mode));
3636   /* We can't support modes wider than HOST_BITS_PER_INT.  */
3637   gcc_assert (HWI_COMPUTABLE_MODE_P (mode));
3638 
3639   cnst1 = INTVAL (op1) & GET_MODE_MASK (mode);
3640 
3641   /* We can't optimize modes wider than BITS_PER_WORD.
3642      ??? We might be able to perform double-word arithmetic if
3643      mode == word_mode, however all the cost calculations in
3644      synth_mult etc. assume single-word operations.  */
3645   if (GET_MODE_BITSIZE (wider_mode) > BITS_PER_WORD)
3646     return expmed_mult_highpart_optab (mode, op0, op1, target,
3647 				       unsignedp, max_cost);
3648 
3649   extra_cost = shift_cost (speed, mode, GET_MODE_BITSIZE (mode) - 1);
3650 
3651   /* Check whether we try to multiply by a negative constant.  */
3652   if (!unsignedp && ((cnst1 >> (GET_MODE_BITSIZE (mode) - 1)) & 1))
3653     {
3654       sign_adjust = true;
3655       extra_cost += add_cost (speed, mode);
3656     }
3657 
3658   /* See whether shift/add multiplication is cheap enough.  */
3659   if (choose_mult_variant (wider_mode, cnst1, &alg, &variant,
3660 			   max_cost - extra_cost))
3661     {
3662       /* See whether the specialized multiplication optabs are
3663 	 cheaper than the shift/add version.  */
3664       tem = expmed_mult_highpart_optab (mode, op0, op1, target, unsignedp,
3665 					alg.cost.cost + extra_cost);
3666       if (tem)
3667 	return tem;
3668 
3669       tem = convert_to_mode (wider_mode, op0, unsignedp);
3670       tem = expand_mult_const (wider_mode, tem, cnst1, 0, &alg, variant);
3671       tem = extract_high_half (mode, tem);
3672 
3673       /* Adjust result for signedness.  */
3674       if (sign_adjust)
3675 	tem = force_operand (gen_rtx_MINUS (mode, tem, op0), tem);
3676 
3677       return tem;
3678     }
3679   return expmed_mult_highpart_optab (mode, op0, op1, target,
3680 				     unsignedp, max_cost);
3681 }
3682 
3683 
3684 /* Expand signed modulus of OP0 by a power of two D in mode MODE.  */
3685 
3686 static rtx
3687 expand_smod_pow2 (machine_mode mode, rtx op0, HOST_WIDE_INT d)
3688 {
3689   rtx result, temp, shift;
3690   rtx_code_label *label;
3691   int logd;
3692   int prec = GET_MODE_PRECISION (mode);
3693 
3694   logd = floor_log2 (d);
3695   result = gen_reg_rtx (mode);
3696 
3697   /* Avoid conditional branches when they're expensive.  */
3698   if (BRANCH_COST (optimize_insn_for_speed_p (), false) >= 2
3699       && optimize_insn_for_speed_p ())
3700     {
3701       rtx signmask = emit_store_flag (result, LT, op0, const0_rtx,
3702 				      mode, 0, -1);
3703       if (signmask)
3704 	{
3705 	  HOST_WIDE_INT masklow = ((HOST_WIDE_INT) 1 << logd) - 1;
3706 	  signmask = force_reg (mode, signmask);
3707 	  shift = GEN_INT (GET_MODE_BITSIZE (mode) - logd);
3708 
3709 	  /* Use the rtx_cost of a LSHIFTRT instruction to determine
3710 	     which instruction sequence to use.  If logical right shifts
3711 	     are expensive the use 2 XORs, 2 SUBs and an AND, otherwise
3712 	     use a LSHIFTRT, 1 ADD, 1 SUB and an AND.  */
3713 
3714 	  temp = gen_rtx_LSHIFTRT (mode, result, shift);
3715 	  if (optab_handler (lshr_optab, mode) == CODE_FOR_nothing
3716 	      || (set_src_cost (temp, optimize_insn_for_speed_p ())
3717 		  > COSTS_N_INSNS (2)))
3718 	    {
3719 	      temp = expand_binop (mode, xor_optab, op0, signmask,
3720 				   NULL_RTX, 1, OPTAB_LIB_WIDEN);
3721 	      temp = expand_binop (mode, sub_optab, temp, signmask,
3722 				   NULL_RTX, 1, OPTAB_LIB_WIDEN);
3723 	      temp = expand_binop (mode, and_optab, temp,
3724 				   gen_int_mode (masklow, mode),
3725 				   NULL_RTX, 1, OPTAB_LIB_WIDEN);
3726 	      temp = expand_binop (mode, xor_optab, temp, signmask,
3727 				   NULL_RTX, 1, OPTAB_LIB_WIDEN);
3728 	      temp = expand_binop (mode, sub_optab, temp, signmask,
3729 				   NULL_RTX, 1, OPTAB_LIB_WIDEN);
3730 	    }
3731 	  else
3732 	    {
3733 	      signmask = expand_binop (mode, lshr_optab, signmask, shift,
3734 				       NULL_RTX, 1, OPTAB_LIB_WIDEN);
3735 	      signmask = force_reg (mode, signmask);
3736 
3737 	      temp = expand_binop (mode, add_optab, op0, signmask,
3738 				   NULL_RTX, 1, OPTAB_LIB_WIDEN);
3739 	      temp = expand_binop (mode, and_optab, temp,
3740 				   gen_int_mode (masklow, mode),
3741 				   NULL_RTX, 1, OPTAB_LIB_WIDEN);
3742 	      temp = expand_binop (mode, sub_optab, temp, signmask,
3743 				   NULL_RTX, 1, OPTAB_LIB_WIDEN);
3744 	    }
3745 	  return temp;
3746 	}
3747     }
3748 
3749   /* Mask contains the mode's signbit and the significant bits of the
3750      modulus.  By including the signbit in the operation, many targets
3751      can avoid an explicit compare operation in the following comparison
3752      against zero.  */
3753   wide_int mask = wi::mask (logd, false, prec);
3754   mask = wi::set_bit (mask, prec - 1);
3755 
3756   temp = expand_binop (mode, and_optab, op0,
3757 		       immed_wide_int_const (mask, mode),
3758 		       result, 1, OPTAB_LIB_WIDEN);
3759   if (temp != result)
3760     emit_move_insn (result, temp);
3761 
3762   label = gen_label_rtx ();
3763   do_cmp_and_jump (result, const0_rtx, GE, mode, label);
3764 
3765   temp = expand_binop (mode, sub_optab, result, const1_rtx, result,
3766 		       0, OPTAB_LIB_WIDEN);
3767 
3768   mask = wi::mask (logd, true, prec);
3769   temp = expand_binop (mode, ior_optab, temp,
3770 		       immed_wide_int_const (mask, mode),
3771 		       result, 1, OPTAB_LIB_WIDEN);
3772   temp = expand_binop (mode, add_optab, temp, const1_rtx, result,
3773 		       0, OPTAB_LIB_WIDEN);
3774   if (temp != result)
3775     emit_move_insn (result, temp);
3776   emit_label (label);
3777   return result;
3778 }
3779 
3780 /* Expand signed division of OP0 by a power of two D in mode MODE.
3781    This routine is only called for positive values of D.  */
3782 
3783 static rtx
3784 expand_sdiv_pow2 (machine_mode mode, rtx op0, HOST_WIDE_INT d)
3785 {
3786   rtx temp;
3787   rtx_code_label *label;
3788   int logd;
3789 
3790   logd = floor_log2 (d);
3791 
3792   if (d == 2
3793       && BRANCH_COST (optimize_insn_for_speed_p (),
3794 		      false) >= 1)
3795     {
3796       temp = gen_reg_rtx (mode);
3797       temp = emit_store_flag (temp, LT, op0, const0_rtx, mode, 0, 1);
3798       temp = expand_binop (mode, add_optab, temp, op0, NULL_RTX,
3799 			   0, OPTAB_LIB_WIDEN);
3800       return expand_shift (RSHIFT_EXPR, mode, temp, logd, NULL_RTX, 0);
3801     }
3802 
3803 #ifdef HAVE_conditional_move
3804   if (BRANCH_COST (optimize_insn_for_speed_p (), false)
3805       >= 2)
3806     {
3807       rtx temp2;
3808 
3809       start_sequence ();
3810       temp2 = copy_to_mode_reg (mode, op0);
3811       temp = expand_binop (mode, add_optab, temp2, gen_int_mode (d - 1, mode),
3812 			   NULL_RTX, 0, OPTAB_LIB_WIDEN);
3813       temp = force_reg (mode, temp);
3814 
3815       /* Construct "temp2 = (temp2 < 0) ? temp : temp2".  */
3816       temp2 = emit_conditional_move (temp2, LT, temp2, const0_rtx,
3817 				     mode, temp, temp2, mode, 0);
3818       if (temp2)
3819 	{
3820 	  rtx_insn *seq = get_insns ();
3821 	  end_sequence ();
3822 	  emit_insn (seq);
3823 	  return expand_shift (RSHIFT_EXPR, mode, temp2, logd, NULL_RTX, 0);
3824 	}
3825       end_sequence ();
3826     }
3827 #endif
3828 
3829   if (BRANCH_COST (optimize_insn_for_speed_p (),
3830 		   false) >= 2)
3831     {
3832       int ushift = GET_MODE_BITSIZE (mode) - logd;
3833 
3834       temp = gen_reg_rtx (mode);
3835       temp = emit_store_flag (temp, LT, op0, const0_rtx, mode, 0, -1);
3836       if (GET_MODE_BITSIZE (mode) >= BITS_PER_WORD
3837 	  || shift_cost (optimize_insn_for_speed_p (), mode, ushift)
3838 	     > COSTS_N_INSNS (1))
3839 	temp = expand_binop (mode, and_optab, temp, gen_int_mode (d - 1, mode),
3840 			     NULL_RTX, 0, OPTAB_LIB_WIDEN);
3841       else
3842 	temp = expand_shift (RSHIFT_EXPR, mode, temp,
3843 			     ushift, NULL_RTX, 1);
3844       temp = expand_binop (mode, add_optab, temp, op0, NULL_RTX,
3845 			   0, OPTAB_LIB_WIDEN);
3846       return expand_shift (RSHIFT_EXPR, mode, temp, logd, NULL_RTX, 0);
3847     }
3848 
3849   label = gen_label_rtx ();
3850   temp = copy_to_mode_reg (mode, op0);
3851   do_cmp_and_jump (temp, const0_rtx, GE, mode, label);
3852   expand_inc (temp, gen_int_mode (d - 1, mode));
3853   emit_label (label);
3854   return expand_shift (RSHIFT_EXPR, mode, temp, logd, NULL_RTX, 0);
3855 }
3856 
3857 /* Emit the code to divide OP0 by OP1, putting the result in TARGET
3858    if that is convenient, and returning where the result is.
3859    You may request either the quotient or the remainder as the result;
3860    specify REM_FLAG nonzero to get the remainder.
3861 
3862    CODE is the expression code for which kind of division this is;
3863    it controls how rounding is done.  MODE is the machine mode to use.
3864    UNSIGNEDP nonzero means do unsigned division.  */
3865 
3866 /* ??? For CEIL_MOD_EXPR, can compute incorrect remainder with ANDI
3867    and then correct it by or'ing in missing high bits
3868    if result of ANDI is nonzero.
3869    For ROUND_MOD_EXPR, can use ANDI and then sign-extend the result.
3870    This could optimize to a bfexts instruction.
3871    But C doesn't use these operations, so their optimizations are
3872    left for later.  */
3873 /* ??? For modulo, we don't actually need the highpart of the first product,
3874    the low part will do nicely.  And for small divisors, the second multiply
3875    can also be a low-part only multiply or even be completely left out.
3876    E.g. to calculate the remainder of a division by 3 with a 32 bit
3877    multiply, multiply with 0x55555556 and extract the upper two bits;
3878    the result is exact for inputs up to 0x1fffffff.
3879    The input range can be reduced by using cross-sum rules.
3880    For odd divisors >= 3, the following table gives right shift counts
3881    so that if a number is shifted by an integer multiple of the given
3882    amount, the remainder stays the same:
3883    2, 4, 3, 6, 10, 12, 4, 8, 18, 6, 11, 20, 18, 0, 5, 10, 12, 0, 12, 20,
3884    14, 12, 23, 21, 8, 0, 20, 18, 0, 0, 6, 12, 0, 22, 0, 18, 20, 30, 0, 0,
3885    0, 8, 0, 11, 12, 10, 36, 0, 30, 0, 0, 12, 0, 0, 0, 0, 44, 12, 24, 0,
3886    20, 0, 7, 14, 0, 18, 36, 0, 0, 46, 60, 0, 42, 0, 15, 24, 20, 0, 0, 33,
3887    0, 20, 0, 0, 18, 0, 60, 0, 0, 0, 0, 0, 40, 18, 0, 0, 12
3888 
3889    Cross-sum rules for even numbers can be derived by leaving as many bits
3890    to the right alone as the divisor has zeros to the right.
3891    E.g. if x is an unsigned 32 bit number:
3892    (x mod 12) == (((x & 1023) + ((x >> 8) & ~3)) * 0x15555558 >> 2 * 3) >> 28
3893    */
3894 
3895 rtx
3896 expand_divmod (int rem_flag, enum tree_code code, machine_mode mode,
3897 	       rtx op0, rtx op1, rtx target, int unsignedp)
3898 {
3899   machine_mode compute_mode;
3900   rtx tquotient;
3901   rtx quotient = 0, remainder = 0;
3902   rtx_insn *last;
3903   int size;
3904   rtx_insn *insn;
3905   optab optab1, optab2;
3906   int op1_is_constant, op1_is_pow2 = 0;
3907   int max_cost, extra_cost;
3908   static HOST_WIDE_INT last_div_const = 0;
3909   bool speed = optimize_insn_for_speed_p ();
3910 
3911   op1_is_constant = CONST_INT_P (op1);
3912   if (op1_is_constant)
3913     {
3914       wide_int ext_op1 = rtx_mode_t (op1, mode);
3915       op1_is_pow2 = (wi::popcount (ext_op1) == 1
3916 		     || (! unsignedp
3917 			 && wi::popcount (wi::neg (ext_op1)) == 1));
3918     }
3919 
3920   /*
3921      This is the structure of expand_divmod:
3922 
3923      First comes code to fix up the operands so we can perform the operations
3924      correctly and efficiently.
3925 
3926      Second comes a switch statement with code specific for each rounding mode.
3927      For some special operands this code emits all RTL for the desired
3928      operation, for other cases, it generates only a quotient and stores it in
3929      QUOTIENT.  The case for trunc division/remainder might leave quotient = 0,
3930      to indicate that it has not done anything.
3931 
3932      Last comes code that finishes the operation.  If QUOTIENT is set and
3933      REM_FLAG is set, the remainder is computed as OP0 - QUOTIENT * OP1.  If
3934      QUOTIENT is not set, it is computed using trunc rounding.
3935 
3936      We try to generate special code for division and remainder when OP1 is a
3937      constant.  If |OP1| = 2**n we can use shifts and some other fast
3938      operations.  For other values of OP1, we compute a carefully selected
3939      fixed-point approximation m = 1/OP1, and generate code that multiplies OP0
3940      by m.
3941 
3942      In all cases but EXACT_DIV_EXPR, this multiplication requires the upper
3943      half of the product.  Different strategies for generating the product are
3944      implemented in expmed_mult_highpart.
3945 
3946      If what we actually want is the remainder, we generate that by another
3947      by-constant multiplication and a subtraction.  */
3948 
3949   /* We shouldn't be called with OP1 == const1_rtx, but some of the
3950      code below will malfunction if we are, so check here and handle
3951      the special case if so.  */
3952   if (op1 == const1_rtx)
3953     return rem_flag ? const0_rtx : op0;
3954 
3955     /* When dividing by -1, we could get an overflow.
3956      negv_optab can handle overflows.  */
3957   if (! unsignedp && op1 == constm1_rtx)
3958     {
3959       if (rem_flag)
3960 	return const0_rtx;
3961       return expand_unop (mode, flag_trapv && GET_MODE_CLASS (mode) == MODE_INT
3962 			  ? negv_optab : neg_optab, op0, target, 0);
3963     }
3964 
3965   if (target
3966       /* Don't use the function value register as a target
3967 	 since we have to read it as well as write it,
3968 	 and function-inlining gets confused by this.  */
3969       && ((REG_P (target) && REG_FUNCTION_VALUE_P (target))
3970 	  /* Don't clobber an operand while doing a multi-step calculation.  */
3971 	  || ((rem_flag || op1_is_constant)
3972 	      && (reg_mentioned_p (target, op0)
3973 		  || (MEM_P (op0) && MEM_P (target))))
3974 	  || reg_mentioned_p (target, op1)
3975 	  || (MEM_P (op1) && MEM_P (target))))
3976     target = 0;
3977 
3978   /* Get the mode in which to perform this computation.  Normally it will
3979      be MODE, but sometimes we can't do the desired operation in MODE.
3980      If so, pick a wider mode in which we can do the operation.  Convert
3981      to that mode at the start to avoid repeated conversions.
3982 
3983      First see what operations we need.  These depend on the expression
3984      we are evaluating.  (We assume that divxx3 insns exist under the
3985      same conditions that modxx3 insns and that these insns don't normally
3986      fail.  If these assumptions are not correct, we may generate less
3987      efficient code in some cases.)
3988 
3989      Then see if we find a mode in which we can open-code that operation
3990      (either a division, modulus, or shift).  Finally, check for the smallest
3991      mode for which we can do the operation with a library call.  */
3992 
3993   /* We might want to refine this now that we have division-by-constant
3994      optimization.  Since expmed_mult_highpart tries so many variants, it is
3995      not straightforward to generalize this.  Maybe we should make an array
3996      of possible modes in init_expmed?  Save this for GCC 2.7.  */
3997 
3998   optab1 = (op1_is_pow2
3999 	    ? (unsignedp ? lshr_optab : ashr_optab)
4000 	    : (unsignedp ? udiv_optab : sdiv_optab));
4001   optab2 = (op1_is_pow2 ? optab1
4002 	    : (unsignedp ? udivmod_optab : sdivmod_optab));
4003 
4004   for (compute_mode = mode; compute_mode != VOIDmode;
4005        compute_mode = GET_MODE_WIDER_MODE (compute_mode))
4006     if (optab_handler (optab1, compute_mode) != CODE_FOR_nothing
4007 	|| optab_handler (optab2, compute_mode) != CODE_FOR_nothing)
4008       break;
4009 
4010   if (compute_mode == VOIDmode)
4011     for (compute_mode = mode; compute_mode != VOIDmode;
4012 	 compute_mode = GET_MODE_WIDER_MODE (compute_mode))
4013       if (optab_libfunc (optab1, compute_mode)
4014 	  || optab_libfunc (optab2, compute_mode))
4015 	break;
4016 
4017   /* If we still couldn't find a mode, use MODE, but expand_binop will
4018      probably die.  */
4019   if (compute_mode == VOIDmode)
4020     compute_mode = mode;
4021 
4022   if (target && GET_MODE (target) == compute_mode)
4023     tquotient = target;
4024   else
4025     tquotient = gen_reg_rtx (compute_mode);
4026 
4027   size = GET_MODE_BITSIZE (compute_mode);
4028 #if 0
4029   /* It should be possible to restrict the precision to GET_MODE_BITSIZE
4030      (mode), and thereby get better code when OP1 is a constant.  Do that
4031      later.  It will require going over all usages of SIZE below.  */
4032   size = GET_MODE_BITSIZE (mode);
4033 #endif
4034 
4035   /* Only deduct something for a REM if the last divide done was
4036      for a different constant.   Then set the constant of the last
4037      divide.  */
4038   max_cost = (unsignedp
4039 	      ? udiv_cost (speed, compute_mode)
4040 	      : sdiv_cost (speed, compute_mode));
4041   if (rem_flag && ! (last_div_const != 0 && op1_is_constant
4042 		     && INTVAL (op1) == last_div_const))
4043     max_cost -= (mul_cost (speed, compute_mode)
4044 		 + add_cost (speed, compute_mode));
4045 
4046   last_div_const = ! rem_flag && op1_is_constant ? INTVAL (op1) : 0;
4047 
4048   /* Now convert to the best mode to use.  */
4049   if (compute_mode != mode)
4050     {
4051       op0 = convert_modes (compute_mode, mode, op0, unsignedp);
4052       op1 = convert_modes (compute_mode, mode, op1, unsignedp);
4053 
4054       /* convert_modes may have placed op1 into a register, so we
4055 	 must recompute the following.  */
4056       op1_is_constant = CONST_INT_P (op1);
4057       if (op1_is_constant)
4058 	{
4059 	  wide_int ext_op1 = rtx_mode_t (op1, compute_mode);
4060 	  op1_is_pow2 = (wi::popcount (ext_op1) == 1
4061 			 || (! unsignedp
4062 			     && wi::popcount (wi::neg (ext_op1)) == 1));
4063 	}
4064       else
4065 	op1_is_pow2 = 0;
4066     }
4067 
4068   /* If one of the operands is a volatile MEM, copy it into a register.  */
4069 
4070   if (MEM_P (op0) && MEM_VOLATILE_P (op0))
4071     op0 = force_reg (compute_mode, op0);
4072   if (MEM_P (op1) && MEM_VOLATILE_P (op1))
4073     op1 = force_reg (compute_mode, op1);
4074 
4075   /* If we need the remainder or if OP1 is constant, we need to
4076      put OP0 in a register in case it has any queued subexpressions.  */
4077   if (rem_flag || op1_is_constant)
4078     op0 = force_reg (compute_mode, op0);
4079 
4080   last = get_last_insn ();
4081 
4082   /* Promote floor rounding to trunc rounding for unsigned operations.  */
4083   if (unsignedp)
4084     {
4085       if (code == FLOOR_DIV_EXPR)
4086 	code = TRUNC_DIV_EXPR;
4087       if (code == FLOOR_MOD_EXPR)
4088 	code = TRUNC_MOD_EXPR;
4089       if (code == EXACT_DIV_EXPR && op1_is_pow2)
4090 	code = TRUNC_DIV_EXPR;
4091     }
4092 
4093   if (op1 != const0_rtx)
4094     switch (code)
4095       {
4096       case TRUNC_MOD_EXPR:
4097       case TRUNC_DIV_EXPR:
4098 	if (op1_is_constant)
4099 	  {
4100 	    if (unsignedp)
4101 	      {
4102 		unsigned HOST_WIDE_INT mh, ml;
4103 		int pre_shift, post_shift;
4104 		int dummy;
4105 		wide_int wd = rtx_mode_t (op1, compute_mode);
4106 		unsigned HOST_WIDE_INT d = wd.to_uhwi ();
4107 
4108 		if (wi::popcount (wd) == 1)
4109 		  {
4110 		    pre_shift = floor_log2 (d);
4111 		    if (rem_flag)
4112 		      {
4113 			unsigned HOST_WIDE_INT mask
4114 			  = ((unsigned HOST_WIDE_INT) 1 << pre_shift) - 1;
4115 			remainder
4116 			  = expand_binop (compute_mode, and_optab, op0,
4117 					  gen_int_mode (mask, compute_mode),
4118 					  remainder, 1,
4119 					  OPTAB_LIB_WIDEN);
4120 			if (remainder)
4121 			  return gen_lowpart (mode, remainder);
4122 		      }
4123 		    quotient = expand_shift (RSHIFT_EXPR, compute_mode, op0,
4124 					     pre_shift, tquotient, 1);
4125 		  }
4126 		else if (size <= HOST_BITS_PER_WIDE_INT)
4127 		  {
4128 		    if (d >= ((unsigned HOST_WIDE_INT) 1 << (size - 1)))
4129 		      {
4130 			/* Most significant bit of divisor is set; emit an scc
4131 			   insn.  */
4132 			quotient = emit_store_flag_force (tquotient, GEU, op0, op1,
4133 							  compute_mode, 1, 1);
4134 		      }
4135 		    else
4136 		      {
4137 			/* Find a suitable multiplier and right shift count
4138 			   instead of multiplying with D.  */
4139 
4140 			mh = choose_multiplier (d, size, size,
4141 						&ml, &post_shift, &dummy);
4142 
4143 			/* If the suggested multiplier is more than SIZE bits,
4144 			   we can do better for even divisors, using an
4145 			   initial right shift.  */
4146 			if (mh != 0 && (d & 1) == 0)
4147 			  {
4148 			    pre_shift = floor_log2 (d & -d);
4149 			    mh = choose_multiplier (d >> pre_shift, size,
4150 						    size - pre_shift,
4151 						    &ml, &post_shift, &dummy);
4152 			    gcc_assert (!mh);
4153 			  }
4154 			else
4155 			  pre_shift = 0;
4156 
4157 			if (mh != 0)
4158 			  {
4159 			    rtx t1, t2, t3, t4;
4160 
4161 			    if (post_shift - 1 >= BITS_PER_WORD)
4162 			      goto fail1;
4163 
4164 			    extra_cost
4165 			      = (shift_cost (speed, compute_mode, post_shift - 1)
4166 				 + shift_cost (speed, compute_mode, 1)
4167 				 + 2 * add_cost (speed, compute_mode));
4168 			    t1 = expmed_mult_highpart
4169 			      (compute_mode, op0,
4170 			       gen_int_mode (ml, compute_mode),
4171 			       NULL_RTX, 1, max_cost - extra_cost);
4172 			    if (t1 == 0)
4173 			      goto fail1;
4174 			    t2 = force_operand (gen_rtx_MINUS (compute_mode,
4175 							       op0, t1),
4176 						NULL_RTX);
4177 			    t3 = expand_shift (RSHIFT_EXPR, compute_mode,
4178 					       t2, 1, NULL_RTX, 1);
4179 			    t4 = force_operand (gen_rtx_PLUS (compute_mode,
4180 							      t1, t3),
4181 						NULL_RTX);
4182 			    quotient = expand_shift
4183 			      (RSHIFT_EXPR, compute_mode, t4,
4184 			       post_shift - 1, tquotient, 1);
4185 			  }
4186 			else
4187 			  {
4188 			    rtx t1, t2;
4189 
4190 			    if (pre_shift >= BITS_PER_WORD
4191 				|| post_shift >= BITS_PER_WORD)
4192 			      goto fail1;
4193 
4194 			    t1 = expand_shift
4195 			      (RSHIFT_EXPR, compute_mode, op0,
4196 			       pre_shift, NULL_RTX, 1);
4197 			    extra_cost
4198 			      = (shift_cost (speed, compute_mode, pre_shift)
4199 				 + shift_cost (speed, compute_mode, post_shift));
4200 			    t2 = expmed_mult_highpart
4201 			      (compute_mode, t1,
4202 			       gen_int_mode (ml, compute_mode),
4203 			       NULL_RTX, 1, max_cost - extra_cost);
4204 			    if (t2 == 0)
4205 			      goto fail1;
4206 			    quotient = expand_shift
4207 			      (RSHIFT_EXPR, compute_mode, t2,
4208 			       post_shift, tquotient, 1);
4209 			  }
4210 		      }
4211 		  }
4212 		else		/* Too wide mode to use tricky code */
4213 		  break;
4214 
4215 		insn = get_last_insn ();
4216 		if (insn != last)
4217 		  set_dst_reg_note (insn, REG_EQUAL,
4218 				    gen_rtx_UDIV (compute_mode, op0, op1),
4219 				    quotient);
4220 	      }
4221 	    else		/* TRUNC_DIV, signed */
4222 	      {
4223 		unsigned HOST_WIDE_INT ml;
4224 		int lgup, post_shift;
4225 		rtx mlr;
4226 		HOST_WIDE_INT d = INTVAL (op1);
4227 		unsigned HOST_WIDE_INT abs_d;
4228 
4229 		/* Since d might be INT_MIN, we have to cast to
4230 		   unsigned HOST_WIDE_INT before negating to avoid
4231 		   undefined signed overflow.  */
4232 		abs_d = (d >= 0
4233 			 ? (unsigned HOST_WIDE_INT) d
4234 			 : - (unsigned HOST_WIDE_INT) d);
4235 
4236 		/* n rem d = n rem -d */
4237 		if (rem_flag && d < 0)
4238 		  {
4239 		    d = abs_d;
4240 		    op1 = gen_int_mode (abs_d, compute_mode);
4241 		  }
4242 
4243 		if (d == 1)
4244 		  quotient = op0;
4245 		else if (d == -1)
4246 		  quotient = expand_unop (compute_mode, neg_optab, op0,
4247 					  tquotient, 0);
4248 		else if (size <= HOST_BITS_PER_WIDE_INT
4249 			 && abs_d == (unsigned HOST_WIDE_INT) 1 << (size - 1))
4250 		  {
4251 		    /* This case is not handled correctly below.  */
4252 		    quotient = emit_store_flag (tquotient, EQ, op0, op1,
4253 						compute_mode, 1, 1);
4254 		    if (quotient == 0)
4255 		      goto fail1;
4256 		  }
4257 		else if (EXACT_POWER_OF_2_OR_ZERO_P (d)
4258 			 && (size <= HOST_BITS_PER_WIDE_INT || d >= 0)
4259 			 && (rem_flag
4260 			     ? smod_pow2_cheap (speed, compute_mode)
4261 			     : sdiv_pow2_cheap (speed, compute_mode))
4262 			 /* We assume that cheap metric is true if the
4263 			    optab has an expander for this mode.  */
4264 			 && ((optab_handler ((rem_flag ? smod_optab
4265 					      : sdiv_optab),
4266 					     compute_mode)
4267 			      != CODE_FOR_nothing)
4268 			     || (optab_handler (sdivmod_optab,
4269 						compute_mode)
4270 				 != CODE_FOR_nothing)))
4271 		  ;
4272 		else if (EXACT_POWER_OF_2_OR_ZERO_P (abs_d)
4273 			 && (size <= HOST_BITS_PER_WIDE_INT
4274 			     || abs_d != (unsigned HOST_WIDE_INT) d))
4275 		  {
4276 		    if (rem_flag)
4277 		      {
4278 			remainder = expand_smod_pow2 (compute_mode, op0, d);
4279 			if (remainder)
4280 			  return gen_lowpart (mode, remainder);
4281 		      }
4282 
4283 		    if (sdiv_pow2_cheap (speed, compute_mode)
4284 			&& ((optab_handler (sdiv_optab, compute_mode)
4285 			     != CODE_FOR_nothing)
4286 			    || (optab_handler (sdivmod_optab, compute_mode)
4287 				!= CODE_FOR_nothing)))
4288 		      quotient = expand_divmod (0, TRUNC_DIV_EXPR,
4289 						compute_mode, op0,
4290 						gen_int_mode (abs_d,
4291 							      compute_mode),
4292 						NULL_RTX, 0);
4293 		    else
4294 		      quotient = expand_sdiv_pow2 (compute_mode, op0, abs_d);
4295 
4296 		    /* We have computed OP0 / abs(OP1).  If OP1 is negative,
4297 		       negate the quotient.  */
4298 		    if (d < 0)
4299 		      {
4300 			insn = get_last_insn ();
4301 			if (insn != last
4302 			    && abs_d < ((unsigned HOST_WIDE_INT) 1
4303 					<< (HOST_BITS_PER_WIDE_INT - 1)))
4304 			  set_dst_reg_note (insn, REG_EQUAL,
4305 					    gen_rtx_DIV (compute_mode, op0,
4306 							 gen_int_mode
4307 							   (abs_d,
4308 							    compute_mode)),
4309 					    quotient);
4310 
4311 			quotient = expand_unop (compute_mode, neg_optab,
4312 						quotient, quotient, 0);
4313 		      }
4314 		  }
4315 		else if (size <= HOST_BITS_PER_WIDE_INT)
4316 		  {
4317 		    choose_multiplier (abs_d, size, size - 1,
4318 				       &ml, &post_shift, &lgup);
4319 		    if (ml < (unsigned HOST_WIDE_INT) 1 << (size - 1))
4320 		      {
4321 			rtx t1, t2, t3;
4322 
4323 			if (post_shift >= BITS_PER_WORD
4324 			    || size - 1 >= BITS_PER_WORD)
4325 			  goto fail1;
4326 
4327 			extra_cost = (shift_cost (speed, compute_mode, post_shift)
4328 				      + shift_cost (speed, compute_mode, size - 1)
4329 				      + add_cost (speed, compute_mode));
4330 			t1 = expmed_mult_highpart
4331 			  (compute_mode, op0, gen_int_mode (ml, compute_mode),
4332 			   NULL_RTX, 0, max_cost - extra_cost);
4333 			if (t1 == 0)
4334 			  goto fail1;
4335 			t2 = expand_shift
4336 			  (RSHIFT_EXPR, compute_mode, t1,
4337 			   post_shift, NULL_RTX, 0);
4338 			t3 = expand_shift
4339 			  (RSHIFT_EXPR, compute_mode, op0,
4340 			   size - 1, NULL_RTX, 0);
4341 			if (d < 0)
4342 			  quotient
4343 			    = force_operand (gen_rtx_MINUS (compute_mode,
4344 							    t3, t2),
4345 					     tquotient);
4346 			else
4347 			  quotient
4348 			    = force_operand (gen_rtx_MINUS (compute_mode,
4349 							    t2, t3),
4350 					     tquotient);
4351 		      }
4352 		    else
4353 		      {
4354 			rtx t1, t2, t3, t4;
4355 
4356 			if (post_shift >= BITS_PER_WORD
4357 			    || size - 1 >= BITS_PER_WORD)
4358 			  goto fail1;
4359 
4360 			ml |= (~(unsigned HOST_WIDE_INT) 0) << (size - 1);
4361 			mlr = gen_int_mode (ml, compute_mode);
4362 			extra_cost = (shift_cost (speed, compute_mode, post_shift)
4363 				      + shift_cost (speed, compute_mode, size - 1)
4364 				      + 2 * add_cost (speed, compute_mode));
4365 			t1 = expmed_mult_highpart (compute_mode, op0, mlr,
4366 						   NULL_RTX, 0,
4367 						   max_cost - extra_cost);
4368 			if (t1 == 0)
4369 			  goto fail1;
4370 			t2 = force_operand (gen_rtx_PLUS (compute_mode,
4371 							  t1, op0),
4372 					    NULL_RTX);
4373 			t3 = expand_shift
4374 			  (RSHIFT_EXPR, compute_mode, t2,
4375 			   post_shift, NULL_RTX, 0);
4376 			t4 = expand_shift
4377 			  (RSHIFT_EXPR, compute_mode, op0,
4378 			   size - 1, NULL_RTX, 0);
4379 			if (d < 0)
4380 			  quotient
4381 			    = force_operand (gen_rtx_MINUS (compute_mode,
4382 							    t4, t3),
4383 					     tquotient);
4384 			else
4385 			  quotient
4386 			    = force_operand (gen_rtx_MINUS (compute_mode,
4387 							    t3, t4),
4388 					     tquotient);
4389 		      }
4390 		  }
4391 		else		/* Too wide mode to use tricky code */
4392 		  break;
4393 
4394 		insn = get_last_insn ();
4395 		if (insn != last)
4396 		  set_dst_reg_note (insn, REG_EQUAL,
4397 				    gen_rtx_DIV (compute_mode, op0, op1),
4398 				    quotient);
4399 	      }
4400 	    break;
4401 	  }
4402       fail1:
4403 	delete_insns_since (last);
4404 	break;
4405 
4406       case FLOOR_DIV_EXPR:
4407       case FLOOR_MOD_EXPR:
4408       /* We will come here only for signed operations.  */
4409 	if (op1_is_constant && size <= HOST_BITS_PER_WIDE_INT)
4410 	  {
4411 	    unsigned HOST_WIDE_INT mh, ml;
4412 	    int pre_shift, lgup, post_shift;
4413 	    HOST_WIDE_INT d = INTVAL (op1);
4414 
4415 	    if (d > 0)
4416 	      {
4417 		/* We could just as easily deal with negative constants here,
4418 		   but it does not seem worth the trouble for GCC 2.6.  */
4419 		if (EXACT_POWER_OF_2_OR_ZERO_P (d))
4420 		  {
4421 		    pre_shift = floor_log2 (d);
4422 		    if (rem_flag)
4423 		      {
4424 			unsigned HOST_WIDE_INT mask
4425 			  = ((unsigned HOST_WIDE_INT) 1 << pre_shift) - 1;
4426 			remainder = expand_binop
4427 			  (compute_mode, and_optab, op0,
4428 			   gen_int_mode (mask, compute_mode),
4429 			   remainder, 0, OPTAB_LIB_WIDEN);
4430 			if (remainder)
4431 			  return gen_lowpart (mode, remainder);
4432 		      }
4433 		    quotient = expand_shift
4434 		      (RSHIFT_EXPR, compute_mode, op0,
4435 		       pre_shift, tquotient, 0);
4436 		  }
4437 		else
4438 		  {
4439 		    rtx t1, t2, t3, t4;
4440 
4441 		    mh = choose_multiplier (d, size, size - 1,
4442 					    &ml, &post_shift, &lgup);
4443 		    gcc_assert (!mh);
4444 
4445 		    if (post_shift < BITS_PER_WORD
4446 			&& size - 1 < BITS_PER_WORD)
4447 		      {
4448 			t1 = expand_shift
4449 			  (RSHIFT_EXPR, compute_mode, op0,
4450 			   size - 1, NULL_RTX, 0);
4451 			t2 = expand_binop (compute_mode, xor_optab, op0, t1,
4452 					   NULL_RTX, 0, OPTAB_WIDEN);
4453 			extra_cost = (shift_cost (speed, compute_mode, post_shift)
4454 				      + shift_cost (speed, compute_mode, size - 1)
4455 				      + 2 * add_cost (speed, compute_mode));
4456 			t3 = expmed_mult_highpart
4457 			  (compute_mode, t2, gen_int_mode (ml, compute_mode),
4458 			   NULL_RTX, 1, max_cost - extra_cost);
4459 			if (t3 != 0)
4460 			  {
4461 			    t4 = expand_shift
4462 			      (RSHIFT_EXPR, compute_mode, t3,
4463 			       post_shift, NULL_RTX, 1);
4464 			    quotient = expand_binop (compute_mode, xor_optab,
4465 						     t4, t1, tquotient, 0,
4466 						     OPTAB_WIDEN);
4467 			  }
4468 		      }
4469 		  }
4470 	      }
4471 	    else
4472 	      {
4473 		rtx nsign, t1, t2, t3, t4;
4474 		t1 = force_operand (gen_rtx_PLUS (compute_mode,
4475 						  op0, constm1_rtx), NULL_RTX);
4476 		t2 = expand_binop (compute_mode, ior_optab, op0, t1, NULL_RTX,
4477 				   0, OPTAB_WIDEN);
4478 		nsign = expand_shift (RSHIFT_EXPR, compute_mode, t2,
4479 				      size - 1, NULL_RTX, 0);
4480 		t3 = force_operand (gen_rtx_MINUS (compute_mode, t1, nsign),
4481 				    NULL_RTX);
4482 		t4 = expand_divmod (0, TRUNC_DIV_EXPR, compute_mode, t3, op1,
4483 				    NULL_RTX, 0);
4484 		if (t4)
4485 		  {
4486 		    rtx t5;
4487 		    t5 = expand_unop (compute_mode, one_cmpl_optab, nsign,
4488 				      NULL_RTX, 0);
4489 		    quotient = force_operand (gen_rtx_PLUS (compute_mode,
4490 							    t4, t5),
4491 					      tquotient);
4492 		  }
4493 	      }
4494 	  }
4495 
4496 	if (quotient != 0)
4497 	  break;
4498 	delete_insns_since (last);
4499 
4500 	/* Try using an instruction that produces both the quotient and
4501 	   remainder, using truncation.  We can easily compensate the quotient
4502 	   or remainder to get floor rounding, once we have the remainder.
4503 	   Notice that we compute also the final remainder value here,
4504 	   and return the result right away.  */
4505 	if (target == 0 || GET_MODE (target) != compute_mode)
4506 	  target = gen_reg_rtx (compute_mode);
4507 
4508 	if (rem_flag)
4509 	  {
4510 	    remainder
4511 	      = REG_P (target) ? target : gen_reg_rtx (compute_mode);
4512 	    quotient = gen_reg_rtx (compute_mode);
4513 	  }
4514 	else
4515 	  {
4516 	    quotient
4517 	      = REG_P (target) ? target : gen_reg_rtx (compute_mode);
4518 	    remainder = gen_reg_rtx (compute_mode);
4519 	  }
4520 
4521 	if (expand_twoval_binop (sdivmod_optab, op0, op1,
4522 				 quotient, remainder, 0))
4523 	  {
4524 	    /* This could be computed with a branch-less sequence.
4525 	       Save that for later.  */
4526 	    rtx tem;
4527 	    rtx_code_label *label = gen_label_rtx ();
4528 	    do_cmp_and_jump (remainder, const0_rtx, EQ, compute_mode, label);
4529 	    tem = expand_binop (compute_mode, xor_optab, op0, op1,
4530 				NULL_RTX, 0, OPTAB_WIDEN);
4531 	    do_cmp_and_jump (tem, const0_rtx, GE, compute_mode, label);
4532 	    expand_dec (quotient, const1_rtx);
4533 	    expand_inc (remainder, op1);
4534 	    emit_label (label);
4535 	    return gen_lowpart (mode, rem_flag ? remainder : quotient);
4536 	  }
4537 
4538 	/* No luck with division elimination or divmod.  Have to do it
4539 	   by conditionally adjusting op0 *and* the result.  */
4540 	{
4541 	  rtx_code_label *label1, *label2, *label3, *label4, *label5;
4542 	  rtx adjusted_op0;
4543 	  rtx tem;
4544 
4545 	  quotient = gen_reg_rtx (compute_mode);
4546 	  adjusted_op0 = copy_to_mode_reg (compute_mode, op0);
4547 	  label1 = gen_label_rtx ();
4548 	  label2 = gen_label_rtx ();
4549 	  label3 = gen_label_rtx ();
4550 	  label4 = gen_label_rtx ();
4551 	  label5 = gen_label_rtx ();
4552 	  do_cmp_and_jump (op1, const0_rtx, LT, compute_mode, label2);
4553 	  do_cmp_and_jump (adjusted_op0, const0_rtx, LT, compute_mode, label1);
4554 	  tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4555 			      quotient, 0, OPTAB_LIB_WIDEN);
4556 	  if (tem != quotient)
4557 	    emit_move_insn (quotient, tem);
4558 	  emit_jump_insn (gen_jump (label5));
4559 	  emit_barrier ();
4560 	  emit_label (label1);
4561 	  expand_inc (adjusted_op0, const1_rtx);
4562 	  emit_jump_insn (gen_jump (label4));
4563 	  emit_barrier ();
4564 	  emit_label (label2);
4565 	  do_cmp_and_jump (adjusted_op0, const0_rtx, GT, compute_mode, label3);
4566 	  tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4567 			      quotient, 0, OPTAB_LIB_WIDEN);
4568 	  if (tem != quotient)
4569 	    emit_move_insn (quotient, tem);
4570 	  emit_jump_insn (gen_jump (label5));
4571 	  emit_barrier ();
4572 	  emit_label (label3);
4573 	  expand_dec (adjusted_op0, const1_rtx);
4574 	  emit_label (label4);
4575 	  tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4576 			      quotient, 0, OPTAB_LIB_WIDEN);
4577 	  if (tem != quotient)
4578 	    emit_move_insn (quotient, tem);
4579 	  expand_dec (quotient, const1_rtx);
4580 	  emit_label (label5);
4581 	}
4582 	break;
4583 
4584       case CEIL_DIV_EXPR:
4585       case CEIL_MOD_EXPR:
4586 	if (unsignedp)
4587 	  {
4588 	    if (op1_is_constant
4589 		&& EXACT_POWER_OF_2_OR_ZERO_P (INTVAL (op1))
4590 		&& (size <= HOST_BITS_PER_WIDE_INT
4591 		    || INTVAL (op1) >= 0))
4592 	      {
4593 		rtx t1, t2, t3;
4594 		unsigned HOST_WIDE_INT d = INTVAL (op1);
4595 		t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
4596 				   floor_log2 (d), tquotient, 1);
4597 		t2 = expand_binop (compute_mode, and_optab, op0,
4598 				   gen_int_mode (d - 1, compute_mode),
4599 				   NULL_RTX, 1, OPTAB_LIB_WIDEN);
4600 		t3 = gen_reg_rtx (compute_mode);
4601 		t3 = emit_store_flag (t3, NE, t2, const0_rtx,
4602 				      compute_mode, 1, 1);
4603 		if (t3 == 0)
4604 		  {
4605 		    rtx_code_label *lab;
4606 		    lab = gen_label_rtx ();
4607 		    do_cmp_and_jump (t2, const0_rtx, EQ, compute_mode, lab);
4608 		    expand_inc (t1, const1_rtx);
4609 		    emit_label (lab);
4610 		    quotient = t1;
4611 		  }
4612 		else
4613 		  quotient = force_operand (gen_rtx_PLUS (compute_mode,
4614 							  t1, t3),
4615 					    tquotient);
4616 		break;
4617 	      }
4618 
4619 	    /* Try using an instruction that produces both the quotient and
4620 	       remainder, using truncation.  We can easily compensate the
4621 	       quotient or remainder to get ceiling rounding, once we have the
4622 	       remainder.  Notice that we compute also the final remainder
4623 	       value here, and return the result right away.  */
4624 	    if (target == 0 || GET_MODE (target) != compute_mode)
4625 	      target = gen_reg_rtx (compute_mode);
4626 
4627 	    if (rem_flag)
4628 	      {
4629 		remainder = (REG_P (target)
4630 			     ? target : gen_reg_rtx (compute_mode));
4631 		quotient = gen_reg_rtx (compute_mode);
4632 	      }
4633 	    else
4634 	      {
4635 		quotient = (REG_P (target)
4636 			    ? target : gen_reg_rtx (compute_mode));
4637 		remainder = gen_reg_rtx (compute_mode);
4638 	      }
4639 
4640 	    if (expand_twoval_binop (udivmod_optab, op0, op1, quotient,
4641 				     remainder, 1))
4642 	      {
4643 		/* This could be computed with a branch-less sequence.
4644 		   Save that for later.  */
4645 		rtx_code_label *label = gen_label_rtx ();
4646 		do_cmp_and_jump (remainder, const0_rtx, EQ,
4647 				 compute_mode, label);
4648 		expand_inc (quotient, const1_rtx);
4649 		expand_dec (remainder, op1);
4650 		emit_label (label);
4651 		return gen_lowpart (mode, rem_flag ? remainder : quotient);
4652 	      }
4653 
4654 	    /* No luck with division elimination or divmod.  Have to do it
4655 	       by conditionally adjusting op0 *and* the result.  */
4656 	    {
4657 	      rtx_code_label *label1, *label2;
4658 	      rtx adjusted_op0, tem;
4659 
4660 	      quotient = gen_reg_rtx (compute_mode);
4661 	      adjusted_op0 = copy_to_mode_reg (compute_mode, op0);
4662 	      label1 = gen_label_rtx ();
4663 	      label2 = gen_label_rtx ();
4664 	      do_cmp_and_jump (adjusted_op0, const0_rtx, NE,
4665 			       compute_mode, label1);
4666 	      emit_move_insn  (quotient, const0_rtx);
4667 	      emit_jump_insn (gen_jump (label2));
4668 	      emit_barrier ();
4669 	      emit_label (label1);
4670 	      expand_dec (adjusted_op0, const1_rtx);
4671 	      tem = expand_binop (compute_mode, udiv_optab, adjusted_op0, op1,
4672 				  quotient, 1, OPTAB_LIB_WIDEN);
4673 	      if (tem != quotient)
4674 		emit_move_insn (quotient, tem);
4675 	      expand_inc (quotient, const1_rtx);
4676 	      emit_label (label2);
4677 	    }
4678 	  }
4679 	else /* signed */
4680 	  {
4681 	    if (op1_is_constant && EXACT_POWER_OF_2_OR_ZERO_P (INTVAL (op1))
4682 		&& INTVAL (op1) >= 0)
4683 	      {
4684 		/* This is extremely similar to the code for the unsigned case
4685 		   above.  For 2.7 we should merge these variants, but for
4686 		   2.6.1 I don't want to touch the code for unsigned since that
4687 		   get used in C.  The signed case will only be used by other
4688 		   languages (Ada).  */
4689 
4690 		rtx t1, t2, t3;
4691 		unsigned HOST_WIDE_INT d = INTVAL (op1);
4692 		t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
4693 				   floor_log2 (d), tquotient, 0);
4694 		t2 = expand_binop (compute_mode, and_optab, op0,
4695 				   gen_int_mode (d - 1, compute_mode),
4696 				   NULL_RTX, 1, OPTAB_LIB_WIDEN);
4697 		t3 = gen_reg_rtx (compute_mode);
4698 		t3 = emit_store_flag (t3, NE, t2, const0_rtx,
4699 				      compute_mode, 1, 1);
4700 		if (t3 == 0)
4701 		  {
4702 		    rtx_code_label *lab;
4703 		    lab = gen_label_rtx ();
4704 		    do_cmp_and_jump (t2, const0_rtx, EQ, compute_mode, lab);
4705 		    expand_inc (t1, const1_rtx);
4706 		    emit_label (lab);
4707 		    quotient = t1;
4708 		  }
4709 		else
4710 		  quotient = force_operand (gen_rtx_PLUS (compute_mode,
4711 							  t1, t3),
4712 					    tquotient);
4713 		break;
4714 	      }
4715 
4716 	    /* Try using an instruction that produces both the quotient and
4717 	       remainder, using truncation.  We can easily compensate the
4718 	       quotient or remainder to get ceiling rounding, once we have the
4719 	       remainder.  Notice that we compute also the final remainder
4720 	       value here, and return the result right away.  */
4721 	    if (target == 0 || GET_MODE (target) != compute_mode)
4722 	      target = gen_reg_rtx (compute_mode);
4723 	    if (rem_flag)
4724 	      {
4725 		remainder= (REG_P (target)
4726 			    ? target : gen_reg_rtx (compute_mode));
4727 		quotient = gen_reg_rtx (compute_mode);
4728 	      }
4729 	    else
4730 	      {
4731 		quotient = (REG_P (target)
4732 			    ? target : gen_reg_rtx (compute_mode));
4733 		remainder = gen_reg_rtx (compute_mode);
4734 	      }
4735 
4736 	    if (expand_twoval_binop (sdivmod_optab, op0, op1, quotient,
4737 				     remainder, 0))
4738 	      {
4739 		/* This could be computed with a branch-less sequence.
4740 		   Save that for later.  */
4741 		rtx tem;
4742 		rtx_code_label *label = gen_label_rtx ();
4743 		do_cmp_and_jump (remainder, const0_rtx, EQ,
4744 				 compute_mode, label);
4745 		tem = expand_binop (compute_mode, xor_optab, op0, op1,
4746 				    NULL_RTX, 0, OPTAB_WIDEN);
4747 		do_cmp_and_jump (tem, const0_rtx, LT, compute_mode, label);
4748 		expand_inc (quotient, const1_rtx);
4749 		expand_dec (remainder, op1);
4750 		emit_label (label);
4751 		return gen_lowpart (mode, rem_flag ? remainder : quotient);
4752 	      }
4753 
4754 	    /* No luck with division elimination or divmod.  Have to do it
4755 	       by conditionally adjusting op0 *and* the result.  */
4756 	    {
4757 	      rtx_code_label *label1, *label2, *label3, *label4, *label5;
4758 	      rtx adjusted_op0;
4759 	      rtx tem;
4760 
4761 	      quotient = gen_reg_rtx (compute_mode);
4762 	      adjusted_op0 = copy_to_mode_reg (compute_mode, op0);
4763 	      label1 = gen_label_rtx ();
4764 	      label2 = gen_label_rtx ();
4765 	      label3 = gen_label_rtx ();
4766 	      label4 = gen_label_rtx ();
4767 	      label5 = gen_label_rtx ();
4768 	      do_cmp_and_jump (op1, const0_rtx, LT, compute_mode, label2);
4769 	      do_cmp_and_jump (adjusted_op0, const0_rtx, GT,
4770 			       compute_mode, label1);
4771 	      tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4772 				  quotient, 0, OPTAB_LIB_WIDEN);
4773 	      if (tem != quotient)
4774 		emit_move_insn (quotient, tem);
4775 	      emit_jump_insn (gen_jump (label5));
4776 	      emit_barrier ();
4777 	      emit_label (label1);
4778 	      expand_dec (adjusted_op0, const1_rtx);
4779 	      emit_jump_insn (gen_jump (label4));
4780 	      emit_barrier ();
4781 	      emit_label (label2);
4782 	      do_cmp_and_jump (adjusted_op0, const0_rtx, LT,
4783 			       compute_mode, label3);
4784 	      tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4785 				  quotient, 0, OPTAB_LIB_WIDEN);
4786 	      if (tem != quotient)
4787 		emit_move_insn (quotient, tem);
4788 	      emit_jump_insn (gen_jump (label5));
4789 	      emit_barrier ();
4790 	      emit_label (label3);
4791 	      expand_inc (adjusted_op0, const1_rtx);
4792 	      emit_label (label4);
4793 	      tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4794 				  quotient, 0, OPTAB_LIB_WIDEN);
4795 	      if (tem != quotient)
4796 		emit_move_insn (quotient, tem);
4797 	      expand_inc (quotient, const1_rtx);
4798 	      emit_label (label5);
4799 	    }
4800 	  }
4801 	break;
4802 
4803       case EXACT_DIV_EXPR:
4804 	if (op1_is_constant && size <= HOST_BITS_PER_WIDE_INT)
4805 	  {
4806 	    HOST_WIDE_INT d = INTVAL (op1);
4807 	    unsigned HOST_WIDE_INT ml;
4808 	    int pre_shift;
4809 	    rtx t1;
4810 
4811 	    pre_shift = floor_log2 (d & -d);
4812 	    ml = invert_mod2n (d >> pre_shift, size);
4813 	    t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
4814 			       pre_shift, NULL_RTX, unsignedp);
4815 	    quotient = expand_mult (compute_mode, t1,
4816 				    gen_int_mode (ml, compute_mode),
4817 				    NULL_RTX, 1);
4818 
4819 	    insn = get_last_insn ();
4820 	    set_dst_reg_note (insn, REG_EQUAL,
4821 			      gen_rtx_fmt_ee (unsignedp ? UDIV : DIV,
4822 					      compute_mode, op0, op1),
4823 			      quotient);
4824 	  }
4825 	break;
4826 
4827       case ROUND_DIV_EXPR:
4828       case ROUND_MOD_EXPR:
4829 	if (unsignedp)
4830 	  {
4831 	    rtx tem;
4832 	    rtx_code_label *label;
4833 	    label = gen_label_rtx ();
4834 	    quotient = gen_reg_rtx (compute_mode);
4835 	    remainder = gen_reg_rtx (compute_mode);
4836 	    if (expand_twoval_binop (udivmod_optab, op0, op1, quotient, remainder, 1) == 0)
4837 	      {
4838 		rtx tem;
4839 		quotient = expand_binop (compute_mode, udiv_optab, op0, op1,
4840 					 quotient, 1, OPTAB_LIB_WIDEN);
4841 		tem = expand_mult (compute_mode, quotient, op1, NULL_RTX, 1);
4842 		remainder = expand_binop (compute_mode, sub_optab, op0, tem,
4843 					  remainder, 1, OPTAB_LIB_WIDEN);
4844 	      }
4845 	    tem = plus_constant (compute_mode, op1, -1);
4846 	    tem = expand_shift (RSHIFT_EXPR, compute_mode, tem, 1, NULL_RTX, 1);
4847 	    do_cmp_and_jump (remainder, tem, LEU, compute_mode, label);
4848 	    expand_inc (quotient, const1_rtx);
4849 	    expand_dec (remainder, op1);
4850 	    emit_label (label);
4851 	  }
4852 	else
4853 	  {
4854 	    rtx abs_rem, abs_op1, tem, mask;
4855 	    rtx_code_label *label;
4856 	    label = gen_label_rtx ();
4857 	    quotient = gen_reg_rtx (compute_mode);
4858 	    remainder = gen_reg_rtx (compute_mode);
4859 	    if (expand_twoval_binop (sdivmod_optab, op0, op1, quotient, remainder, 0) == 0)
4860 	      {
4861 		rtx tem;
4862 		quotient = expand_binop (compute_mode, sdiv_optab, op0, op1,
4863 					 quotient, 0, OPTAB_LIB_WIDEN);
4864 		tem = expand_mult (compute_mode, quotient, op1, NULL_RTX, 0);
4865 		remainder = expand_binop (compute_mode, sub_optab, op0, tem,
4866 					  remainder, 0, OPTAB_LIB_WIDEN);
4867 	      }
4868 	    abs_rem = expand_abs (compute_mode, remainder, NULL_RTX, 1, 0);
4869 	    abs_op1 = expand_abs (compute_mode, op1, NULL_RTX, 1, 0);
4870 	    tem = expand_shift (LSHIFT_EXPR, compute_mode, abs_rem,
4871 				1, NULL_RTX, 1);
4872 	    do_cmp_and_jump (tem, abs_op1, LTU, compute_mode, label);
4873 	    tem = expand_binop (compute_mode, xor_optab, op0, op1,
4874 				NULL_RTX, 0, OPTAB_WIDEN);
4875 	    mask = expand_shift (RSHIFT_EXPR, compute_mode, tem,
4876 				 size - 1, NULL_RTX, 0);
4877 	    tem = expand_binop (compute_mode, xor_optab, mask, const1_rtx,
4878 				NULL_RTX, 0, OPTAB_WIDEN);
4879 	    tem = expand_binop (compute_mode, sub_optab, tem, mask,
4880 				NULL_RTX, 0, OPTAB_WIDEN);
4881 	    expand_inc (quotient, tem);
4882 	    tem = expand_binop (compute_mode, xor_optab, mask, op1,
4883 				NULL_RTX, 0, OPTAB_WIDEN);
4884 	    tem = expand_binop (compute_mode, sub_optab, tem, mask,
4885 				NULL_RTX, 0, OPTAB_WIDEN);
4886 	    expand_dec (remainder, tem);
4887 	    emit_label (label);
4888 	  }
4889 	return gen_lowpart (mode, rem_flag ? remainder : quotient);
4890 
4891       default:
4892 	gcc_unreachable ();
4893       }
4894 
4895   if (quotient == 0)
4896     {
4897       if (target && GET_MODE (target) != compute_mode)
4898 	target = 0;
4899 
4900       if (rem_flag)
4901 	{
4902 	  /* Try to produce the remainder without producing the quotient.
4903 	     If we seem to have a divmod pattern that does not require widening,
4904 	     don't try widening here.  We should really have a WIDEN argument
4905 	     to expand_twoval_binop, since what we'd really like to do here is
4906 	     1) try a mod insn in compute_mode
4907 	     2) try a divmod insn in compute_mode
4908 	     3) try a div insn in compute_mode and multiply-subtract to get
4909 	        remainder
4910 	     4) try the same things with widening allowed.  */
4911 	  remainder
4912 	    = sign_expand_binop (compute_mode, umod_optab, smod_optab,
4913 				 op0, op1, target,
4914 				 unsignedp,
4915 				 ((optab_handler (optab2, compute_mode)
4916 				   != CODE_FOR_nothing)
4917 				  ? OPTAB_DIRECT : OPTAB_WIDEN));
4918 	  if (remainder == 0)
4919 	    {
4920 	      /* No luck there.  Can we do remainder and divide at once
4921 		 without a library call?  */
4922 	      remainder = gen_reg_rtx (compute_mode);
4923 	      if (! expand_twoval_binop ((unsignedp
4924 					  ? udivmod_optab
4925 					  : sdivmod_optab),
4926 					 op0, op1,
4927 					 NULL_RTX, remainder, unsignedp))
4928 		remainder = 0;
4929 	    }
4930 
4931 	  if (remainder)
4932 	    return gen_lowpart (mode, remainder);
4933 	}
4934 
4935       /* Produce the quotient.  Try a quotient insn, but not a library call.
4936 	 If we have a divmod in this mode, use it in preference to widening
4937 	 the div (for this test we assume it will not fail). Note that optab2
4938 	 is set to the one of the two optabs that the call below will use.  */
4939       quotient
4940 	= sign_expand_binop (compute_mode, udiv_optab, sdiv_optab,
4941 			     op0, op1, rem_flag ? NULL_RTX : target,
4942 			     unsignedp,
4943 			     ((optab_handler (optab2, compute_mode)
4944 			       != CODE_FOR_nothing)
4945 			      ? OPTAB_DIRECT : OPTAB_WIDEN));
4946 
4947       if (quotient == 0)
4948 	{
4949 	  /* No luck there.  Try a quotient-and-remainder insn,
4950 	     keeping the quotient alone.  */
4951 	  quotient = gen_reg_rtx (compute_mode);
4952 	  if (! expand_twoval_binop (unsignedp ? udivmod_optab : sdivmod_optab,
4953 				     op0, op1,
4954 				     quotient, NULL_RTX, unsignedp))
4955 	    {
4956 	      quotient = 0;
4957 	      if (! rem_flag)
4958 		/* Still no luck.  If we are not computing the remainder,
4959 		   use a library call for the quotient.  */
4960 		quotient = sign_expand_binop (compute_mode,
4961 					      udiv_optab, sdiv_optab,
4962 					      op0, op1, target,
4963 					      unsignedp, OPTAB_LIB_WIDEN);
4964 	    }
4965 	}
4966     }
4967 
4968   if (rem_flag)
4969     {
4970       if (target && GET_MODE (target) != compute_mode)
4971 	target = 0;
4972 
4973       if (quotient == 0)
4974 	{
4975 	  /* No divide instruction either.  Use library for remainder.  */
4976 	  remainder = sign_expand_binop (compute_mode, umod_optab, smod_optab,
4977 					 op0, op1, target,
4978 					 unsignedp, OPTAB_LIB_WIDEN);
4979 	  /* No remainder function.  Try a quotient-and-remainder
4980 	     function, keeping the remainder.  */
4981 	  if (!remainder)
4982 	    {
4983 	      remainder = gen_reg_rtx (compute_mode);
4984 	      if (!expand_twoval_binop_libfunc
4985 		  (unsignedp ? udivmod_optab : sdivmod_optab,
4986 		   op0, op1,
4987 		   NULL_RTX, remainder,
4988 		   unsignedp ? UMOD : MOD))
4989 		remainder = NULL_RTX;
4990 	    }
4991 	}
4992       else
4993 	{
4994 	  /* We divided.  Now finish doing X - Y * (X / Y).  */
4995 	  remainder = expand_mult (compute_mode, quotient, op1,
4996 				   NULL_RTX, unsignedp);
4997 	  remainder = expand_binop (compute_mode, sub_optab, op0,
4998 				    remainder, target, unsignedp,
4999 				    OPTAB_LIB_WIDEN);
5000 	}
5001     }
5002 
5003   return gen_lowpart (mode, rem_flag ? remainder : quotient);
5004 }
5005 
5006 /* Return a tree node with data type TYPE, describing the value of X.
5007    Usually this is an VAR_DECL, if there is no obvious better choice.
5008    X may be an expression, however we only support those expressions
5009    generated by loop.c.  */
5010 
5011 tree
5012 make_tree (tree type, rtx x)
5013 {
5014   tree t;
5015 
5016   switch (GET_CODE (x))
5017     {
5018     case CONST_INT:
5019     case CONST_WIDE_INT:
5020       t = wide_int_to_tree (type, std::make_pair (x, TYPE_MODE (type)));
5021       return t;
5022 
5023     case CONST_DOUBLE:
5024       STATIC_ASSERT (HOST_BITS_PER_WIDE_INT * 2 <= MAX_BITSIZE_MODE_ANY_INT);
5025       if (TARGET_SUPPORTS_WIDE_INT == 0 && GET_MODE (x) == VOIDmode)
5026 	t = wide_int_to_tree (type,
5027 			      wide_int::from_array (&CONST_DOUBLE_LOW (x), 2,
5028 						    HOST_BITS_PER_WIDE_INT * 2));
5029       else
5030 	{
5031 	  REAL_VALUE_TYPE d;
5032 
5033 	  REAL_VALUE_FROM_CONST_DOUBLE (d, x);
5034 	  t = build_real (type, d);
5035 	}
5036 
5037       return t;
5038 
5039     case CONST_VECTOR:
5040       {
5041 	int units = CONST_VECTOR_NUNITS (x);
5042 	tree itype = TREE_TYPE (type);
5043 	tree *elts;
5044 	int i;
5045 
5046 	/* Build a tree with vector elements.  */
5047 	elts = XALLOCAVEC (tree, units);
5048 	for (i = units - 1; i >= 0; --i)
5049 	  {
5050 	    rtx elt = CONST_VECTOR_ELT (x, i);
5051 	    elts[i] = make_tree (itype, elt);
5052 	  }
5053 
5054 	return build_vector (type, elts);
5055       }
5056 
5057     case PLUS:
5058       return fold_build2 (PLUS_EXPR, type, make_tree (type, XEXP (x, 0)),
5059 			  make_tree (type, XEXP (x, 1)));
5060 
5061     case MINUS:
5062       return fold_build2 (MINUS_EXPR, type, make_tree (type, XEXP (x, 0)),
5063 			  make_tree (type, XEXP (x, 1)));
5064 
5065     case NEG:
5066       return fold_build1 (NEGATE_EXPR, type, make_tree (type, XEXP (x, 0)));
5067 
5068     case MULT:
5069       return fold_build2 (MULT_EXPR, type, make_tree (type, XEXP (x, 0)),
5070 			  make_tree (type, XEXP (x, 1)));
5071 
5072     case ASHIFT:
5073       return fold_build2 (LSHIFT_EXPR, type, make_tree (type, XEXP (x, 0)),
5074 			  make_tree (type, XEXP (x, 1)));
5075 
5076     case LSHIFTRT:
5077       t = unsigned_type_for (type);
5078       return fold_convert (type, build2 (RSHIFT_EXPR, t,
5079 			    		 make_tree (t, XEXP (x, 0)),
5080 				    	 make_tree (type, XEXP (x, 1))));
5081 
5082     case ASHIFTRT:
5083       t = signed_type_for (type);
5084       return fold_convert (type, build2 (RSHIFT_EXPR, t,
5085 					 make_tree (t, XEXP (x, 0)),
5086 				    	 make_tree (type, XEXP (x, 1))));
5087 
5088     case DIV:
5089       if (TREE_CODE (type) != REAL_TYPE)
5090 	t = signed_type_for (type);
5091       else
5092 	t = type;
5093 
5094       return fold_convert (type, build2 (TRUNC_DIV_EXPR, t,
5095 				    	 make_tree (t, XEXP (x, 0)),
5096 				    	 make_tree (t, XEXP (x, 1))));
5097     case UDIV:
5098       t = unsigned_type_for (type);
5099       return fold_convert (type, build2 (TRUNC_DIV_EXPR, t,
5100 				    	 make_tree (t, XEXP (x, 0)),
5101 				    	 make_tree (t, XEXP (x, 1))));
5102 
5103     case SIGN_EXTEND:
5104     case ZERO_EXTEND:
5105       t = lang_hooks.types.type_for_mode (GET_MODE (XEXP (x, 0)),
5106 					  GET_CODE (x) == ZERO_EXTEND);
5107       return fold_convert (type, make_tree (t, XEXP (x, 0)));
5108 
5109     case CONST:
5110       return make_tree (type, XEXP (x, 0));
5111 
5112     case SYMBOL_REF:
5113       t = SYMBOL_REF_DECL (x);
5114       if (t)
5115 	return fold_convert (type, build_fold_addr_expr (t));
5116       /* else fall through.  */
5117 
5118     default:
5119       t = build_decl (RTL_LOCATION (x), VAR_DECL, NULL_TREE, type);
5120 
5121       /* If TYPE is a POINTER_TYPE, we might need to convert X from
5122 	 address mode to pointer mode.  */
5123       if (POINTER_TYPE_P (type))
5124 	x = convert_memory_address_addr_space
5125 	      (TYPE_MODE (type), x, TYPE_ADDR_SPACE (TREE_TYPE (type)));
5126 
5127       /* Note that we do *not* use SET_DECL_RTL here, because we do not
5128 	 want set_decl_rtl to go adjusting REG_ATTRS for this temporary.  */
5129       t->decl_with_rtl.rtl = x;
5130 
5131       return t;
5132     }
5133 }
5134 
5135 /* Compute the logical-and of OP0 and OP1, storing it in TARGET
5136    and returning TARGET.
5137 
5138    If TARGET is 0, a pseudo-register or constant is returned.  */
5139 
5140 rtx
5141 expand_and (machine_mode mode, rtx op0, rtx op1, rtx target)
5142 {
5143   rtx tem = 0;
5144 
5145   if (GET_MODE (op0) == VOIDmode && GET_MODE (op1) == VOIDmode)
5146     tem = simplify_binary_operation (AND, mode, op0, op1);
5147   if (tem == 0)
5148     tem = expand_binop (mode, and_optab, op0, op1, target, 0, OPTAB_LIB_WIDEN);
5149 
5150   if (target == 0)
5151     target = tem;
5152   else if (tem != target)
5153     emit_move_insn (target, tem);
5154   return target;
5155 }
5156 
5157 /* Helper function for emit_store_flag.  */
5158 rtx
5159 emit_cstore (rtx target, enum insn_code icode, enum rtx_code code,
5160 	     machine_mode mode, machine_mode compare_mode,
5161 	     int unsignedp, rtx x, rtx y, int normalizep,
5162 	     machine_mode target_mode)
5163 {
5164   struct expand_operand ops[4];
5165   rtx op0, comparison, subtarget;
5166   rtx_insn *last;
5167   machine_mode result_mode = targetm.cstore_mode (icode);
5168 
5169   last = get_last_insn ();
5170   x = prepare_operand (icode, x, 2, mode, compare_mode, unsignedp);
5171   y = prepare_operand (icode, y, 3, mode, compare_mode, unsignedp);
5172   if (!x || !y)
5173     {
5174       delete_insns_since (last);
5175       return NULL_RTX;
5176     }
5177 
5178   if (target_mode == VOIDmode)
5179     target_mode = result_mode;
5180   if (!target)
5181     target = gen_reg_rtx (target_mode);
5182 
5183   comparison = gen_rtx_fmt_ee (code, result_mode, x, y);
5184 
5185   create_output_operand (&ops[0], optimize ? NULL_RTX : target, result_mode);
5186   create_fixed_operand (&ops[1], comparison);
5187   create_fixed_operand (&ops[2], x);
5188   create_fixed_operand (&ops[3], y);
5189   if (!maybe_expand_insn (icode, 4, ops))
5190     {
5191       delete_insns_since (last);
5192       return NULL_RTX;
5193     }
5194   subtarget = ops[0].value;
5195 
5196   /* If we are converting to a wider mode, first convert to
5197      TARGET_MODE, then normalize.  This produces better combining
5198      opportunities on machines that have a SIGN_EXTRACT when we are
5199      testing a single bit.  This mostly benefits the 68k.
5200 
5201      If STORE_FLAG_VALUE does not have the sign bit set when
5202      interpreted in MODE, we can do this conversion as unsigned, which
5203      is usually more efficient.  */
5204   if (GET_MODE_SIZE (target_mode) > GET_MODE_SIZE (result_mode))
5205     {
5206       convert_move (target, subtarget,
5207 		    val_signbit_known_clear_p (result_mode,
5208 					       STORE_FLAG_VALUE));
5209       op0 = target;
5210       result_mode = target_mode;
5211     }
5212   else
5213     op0 = subtarget;
5214 
5215   /* If we want to keep subexpressions around, don't reuse our last
5216      target.  */
5217   if (optimize)
5218     subtarget = 0;
5219 
5220   /* Now normalize to the proper value in MODE.  Sometimes we don't
5221      have to do anything.  */
5222   if (normalizep == 0 || normalizep == STORE_FLAG_VALUE)
5223     ;
5224   /* STORE_FLAG_VALUE might be the most negative number, so write
5225      the comparison this way to avoid a compiler-time warning.  */
5226   else if (- normalizep == STORE_FLAG_VALUE)
5227     op0 = expand_unop (result_mode, neg_optab, op0, subtarget, 0);
5228 
5229   /* We don't want to use STORE_FLAG_VALUE < 0 below since this makes
5230      it hard to use a value of just the sign bit due to ANSI integer
5231      constant typing rules.  */
5232   else if (val_signbit_known_set_p (result_mode, STORE_FLAG_VALUE))
5233     op0 = expand_shift (RSHIFT_EXPR, result_mode, op0,
5234 			GET_MODE_BITSIZE (result_mode) - 1, subtarget,
5235 			normalizep == 1);
5236   else
5237     {
5238       gcc_assert (STORE_FLAG_VALUE & 1);
5239 
5240       op0 = expand_and (result_mode, op0, const1_rtx, subtarget);
5241       if (normalizep == -1)
5242 	op0 = expand_unop (result_mode, neg_optab, op0, op0, 0);
5243     }
5244 
5245   /* If we were converting to a smaller mode, do the conversion now.  */
5246   if (target_mode != result_mode)
5247     {
5248       convert_move (target, op0, 0);
5249       return target;
5250     }
5251   else
5252     return op0;
5253 }
5254 
5255 
5256 /* A subroutine of emit_store_flag only including "tricks" that do not
5257    need a recursive call.  These are kept separate to avoid infinite
5258    loops.  */
5259 
5260 static rtx
5261 emit_store_flag_1 (rtx target, enum rtx_code code, rtx op0, rtx op1,
5262 		   machine_mode mode, int unsignedp, int normalizep,
5263 		   machine_mode target_mode)
5264 {
5265   rtx subtarget;
5266   enum insn_code icode;
5267   machine_mode compare_mode;
5268   enum mode_class mclass;
5269   enum rtx_code scode;
5270   rtx tem;
5271 
5272   if (unsignedp)
5273     code = unsigned_condition (code);
5274   scode = swap_condition (code);
5275 
5276   /* If one operand is constant, make it the second one.  Only do this
5277      if the other operand is not constant as well.  */
5278 
5279   if (swap_commutative_operands_p (op0, op1))
5280     {
5281       tem = op0;
5282       op0 = op1;
5283       op1 = tem;
5284       code = swap_condition (code);
5285     }
5286 
5287   if (mode == VOIDmode)
5288     mode = GET_MODE (op0);
5289 
5290   /* For some comparisons with 1 and -1, we can convert this to
5291      comparisons with zero.  This will often produce more opportunities for
5292      store-flag insns.  */
5293 
5294   switch (code)
5295     {
5296     case LT:
5297       if (op1 == const1_rtx)
5298 	op1 = const0_rtx, code = LE;
5299       break;
5300     case LE:
5301       if (op1 == constm1_rtx)
5302 	op1 = const0_rtx, code = LT;
5303       break;
5304     case GE:
5305       if (op1 == const1_rtx)
5306 	op1 = const0_rtx, code = GT;
5307       break;
5308     case GT:
5309       if (op1 == constm1_rtx)
5310 	op1 = const0_rtx, code = GE;
5311       break;
5312     case GEU:
5313       if (op1 == const1_rtx)
5314 	op1 = const0_rtx, code = NE;
5315       break;
5316     case LTU:
5317       if (op1 == const1_rtx)
5318 	op1 = const0_rtx, code = EQ;
5319       break;
5320     default:
5321       break;
5322     }
5323 
5324   /* If we are comparing a double-word integer with zero or -1, we can
5325      convert the comparison into one involving a single word.  */
5326   if (GET_MODE_BITSIZE (mode) == BITS_PER_WORD * 2
5327       && GET_MODE_CLASS (mode) == MODE_INT
5328       && (!MEM_P (op0) || ! MEM_VOLATILE_P (op0)))
5329     {
5330       if ((code == EQ || code == NE)
5331 	  && (op1 == const0_rtx || op1 == constm1_rtx))
5332 	{
5333 	  rtx op00, op01;
5334 
5335 	  /* Do a logical OR or AND of the two words and compare the
5336 	     result.  */
5337 	  op00 = simplify_gen_subreg (word_mode, op0, mode, 0);
5338 	  op01 = simplify_gen_subreg (word_mode, op0, mode, UNITS_PER_WORD);
5339 	  tem = expand_binop (word_mode,
5340 			      op1 == const0_rtx ? ior_optab : and_optab,
5341 			      op00, op01, NULL_RTX, unsignedp,
5342 			      OPTAB_DIRECT);
5343 
5344 	  if (tem != 0)
5345 	    tem = emit_store_flag (NULL_RTX, code, tem, op1, word_mode,
5346 				   unsignedp, normalizep);
5347 	}
5348       else if ((code == LT || code == GE) && op1 == const0_rtx)
5349 	{
5350 	  rtx op0h;
5351 
5352 	  /* If testing the sign bit, can just test on high word.  */
5353 	  op0h = simplify_gen_subreg (word_mode, op0, mode,
5354 				      subreg_highpart_offset (word_mode,
5355 							      mode));
5356 	  tem = emit_store_flag (NULL_RTX, code, op0h, op1, word_mode,
5357 				 unsignedp, normalizep);
5358 	}
5359       else
5360 	tem = NULL_RTX;
5361 
5362       if (tem)
5363 	{
5364 	  if (target_mode == VOIDmode || GET_MODE (tem) == target_mode)
5365 	    return tem;
5366 	  if (!target)
5367 	    target = gen_reg_rtx (target_mode);
5368 
5369 	  convert_move (target, tem,
5370 			!val_signbit_known_set_p (word_mode,
5371 						  (normalizep ? normalizep
5372 						   : STORE_FLAG_VALUE)));
5373 	  return target;
5374 	}
5375     }
5376 
5377   /* If this is A < 0 or A >= 0, we can do this by taking the ones
5378      complement of A (for GE) and shifting the sign bit to the low bit.  */
5379   if (op1 == const0_rtx && (code == LT || code == GE)
5380       && GET_MODE_CLASS (mode) == MODE_INT
5381       && (normalizep || STORE_FLAG_VALUE == 1
5382 	  || val_signbit_p (mode, STORE_FLAG_VALUE)))
5383     {
5384       subtarget = target;
5385 
5386       if (!target)
5387 	target_mode = mode;
5388 
5389       /* If the result is to be wider than OP0, it is best to convert it
5390 	 first.  If it is to be narrower, it is *incorrect* to convert it
5391 	 first.  */
5392       else if (GET_MODE_SIZE (target_mode) > GET_MODE_SIZE (mode))
5393 	{
5394 	  op0 = convert_modes (target_mode, mode, op0, 0);
5395 	  mode = target_mode;
5396 	}
5397 
5398       if (target_mode != mode)
5399 	subtarget = 0;
5400 
5401       if (code == GE)
5402 	op0 = expand_unop (mode, one_cmpl_optab, op0,
5403 			   ((STORE_FLAG_VALUE == 1 || normalizep)
5404 			    ? 0 : subtarget), 0);
5405 
5406       if (STORE_FLAG_VALUE == 1 || normalizep)
5407 	/* If we are supposed to produce a 0/1 value, we want to do
5408 	   a logical shift from the sign bit to the low-order bit; for
5409 	   a -1/0 value, we do an arithmetic shift.  */
5410 	op0 = expand_shift (RSHIFT_EXPR, mode, op0,
5411 			    GET_MODE_BITSIZE (mode) - 1,
5412 			    subtarget, normalizep != -1);
5413 
5414       if (mode != target_mode)
5415 	op0 = convert_modes (target_mode, mode, op0, 0);
5416 
5417       return op0;
5418     }
5419 
5420   mclass = GET_MODE_CLASS (mode);
5421   for (compare_mode = mode; compare_mode != VOIDmode;
5422        compare_mode = GET_MODE_WIDER_MODE (compare_mode))
5423     {
5424      machine_mode optab_mode = mclass == MODE_CC ? CCmode : compare_mode;
5425      icode = optab_handler (cstore_optab, optab_mode);
5426      if (icode != CODE_FOR_nothing)
5427 	{
5428 	  do_pending_stack_adjust ();
5429 	  tem = emit_cstore (target, icode, code, mode, compare_mode,
5430 			     unsignedp, op0, op1, normalizep, target_mode);
5431 	  if (tem)
5432 	    return tem;
5433 
5434 	  if (GET_MODE_CLASS (mode) == MODE_FLOAT)
5435 	    {
5436 	      tem = emit_cstore (target, icode, scode, mode, compare_mode,
5437 				 unsignedp, op1, op0, normalizep, target_mode);
5438 	      if (tem)
5439 	        return tem;
5440 	    }
5441 	  break;
5442 	}
5443     }
5444 
5445   return 0;
5446 }
5447 
5448 /* Emit a store-flags instruction for comparison CODE on OP0 and OP1
5449    and storing in TARGET.  Normally return TARGET.
5450    Return 0 if that cannot be done.
5451 
5452    MODE is the mode to use for OP0 and OP1 should they be CONST_INTs.  If
5453    it is VOIDmode, they cannot both be CONST_INT.
5454 
5455    UNSIGNEDP is for the case where we have to widen the operands
5456    to perform the operation.  It says to use zero-extension.
5457 
5458    NORMALIZEP is 1 if we should convert the result to be either zero
5459    or one.  Normalize is -1 if we should convert the result to be
5460    either zero or -1.  If NORMALIZEP is zero, the result will be left
5461    "raw" out of the scc insn.  */
5462 
5463 rtx
5464 emit_store_flag (rtx target, enum rtx_code code, rtx op0, rtx op1,
5465 		 machine_mode mode, int unsignedp, int normalizep)
5466 {
5467   machine_mode target_mode = target ? GET_MODE (target) : VOIDmode;
5468   enum rtx_code rcode;
5469   rtx subtarget;
5470   rtx tem, trueval;
5471   rtx_insn *last;
5472 
5473   /* If we compare constants, we shouldn't use a store-flag operation,
5474      but a constant load.  We can get there via the vanilla route that
5475      usually generates a compare-branch sequence, but will in this case
5476      fold the comparison to a constant, and thus elide the branch.  */
5477   if (CONSTANT_P (op0) && CONSTANT_P (op1))
5478     return NULL_RTX;
5479 
5480   tem = emit_store_flag_1 (target, code, op0, op1, mode, unsignedp, normalizep,
5481 			   target_mode);
5482   if (tem)
5483     return tem;
5484 
5485   /* If we reached here, we can't do this with a scc insn, however there
5486      are some comparisons that can be done in other ways.  Don't do any
5487      of these cases if branches are very cheap.  */
5488   if (BRANCH_COST (optimize_insn_for_speed_p (), false) == 0)
5489     return 0;
5490 
5491   /* See what we need to return.  We can only return a 1, -1, or the
5492      sign bit.  */
5493 
5494   if (normalizep == 0)
5495     {
5496       if (STORE_FLAG_VALUE == 1 || STORE_FLAG_VALUE == -1)
5497 	normalizep = STORE_FLAG_VALUE;
5498 
5499       else if (val_signbit_p (mode, STORE_FLAG_VALUE))
5500 	;
5501       else
5502 	return 0;
5503     }
5504 
5505   last = get_last_insn ();
5506 
5507   /* If optimizing, use different pseudo registers for each insn, instead
5508      of reusing the same pseudo.  This leads to better CSE, but slows
5509      down the compiler, since there are more pseudos */
5510   subtarget = (!optimize
5511 	       && (target_mode == mode)) ? target : NULL_RTX;
5512   trueval = GEN_INT (normalizep ? normalizep : STORE_FLAG_VALUE);
5513 
5514   /* For floating-point comparisons, try the reverse comparison or try
5515      changing the "orderedness" of the comparison.  */
5516   if (GET_MODE_CLASS (mode) == MODE_FLOAT)
5517     {
5518       enum rtx_code first_code;
5519       bool and_them;
5520 
5521       rcode = reverse_condition_maybe_unordered (code);
5522       if (can_compare_p (rcode, mode, ccp_store_flag)
5523           && (code == ORDERED || code == UNORDERED
5524 	      || (! HONOR_NANS (mode) && (code == LTGT || code == UNEQ))
5525 	      || (! HONOR_SNANS (mode) && (code == EQ || code == NE))))
5526 	{
5527           int want_add = ((STORE_FLAG_VALUE == 1 && normalizep == -1)
5528 		          || (STORE_FLAG_VALUE == -1 && normalizep == 1));
5529 
5530 	  /* For the reverse comparison, use either an addition or a XOR.  */
5531           if (want_add
5532 	      && rtx_cost (GEN_INT (normalizep), PLUS, 1,
5533 			   optimize_insn_for_speed_p ()) == 0)
5534 	    {
5535 	      tem = emit_store_flag_1 (subtarget, rcode, op0, op1, mode, 0,
5536 				       STORE_FLAG_VALUE, target_mode);
5537 	      if (tem)
5538                 return expand_binop (target_mode, add_optab, tem,
5539 				     gen_int_mode (normalizep, target_mode),
5540 				     target, 0, OPTAB_WIDEN);
5541 	    }
5542           else if (!want_add
5543 	           && rtx_cost (trueval, XOR, 1,
5544 			        optimize_insn_for_speed_p ()) == 0)
5545 	    {
5546 	      tem = emit_store_flag_1 (subtarget, rcode, op0, op1, mode, 0,
5547 				       normalizep, target_mode);
5548 	      if (tem)
5549                 return expand_binop (target_mode, xor_optab, tem, trueval,
5550 				     target, INTVAL (trueval) >= 0, OPTAB_WIDEN);
5551 	    }
5552 	}
5553 
5554       delete_insns_since (last);
5555 
5556       /* Cannot split ORDERED and UNORDERED, only try the above trick.   */
5557       if (code == ORDERED || code == UNORDERED)
5558 	return 0;
5559 
5560       and_them = split_comparison (code, mode, &first_code, &code);
5561 
5562       /* If there are no NaNs, the first comparison should always fall through.
5563          Effectively change the comparison to the other one.  */
5564       if (!HONOR_NANS (mode))
5565 	{
5566           gcc_assert (first_code == (and_them ? ORDERED : UNORDERED));
5567 	  return emit_store_flag_1 (target, code, op0, op1, mode, 0, normalizep,
5568 				    target_mode);
5569 	}
5570 
5571 #ifdef HAVE_conditional_move
5572       /* Try using a setcc instruction for ORDERED/UNORDERED, followed by a
5573 	 conditional move.  */
5574       tem = emit_store_flag_1 (subtarget, first_code, op0, op1, mode, 0,
5575 			       normalizep, target_mode);
5576       if (tem == 0)
5577 	return 0;
5578 
5579       if (and_them)
5580         tem = emit_conditional_move (target, code, op0, op1, mode,
5581 				     tem, const0_rtx, GET_MODE (tem), 0);
5582       else
5583         tem = emit_conditional_move (target, code, op0, op1, mode,
5584 				     trueval, tem, GET_MODE (tem), 0);
5585 
5586       if (tem == 0)
5587         delete_insns_since (last);
5588       return tem;
5589 #else
5590       return 0;
5591 #endif
5592     }
5593 
5594   /* The remaining tricks only apply to integer comparisons.  */
5595 
5596   if (GET_MODE_CLASS (mode) != MODE_INT)
5597     return 0;
5598 
5599   /* If this is an equality comparison of integers, we can try to exclusive-or
5600      (or subtract) the two operands and use a recursive call to try the
5601      comparison with zero.  Don't do any of these cases if branches are
5602      very cheap.  */
5603 
5604   if ((code == EQ || code == NE) && op1 != const0_rtx)
5605     {
5606       tem = expand_binop (mode, xor_optab, op0, op1, subtarget, 1,
5607 			  OPTAB_WIDEN);
5608 
5609       if (tem == 0)
5610 	tem = expand_binop (mode, sub_optab, op0, op1, subtarget, 1,
5611 			    OPTAB_WIDEN);
5612       if (tem != 0)
5613 	tem = emit_store_flag (target, code, tem, const0_rtx,
5614 			       mode, unsignedp, normalizep);
5615       if (tem != 0)
5616 	return tem;
5617 
5618       delete_insns_since (last);
5619     }
5620 
5621   /* For integer comparisons, try the reverse comparison.  However, for
5622      small X and if we'd have anyway to extend, implementing "X != 0"
5623      as "-(int)X >> 31" is still cheaper than inverting "(int)X == 0".  */
5624   rcode = reverse_condition (code);
5625   if (can_compare_p (rcode, mode, ccp_store_flag)
5626       && ! (optab_handler (cstore_optab, mode) == CODE_FOR_nothing
5627 	    && code == NE
5628 	    && GET_MODE_SIZE (mode) < UNITS_PER_WORD
5629 	    && op1 == const0_rtx))
5630     {
5631       int want_add = ((STORE_FLAG_VALUE == 1 && normalizep == -1)
5632 		      || (STORE_FLAG_VALUE == -1 && normalizep == 1));
5633 
5634       /* Again, for the reverse comparison, use either an addition or a XOR.  */
5635       if (want_add
5636 	  && rtx_cost (GEN_INT (normalizep), PLUS, 1,
5637 		       optimize_insn_for_speed_p ()) == 0)
5638 	{
5639 	  tem = emit_store_flag_1 (subtarget, rcode, op0, op1, mode, 0,
5640 				   STORE_FLAG_VALUE, target_mode);
5641 	  if (tem != 0)
5642             tem = expand_binop (target_mode, add_optab, tem,
5643 				gen_int_mode (normalizep, target_mode),
5644 				target, 0, OPTAB_WIDEN);
5645 	}
5646       else if (!want_add
5647 	       && rtx_cost (trueval, XOR, 1,
5648 			    optimize_insn_for_speed_p ()) == 0)
5649 	{
5650 	  tem = emit_store_flag_1 (subtarget, rcode, op0, op1, mode, 0,
5651 				   normalizep, target_mode);
5652 	  if (tem != 0)
5653             tem = expand_binop (target_mode, xor_optab, tem, trueval, target,
5654 				INTVAL (trueval) >= 0, OPTAB_WIDEN);
5655 	}
5656 
5657       if (tem != 0)
5658 	return tem;
5659       delete_insns_since (last);
5660     }
5661 
5662   /* Some other cases we can do are EQ, NE, LE, and GT comparisons with
5663      the constant zero.  Reject all other comparisons at this point.  Only
5664      do LE and GT if branches are expensive since they are expensive on
5665      2-operand machines.  */
5666 
5667   if (op1 != const0_rtx
5668       || (code != EQ && code != NE
5669 	  && (BRANCH_COST (optimize_insn_for_speed_p (),
5670 			   false) <= 1 || (code != LE && code != GT))))
5671     return 0;
5672 
5673   /* Try to put the result of the comparison in the sign bit.  Assume we can't
5674      do the necessary operation below.  */
5675 
5676   tem = 0;
5677 
5678   /* To see if A <= 0, compute (A | (A - 1)).  A <= 0 iff that result has
5679      the sign bit set.  */
5680 
5681   if (code == LE)
5682     {
5683       /* This is destructive, so SUBTARGET can't be OP0.  */
5684       if (rtx_equal_p (subtarget, op0))
5685 	subtarget = 0;
5686 
5687       tem = expand_binop (mode, sub_optab, op0, const1_rtx, subtarget, 0,
5688 			  OPTAB_WIDEN);
5689       if (tem)
5690 	tem = expand_binop (mode, ior_optab, op0, tem, subtarget, 0,
5691 			    OPTAB_WIDEN);
5692     }
5693 
5694   /* To see if A > 0, compute (((signed) A) << BITS) - A, where BITS is the
5695      number of bits in the mode of OP0, minus one.  */
5696 
5697   if (code == GT)
5698     {
5699       if (rtx_equal_p (subtarget, op0))
5700 	subtarget = 0;
5701 
5702       tem = expand_shift (RSHIFT_EXPR, mode, op0,
5703 			  GET_MODE_BITSIZE (mode) - 1,
5704 			  subtarget, 0);
5705       tem = expand_binop (mode, sub_optab, tem, op0, subtarget, 0,
5706 			  OPTAB_WIDEN);
5707     }
5708 
5709   if (code == EQ || code == NE)
5710     {
5711       /* For EQ or NE, one way to do the comparison is to apply an operation
5712 	 that converts the operand into a positive number if it is nonzero
5713 	 or zero if it was originally zero.  Then, for EQ, we subtract 1 and
5714 	 for NE we negate.  This puts the result in the sign bit.  Then we
5715 	 normalize with a shift, if needed.
5716 
5717 	 Two operations that can do the above actions are ABS and FFS, so try
5718 	 them.  If that doesn't work, and MODE is smaller than a full word,
5719 	 we can use zero-extension to the wider mode (an unsigned conversion)
5720 	 as the operation.  */
5721 
5722       /* Note that ABS doesn't yield a positive number for INT_MIN, but
5723 	 that is compensated by the subsequent overflow when subtracting
5724 	 one / negating.  */
5725 
5726       if (optab_handler (abs_optab, mode) != CODE_FOR_nothing)
5727 	tem = expand_unop (mode, abs_optab, op0, subtarget, 1);
5728       else if (optab_handler (ffs_optab, mode) != CODE_FOR_nothing)
5729 	tem = expand_unop (mode, ffs_optab, op0, subtarget, 1);
5730       else if (GET_MODE_SIZE (mode) < UNITS_PER_WORD)
5731 	{
5732 	  tem = convert_modes (word_mode, mode, op0, 1);
5733 	  mode = word_mode;
5734 	}
5735 
5736       if (tem != 0)
5737 	{
5738 	  if (code == EQ)
5739 	    tem = expand_binop (mode, sub_optab, tem, const1_rtx, subtarget,
5740 				0, OPTAB_WIDEN);
5741 	  else
5742 	    tem = expand_unop (mode, neg_optab, tem, subtarget, 0);
5743 	}
5744 
5745       /* If we couldn't do it that way, for NE we can "or" the two's complement
5746 	 of the value with itself.  For EQ, we take the one's complement of
5747 	 that "or", which is an extra insn, so we only handle EQ if branches
5748 	 are expensive.  */
5749 
5750       if (tem == 0
5751 	  && (code == NE
5752 	      || BRANCH_COST (optimize_insn_for_speed_p (),
5753 		      	      false) > 1))
5754 	{
5755 	  if (rtx_equal_p (subtarget, op0))
5756 	    subtarget = 0;
5757 
5758 	  tem = expand_unop (mode, neg_optab, op0, subtarget, 0);
5759 	  tem = expand_binop (mode, ior_optab, tem, op0, subtarget, 0,
5760 			      OPTAB_WIDEN);
5761 
5762 	  if (tem && code == EQ)
5763 	    tem = expand_unop (mode, one_cmpl_optab, tem, subtarget, 0);
5764 	}
5765     }
5766 
5767   if (tem && normalizep)
5768     tem = expand_shift (RSHIFT_EXPR, mode, tem,
5769 			GET_MODE_BITSIZE (mode) - 1,
5770 			subtarget, normalizep == 1);
5771 
5772   if (tem)
5773     {
5774       if (!target)
5775         ;
5776       else if (GET_MODE (tem) != target_mode)
5777 	{
5778 	  convert_move (target, tem, 0);
5779 	  tem = target;
5780 	}
5781       else if (!subtarget)
5782 	{
5783 	  emit_move_insn (target, tem);
5784 	  tem = target;
5785 	}
5786     }
5787   else
5788     delete_insns_since (last);
5789 
5790   return tem;
5791 }
5792 
5793 /* Like emit_store_flag, but always succeeds.  */
5794 
5795 rtx
5796 emit_store_flag_force (rtx target, enum rtx_code code, rtx op0, rtx op1,
5797 		       machine_mode mode, int unsignedp, int normalizep)
5798 {
5799   rtx tem;
5800   rtx_code_label *label;
5801   rtx trueval, falseval;
5802 
5803   /* First see if emit_store_flag can do the job.  */
5804   tem = emit_store_flag (target, code, op0, op1, mode, unsignedp, normalizep);
5805   if (tem != 0)
5806     return tem;
5807 
5808   if (!target)
5809     target = gen_reg_rtx (word_mode);
5810 
5811   /* If this failed, we have to do this with set/compare/jump/set code.
5812      For foo != 0, if foo is in OP0, just replace it with 1 if nonzero.  */
5813   trueval = normalizep ? GEN_INT (normalizep) : const1_rtx;
5814   if (code == NE
5815       && GET_MODE_CLASS (mode) == MODE_INT
5816       && REG_P (target)
5817       && op0 == target
5818       && op1 == const0_rtx)
5819     {
5820       label = gen_label_rtx ();
5821       do_compare_rtx_and_jump (target, const0_rtx, EQ, unsignedp,
5822 			       mode, NULL_RTX, NULL_RTX, label, -1);
5823       emit_move_insn (target, trueval);
5824       emit_label (label);
5825       return target;
5826     }
5827 
5828   if (!REG_P (target)
5829       || reg_mentioned_p (target, op0) || reg_mentioned_p (target, op1))
5830     target = gen_reg_rtx (GET_MODE (target));
5831 
5832   /* Jump in the right direction if the target cannot implement CODE
5833      but can jump on its reverse condition.  */
5834   falseval = const0_rtx;
5835   if (! can_compare_p (code, mode, ccp_jump)
5836       && (! FLOAT_MODE_P (mode)
5837           || code == ORDERED || code == UNORDERED
5838           || (! HONOR_NANS (mode) && (code == LTGT || code == UNEQ))
5839           || (! HONOR_SNANS (mode) && (code == EQ || code == NE))))
5840     {
5841       enum rtx_code rcode;
5842       if (FLOAT_MODE_P (mode))
5843         rcode = reverse_condition_maybe_unordered (code);
5844       else
5845         rcode = reverse_condition (code);
5846 
5847       /* Canonicalize to UNORDERED for the libcall.  */
5848       if (can_compare_p (rcode, mode, ccp_jump)
5849           || (code == ORDERED && ! can_compare_p (ORDERED, mode, ccp_jump)))
5850 	{
5851 	  falseval = trueval;
5852 	  trueval = const0_rtx;
5853 	  code = rcode;
5854 	}
5855     }
5856 
5857   emit_move_insn (target, trueval);
5858   label = gen_label_rtx ();
5859   do_compare_rtx_and_jump (op0, op1, code, unsignedp, mode, NULL_RTX,
5860 			   NULL_RTX, label, -1);
5861 
5862   emit_move_insn (target, falseval);
5863   emit_label (label);
5864 
5865   return target;
5866 }
5867 
5868 /* Perform possibly multi-word comparison and conditional jump to LABEL
5869    if ARG1 OP ARG2 true where ARG1 and ARG2 are of mode MODE.  This is
5870    now a thin wrapper around do_compare_rtx_and_jump.  */
5871 
5872 static void
5873 do_cmp_and_jump (rtx arg1, rtx arg2, enum rtx_code op, machine_mode mode,
5874 		 rtx_code_label *label)
5875 {
5876   int unsignedp = (op == LTU || op == LEU || op == GTU || op == GEU);
5877   do_compare_rtx_and_jump (arg1, arg2, op, unsignedp, mode,
5878 			   NULL_RTX, NULL_RTX, label, -1);
5879 }
5880