xref: /netbsd-src/external/gpl3/gcc.old/dist/gcc/expmed.c (revision 8ecbf5f02b752fcb7debe1a8fab1dc82602bc760)
1 /* Medium-level subroutines: convert bit-field store and extract
2    and shifts, multiplies and divides to rtl instructions.
3    Copyright (C) 1987-2018 Free Software Foundation, Inc.
4 
5 This file is part of GCC.
6 
7 GCC is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 3, or (at your option) any later
10 version.
11 
12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
15 for more details.
16 
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3.  If not see
19 <http://www.gnu.org/licenses/>.  */
20 
21 
22 #include "config.h"
23 #include "system.h"
24 #include "coretypes.h"
25 #include "backend.h"
26 #include "target.h"
27 #include "rtl.h"
28 #include "tree.h"
29 #include "predict.h"
30 #include "memmodel.h"
31 #include "tm_p.h"
32 #include "expmed.h"
33 #include "optabs.h"
34 #include "regs.h"
35 #include "emit-rtl.h"
36 #include "diagnostic-core.h"
37 #include "fold-const.h"
38 #include "stor-layout.h"
39 #include "dojump.h"
40 #include "explow.h"
41 #include "expr.h"
42 #include "langhooks.h"
43 #include "tree-vector-builder.h"
44 
45 struct target_expmed default_target_expmed;
46 #if SWITCHABLE_TARGET
47 struct target_expmed *this_target_expmed = &default_target_expmed;
48 #endif
49 
50 static bool store_integral_bit_field (rtx, opt_scalar_int_mode,
51 				      unsigned HOST_WIDE_INT,
52 				      unsigned HOST_WIDE_INT,
53 				      poly_uint64, poly_uint64,
54 				      machine_mode, rtx, bool, bool);
55 static void store_fixed_bit_field (rtx, opt_scalar_int_mode,
56 				   unsigned HOST_WIDE_INT,
57 				   unsigned HOST_WIDE_INT,
58 				   poly_uint64, poly_uint64,
59 				   rtx, scalar_int_mode, bool);
60 static void store_fixed_bit_field_1 (rtx, scalar_int_mode,
61 				     unsigned HOST_WIDE_INT,
62 				     unsigned HOST_WIDE_INT,
63 				     rtx, scalar_int_mode, bool);
64 static void store_split_bit_field (rtx, opt_scalar_int_mode,
65 				   unsigned HOST_WIDE_INT,
66 				   unsigned HOST_WIDE_INT,
67 				   poly_uint64, poly_uint64,
68 				   rtx, scalar_int_mode, bool);
69 static rtx extract_integral_bit_field (rtx, opt_scalar_int_mode,
70 				       unsigned HOST_WIDE_INT,
71 				       unsigned HOST_WIDE_INT, int, rtx,
72 				       machine_mode, machine_mode, bool, bool);
73 static rtx extract_fixed_bit_field (machine_mode, rtx, opt_scalar_int_mode,
74 				    unsigned HOST_WIDE_INT,
75 				    unsigned HOST_WIDE_INT, rtx, int, bool);
76 static rtx extract_fixed_bit_field_1 (machine_mode, rtx, scalar_int_mode,
77 				      unsigned HOST_WIDE_INT,
78 				      unsigned HOST_WIDE_INT, rtx, int, bool);
79 static rtx lshift_value (machine_mode, unsigned HOST_WIDE_INT, int);
80 static rtx extract_split_bit_field (rtx, opt_scalar_int_mode,
81 				    unsigned HOST_WIDE_INT,
82 				    unsigned HOST_WIDE_INT, int, bool);
83 static void do_cmp_and_jump (rtx, rtx, enum rtx_code, machine_mode, rtx_code_label *);
84 static rtx expand_smod_pow2 (scalar_int_mode, rtx, HOST_WIDE_INT);
85 static rtx expand_sdiv_pow2 (scalar_int_mode, rtx, HOST_WIDE_INT);
86 
87 /* Return a constant integer mask value of mode MODE with BITSIZE ones
88    followed by BITPOS zeros, or the complement of that if COMPLEMENT.
89    The mask is truncated if necessary to the width of mode MODE.  The
90    mask is zero-extended if BITSIZE+BITPOS is too small for MODE.  */
91 
92 static inline rtx
93 mask_rtx (scalar_int_mode mode, int bitpos, int bitsize, bool complement)
94 {
95   return immed_wide_int_const
96     (wi::shifted_mask (bitpos, bitsize, complement,
97 		       GET_MODE_PRECISION (mode)), mode);
98 }
99 
100 /* Test whether a value is zero of a power of two.  */
101 #define EXACT_POWER_OF_2_OR_ZERO_P(x) \
102   (((x) & ((x) - HOST_WIDE_INT_1U)) == 0)
103 
104 struct init_expmed_rtl
105 {
106   rtx reg;
107   rtx plus;
108   rtx neg;
109   rtx mult;
110   rtx sdiv;
111   rtx udiv;
112   rtx sdiv_32;
113   rtx smod_32;
114   rtx wide_mult;
115   rtx wide_lshr;
116   rtx wide_trunc;
117   rtx shift;
118   rtx shift_mult;
119   rtx shift_add;
120   rtx shift_sub0;
121   rtx shift_sub1;
122   rtx zext;
123   rtx trunc;
124 
125   rtx pow2[MAX_BITS_PER_WORD];
126   rtx cint[MAX_BITS_PER_WORD];
127 };
128 
129 static void
130 init_expmed_one_conv (struct init_expmed_rtl *all, scalar_int_mode to_mode,
131 		      scalar_int_mode from_mode, bool speed)
132 {
133   int to_size, from_size;
134   rtx which;
135 
136   to_size = GET_MODE_PRECISION (to_mode);
137   from_size = GET_MODE_PRECISION (from_mode);
138 
139   /* Most partial integers have a precision less than the "full"
140      integer it requires for storage.  In case one doesn't, for
141      comparison purposes here, reduce the bit size by one in that
142      case.  */
143   if (GET_MODE_CLASS (to_mode) == MODE_PARTIAL_INT
144       && pow2p_hwi (to_size))
145     to_size --;
146   if (GET_MODE_CLASS (from_mode) == MODE_PARTIAL_INT
147       && pow2p_hwi (from_size))
148     from_size --;
149 
150   /* Assume cost of zero-extend and sign-extend is the same.  */
151   which = (to_size < from_size ? all->trunc : all->zext);
152 
153   PUT_MODE (all->reg, from_mode);
154   set_convert_cost (to_mode, from_mode, speed,
155 		    set_src_cost (which, to_mode, speed));
156 }
157 
158 static void
159 init_expmed_one_mode (struct init_expmed_rtl *all,
160 		      machine_mode mode, int speed)
161 {
162   int m, n, mode_bitsize;
163   machine_mode mode_from;
164 
165   mode_bitsize = GET_MODE_UNIT_BITSIZE (mode);
166 
167   PUT_MODE (all->reg, mode);
168   PUT_MODE (all->plus, mode);
169   PUT_MODE (all->neg, mode);
170   PUT_MODE (all->mult, mode);
171   PUT_MODE (all->sdiv, mode);
172   PUT_MODE (all->udiv, mode);
173   PUT_MODE (all->sdiv_32, mode);
174   PUT_MODE (all->smod_32, mode);
175   PUT_MODE (all->wide_trunc, mode);
176   PUT_MODE (all->shift, mode);
177   PUT_MODE (all->shift_mult, mode);
178   PUT_MODE (all->shift_add, mode);
179   PUT_MODE (all->shift_sub0, mode);
180   PUT_MODE (all->shift_sub1, mode);
181   PUT_MODE (all->zext, mode);
182   PUT_MODE (all->trunc, mode);
183 
184   set_add_cost (speed, mode, set_src_cost (all->plus, mode, speed));
185   set_neg_cost (speed, mode, set_src_cost (all->neg, mode, speed));
186   set_mul_cost (speed, mode, set_src_cost (all->mult, mode, speed));
187   set_sdiv_cost (speed, mode, set_src_cost (all->sdiv, mode, speed));
188   set_udiv_cost (speed, mode, set_src_cost (all->udiv, mode, speed));
189 
190   set_sdiv_pow2_cheap (speed, mode, (set_src_cost (all->sdiv_32, mode, speed)
191 				     <= 2 * add_cost (speed, mode)));
192   set_smod_pow2_cheap (speed, mode, (set_src_cost (all->smod_32, mode, speed)
193 				     <= 4 * add_cost (speed, mode)));
194 
195   set_shift_cost (speed, mode, 0, 0);
196   {
197     int cost = add_cost (speed, mode);
198     set_shiftadd_cost (speed, mode, 0, cost);
199     set_shiftsub0_cost (speed, mode, 0, cost);
200     set_shiftsub1_cost (speed, mode, 0, cost);
201   }
202 
203   n = MIN (MAX_BITS_PER_WORD, mode_bitsize);
204   for (m = 1; m < n; m++)
205     {
206       XEXP (all->shift, 1) = all->cint[m];
207       XEXP (all->shift_mult, 1) = all->pow2[m];
208 
209       set_shift_cost (speed, mode, m, set_src_cost (all->shift, mode, speed));
210       set_shiftadd_cost (speed, mode, m, set_src_cost (all->shift_add, mode,
211 						       speed));
212       set_shiftsub0_cost (speed, mode, m, set_src_cost (all->shift_sub0, mode,
213 							speed));
214       set_shiftsub1_cost (speed, mode, m, set_src_cost (all->shift_sub1, mode,
215 							speed));
216     }
217 
218   scalar_int_mode int_mode_to;
219   if (is_a <scalar_int_mode> (mode, &int_mode_to))
220     {
221       for (mode_from = MIN_MODE_INT; mode_from <= MAX_MODE_INT;
222 	   mode_from = (machine_mode)(mode_from + 1))
223 	init_expmed_one_conv (all, int_mode_to,
224 			      as_a <scalar_int_mode> (mode_from), speed);
225 
226       scalar_int_mode wider_mode;
227       if (GET_MODE_CLASS (int_mode_to) == MODE_INT
228 	  && GET_MODE_WIDER_MODE (int_mode_to).exists (&wider_mode))
229 	{
230 	  PUT_MODE (all->zext, wider_mode);
231 	  PUT_MODE (all->wide_mult, wider_mode);
232 	  PUT_MODE (all->wide_lshr, wider_mode);
233 	  XEXP (all->wide_lshr, 1)
234 	    = gen_int_shift_amount (wider_mode, mode_bitsize);
235 
236 	  set_mul_widen_cost (speed, wider_mode,
237 			      set_src_cost (all->wide_mult, wider_mode, speed));
238 	  set_mul_highpart_cost (speed, int_mode_to,
239 				 set_src_cost (all->wide_trunc,
240 					       int_mode_to, speed));
241 	}
242     }
243 }
244 
245 void
246 init_expmed (void)
247 {
248   struct init_expmed_rtl all;
249   machine_mode mode = QImode;
250   int m, speed;
251 
252   memset (&all, 0, sizeof all);
253   for (m = 1; m < MAX_BITS_PER_WORD; m++)
254     {
255       all.pow2[m] = GEN_INT (HOST_WIDE_INT_1 << m);
256       all.cint[m] = GEN_INT (m);
257     }
258 
259   /* Avoid using hard regs in ways which may be unsupported.  */
260   all.reg = gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1);
261   all.plus = gen_rtx_PLUS (mode, all.reg, all.reg);
262   all.neg = gen_rtx_NEG (mode, all.reg);
263   all.mult = gen_rtx_MULT (mode, all.reg, all.reg);
264   all.sdiv = gen_rtx_DIV (mode, all.reg, all.reg);
265   all.udiv = gen_rtx_UDIV (mode, all.reg, all.reg);
266   all.sdiv_32 = gen_rtx_DIV (mode, all.reg, all.pow2[5]);
267   all.smod_32 = gen_rtx_MOD (mode, all.reg, all.pow2[5]);
268   all.zext = gen_rtx_ZERO_EXTEND (mode, all.reg);
269   all.wide_mult = gen_rtx_MULT (mode, all.zext, all.zext);
270   all.wide_lshr = gen_rtx_LSHIFTRT (mode, all.wide_mult, all.reg);
271   all.wide_trunc = gen_rtx_TRUNCATE (mode, all.wide_lshr);
272   all.shift = gen_rtx_ASHIFT (mode, all.reg, all.reg);
273   all.shift_mult = gen_rtx_MULT (mode, all.reg, all.reg);
274   all.shift_add = gen_rtx_PLUS (mode, all.shift_mult, all.reg);
275   all.shift_sub0 = gen_rtx_MINUS (mode, all.shift_mult, all.reg);
276   all.shift_sub1 = gen_rtx_MINUS (mode, all.reg, all.shift_mult);
277   all.trunc = gen_rtx_TRUNCATE (mode, all.reg);
278 
279   for (speed = 0; speed < 2; speed++)
280     {
281       crtl->maybe_hot_insn_p = speed;
282       set_zero_cost (speed, set_src_cost (const0_rtx, mode, speed));
283 
284       for (mode = MIN_MODE_INT; mode <= MAX_MODE_INT;
285 	   mode = (machine_mode)(mode + 1))
286 	init_expmed_one_mode (&all, mode, speed);
287 
288       if (MIN_MODE_PARTIAL_INT != VOIDmode)
289 	for (mode = MIN_MODE_PARTIAL_INT; mode <= MAX_MODE_PARTIAL_INT;
290 	     mode = (machine_mode)(mode + 1))
291 	  init_expmed_one_mode (&all, mode, speed);
292 
293       if (MIN_MODE_VECTOR_INT != VOIDmode)
294 	for (mode = MIN_MODE_VECTOR_INT; mode <= MAX_MODE_VECTOR_INT;
295 	     mode = (machine_mode)(mode + 1))
296 	  init_expmed_one_mode (&all, mode, speed);
297     }
298 
299   if (alg_hash_used_p ())
300     {
301       struct alg_hash_entry *p = alg_hash_entry_ptr (0);
302       memset (p, 0, sizeof (*p) * NUM_ALG_HASH_ENTRIES);
303     }
304   else
305     set_alg_hash_used_p (true);
306   default_rtl_profile ();
307 
308   ggc_free (all.trunc);
309   ggc_free (all.shift_sub1);
310   ggc_free (all.shift_sub0);
311   ggc_free (all.shift_add);
312   ggc_free (all.shift_mult);
313   ggc_free (all.shift);
314   ggc_free (all.wide_trunc);
315   ggc_free (all.wide_lshr);
316   ggc_free (all.wide_mult);
317   ggc_free (all.zext);
318   ggc_free (all.smod_32);
319   ggc_free (all.sdiv_32);
320   ggc_free (all.udiv);
321   ggc_free (all.sdiv);
322   ggc_free (all.mult);
323   ggc_free (all.neg);
324   ggc_free (all.plus);
325   ggc_free (all.reg);
326 }
327 
328 /* Return an rtx representing minus the value of X.
329    MODE is the intended mode of the result,
330    useful if X is a CONST_INT.  */
331 
332 rtx
333 negate_rtx (machine_mode mode, rtx x)
334 {
335   rtx result = simplify_unary_operation (NEG, mode, x, mode);
336 
337   if (result == 0)
338     result = expand_unop (mode, neg_optab, x, NULL_RTX, 0);
339 
340   return result;
341 }
342 
343 /* Whether reverse storage order is supported on the target.  */
344 static int reverse_storage_order_supported = -1;
345 
346 /* Check whether reverse storage order is supported on the target.  */
347 
348 static void
349 check_reverse_storage_order_support (void)
350 {
351   if (BYTES_BIG_ENDIAN != WORDS_BIG_ENDIAN)
352     {
353       reverse_storage_order_supported = 0;
354       sorry ("reverse scalar storage order");
355     }
356   else
357     reverse_storage_order_supported = 1;
358 }
359 
360 /* Whether reverse FP storage order is supported on the target.  */
361 static int reverse_float_storage_order_supported = -1;
362 
363 /* Check whether reverse FP storage order is supported on the target.  */
364 
365 static void
366 check_reverse_float_storage_order_support (void)
367 {
368   if (FLOAT_WORDS_BIG_ENDIAN != WORDS_BIG_ENDIAN)
369     {
370       reverse_float_storage_order_supported = 0;
371       sorry ("reverse floating-point scalar storage order");
372     }
373   else
374     reverse_float_storage_order_supported = 1;
375 }
376 
377 /* Return an rtx representing value of X with reverse storage order.
378    MODE is the intended mode of the result,
379    useful if X is a CONST_INT.  */
380 
381 rtx
382 flip_storage_order (machine_mode mode, rtx x)
383 {
384   scalar_int_mode int_mode;
385   rtx result;
386 
387   if (mode == QImode)
388     return x;
389 
390   if (COMPLEX_MODE_P (mode))
391     {
392       rtx real = read_complex_part (x, false);
393       rtx imag = read_complex_part (x, true);
394 
395       real = flip_storage_order (GET_MODE_INNER (mode), real);
396       imag = flip_storage_order (GET_MODE_INNER (mode), imag);
397 
398       return gen_rtx_CONCAT (mode, real, imag);
399     }
400 
401   if (__builtin_expect (reverse_storage_order_supported < 0, 0))
402     check_reverse_storage_order_support ();
403 
404   if (!is_a <scalar_int_mode> (mode, &int_mode))
405     {
406       if (FLOAT_MODE_P (mode)
407 	  && __builtin_expect (reverse_float_storage_order_supported < 0, 0))
408 	check_reverse_float_storage_order_support ();
409 
410       if (!int_mode_for_size (GET_MODE_PRECISION (mode), 0).exists (&int_mode))
411 	{
412 	  sorry ("reverse storage order for %smode", GET_MODE_NAME (mode));
413 	  return x;
414 	}
415       x = gen_lowpart (int_mode, x);
416     }
417 
418   result = simplify_unary_operation (BSWAP, int_mode, x, int_mode);
419   if (result == 0)
420     result = expand_unop (int_mode, bswap_optab, x, NULL_RTX, 1);
421 
422   if (int_mode != mode)
423     result = gen_lowpart (mode, result);
424 
425   return result;
426 }
427 
428 /* If MODE is set, adjust bitfield memory MEM so that it points to the
429    first unit of mode MODE that contains a bitfield of size BITSIZE at
430    bit position BITNUM.  If MODE is not set, return a BLKmode reference
431    to every byte in the bitfield.  Set *NEW_BITNUM to the bit position
432    of the field within the new memory.  */
433 
434 static rtx
435 narrow_bit_field_mem (rtx mem, opt_scalar_int_mode mode,
436 		      unsigned HOST_WIDE_INT bitsize,
437 		      unsigned HOST_WIDE_INT bitnum,
438 		      unsigned HOST_WIDE_INT *new_bitnum)
439 {
440   scalar_int_mode imode;
441   if (mode.exists (&imode))
442     {
443       unsigned int unit = GET_MODE_BITSIZE (imode);
444       *new_bitnum = bitnum % unit;
445       HOST_WIDE_INT offset = (bitnum - *new_bitnum) / BITS_PER_UNIT;
446       return adjust_bitfield_address (mem, imode, offset);
447     }
448   else
449     {
450       *new_bitnum = bitnum % BITS_PER_UNIT;
451       HOST_WIDE_INT offset = bitnum / BITS_PER_UNIT;
452       HOST_WIDE_INT size = ((*new_bitnum + bitsize + BITS_PER_UNIT - 1)
453 			    / BITS_PER_UNIT);
454       return adjust_bitfield_address_size (mem, BLKmode, offset, size);
455     }
456 }
457 
458 /* The caller wants to perform insertion or extraction PATTERN on a
459    bitfield of size BITSIZE at BITNUM bits into memory operand OP0.
460    BITREGION_START and BITREGION_END are as for store_bit_field
461    and FIELDMODE is the natural mode of the field.
462 
463    Search for a mode that is compatible with the memory access
464    restrictions and (where applicable) with a register insertion or
465    extraction.  Return the new memory on success, storing the adjusted
466    bit position in *NEW_BITNUM.  Return null otherwise.  */
467 
468 static rtx
469 adjust_bit_field_mem_for_reg (enum extraction_pattern pattern,
470 			      rtx op0, HOST_WIDE_INT bitsize,
471 			      HOST_WIDE_INT bitnum,
472 			      poly_uint64 bitregion_start,
473 			      poly_uint64 bitregion_end,
474 			      machine_mode fieldmode,
475 			      unsigned HOST_WIDE_INT *new_bitnum)
476 {
477   bit_field_mode_iterator iter (bitsize, bitnum, bitregion_start,
478 				bitregion_end, MEM_ALIGN (op0),
479 				MEM_VOLATILE_P (op0));
480   scalar_int_mode best_mode;
481   if (iter.next_mode (&best_mode))
482     {
483       /* We can use a memory in BEST_MODE.  See whether this is true for
484 	 any wider modes.  All other things being equal, we prefer to
485 	 use the widest mode possible because it tends to expose more
486 	 CSE opportunities.  */
487       if (!iter.prefer_smaller_modes ())
488 	{
489 	  /* Limit the search to the mode required by the corresponding
490 	     register insertion or extraction instruction, if any.  */
491 	  scalar_int_mode limit_mode = word_mode;
492 	  extraction_insn insn;
493 	  if (get_best_reg_extraction_insn (&insn, pattern,
494 					    GET_MODE_BITSIZE (best_mode),
495 					    fieldmode))
496 	    limit_mode = insn.field_mode;
497 
498 	  scalar_int_mode wider_mode;
499 	  while (iter.next_mode (&wider_mode)
500 		 && GET_MODE_SIZE (wider_mode) <= GET_MODE_SIZE (limit_mode))
501 	    best_mode = wider_mode;
502 	}
503       return narrow_bit_field_mem (op0, best_mode, bitsize, bitnum,
504 				   new_bitnum);
505     }
506   return NULL_RTX;
507 }
508 
509 /* Return true if a bitfield of size BITSIZE at bit number BITNUM within
510    a structure of mode STRUCT_MODE represents a lowpart subreg.   The subreg
511    offset is then BITNUM / BITS_PER_UNIT.  */
512 
513 static bool
514 lowpart_bit_field_p (poly_uint64 bitnum, poly_uint64 bitsize,
515 		     machine_mode struct_mode)
516 {
517   poly_uint64 regsize = REGMODE_NATURAL_SIZE (struct_mode);
518   if (BYTES_BIG_ENDIAN)
519     return (multiple_p (bitnum, BITS_PER_UNIT)
520 	    && (known_eq (bitnum + bitsize, GET_MODE_BITSIZE (struct_mode))
521 		|| multiple_p (bitnum + bitsize,
522 			       regsize * BITS_PER_UNIT)));
523   else
524     return multiple_p (bitnum, regsize * BITS_PER_UNIT);
525 }
526 
527 /* Return true if -fstrict-volatile-bitfields applies to an access of OP0
528    containing BITSIZE bits starting at BITNUM, with field mode FIELDMODE.
529    Return false if the access would touch memory outside the range
530    BITREGION_START to BITREGION_END for conformance to the C++ memory
531    model.  */
532 
533 static bool
534 strict_volatile_bitfield_p (rtx op0, unsigned HOST_WIDE_INT bitsize,
535 			    unsigned HOST_WIDE_INT bitnum,
536 			    scalar_int_mode fieldmode,
537 			    poly_uint64 bitregion_start,
538 			    poly_uint64 bitregion_end)
539 {
540   unsigned HOST_WIDE_INT modesize = GET_MODE_BITSIZE (fieldmode);
541 
542   /* -fstrict-volatile-bitfields must be enabled and we must have a
543      volatile MEM.  */
544   if (!MEM_P (op0)
545       || !MEM_VOLATILE_P (op0)
546       || flag_strict_volatile_bitfields <= 0)
547     return false;
548 
549   /* The bit size must not be larger than the field mode, and
550      the field mode must not be larger than a word.  */
551   if (bitsize > modesize || modesize > BITS_PER_WORD)
552     return false;
553 
554   /* Check for cases of unaligned fields that must be split.  */
555   if (bitnum % modesize + bitsize > modesize)
556     return false;
557 
558   /* The memory must be sufficiently aligned for a MODESIZE access.
559      This condition guarantees, that the memory access will not
560      touch anything after the end of the structure.  */
561   if (MEM_ALIGN (op0) < modesize)
562     return false;
563 
564   /* Check for cases where the C++ memory model applies.  */
565   if (maybe_ne (bitregion_end, 0U)
566       && (maybe_lt (bitnum - bitnum % modesize, bitregion_start)
567 	  || maybe_gt (bitnum - bitnum % modesize + modesize - 1,
568 		       bitregion_end)))
569     return false;
570 
571   return true;
572 }
573 
574 /* Return true if OP is a memory and if a bitfield of size BITSIZE at
575    bit number BITNUM can be treated as a simple value of mode MODE.
576    Store the byte offset in *BYTENUM if so.  */
577 
578 static bool
579 simple_mem_bitfield_p (rtx op0, poly_uint64 bitsize, poly_uint64 bitnum,
580 		       machine_mode mode, poly_uint64 *bytenum)
581 {
582   return (MEM_P (op0)
583 	  && multiple_p (bitnum, BITS_PER_UNIT, bytenum)
584 	  && known_eq (bitsize, GET_MODE_BITSIZE (mode))
585 	  && (!targetm.slow_unaligned_access (mode, MEM_ALIGN (op0))
586 	      || (multiple_p (bitnum, GET_MODE_ALIGNMENT (mode))
587 		  && MEM_ALIGN (op0) >= GET_MODE_ALIGNMENT (mode))));
588 }
589 
590 /* Try to use instruction INSV to store VALUE into a field of OP0.
591    If OP0_MODE is defined, it is the mode of OP0, otherwise OP0 is a
592    BLKmode MEM.  VALUE_MODE is the mode of VALUE.  BITSIZE and BITNUM
593    are as for store_bit_field.  */
594 
595 static bool
596 store_bit_field_using_insv (const extraction_insn *insv, rtx op0,
597 			    opt_scalar_int_mode op0_mode,
598 			    unsigned HOST_WIDE_INT bitsize,
599 			    unsigned HOST_WIDE_INT bitnum,
600 			    rtx value, scalar_int_mode value_mode)
601 {
602   struct expand_operand ops[4];
603   rtx value1;
604   rtx xop0 = op0;
605   rtx_insn *last = get_last_insn ();
606   bool copy_back = false;
607 
608   scalar_int_mode op_mode = insv->field_mode;
609   unsigned int unit = GET_MODE_BITSIZE (op_mode);
610   if (bitsize == 0 || bitsize > unit)
611     return false;
612 
613   if (MEM_P (xop0))
614     /* Get a reference to the first byte of the field.  */
615     xop0 = narrow_bit_field_mem (xop0, insv->struct_mode, bitsize, bitnum,
616 				 &bitnum);
617   else
618     {
619       /* Convert from counting within OP0 to counting in OP_MODE.  */
620       if (BYTES_BIG_ENDIAN)
621 	bitnum += unit - GET_MODE_BITSIZE (op0_mode.require ());
622 
623       /* If xop0 is a register, we need it in OP_MODE
624 	 to make it acceptable to the format of insv.  */
625       if (GET_CODE (xop0) == SUBREG)
626 	/* We can't just change the mode, because this might clobber op0,
627 	   and we will need the original value of op0 if insv fails.  */
628 	xop0 = gen_rtx_SUBREG (op_mode, SUBREG_REG (xop0), SUBREG_BYTE (xop0));
629       if (REG_P (xop0) && GET_MODE (xop0) != op_mode)
630 	xop0 = gen_lowpart_SUBREG (op_mode, xop0);
631     }
632 
633   /* If the destination is a paradoxical subreg such that we need a
634      truncate to the inner mode, perform the insertion on a temporary and
635      truncate the result to the original destination.  Note that we can't
636      just truncate the paradoxical subreg as (truncate:N (subreg:W (reg:N
637      X) 0)) is (reg:N X).  */
638   if (GET_CODE (xop0) == SUBREG
639       && REG_P (SUBREG_REG (xop0))
640       && !TRULY_NOOP_TRUNCATION_MODES_P (GET_MODE (SUBREG_REG (xop0)),
641 					 op_mode))
642     {
643       rtx tem = gen_reg_rtx (op_mode);
644       emit_move_insn (tem, xop0);
645       xop0 = tem;
646       copy_back = true;
647     }
648 
649   /* There are similar overflow check at the start of store_bit_field_1,
650      but that only check the situation where the field lies completely
651      outside the register, while there do have situation where the field
652      lies partialy in the register, we need to adjust bitsize for this
653      partial overflow situation.  Without this fix, pr48335-2.c on big-endian
654      will broken on those arch support bit insert instruction, like arm, aarch64
655      etc.  */
656   if (bitsize + bitnum > unit && bitnum < unit)
657     {
658       warning (OPT_Wextra, "write of %wu-bit data outside the bound of "
659 	       "destination object, data truncated into %wu-bit",
660 	       bitsize, unit - bitnum);
661       bitsize = unit - bitnum;
662     }
663 
664   /* If BITS_BIG_ENDIAN is zero on a BYTES_BIG_ENDIAN machine, we count
665      "backwards" from the size of the unit we are inserting into.
666      Otherwise, we count bits from the most significant on a
667      BYTES/BITS_BIG_ENDIAN machine.  */
668 
669   if (BITS_BIG_ENDIAN != BYTES_BIG_ENDIAN)
670     bitnum = unit - bitsize - bitnum;
671 
672   /* Convert VALUE to op_mode (which insv insn wants) in VALUE1.  */
673   value1 = value;
674   if (value_mode != op_mode)
675     {
676       if (GET_MODE_BITSIZE (value_mode) >= bitsize)
677 	{
678 	  rtx tmp;
679 	  /* Optimization: Don't bother really extending VALUE
680 	     if it has all the bits we will actually use.  However,
681 	     if we must narrow it, be sure we do it correctly.  */
682 
683 	  if (GET_MODE_SIZE (value_mode) < GET_MODE_SIZE (op_mode))
684 	    {
685 	      tmp = simplify_subreg (op_mode, value1, value_mode, 0);
686 	      if (! tmp)
687 		tmp = simplify_gen_subreg (op_mode,
688 					   force_reg (value_mode, value1),
689 					   value_mode, 0);
690 	    }
691 	  else
692 	    {
693 	      tmp = gen_lowpart_if_possible (op_mode, value1);
694 	      if (! tmp)
695 		tmp = gen_lowpart (op_mode, force_reg (value_mode, value1));
696 	    }
697 	  value1 = tmp;
698 	}
699       else if (CONST_INT_P (value))
700 	value1 = gen_int_mode (INTVAL (value), op_mode);
701       else
702 	/* Parse phase is supposed to make VALUE's data type
703 	   match that of the component reference, which is a type
704 	   at least as wide as the field; so VALUE should have
705 	   a mode that corresponds to that type.  */
706 	gcc_assert (CONSTANT_P (value));
707     }
708 
709   create_fixed_operand (&ops[0], xop0);
710   create_integer_operand (&ops[1], bitsize);
711   create_integer_operand (&ops[2], bitnum);
712   create_input_operand (&ops[3], value1, op_mode);
713   if (maybe_expand_insn (insv->icode, 4, ops))
714     {
715       if (copy_back)
716 	convert_move (op0, xop0, true);
717       return true;
718     }
719   delete_insns_since (last);
720   return false;
721 }
722 
723 /* A subroutine of store_bit_field, with the same arguments.  Return true
724    if the operation could be implemented.
725 
726    If FALLBACK_P is true, fall back to store_fixed_bit_field if we have
727    no other way of implementing the operation.  If FALLBACK_P is false,
728    return false instead.  */
729 
730 static bool
731 store_bit_field_1 (rtx str_rtx, poly_uint64 bitsize, poly_uint64 bitnum,
732 		   poly_uint64 bitregion_start, poly_uint64 bitregion_end,
733 		   machine_mode fieldmode,
734 		   rtx value, bool reverse, bool fallback_p)
735 {
736   rtx op0 = str_rtx;
737 
738   while (GET_CODE (op0) == SUBREG)
739     {
740       bitnum += subreg_memory_offset (op0) * BITS_PER_UNIT;
741       op0 = SUBREG_REG (op0);
742     }
743 
744   /* No action is needed if the target is a register and if the field
745      lies completely outside that register.  This can occur if the source
746      code contains an out-of-bounds access to a small array.  */
747   if (REG_P (op0) && known_ge (bitnum, GET_MODE_BITSIZE (GET_MODE (op0))))
748     return true;
749 
750   /* Use vec_set patterns for inserting parts of vectors whenever
751      available.  */
752   machine_mode outermode = GET_MODE (op0);
753   scalar_mode innermode = GET_MODE_INNER (outermode);
754   poly_uint64 pos;
755   if (VECTOR_MODE_P (outermode)
756       && !MEM_P (op0)
757       && optab_handler (vec_set_optab, outermode) != CODE_FOR_nothing
758       && fieldmode == innermode
759       && known_eq (bitsize, GET_MODE_BITSIZE (innermode))
760       && multiple_p (bitnum, GET_MODE_BITSIZE (innermode), &pos))
761     {
762       struct expand_operand ops[3];
763       enum insn_code icode = optab_handler (vec_set_optab, outermode);
764 
765       create_fixed_operand (&ops[0], op0);
766       create_input_operand (&ops[1], value, innermode);
767       create_integer_operand (&ops[2], pos);
768       if (maybe_expand_insn (icode, 3, ops))
769 	return true;
770     }
771 
772   /* If the target is a register, overwriting the entire object, or storing
773      a full-word or multi-word field can be done with just a SUBREG.  */
774   if (!MEM_P (op0)
775       && known_eq (bitsize, GET_MODE_BITSIZE (fieldmode)))
776     {
777       /* Use the subreg machinery either to narrow OP0 to the required
778 	 words or to cope with mode punning between equal-sized modes.
779 	 In the latter case, use subreg on the rhs side, not lhs.  */
780       rtx sub;
781       HOST_WIDE_INT regnum;
782       poly_uint64 regsize = REGMODE_NATURAL_SIZE (GET_MODE (op0));
783       if (known_eq (bitnum, 0U)
784 	  && known_eq (bitsize, GET_MODE_BITSIZE (GET_MODE (op0))))
785 	{
786 	  sub = simplify_gen_subreg (GET_MODE (op0), value, fieldmode, 0);
787 	  if (sub)
788 	    {
789 	      if (reverse)
790 		sub = flip_storage_order (GET_MODE (op0), sub);
791 	      emit_move_insn (op0, sub);
792 	      return true;
793 	    }
794 	}
795       else if (constant_multiple_p (bitnum, regsize * BITS_PER_UNIT, &regnum)
796 	       && multiple_p (bitsize, regsize * BITS_PER_UNIT))
797 	{
798 	  sub = simplify_gen_subreg (fieldmode, op0, GET_MODE (op0),
799 				     regnum * regsize);
800 	  if (sub)
801 	    {
802 	      if (reverse)
803 		value = flip_storage_order (fieldmode, value);
804 	      emit_move_insn (sub, value);
805 	      return true;
806 	    }
807 	}
808     }
809 
810   /* If the target is memory, storing any naturally aligned field can be
811      done with a simple store.  For targets that support fast unaligned
812      memory, any naturally sized, unit aligned field can be done directly.  */
813   poly_uint64 bytenum;
814   if (simple_mem_bitfield_p (op0, bitsize, bitnum, fieldmode, &bytenum))
815     {
816       op0 = adjust_bitfield_address (op0, fieldmode, bytenum);
817       if (reverse)
818 	value = flip_storage_order (fieldmode, value);
819       emit_move_insn (op0, value);
820       return true;
821     }
822 
823   /* It's possible we'll need to handle other cases here for
824      polynomial bitnum and bitsize.  */
825 
826   /* From here on we need to be looking at a fixed-size insertion.  */
827   unsigned HOST_WIDE_INT ibitsize = bitsize.to_constant ();
828   unsigned HOST_WIDE_INT ibitnum = bitnum.to_constant ();
829 
830   /* Make sure we are playing with integral modes.  Pun with subregs
831      if we aren't.  This must come after the entire register case above,
832      since that case is valid for any mode.  The following cases are only
833      valid for integral modes.  */
834   opt_scalar_int_mode op0_mode = int_mode_for_mode (GET_MODE (op0));
835   scalar_int_mode imode;
836   if (!op0_mode.exists (&imode) || imode != GET_MODE (op0))
837     {
838       if (MEM_P (op0))
839 	op0 = adjust_bitfield_address_size (op0, op0_mode.else_blk (),
840 					    0, MEM_SIZE (op0));
841       else if (!op0_mode.exists ())
842 	{
843 	  if (ibitnum == 0
844 	      && known_eq (ibitsize, GET_MODE_BITSIZE (GET_MODE (op0)))
845 	      && MEM_P (value)
846 	      && !reverse)
847 	    {
848 	      value = adjust_address (value, GET_MODE (op0), 0);
849 	      emit_move_insn (op0, value);
850 	      return true;
851 	    }
852 	  if (!fallback_p)
853 	    return false;
854 	  rtx temp = assign_stack_temp (GET_MODE (op0),
855 					GET_MODE_SIZE (GET_MODE (op0)));
856 	  emit_move_insn (temp, op0);
857 	  store_bit_field_1 (temp, bitsize, bitnum, 0, 0, fieldmode, value,
858 			     reverse, fallback_p);
859 	  emit_move_insn (op0, temp);
860 	  return true;
861 	}
862       else
863 	op0 = gen_lowpart (op0_mode.require (), op0);
864     }
865 
866   return store_integral_bit_field (op0, op0_mode, ibitsize, ibitnum,
867 				   bitregion_start, bitregion_end,
868 				   fieldmode, value, reverse, fallback_p);
869 }
870 
871 /* Subroutine of store_bit_field_1, with the same arguments, except
872    that BITSIZE and BITNUM are constant.  Handle cases specific to
873    integral modes.  If OP0_MODE is defined, it is the mode of OP0,
874    otherwise OP0 is a BLKmode MEM.  */
875 
876 static bool
877 store_integral_bit_field (rtx op0, opt_scalar_int_mode op0_mode,
878 			  unsigned HOST_WIDE_INT bitsize,
879 			  unsigned HOST_WIDE_INT bitnum,
880 			  poly_uint64 bitregion_start,
881 			  poly_uint64 bitregion_end,
882 			  machine_mode fieldmode,
883 			  rtx value, bool reverse, bool fallback_p)
884 {
885   /* Storing an lsb-aligned field in a register
886      can be done with a movstrict instruction.  */
887 
888   if (!MEM_P (op0)
889       && !reverse
890       && lowpart_bit_field_p (bitnum, bitsize, op0_mode.require ())
891       && known_eq (bitsize, GET_MODE_BITSIZE (fieldmode))
892       && optab_handler (movstrict_optab, fieldmode) != CODE_FOR_nothing)
893     {
894       struct expand_operand ops[2];
895       enum insn_code icode = optab_handler (movstrict_optab, fieldmode);
896       rtx arg0 = op0;
897       unsigned HOST_WIDE_INT subreg_off;
898 
899       if (GET_CODE (arg0) == SUBREG)
900 	{
901 	  /* Else we've got some float mode source being extracted into
902 	     a different float mode destination -- this combination of
903 	     subregs results in Severe Tire Damage.  */
904 	  gcc_assert (GET_MODE (SUBREG_REG (arg0)) == fieldmode
905 		      || GET_MODE_CLASS (fieldmode) == MODE_INT
906 		      || GET_MODE_CLASS (fieldmode) == MODE_PARTIAL_INT);
907 	  arg0 = SUBREG_REG (arg0);
908 	}
909 
910       subreg_off = bitnum / BITS_PER_UNIT;
911       if (validate_subreg (fieldmode, GET_MODE (arg0), arg0, subreg_off))
912 	{
913 	  arg0 = gen_rtx_SUBREG (fieldmode, arg0, subreg_off);
914 
915 	  create_fixed_operand (&ops[0], arg0);
916 	  /* Shrink the source operand to FIELDMODE.  */
917 	  create_convert_operand_to (&ops[1], value, fieldmode, false);
918 	  if (maybe_expand_insn (icode, 2, ops))
919 	    return true;
920 	}
921     }
922 
923   /* Handle fields bigger than a word.  */
924 
925   if (bitsize > BITS_PER_WORD)
926     {
927       /* Here we transfer the words of the field
928 	 in the order least significant first.
929 	 This is because the most significant word is the one which may
930 	 be less than full.
931 	 However, only do that if the value is not BLKmode.  */
932 
933       const bool backwards = WORDS_BIG_ENDIAN && fieldmode != BLKmode;
934       unsigned int nwords = (bitsize + (BITS_PER_WORD - 1)) / BITS_PER_WORD;
935       unsigned int i;
936       rtx_insn *last;
937 
938       /* This is the mode we must force value to, so that there will be enough
939 	 subwords to extract.  Note that fieldmode will often (always?) be
940 	 VOIDmode, because that is what store_field uses to indicate that this
941 	 is a bit field, but passing VOIDmode to operand_subword_force
942 	 is not allowed.
943 
944 	 The mode must be fixed-size, since insertions into variable-sized
945 	 objects are meant to be handled before calling this function.  */
946       fixed_size_mode value_mode = as_a <fixed_size_mode> (GET_MODE (value));
947       if (value_mode == VOIDmode)
948 	value_mode = smallest_int_mode_for_size (nwords * BITS_PER_WORD);
949 
950       last = get_last_insn ();
951       for (i = 0; i < nwords; i++)
952 	{
953 	  /* If I is 0, use the low-order word in both field and target;
954 	     if I is 1, use the next to lowest word; and so on.  */
955 	  unsigned int wordnum = (backwards
956 				  ? GET_MODE_SIZE (value_mode) / UNITS_PER_WORD
957 				  - i - 1
958 				  : i);
959 	  unsigned int bit_offset = (backwards ^ reverse
960 				     ? MAX ((int) bitsize - ((int) i + 1)
961 					    * BITS_PER_WORD,
962 					    0)
963 				     : (int) i * BITS_PER_WORD);
964 	  rtx value_word = operand_subword_force (value, wordnum, value_mode);
965 	  unsigned HOST_WIDE_INT new_bitsize =
966 	    MIN (BITS_PER_WORD, bitsize - i * BITS_PER_WORD);
967 
968 	  /* If the remaining chunk doesn't have full wordsize we have
969 	     to make sure that for big-endian machines the higher order
970 	     bits are used.  */
971 	  if (new_bitsize < BITS_PER_WORD && BYTES_BIG_ENDIAN && !backwards)
972 	    {
973 	      int shift = BITS_PER_WORD - new_bitsize;
974 	      rtx shift_rtx = gen_int_shift_amount (word_mode, shift);
975 	      value_word = simplify_expand_binop (word_mode, lshr_optab,
976 						  value_word, shift_rtx,
977 						  NULL_RTX, true,
978 						  OPTAB_LIB_WIDEN);
979 	    }
980 
981 	  if (!store_bit_field_1 (op0, new_bitsize,
982 				  bitnum + bit_offset,
983 				  bitregion_start, bitregion_end,
984 				  word_mode,
985 				  value_word, reverse, fallback_p))
986 	    {
987 	      delete_insns_since (last);
988 	      return false;
989 	    }
990 	}
991       return true;
992     }
993 
994   /* If VALUE has a floating-point or complex mode, access it as an
995      integer of the corresponding size.  This can occur on a machine
996      with 64 bit registers that uses SFmode for float.  It can also
997      occur for unaligned float or complex fields.  */
998   rtx orig_value = value;
999   scalar_int_mode value_mode;
1000   if (GET_MODE (value) == VOIDmode)
1001     /* By this point we've dealt with values that are bigger than a word,
1002        so word_mode is a conservatively correct choice.  */
1003     value_mode = word_mode;
1004   else if (!is_a <scalar_int_mode> (GET_MODE (value), &value_mode))
1005     {
1006       value_mode = int_mode_for_mode (GET_MODE (value)).require ();
1007       value = gen_reg_rtx (value_mode);
1008       emit_move_insn (gen_lowpart (GET_MODE (orig_value), value), orig_value);
1009     }
1010 
1011   /* If OP0 is a multi-word register, narrow it to the affected word.
1012      If the region spans two words, defer to store_split_bit_field.
1013      Don't do this if op0 is a single hard register wider than word
1014      such as a float or vector register.  */
1015   if (!MEM_P (op0)
1016       && GET_MODE_SIZE (op0_mode.require ()) > UNITS_PER_WORD
1017       && (!REG_P (op0)
1018 	  || !HARD_REGISTER_P (op0)
1019 	  || hard_regno_nregs (REGNO (op0), op0_mode.require ()) != 1))
1020     {
1021       if (bitnum % BITS_PER_WORD + bitsize > BITS_PER_WORD)
1022 	{
1023 	  if (!fallback_p)
1024 	    return false;
1025 
1026 	  store_split_bit_field (op0, op0_mode, bitsize, bitnum,
1027 				 bitregion_start, bitregion_end,
1028 				 value, value_mode, reverse);
1029 	  return true;
1030 	}
1031       op0 = simplify_gen_subreg (word_mode, op0, op0_mode.require (),
1032 				 bitnum / BITS_PER_WORD * UNITS_PER_WORD);
1033       gcc_assert (op0);
1034       op0_mode = word_mode;
1035       bitnum %= BITS_PER_WORD;
1036     }
1037 
1038   /* From here on we can assume that the field to be stored in fits
1039      within a word.  If the destination is a register, it too fits
1040      in a word.  */
1041 
1042   extraction_insn insv;
1043   if (!MEM_P (op0)
1044       && !reverse
1045       && get_best_reg_extraction_insn (&insv, EP_insv,
1046 				       GET_MODE_BITSIZE (op0_mode.require ()),
1047 				       fieldmode)
1048       && store_bit_field_using_insv (&insv, op0, op0_mode,
1049 				     bitsize, bitnum, value, value_mode))
1050     return true;
1051 
1052   /* If OP0 is a memory, try copying it to a register and seeing if a
1053      cheap register alternative is available.  */
1054   if (MEM_P (op0) && !reverse)
1055     {
1056       if (get_best_mem_extraction_insn (&insv, EP_insv, bitsize, bitnum,
1057 					fieldmode)
1058 	  && store_bit_field_using_insv (&insv, op0, op0_mode,
1059 					 bitsize, bitnum, value, value_mode))
1060 	return true;
1061 
1062       rtx_insn *last = get_last_insn ();
1063 
1064       /* Try loading part of OP0 into a register, inserting the bitfield
1065 	 into that, and then copying the result back to OP0.  */
1066       unsigned HOST_WIDE_INT bitpos;
1067       rtx xop0 = adjust_bit_field_mem_for_reg (EP_insv, op0, bitsize, bitnum,
1068 					       bitregion_start, bitregion_end,
1069 					       fieldmode, &bitpos);
1070       if (xop0)
1071 	{
1072 	  rtx tempreg = copy_to_reg (xop0);
1073 	  if (store_bit_field_1 (tempreg, bitsize, bitpos,
1074 				 bitregion_start, bitregion_end,
1075 				 fieldmode, orig_value, reverse, false))
1076 	    {
1077 	      emit_move_insn (xop0, tempreg);
1078 	      return true;
1079 	    }
1080 	  delete_insns_since (last);
1081 	}
1082     }
1083 
1084   if (!fallback_p)
1085     return false;
1086 
1087   store_fixed_bit_field (op0, op0_mode, bitsize, bitnum, bitregion_start,
1088 			 bitregion_end, value, value_mode, reverse);
1089   return true;
1090 }
1091 
1092 /* Generate code to store value from rtx VALUE
1093    into a bit-field within structure STR_RTX
1094    containing BITSIZE bits starting at bit BITNUM.
1095 
1096    BITREGION_START is bitpos of the first bitfield in this region.
1097    BITREGION_END is the bitpos of the ending bitfield in this region.
1098    These two fields are 0, if the C++ memory model does not apply,
1099    or we are not interested in keeping track of bitfield regions.
1100 
1101    FIELDMODE is the machine-mode of the FIELD_DECL node for this field.
1102 
1103    If REVERSE is true, the store is to be done in reverse order.  */
1104 
1105 void
1106 store_bit_field (rtx str_rtx, poly_uint64 bitsize, poly_uint64 bitnum,
1107 		 poly_uint64 bitregion_start, poly_uint64 bitregion_end,
1108 		 machine_mode fieldmode,
1109 		 rtx value, bool reverse)
1110 {
1111   /* Handle -fstrict-volatile-bitfields in the cases where it applies.  */
1112   unsigned HOST_WIDE_INT ibitsize = 0, ibitnum = 0;
1113   scalar_int_mode int_mode;
1114   if (bitsize.is_constant (&ibitsize)
1115       && bitnum.is_constant (&ibitnum)
1116       && is_a <scalar_int_mode> (fieldmode, &int_mode)
1117       && strict_volatile_bitfield_p (str_rtx, ibitsize, ibitnum, int_mode,
1118 				     bitregion_start, bitregion_end))
1119     {
1120       /* Storing of a full word can be done with a simple store.
1121 	 We know here that the field can be accessed with one single
1122 	 instruction.  For targets that support unaligned memory,
1123 	 an unaligned access may be necessary.  */
1124       if (ibitsize == GET_MODE_BITSIZE (int_mode))
1125 	{
1126 	  str_rtx = adjust_bitfield_address (str_rtx, int_mode,
1127 					     ibitnum / BITS_PER_UNIT);
1128 	  if (reverse)
1129 	    value = flip_storage_order (int_mode, value);
1130 	  gcc_assert (ibitnum % BITS_PER_UNIT == 0);
1131 	  emit_move_insn (str_rtx, value);
1132 	}
1133       else
1134 	{
1135 	  rtx temp;
1136 
1137 	  str_rtx = narrow_bit_field_mem (str_rtx, int_mode, ibitsize,
1138 					  ibitnum, &ibitnum);
1139 	  gcc_assert (ibitnum + ibitsize <= GET_MODE_BITSIZE (int_mode));
1140 	  temp = copy_to_reg (str_rtx);
1141 	  if (!store_bit_field_1 (temp, ibitsize, ibitnum, 0, 0,
1142 				  int_mode, value, reverse, true))
1143 	    gcc_unreachable ();
1144 
1145 	  emit_move_insn (str_rtx, temp);
1146 	}
1147 
1148       return;
1149     }
1150 
1151   /* Under the C++0x memory model, we must not touch bits outside the
1152      bit region.  Adjust the address to start at the beginning of the
1153      bit region.  */
1154   if (MEM_P (str_rtx) && maybe_ne (bitregion_start, 0U))
1155     {
1156       scalar_int_mode best_mode;
1157       machine_mode addr_mode = VOIDmode;
1158 
1159       poly_uint64 offset = exact_div (bitregion_start, BITS_PER_UNIT);
1160       bitnum -= bitregion_start;
1161       poly_int64 size = bits_to_bytes_round_up (bitnum + bitsize);
1162       bitregion_end -= bitregion_start;
1163       bitregion_start = 0;
1164       if (bitsize.is_constant (&ibitsize)
1165 	  && bitnum.is_constant (&ibitnum)
1166 	  && get_best_mode (ibitsize, ibitnum,
1167 			    bitregion_start, bitregion_end,
1168 			    MEM_ALIGN (str_rtx), INT_MAX,
1169 			    MEM_VOLATILE_P (str_rtx), &best_mode))
1170 	addr_mode = best_mode;
1171       str_rtx = adjust_bitfield_address_size (str_rtx, addr_mode,
1172 					      offset, size);
1173     }
1174 
1175   if (!store_bit_field_1 (str_rtx, bitsize, bitnum,
1176 			  bitregion_start, bitregion_end,
1177 			  fieldmode, value, reverse, true))
1178     gcc_unreachable ();
1179 }
1180 
1181 /* Use shifts and boolean operations to store VALUE into a bit field of
1182    width BITSIZE in OP0, starting at bit BITNUM.  If OP0_MODE is defined,
1183    it is the mode of OP0, otherwise OP0 is a BLKmode MEM.  VALUE_MODE is
1184    the mode of VALUE.
1185 
1186    If REVERSE is true, the store is to be done in reverse order.  */
1187 
1188 static void
1189 store_fixed_bit_field (rtx op0, opt_scalar_int_mode op0_mode,
1190 		       unsigned HOST_WIDE_INT bitsize,
1191 		       unsigned HOST_WIDE_INT bitnum,
1192 		       poly_uint64 bitregion_start, poly_uint64 bitregion_end,
1193 		       rtx value, scalar_int_mode value_mode, bool reverse)
1194 {
1195   /* There is a case not handled here:
1196      a structure with a known alignment of just a halfword
1197      and a field split across two aligned halfwords within the structure.
1198      Or likewise a structure with a known alignment of just a byte
1199      and a field split across two bytes.
1200      Such cases are not supposed to be able to occur.  */
1201 
1202   scalar_int_mode best_mode;
1203   if (MEM_P (op0))
1204     {
1205       unsigned int max_bitsize = BITS_PER_WORD;
1206       scalar_int_mode imode;
1207       if (op0_mode.exists (&imode) && GET_MODE_BITSIZE (imode) < max_bitsize)
1208 	max_bitsize = GET_MODE_BITSIZE (imode);
1209 
1210       if (!get_best_mode (bitsize, bitnum, bitregion_start, bitregion_end,
1211 			  MEM_ALIGN (op0), max_bitsize, MEM_VOLATILE_P (op0),
1212 			  &best_mode))
1213 	{
1214 	  /* The only way this should occur is if the field spans word
1215 	     boundaries.  */
1216 	  store_split_bit_field (op0, op0_mode, bitsize, bitnum,
1217 				 bitregion_start, bitregion_end,
1218 				 value, value_mode, reverse);
1219 	  return;
1220 	}
1221 
1222       op0 = narrow_bit_field_mem (op0, best_mode, bitsize, bitnum, &bitnum);
1223     }
1224   else
1225     best_mode = op0_mode.require ();
1226 
1227   store_fixed_bit_field_1 (op0, best_mode, bitsize, bitnum,
1228 			   value, value_mode, reverse);
1229 }
1230 
1231 /* Helper function for store_fixed_bit_field, stores
1232    the bit field always using MODE, which is the mode of OP0.  The other
1233    arguments are as for store_fixed_bit_field.  */
1234 
1235 static void
1236 store_fixed_bit_field_1 (rtx op0, scalar_int_mode mode,
1237 			 unsigned HOST_WIDE_INT bitsize,
1238 			 unsigned HOST_WIDE_INT bitnum,
1239 			 rtx value, scalar_int_mode value_mode, bool reverse)
1240 {
1241   rtx temp;
1242   int all_zero = 0;
1243   int all_one = 0;
1244 
1245   /* Note that bitsize + bitnum can be greater than GET_MODE_BITSIZE (mode)
1246      for invalid input, such as f5 from gcc.dg/pr48335-2.c.  */
1247 
1248   if (reverse ? !BYTES_BIG_ENDIAN : BYTES_BIG_ENDIAN)
1249     /* BITNUM is the distance between our msb
1250        and that of the containing datum.
1251        Convert it to the distance from the lsb.  */
1252     bitnum = GET_MODE_BITSIZE (mode) - bitsize - bitnum;
1253 
1254   /* Now BITNUM is always the distance between our lsb
1255      and that of OP0.  */
1256 
1257   /* Shift VALUE left by BITNUM bits.  If VALUE is not constant,
1258      we must first convert its mode to MODE.  */
1259 
1260   if (CONST_INT_P (value))
1261     {
1262       unsigned HOST_WIDE_INT v = UINTVAL (value);
1263 
1264       if (bitsize < HOST_BITS_PER_WIDE_INT)
1265 	v &= (HOST_WIDE_INT_1U << bitsize) - 1;
1266 
1267       if (v == 0)
1268 	all_zero = 1;
1269       else if ((bitsize < HOST_BITS_PER_WIDE_INT
1270 		&& v == (HOST_WIDE_INT_1U << bitsize) - 1)
1271 	       || (bitsize == HOST_BITS_PER_WIDE_INT
1272 		   && v == HOST_WIDE_INT_M1U))
1273 	all_one = 1;
1274 
1275       value = lshift_value (mode, v, bitnum);
1276     }
1277   else
1278     {
1279       int must_and = (GET_MODE_BITSIZE (value_mode) != bitsize
1280 		      && bitnum + bitsize != GET_MODE_BITSIZE (mode));
1281 
1282       if (value_mode != mode)
1283 	value = convert_to_mode (mode, value, 1);
1284 
1285       if (must_and)
1286 	value = expand_binop (mode, and_optab, value,
1287 			      mask_rtx (mode, 0, bitsize, 0),
1288 			      NULL_RTX, 1, OPTAB_LIB_WIDEN);
1289       if (bitnum > 0)
1290 	value = expand_shift (LSHIFT_EXPR, mode, value,
1291 			      bitnum, NULL_RTX, 1);
1292     }
1293 
1294   if (reverse)
1295     value = flip_storage_order (mode, value);
1296 
1297   /* Now clear the chosen bits in OP0,
1298      except that if VALUE is -1 we need not bother.  */
1299   /* We keep the intermediates in registers to allow CSE to combine
1300      consecutive bitfield assignments.  */
1301 
1302   temp = force_reg (mode, op0);
1303 
1304   if (! all_one)
1305     {
1306       rtx mask = mask_rtx (mode, bitnum, bitsize, 1);
1307       if (reverse)
1308 	mask = flip_storage_order (mode, mask);
1309       temp = expand_binop (mode, and_optab, temp, mask,
1310 			   NULL_RTX, 1, OPTAB_LIB_WIDEN);
1311       temp = force_reg (mode, temp);
1312     }
1313 
1314   /* Now logical-or VALUE into OP0, unless it is zero.  */
1315 
1316   if (! all_zero)
1317     {
1318       temp = expand_binop (mode, ior_optab, temp, value,
1319 			   NULL_RTX, 1, OPTAB_LIB_WIDEN);
1320       temp = force_reg (mode, temp);
1321     }
1322 
1323   if (op0 != temp)
1324     {
1325       op0 = copy_rtx (op0);
1326       emit_move_insn (op0, temp);
1327     }
1328 }
1329 
1330 /* Store a bit field that is split across multiple accessible memory objects.
1331 
1332    OP0 is the REG, SUBREG or MEM rtx for the first of the objects.
1333    BITSIZE is the field width; BITPOS the position of its first bit
1334    (within the word).
1335    VALUE is the value to store, which has mode VALUE_MODE.
1336    If OP0_MODE is defined, it is the mode of OP0, otherwise OP0 is
1337    a BLKmode MEM.
1338 
1339    If REVERSE is true, the store is to be done in reverse order.
1340 
1341    This does not yet handle fields wider than BITS_PER_WORD.  */
1342 
1343 static void
1344 store_split_bit_field (rtx op0, opt_scalar_int_mode op0_mode,
1345 		       unsigned HOST_WIDE_INT bitsize,
1346 		       unsigned HOST_WIDE_INT bitpos,
1347 		       poly_uint64 bitregion_start, poly_uint64 bitregion_end,
1348 		       rtx value, scalar_int_mode value_mode, bool reverse)
1349 {
1350   unsigned int unit, total_bits, bitsdone = 0;
1351 
1352   /* Make sure UNIT isn't larger than BITS_PER_WORD, we can only handle that
1353      much at a time.  */
1354   if (REG_P (op0) || GET_CODE (op0) == SUBREG)
1355     unit = BITS_PER_WORD;
1356   else
1357     unit = MIN (MEM_ALIGN (op0), BITS_PER_WORD);
1358 
1359   /* If OP0 is a memory with a mode, then UNIT must not be larger than
1360      OP0's mode as well.  Otherwise, store_fixed_bit_field will call us
1361      again, and we will mutually recurse forever.  */
1362   if (MEM_P (op0) && op0_mode.exists ())
1363     unit = MIN (unit, GET_MODE_BITSIZE (op0_mode.require ()));
1364 
1365   /* If VALUE is a constant other than a CONST_INT, get it into a register in
1366      WORD_MODE.  If we can do this using gen_lowpart_common, do so.  Note
1367      that VALUE might be a floating-point constant.  */
1368   if (CONSTANT_P (value) && !CONST_INT_P (value))
1369     {
1370       rtx word = gen_lowpart_common (word_mode, value);
1371 
1372       if (word && (value != word))
1373 	value = word;
1374       else
1375 	value = gen_lowpart_common (word_mode, force_reg (value_mode, value));
1376       value_mode = word_mode;
1377     }
1378 
1379   total_bits = GET_MODE_BITSIZE (value_mode);
1380 
1381   while (bitsdone < bitsize)
1382     {
1383       unsigned HOST_WIDE_INT thissize;
1384       unsigned HOST_WIDE_INT thispos;
1385       unsigned HOST_WIDE_INT offset;
1386       rtx part;
1387 
1388       offset = (bitpos + bitsdone) / unit;
1389       thispos = (bitpos + bitsdone) % unit;
1390 
1391       /* When region of bytes we can touch is restricted, decrease
1392 	 UNIT close to the end of the region as needed.  If op0 is a REG
1393 	 or SUBREG of REG, don't do this, as there can't be data races
1394 	 on a register and we can expand shorter code in some cases.  */
1395       if (maybe_ne (bitregion_end, 0U)
1396 	  && unit > BITS_PER_UNIT
1397 	  && maybe_gt (bitpos + bitsdone - thispos + unit, bitregion_end + 1)
1398 	  && !REG_P (op0)
1399 	  && (GET_CODE (op0) != SUBREG || !REG_P (SUBREG_REG (op0))))
1400 	{
1401 	  unit = unit / 2;
1402 	  continue;
1403 	}
1404 
1405       /* THISSIZE must not overrun a word boundary.  Otherwise,
1406 	 store_fixed_bit_field will call us again, and we will mutually
1407 	 recurse forever.  */
1408       thissize = MIN (bitsize - bitsdone, BITS_PER_WORD);
1409       thissize = MIN (thissize, unit - thispos);
1410 
1411       if (reverse ? !BYTES_BIG_ENDIAN : BYTES_BIG_ENDIAN)
1412 	{
1413 	  /* Fetch successively less significant portions.  */
1414 	  if (CONST_INT_P (value))
1415 	    part = GEN_INT (((unsigned HOST_WIDE_INT) (INTVAL (value))
1416 			     >> (bitsize - bitsdone - thissize))
1417 			    & ((HOST_WIDE_INT_1 << thissize) - 1));
1418           /* Likewise, but the source is little-endian.  */
1419           else if (reverse)
1420 	    part = extract_fixed_bit_field (word_mode, value, value_mode,
1421 					    thissize,
1422 					    bitsize - bitsdone - thissize,
1423 					    NULL_RTX, 1, false);
1424 	  else
1425 	    /* The args are chosen so that the last part includes the
1426 	       lsb.  Give extract_bit_field the value it needs (with
1427 	       endianness compensation) to fetch the piece we want.  */
1428 	    part = extract_fixed_bit_field (word_mode, value, value_mode,
1429 					    thissize,
1430 					    total_bits - bitsize + bitsdone,
1431 					    NULL_RTX, 1, false);
1432 	}
1433       else
1434 	{
1435 	  /* Fetch successively more significant portions.  */
1436 	  if (CONST_INT_P (value))
1437 	    part = GEN_INT (((unsigned HOST_WIDE_INT) (INTVAL (value))
1438 			     >> bitsdone)
1439 			    & ((HOST_WIDE_INT_1 << thissize) - 1));
1440 	  /* Likewise, but the source is big-endian.  */
1441           else if (reverse)
1442 	    part = extract_fixed_bit_field (word_mode, value, value_mode,
1443 					    thissize,
1444 					    total_bits - bitsdone - thissize,
1445 					    NULL_RTX, 1, false);
1446 	  else
1447 	    part = extract_fixed_bit_field (word_mode, value, value_mode,
1448 					    thissize, bitsdone, NULL_RTX,
1449 					    1, false);
1450 	}
1451 
1452       /* If OP0 is a register, then handle OFFSET here.  */
1453       rtx op0_piece = op0;
1454       opt_scalar_int_mode op0_piece_mode = op0_mode;
1455       if (SUBREG_P (op0) || REG_P (op0))
1456 	{
1457 	  scalar_int_mode imode;
1458 	  if (op0_mode.exists (&imode)
1459 	      && GET_MODE_SIZE (imode) < UNITS_PER_WORD)
1460 	    {
1461 	      if (offset)
1462 		op0_piece = const0_rtx;
1463 	    }
1464 	  else
1465 	    {
1466 	      op0_piece = operand_subword_force (op0,
1467 						 offset * unit / BITS_PER_WORD,
1468 						 GET_MODE (op0));
1469 	      op0_piece_mode = word_mode;
1470 	    }
1471 	  offset &= BITS_PER_WORD / unit - 1;
1472 	}
1473 
1474       /* OFFSET is in UNITs, and UNIT is in bits.  If WORD is const0_rtx,
1475 	 it is just an out-of-bounds access.  Ignore it.  */
1476       if (op0_piece != const0_rtx)
1477 	store_fixed_bit_field (op0_piece, op0_piece_mode, thissize,
1478 			       offset * unit + thispos, bitregion_start,
1479 			       bitregion_end, part, word_mode, reverse);
1480       bitsdone += thissize;
1481     }
1482 }
1483 
1484 /* A subroutine of extract_bit_field_1 that converts return value X
1485    to either MODE or TMODE.  MODE, TMODE and UNSIGNEDP are arguments
1486    to extract_bit_field.  */
1487 
1488 static rtx
1489 convert_extracted_bit_field (rtx x, machine_mode mode,
1490 			     machine_mode tmode, bool unsignedp)
1491 {
1492   if (GET_MODE (x) == tmode || GET_MODE (x) == mode)
1493     return x;
1494 
1495   /* If the x mode is not a scalar integral, first convert to the
1496      integer mode of that size and then access it as a floating-point
1497      value via a SUBREG.  */
1498   if (!SCALAR_INT_MODE_P (tmode))
1499     {
1500       scalar_int_mode int_mode = int_mode_for_mode (tmode).require ();
1501       x = convert_to_mode (int_mode, x, unsignedp);
1502       x = force_reg (int_mode, x);
1503       return gen_lowpart (tmode, x);
1504     }
1505 
1506   return convert_to_mode (tmode, x, unsignedp);
1507 }
1508 
1509 /* Try to use an ext(z)v pattern to extract a field from OP0.
1510    Return the extracted value on success, otherwise return null.
1511    EXTV describes the extraction instruction to use.  If OP0_MODE
1512    is defined, it is the mode of OP0, otherwise OP0 is a BLKmode MEM.
1513    The other arguments are as for extract_bit_field.  */
1514 
1515 static rtx
1516 extract_bit_field_using_extv (const extraction_insn *extv, rtx op0,
1517 			      opt_scalar_int_mode op0_mode,
1518 			      unsigned HOST_WIDE_INT bitsize,
1519 			      unsigned HOST_WIDE_INT bitnum,
1520 			      int unsignedp, rtx target,
1521 			      machine_mode mode, machine_mode tmode)
1522 {
1523   struct expand_operand ops[4];
1524   rtx spec_target = target;
1525   rtx spec_target_subreg = 0;
1526   scalar_int_mode ext_mode = extv->field_mode;
1527   unsigned unit = GET_MODE_BITSIZE (ext_mode);
1528 
1529   if (bitsize == 0 || unit < bitsize)
1530     return NULL_RTX;
1531 
1532   if (MEM_P (op0))
1533     /* Get a reference to the first byte of the field.  */
1534     op0 = narrow_bit_field_mem (op0, extv->struct_mode, bitsize, bitnum,
1535 				&bitnum);
1536   else
1537     {
1538       /* Convert from counting within OP0 to counting in EXT_MODE.  */
1539       if (BYTES_BIG_ENDIAN)
1540 	bitnum += unit - GET_MODE_BITSIZE (op0_mode.require ());
1541 
1542       /* If op0 is a register, we need it in EXT_MODE to make it
1543 	 acceptable to the format of ext(z)v.  */
1544       if (GET_CODE (op0) == SUBREG && op0_mode.require () != ext_mode)
1545 	return NULL_RTX;
1546       if (REG_P (op0) && op0_mode.require () != ext_mode)
1547 	op0 = gen_lowpart_SUBREG (ext_mode, op0);
1548     }
1549 
1550   /* If BITS_BIG_ENDIAN is zero on a BYTES_BIG_ENDIAN machine, we count
1551      "backwards" from the size of the unit we are extracting from.
1552      Otherwise, we count bits from the most significant on a
1553      BYTES/BITS_BIG_ENDIAN machine.  */
1554 
1555   if (BITS_BIG_ENDIAN != BYTES_BIG_ENDIAN)
1556     bitnum = unit - bitsize - bitnum;
1557 
1558   if (target == 0)
1559     target = spec_target = gen_reg_rtx (tmode);
1560 
1561   if (GET_MODE (target) != ext_mode)
1562     {
1563       /* Don't use LHS paradoxical subreg if explicit truncation is needed
1564 	 between the mode of the extraction (word_mode) and the target
1565 	 mode.  Instead, create a temporary and use convert_move to set
1566 	 the target.  */
1567       if (REG_P (target)
1568 	  && TRULY_NOOP_TRUNCATION_MODES_P (GET_MODE (target), ext_mode))
1569 	{
1570 	  target = gen_lowpart (ext_mode, target);
1571 	  if (partial_subreg_p (GET_MODE (spec_target), ext_mode))
1572 	    spec_target_subreg = target;
1573 	}
1574       else
1575 	target = gen_reg_rtx (ext_mode);
1576     }
1577 
1578   create_output_operand (&ops[0], target, ext_mode);
1579   create_fixed_operand (&ops[1], op0);
1580   create_integer_operand (&ops[2], bitsize);
1581   create_integer_operand (&ops[3], bitnum);
1582   if (maybe_expand_insn (extv->icode, 4, ops))
1583     {
1584       target = ops[0].value;
1585       if (target == spec_target)
1586 	return target;
1587       if (target == spec_target_subreg)
1588 	return spec_target;
1589       return convert_extracted_bit_field (target, mode, tmode, unsignedp);
1590     }
1591   return NULL_RTX;
1592 }
1593 
1594 /* See whether it would be valid to extract the part of OP0 described
1595    by BITNUM and BITSIZE into a value of mode MODE using a subreg
1596    operation.  Return the subreg if so, otherwise return null.  */
1597 
1598 static rtx
1599 extract_bit_field_as_subreg (machine_mode mode, rtx op0,
1600 			     poly_uint64 bitsize, poly_uint64 bitnum)
1601 {
1602   poly_uint64 bytenum;
1603   if (multiple_p (bitnum, BITS_PER_UNIT, &bytenum)
1604       && known_eq (bitsize, GET_MODE_BITSIZE (mode))
1605       && lowpart_bit_field_p (bitnum, bitsize, GET_MODE (op0))
1606       && TRULY_NOOP_TRUNCATION_MODES_P (mode, GET_MODE (op0)))
1607     return simplify_gen_subreg (mode, op0, GET_MODE (op0), bytenum);
1608   return NULL_RTX;
1609 }
1610 
1611 /* A subroutine of extract_bit_field, with the same arguments.
1612    If FALLBACK_P is true, fall back to extract_fixed_bit_field
1613    if we can find no other means of implementing the operation.
1614    if FALLBACK_P is false, return NULL instead.  */
1615 
1616 static rtx
1617 extract_bit_field_1 (rtx str_rtx, poly_uint64 bitsize, poly_uint64 bitnum,
1618 		     int unsignedp, rtx target, machine_mode mode,
1619 		     machine_mode tmode, bool reverse, bool fallback_p,
1620 		     rtx *alt_rtl)
1621 {
1622   rtx op0 = str_rtx;
1623   machine_mode mode1;
1624 
1625   if (tmode == VOIDmode)
1626     tmode = mode;
1627 
1628   while (GET_CODE (op0) == SUBREG)
1629     {
1630       bitnum += SUBREG_BYTE (op0) * BITS_PER_UNIT;
1631       op0 = SUBREG_REG (op0);
1632     }
1633 
1634   /* If we have an out-of-bounds access to a register, just return an
1635      uninitialized register of the required mode.  This can occur if the
1636      source code contains an out-of-bounds access to a small array.  */
1637   if (REG_P (op0) && known_ge (bitnum, GET_MODE_BITSIZE (GET_MODE (op0))))
1638     return gen_reg_rtx (tmode);
1639 
1640   if (REG_P (op0)
1641       && mode == GET_MODE (op0)
1642       && known_eq (bitnum, 0U)
1643       && known_eq (bitsize, GET_MODE_BITSIZE (GET_MODE (op0))))
1644     {
1645       if (reverse)
1646 	op0 = flip_storage_order (mode, op0);
1647       /* We're trying to extract a full register from itself.  */
1648       return op0;
1649     }
1650 
1651   /* First try to check for vector from vector extractions.  */
1652   if (VECTOR_MODE_P (GET_MODE (op0))
1653       && !MEM_P (op0)
1654       && VECTOR_MODE_P (tmode)
1655       && known_eq (bitsize, GET_MODE_BITSIZE (tmode))
1656       && maybe_gt (GET_MODE_SIZE (GET_MODE (op0)), GET_MODE_SIZE (tmode)))
1657     {
1658       machine_mode new_mode = GET_MODE (op0);
1659       if (GET_MODE_INNER (new_mode) != GET_MODE_INNER (tmode))
1660 	{
1661 	  scalar_mode inner_mode = GET_MODE_INNER (tmode);
1662 	  poly_uint64 nunits;
1663 	  if (!multiple_p (GET_MODE_BITSIZE (GET_MODE (op0)),
1664 			   GET_MODE_UNIT_BITSIZE (tmode), &nunits)
1665 	      || !mode_for_vector (inner_mode, nunits).exists (&new_mode)
1666 	      || !VECTOR_MODE_P (new_mode)
1667 	      || maybe_ne (GET_MODE_SIZE (new_mode),
1668 			   GET_MODE_SIZE (GET_MODE (op0)))
1669 	      || GET_MODE_INNER (new_mode) != GET_MODE_INNER (tmode)
1670 	      || !targetm.vector_mode_supported_p (new_mode))
1671 	    new_mode = VOIDmode;
1672 	}
1673       poly_uint64 pos;
1674       if (new_mode != VOIDmode
1675 	  && (convert_optab_handler (vec_extract_optab, new_mode, tmode)
1676 	      != CODE_FOR_nothing)
1677 	  && multiple_p (bitnum, GET_MODE_BITSIZE (tmode), &pos))
1678 	{
1679 	  struct expand_operand ops[3];
1680 	  machine_mode outermode = new_mode;
1681 	  machine_mode innermode = tmode;
1682 	  enum insn_code icode
1683 	    = convert_optab_handler (vec_extract_optab, outermode, innermode);
1684 
1685 	  if (new_mode != GET_MODE (op0))
1686 	    op0 = gen_lowpart (new_mode, op0);
1687 	  create_output_operand (&ops[0], target, innermode);
1688 	  ops[0].target = 1;
1689 	  create_input_operand (&ops[1], op0, outermode);
1690 	  create_integer_operand (&ops[2], pos);
1691 	  if (maybe_expand_insn (icode, 3, ops))
1692 	    {
1693 	      if (alt_rtl && ops[0].target)
1694 		*alt_rtl = target;
1695 	      target = ops[0].value;
1696 	      if (GET_MODE (target) != mode)
1697 		return gen_lowpart (tmode, target);
1698 	      return target;
1699 	    }
1700 	}
1701     }
1702 
1703   /* See if we can get a better vector mode before extracting.  */
1704   if (VECTOR_MODE_P (GET_MODE (op0))
1705       && !MEM_P (op0)
1706       && GET_MODE_INNER (GET_MODE (op0)) != tmode)
1707     {
1708       machine_mode new_mode;
1709 
1710       if (GET_MODE_CLASS (tmode) == MODE_FLOAT)
1711 	new_mode = MIN_MODE_VECTOR_FLOAT;
1712       else if (GET_MODE_CLASS (tmode) == MODE_FRACT)
1713 	new_mode = MIN_MODE_VECTOR_FRACT;
1714       else if (GET_MODE_CLASS (tmode) == MODE_UFRACT)
1715 	new_mode = MIN_MODE_VECTOR_UFRACT;
1716       else if (GET_MODE_CLASS (tmode) == MODE_ACCUM)
1717 	new_mode = MIN_MODE_VECTOR_ACCUM;
1718       else if (GET_MODE_CLASS (tmode) == MODE_UACCUM)
1719 	new_mode = MIN_MODE_VECTOR_UACCUM;
1720       else
1721 	new_mode = MIN_MODE_VECTOR_INT;
1722 
1723       FOR_EACH_MODE_FROM (new_mode, new_mode)
1724 	if (known_eq (GET_MODE_SIZE (new_mode), GET_MODE_SIZE (GET_MODE (op0)))
1725 	    && known_eq (GET_MODE_UNIT_SIZE (new_mode), GET_MODE_SIZE (tmode))
1726 	    && targetm.vector_mode_supported_p (new_mode))
1727 	  break;
1728       if (new_mode != VOIDmode)
1729 	op0 = gen_lowpart (new_mode, op0);
1730     }
1731 
1732   /* Use vec_extract patterns for extracting parts of vectors whenever
1733      available.  If that fails, see whether the current modes and bitregion
1734      give a natural subreg.  */
1735   machine_mode outermode = GET_MODE (op0);
1736   if (VECTOR_MODE_P (outermode) && !MEM_P (op0))
1737     {
1738       scalar_mode innermode = GET_MODE_INNER (outermode);
1739       enum insn_code icode
1740 	= convert_optab_handler (vec_extract_optab, outermode, innermode);
1741       poly_uint64 pos;
1742       if (icode != CODE_FOR_nothing
1743 	  && known_eq (bitsize, GET_MODE_BITSIZE (innermode))
1744 	  && multiple_p (bitnum, GET_MODE_BITSIZE (innermode), &pos))
1745 	{
1746 	  struct expand_operand ops[3];
1747 
1748 	  create_output_operand (&ops[0], target, innermode);
1749 	  ops[0].target = 1;
1750 	  create_input_operand (&ops[1], op0, outermode);
1751 	  create_integer_operand (&ops[2], pos);
1752 	  if (maybe_expand_insn (icode, 3, ops))
1753 	    {
1754 	      if (alt_rtl && ops[0].target)
1755 		*alt_rtl = target;
1756 	      target = ops[0].value;
1757 	      if (GET_MODE (target) != mode)
1758 		return gen_lowpart (tmode, target);
1759 	      return target;
1760 	    }
1761 	}
1762       /* Using subregs is useful if we're extracting one register vector
1763 	 from a multi-register vector.  extract_bit_field_as_subreg checks
1764 	 for valid bitsize and bitnum, so we don't need to do that here.  */
1765       if (VECTOR_MODE_P (mode))
1766 	{
1767 	  rtx sub = extract_bit_field_as_subreg (mode, op0, bitsize, bitnum);
1768 	  if (sub)
1769 	    return sub;
1770 	}
1771     }
1772 
1773   /* Make sure we are playing with integral modes.  Pun with subregs
1774      if we aren't.  */
1775   opt_scalar_int_mode op0_mode = int_mode_for_mode (GET_MODE (op0));
1776   scalar_int_mode imode;
1777   if (!op0_mode.exists (&imode) || imode != GET_MODE (op0))
1778     {
1779       if (MEM_P (op0))
1780 	op0 = adjust_bitfield_address_size (op0, op0_mode.else_blk (),
1781 					    0, MEM_SIZE (op0));
1782       else if (op0_mode.exists (&imode))
1783 	{
1784 	  op0 = gen_lowpart (imode, op0);
1785 
1786 	  /* If we got a SUBREG, force it into a register since we
1787 	     aren't going to be able to do another SUBREG on it.  */
1788 	  if (GET_CODE (op0) == SUBREG)
1789 	    op0 = force_reg (imode, op0);
1790 	}
1791       else
1792 	{
1793 	  poly_int64 size = GET_MODE_SIZE (GET_MODE (op0));
1794 	  rtx mem = assign_stack_temp (GET_MODE (op0), size);
1795 	  emit_move_insn (mem, op0);
1796 	  op0 = adjust_bitfield_address_size (mem, BLKmode, 0, size);
1797 	}
1798     }
1799 
1800   /* ??? We currently assume TARGET is at least as big as BITSIZE.
1801      If that's wrong, the solution is to test for it and set TARGET to 0
1802      if needed.  */
1803 
1804   /* Get the mode of the field to use for atomic access or subreg
1805      conversion.  */
1806   if (!SCALAR_INT_MODE_P (tmode)
1807       || !mode_for_size (bitsize, GET_MODE_CLASS (tmode), 0).exists (&mode1))
1808     mode1 = mode;
1809   gcc_assert (mode1 != BLKmode);
1810 
1811   /* Extraction of a full MODE1 value can be done with a subreg as long
1812      as the least significant bit of the value is the least significant
1813      bit of either OP0 or a word of OP0.  */
1814   if (!MEM_P (op0) && !reverse)
1815     {
1816       rtx sub = extract_bit_field_as_subreg (mode1, op0, bitsize, bitnum);
1817       if (sub)
1818 	return convert_extracted_bit_field (sub, mode, tmode, unsignedp);
1819     }
1820 
1821   /* Extraction of a full MODE1 value can be done with a load as long as
1822      the field is on a byte boundary and is sufficiently aligned.  */
1823   poly_uint64 bytenum;
1824   if (simple_mem_bitfield_p (op0, bitsize, bitnum, mode1, &bytenum))
1825     {
1826       op0 = adjust_bitfield_address (op0, mode1, bytenum);
1827       if (reverse)
1828 	op0 = flip_storage_order (mode1, op0);
1829       return convert_extracted_bit_field (op0, mode, tmode, unsignedp);
1830     }
1831 
1832   /* If we have a memory source and a non-constant bit offset, restrict
1833      the memory to the referenced bytes.  This is a worst-case fallback
1834      but is useful for things like vector booleans.  */
1835   if (MEM_P (op0) && !bitnum.is_constant ())
1836     {
1837       bytenum = bits_to_bytes_round_down (bitnum);
1838       bitnum = num_trailing_bits (bitnum);
1839       poly_uint64 bytesize = bits_to_bytes_round_up (bitnum + bitsize);
1840       op0 = adjust_bitfield_address_size (op0, BLKmode, bytenum, bytesize);
1841       op0_mode = opt_scalar_int_mode ();
1842     }
1843 
1844   /* It's possible we'll need to handle other cases here for
1845      polynomial bitnum and bitsize.  */
1846 
1847   /* From here on we need to be looking at a fixed-size insertion.  */
1848   return extract_integral_bit_field (op0, op0_mode, bitsize.to_constant (),
1849 				     bitnum.to_constant (), unsignedp,
1850 				     target, mode, tmode, reverse, fallback_p);
1851 }
1852 
1853 /* Subroutine of extract_bit_field_1, with the same arguments, except
1854    that BITSIZE and BITNUM are constant.  Handle cases specific to
1855    integral modes.  If OP0_MODE is defined, it is the mode of OP0,
1856    otherwise OP0 is a BLKmode MEM.  */
1857 
1858 static rtx
1859 extract_integral_bit_field (rtx op0, opt_scalar_int_mode op0_mode,
1860 			    unsigned HOST_WIDE_INT bitsize,
1861 			    unsigned HOST_WIDE_INT bitnum, int unsignedp,
1862 			    rtx target, machine_mode mode, machine_mode tmode,
1863 			    bool reverse, bool fallback_p)
1864 {
1865   /* Handle fields bigger than a word.  */
1866 
1867   if (bitsize > BITS_PER_WORD)
1868     {
1869       /* Here we transfer the words of the field
1870 	 in the order least significant first.
1871 	 This is because the most significant word is the one which may
1872 	 be less than full.  */
1873 
1874       const bool backwards = WORDS_BIG_ENDIAN;
1875       unsigned int nwords = (bitsize + (BITS_PER_WORD - 1)) / BITS_PER_WORD;
1876       unsigned int i;
1877       rtx_insn *last;
1878 
1879       if (target == 0 || !REG_P (target) || !valid_multiword_target_p (target))
1880 	target = gen_reg_rtx (mode);
1881 
1882       /* In case we're about to clobber a base register or something
1883 	 (see gcc.c-torture/execute/20040625-1.c).   */
1884       if (reg_mentioned_p (target, op0))
1885 	target = gen_reg_rtx (mode);
1886 
1887       /* Indicate for flow that the entire target reg is being set.  */
1888       emit_clobber (target);
1889 
1890       /* The mode must be fixed-size, since extract_bit_field_1 handles
1891 	 extractions from variable-sized objects before calling this
1892 	 function.  */
1893       unsigned int target_size
1894 	= GET_MODE_SIZE (GET_MODE (target)).to_constant ();
1895       last = get_last_insn ();
1896       for (i = 0; i < nwords; i++)
1897 	{
1898 	  /* If I is 0, use the low-order word in both field and target;
1899 	     if I is 1, use the next to lowest word; and so on.  */
1900 	  /* Word number in TARGET to use.  */
1901 	  unsigned int wordnum
1902 	    = (backwards ? target_size / UNITS_PER_WORD - i - 1 : i);
1903 	  /* Offset from start of field in OP0.  */
1904 	  unsigned int bit_offset = (backwards ^ reverse
1905 				     ? MAX ((int) bitsize - ((int) i + 1)
1906 					    * BITS_PER_WORD,
1907 					    0)
1908 				     : (int) i * BITS_PER_WORD);
1909 	  rtx target_part = operand_subword (target, wordnum, 1, VOIDmode);
1910 	  rtx result_part
1911 	    = extract_bit_field_1 (op0, MIN (BITS_PER_WORD,
1912 					     bitsize - i * BITS_PER_WORD),
1913 				   bitnum + bit_offset, 1, target_part,
1914 				   mode, word_mode, reverse, fallback_p, NULL);
1915 
1916 	  gcc_assert (target_part);
1917 	  if (!result_part)
1918 	    {
1919 	      delete_insns_since (last);
1920 	      return NULL;
1921 	    }
1922 
1923 	  if (result_part != target_part)
1924 	    emit_move_insn (target_part, result_part);
1925 	}
1926 
1927       if (unsignedp)
1928 	{
1929 	  /* Unless we've filled TARGET, the upper regs in a multi-reg value
1930 	     need to be zero'd out.  */
1931 	  if (target_size > nwords * UNITS_PER_WORD)
1932 	    {
1933 	      unsigned int i, total_words;
1934 
1935 	      total_words = target_size / UNITS_PER_WORD;
1936 	      for (i = nwords; i < total_words; i++)
1937 		emit_move_insn
1938 		  (operand_subword (target,
1939 				    backwards ? total_words - i - 1 : i,
1940 				    1, VOIDmode),
1941 		   const0_rtx);
1942 	    }
1943 	  return target;
1944 	}
1945 
1946       /* Signed bit field: sign-extend with two arithmetic shifts.  */
1947       target = expand_shift (LSHIFT_EXPR, mode, target,
1948 			     GET_MODE_BITSIZE (mode) - bitsize, NULL_RTX, 0);
1949       return expand_shift (RSHIFT_EXPR, mode, target,
1950 			   GET_MODE_BITSIZE (mode) - bitsize, NULL_RTX, 0);
1951     }
1952 
1953   /* If OP0 is a multi-word register, narrow it to the affected word.
1954      If the region spans two words, defer to extract_split_bit_field.  */
1955   if (!MEM_P (op0) && GET_MODE_SIZE (op0_mode.require ()) > UNITS_PER_WORD)
1956     {
1957       if (bitnum % BITS_PER_WORD + bitsize > BITS_PER_WORD)
1958 	{
1959 	  if (!fallback_p)
1960 	    return NULL_RTX;
1961 	  target = extract_split_bit_field (op0, op0_mode, bitsize, bitnum,
1962 					    unsignedp, reverse);
1963 	  return convert_extracted_bit_field (target, mode, tmode, unsignedp);
1964 	}
1965       op0 = simplify_gen_subreg (word_mode, op0, op0_mode.require (),
1966 				 bitnum / BITS_PER_WORD * UNITS_PER_WORD);
1967       op0_mode = word_mode;
1968       bitnum %= BITS_PER_WORD;
1969     }
1970 
1971   /* From here on we know the desired field is smaller than a word.
1972      If OP0 is a register, it too fits within a word.  */
1973   enum extraction_pattern pattern = unsignedp ? EP_extzv : EP_extv;
1974   extraction_insn extv;
1975   if (!MEM_P (op0)
1976       && !reverse
1977       /* ??? We could limit the structure size to the part of OP0 that
1978 	 contains the field, with appropriate checks for endianness
1979 	 and TARGET_TRULY_NOOP_TRUNCATION.  */
1980       && get_best_reg_extraction_insn (&extv, pattern,
1981 				       GET_MODE_BITSIZE (op0_mode.require ()),
1982 				       tmode))
1983     {
1984       rtx result = extract_bit_field_using_extv (&extv, op0, op0_mode,
1985 						 bitsize, bitnum,
1986 						 unsignedp, target, mode,
1987 						 tmode);
1988       if (result)
1989 	return result;
1990     }
1991 
1992   /* If OP0 is a memory, try copying it to a register and seeing if a
1993      cheap register alternative is available.  */
1994   if (MEM_P (op0) & !reverse)
1995     {
1996       if (get_best_mem_extraction_insn (&extv, pattern, bitsize, bitnum,
1997 					tmode))
1998 	{
1999 	  rtx result = extract_bit_field_using_extv (&extv, op0, op0_mode,
2000 						     bitsize, bitnum,
2001 						     unsignedp, target, mode,
2002 						     tmode);
2003 	  if (result)
2004 	    return result;
2005 	}
2006 
2007       rtx_insn *last = get_last_insn ();
2008 
2009       /* Try loading part of OP0 into a register and extracting the
2010 	 bitfield from that.  */
2011       unsigned HOST_WIDE_INT bitpos;
2012       rtx xop0 = adjust_bit_field_mem_for_reg (pattern, op0, bitsize, bitnum,
2013 					       0, 0, tmode, &bitpos);
2014       if (xop0)
2015 	{
2016 	  xop0 = copy_to_reg (xop0);
2017 	  rtx result = extract_bit_field_1 (xop0, bitsize, bitpos,
2018 					    unsignedp, target,
2019 					    mode, tmode, reverse, false, NULL);
2020 	  if (result)
2021 	    return result;
2022 	  delete_insns_since (last);
2023 	}
2024     }
2025 
2026   if (!fallback_p)
2027     return NULL;
2028 
2029   /* Find a correspondingly-sized integer field, so we can apply
2030      shifts and masks to it.  */
2031   scalar_int_mode int_mode;
2032   if (!int_mode_for_mode (tmode).exists (&int_mode))
2033     /* If this fails, we should probably push op0 out to memory and then
2034        do a load.  */
2035     int_mode = int_mode_for_mode (mode).require ();
2036 
2037   target = extract_fixed_bit_field (int_mode, op0, op0_mode, bitsize,
2038 				    bitnum, target, unsignedp, reverse);
2039 
2040   /* Complex values must be reversed piecewise, so we need to undo the global
2041      reversal, convert to the complex mode and reverse again.  */
2042   if (reverse && COMPLEX_MODE_P (tmode))
2043     {
2044       target = flip_storage_order (int_mode, target);
2045       target = convert_extracted_bit_field (target, mode, tmode, unsignedp);
2046       target = flip_storage_order (tmode, target);
2047     }
2048   else
2049     target = convert_extracted_bit_field (target, mode, tmode, unsignedp);
2050 
2051   return target;
2052 }
2053 
2054 /* Generate code to extract a byte-field from STR_RTX
2055    containing BITSIZE bits, starting at BITNUM,
2056    and put it in TARGET if possible (if TARGET is nonzero).
2057    Regardless of TARGET, we return the rtx for where the value is placed.
2058 
2059    STR_RTX is the structure containing the byte (a REG or MEM).
2060    UNSIGNEDP is nonzero if this is an unsigned bit field.
2061    MODE is the natural mode of the field value once extracted.
2062    TMODE is the mode the caller would like the value to have;
2063    but the value may be returned with type MODE instead.
2064 
2065    If REVERSE is true, the extraction is to be done in reverse order.
2066 
2067    If a TARGET is specified and we can store in it at no extra cost,
2068    we do so, and return TARGET.
2069    Otherwise, we return a REG of mode TMODE or MODE, with TMODE preferred
2070    if they are equally easy.  */
2071 
2072 rtx
2073 extract_bit_field (rtx str_rtx, poly_uint64 bitsize, poly_uint64 bitnum,
2074 		   int unsignedp, rtx target, machine_mode mode,
2075 		   machine_mode tmode, bool reverse, rtx *alt_rtl)
2076 {
2077   machine_mode mode1;
2078 
2079   /* Handle -fstrict-volatile-bitfields in the cases where it applies.  */
2080   if (maybe_ne (GET_MODE_BITSIZE (GET_MODE (str_rtx)), 0))
2081     mode1 = GET_MODE (str_rtx);
2082   else if (target && maybe_ne (GET_MODE_BITSIZE (GET_MODE (target)), 0))
2083     mode1 = GET_MODE (target);
2084   else
2085     mode1 = tmode;
2086 
2087   unsigned HOST_WIDE_INT ibitsize, ibitnum;
2088   scalar_int_mode int_mode;
2089   if (bitsize.is_constant (&ibitsize)
2090       && bitnum.is_constant (&ibitnum)
2091       && is_a <scalar_int_mode> (mode1, &int_mode)
2092       && strict_volatile_bitfield_p (str_rtx, ibitsize, ibitnum,
2093 				     int_mode, 0, 0))
2094     {
2095       /* Extraction of a full INT_MODE value can be done with a simple load.
2096 	 We know here that the field can be accessed with one single
2097 	 instruction.  For targets that support unaligned memory,
2098 	 an unaligned access may be necessary.  */
2099       if (ibitsize == GET_MODE_BITSIZE (int_mode))
2100 	{
2101 	  rtx result = adjust_bitfield_address (str_rtx, int_mode,
2102 						ibitnum / BITS_PER_UNIT);
2103 	  if (reverse)
2104 	    result = flip_storage_order (int_mode, result);
2105 	  gcc_assert (ibitnum % BITS_PER_UNIT == 0);
2106 	  return convert_extracted_bit_field (result, mode, tmode, unsignedp);
2107 	}
2108 
2109       str_rtx = narrow_bit_field_mem (str_rtx, int_mode, ibitsize, ibitnum,
2110 				      &ibitnum);
2111       gcc_assert (ibitnum + ibitsize <= GET_MODE_BITSIZE (int_mode));
2112       str_rtx = copy_to_reg (str_rtx);
2113       return extract_bit_field_1 (str_rtx, ibitsize, ibitnum, unsignedp,
2114 				  target, mode, tmode, reverse, true, alt_rtl);
2115     }
2116 
2117   return extract_bit_field_1 (str_rtx, bitsize, bitnum, unsignedp,
2118 			      target, mode, tmode, reverse, true, alt_rtl);
2119 }
2120 
2121 /* Use shifts and boolean operations to extract a field of BITSIZE bits
2122    from bit BITNUM of OP0.  If OP0_MODE is defined, it is the mode of OP0,
2123    otherwise OP0 is a BLKmode MEM.
2124 
2125    UNSIGNEDP is nonzero for an unsigned bit field (don't sign-extend value).
2126    If REVERSE is true, the extraction is to be done in reverse order.
2127 
2128    If TARGET is nonzero, attempts to store the value there
2129    and return TARGET, but this is not guaranteed.
2130    If TARGET is not used, create a pseudo-reg of mode TMODE for the value.  */
2131 
2132 static rtx
2133 extract_fixed_bit_field (machine_mode tmode, rtx op0,
2134 			 opt_scalar_int_mode op0_mode,
2135 			 unsigned HOST_WIDE_INT bitsize,
2136 			 unsigned HOST_WIDE_INT bitnum, rtx target,
2137 			 int unsignedp, bool reverse)
2138 {
2139   scalar_int_mode mode;
2140   if (MEM_P (op0))
2141     {
2142       if (!get_best_mode (bitsize, bitnum, 0, 0, MEM_ALIGN (op0),
2143 			  BITS_PER_WORD, MEM_VOLATILE_P (op0), &mode))
2144 	/* The only way this should occur is if the field spans word
2145 	   boundaries.  */
2146 	return extract_split_bit_field (op0, op0_mode, bitsize, bitnum,
2147 					unsignedp, reverse);
2148 
2149       op0 = narrow_bit_field_mem (op0, mode, bitsize, bitnum, &bitnum);
2150     }
2151   else
2152     mode = op0_mode.require ();
2153 
2154   return extract_fixed_bit_field_1 (tmode, op0, mode, bitsize, bitnum,
2155 				    target, unsignedp, reverse);
2156 }
2157 
2158 /* Helper function for extract_fixed_bit_field, extracts
2159    the bit field always using MODE, which is the mode of OP0.
2160    The other arguments are as for extract_fixed_bit_field.  */
2161 
2162 static rtx
2163 extract_fixed_bit_field_1 (machine_mode tmode, rtx op0, scalar_int_mode mode,
2164 			   unsigned HOST_WIDE_INT bitsize,
2165 			   unsigned HOST_WIDE_INT bitnum, rtx target,
2166 			   int unsignedp, bool reverse)
2167 {
2168   /* Note that bitsize + bitnum can be greater than GET_MODE_BITSIZE (mode)
2169      for invalid input, such as extract equivalent of f5 from
2170      gcc.dg/pr48335-2.c.  */
2171 
2172   if (reverse ? !BYTES_BIG_ENDIAN : BYTES_BIG_ENDIAN)
2173     /* BITNUM is the distance between our msb and that of OP0.
2174        Convert it to the distance from the lsb.  */
2175     bitnum = GET_MODE_BITSIZE (mode) - bitsize - bitnum;
2176 
2177   /* Now BITNUM is always the distance between the field's lsb and that of OP0.
2178      We have reduced the big-endian case to the little-endian case.  */
2179   if (reverse)
2180     op0 = flip_storage_order (mode, op0);
2181 
2182   if (unsignedp)
2183     {
2184       if (bitnum)
2185 	{
2186 	  /* If the field does not already start at the lsb,
2187 	     shift it so it does.  */
2188 	  /* Maybe propagate the target for the shift.  */
2189 	  rtx subtarget = (target != 0 && REG_P (target) ? target : 0);
2190 	  if (tmode != mode)
2191 	    subtarget = 0;
2192 	  op0 = expand_shift (RSHIFT_EXPR, mode, op0, bitnum, subtarget, 1);
2193 	}
2194       /* Convert the value to the desired mode.  TMODE must also be a
2195 	 scalar integer for this conversion to make sense, since we
2196 	 shouldn't reinterpret the bits.  */
2197       scalar_int_mode new_mode = as_a <scalar_int_mode> (tmode);
2198       if (mode != new_mode)
2199 	op0 = convert_to_mode (new_mode, op0, 1);
2200 
2201       /* Unless the msb of the field used to be the msb when we shifted,
2202 	 mask out the upper bits.  */
2203 
2204       if (GET_MODE_BITSIZE (mode) != bitnum + bitsize)
2205 	return expand_binop (new_mode, and_optab, op0,
2206 			     mask_rtx (new_mode, 0, bitsize, 0),
2207 			     target, 1, OPTAB_LIB_WIDEN);
2208       return op0;
2209     }
2210 
2211   /* To extract a signed bit-field, first shift its msb to the msb of the word,
2212      then arithmetic-shift its lsb to the lsb of the word.  */
2213   op0 = force_reg (mode, op0);
2214 
2215   /* Find the narrowest integer mode that contains the field.  */
2216 
2217   opt_scalar_int_mode mode_iter;
2218   FOR_EACH_MODE_IN_CLASS (mode_iter, MODE_INT)
2219     if (GET_MODE_BITSIZE (mode_iter.require ()) >= bitsize + bitnum)
2220       break;
2221 
2222   mode = mode_iter.require ();
2223   op0 = convert_to_mode (mode, op0, 0);
2224 
2225   if (mode != tmode)
2226     target = 0;
2227 
2228   if (GET_MODE_BITSIZE (mode) != (bitsize + bitnum))
2229     {
2230       int amount = GET_MODE_BITSIZE (mode) - (bitsize + bitnum);
2231       /* Maybe propagate the target for the shift.  */
2232       rtx subtarget = (target != 0 && REG_P (target) ? target : 0);
2233       op0 = expand_shift (LSHIFT_EXPR, mode, op0, amount, subtarget, 1);
2234     }
2235 
2236   return expand_shift (RSHIFT_EXPR, mode, op0,
2237 		       GET_MODE_BITSIZE (mode) - bitsize, target, 0);
2238 }
2239 
2240 /* Return a constant integer (CONST_INT or CONST_DOUBLE) rtx with the value
2241    VALUE << BITPOS.  */
2242 
2243 static rtx
2244 lshift_value (machine_mode mode, unsigned HOST_WIDE_INT value,
2245 	      int bitpos)
2246 {
2247   return immed_wide_int_const (wi::lshift (value, bitpos), mode);
2248 }
2249 
2250 /* Extract a bit field that is split across two words
2251    and return an RTX for the result.
2252 
2253    OP0 is the REG, SUBREG or MEM rtx for the first of the two words.
2254    BITSIZE is the field width; BITPOS, position of its first bit, in the word.
2255    UNSIGNEDP is 1 if should zero-extend the contents; else sign-extend.
2256    If OP0_MODE is defined, it is the mode of OP0, otherwise OP0 is
2257    a BLKmode MEM.
2258 
2259    If REVERSE is true, the extraction is to be done in reverse order.  */
2260 
2261 static rtx
2262 extract_split_bit_field (rtx op0, opt_scalar_int_mode op0_mode,
2263 			 unsigned HOST_WIDE_INT bitsize,
2264 			 unsigned HOST_WIDE_INT bitpos, int unsignedp,
2265 			 bool reverse)
2266 {
2267   unsigned int unit;
2268   unsigned int bitsdone = 0;
2269   rtx result = NULL_RTX;
2270   int first = 1;
2271 
2272   /* Make sure UNIT isn't larger than BITS_PER_WORD, we can only handle that
2273      much at a time.  */
2274   if (REG_P (op0) || GET_CODE (op0) == SUBREG)
2275     unit = BITS_PER_WORD;
2276   else
2277     unit = MIN (MEM_ALIGN (op0), BITS_PER_WORD);
2278 
2279   while (bitsdone < bitsize)
2280     {
2281       unsigned HOST_WIDE_INT thissize;
2282       rtx part;
2283       unsigned HOST_WIDE_INT thispos;
2284       unsigned HOST_WIDE_INT offset;
2285 
2286       offset = (bitpos + bitsdone) / unit;
2287       thispos = (bitpos + bitsdone) % unit;
2288 
2289       /* THISSIZE must not overrun a word boundary.  Otherwise,
2290 	 extract_fixed_bit_field will call us again, and we will mutually
2291 	 recurse forever.  */
2292       thissize = MIN (bitsize - bitsdone, BITS_PER_WORD);
2293       thissize = MIN (thissize, unit - thispos);
2294 
2295       /* If OP0 is a register, then handle OFFSET here.  */
2296       rtx op0_piece = op0;
2297       opt_scalar_int_mode op0_piece_mode = op0_mode;
2298       if (SUBREG_P (op0) || REG_P (op0))
2299 	{
2300 	  op0_piece = operand_subword_force (op0, offset, op0_mode.require ());
2301 	  op0_piece_mode = word_mode;
2302 	  offset = 0;
2303 	}
2304 
2305       /* Extract the parts in bit-counting order,
2306 	 whose meaning is determined by BYTES_PER_UNIT.
2307 	 OFFSET is in UNITs, and UNIT is in bits.  */
2308       part = extract_fixed_bit_field (word_mode, op0_piece, op0_piece_mode,
2309 				      thissize, offset * unit + thispos,
2310 				      0, 1, reverse);
2311       bitsdone += thissize;
2312 
2313       /* Shift this part into place for the result.  */
2314       if (reverse ? !BYTES_BIG_ENDIAN : BYTES_BIG_ENDIAN)
2315 	{
2316 	  if (bitsize != bitsdone)
2317 	    part = expand_shift (LSHIFT_EXPR, word_mode, part,
2318 				 bitsize - bitsdone, 0, 1);
2319 	}
2320       else
2321 	{
2322 	  if (bitsdone != thissize)
2323 	    part = expand_shift (LSHIFT_EXPR, word_mode, part,
2324 				 bitsdone - thissize, 0, 1);
2325 	}
2326 
2327       if (first)
2328 	result = part;
2329       else
2330 	/* Combine the parts with bitwise or.  This works
2331 	   because we extracted each part as an unsigned bit field.  */
2332 	result = expand_binop (word_mode, ior_optab, part, result, NULL_RTX, 1,
2333 			       OPTAB_LIB_WIDEN);
2334 
2335       first = 0;
2336     }
2337 
2338   /* Unsigned bit field: we are done.  */
2339   if (unsignedp)
2340     return result;
2341   /* Signed bit field: sign-extend with two arithmetic shifts.  */
2342   result = expand_shift (LSHIFT_EXPR, word_mode, result,
2343 			 BITS_PER_WORD - bitsize, NULL_RTX, 0);
2344   return expand_shift (RSHIFT_EXPR, word_mode, result,
2345 		       BITS_PER_WORD - bitsize, NULL_RTX, 0);
2346 }
2347 
2348 /* Try to read the low bits of SRC as an rvalue of mode MODE, preserving
2349    the bit pattern.  SRC_MODE is the mode of SRC; if this is smaller than
2350    MODE, fill the upper bits with zeros.  Fail if the layout of either
2351    mode is unknown (as for CC modes) or if the extraction would involve
2352    unprofitable mode punning.  Return the value on success, otherwise
2353    return null.
2354 
2355    This is different from gen_lowpart* in these respects:
2356 
2357      - the returned value must always be considered an rvalue
2358 
2359      - when MODE is wider than SRC_MODE, the extraction involves
2360        a zero extension
2361 
2362      - when MODE is smaller than SRC_MODE, the extraction involves
2363        a truncation (and is thus subject to TARGET_TRULY_NOOP_TRUNCATION).
2364 
2365    In other words, this routine performs a computation, whereas the
2366    gen_lowpart* routines are conceptually lvalue or rvalue subreg
2367    operations.  */
2368 
2369 rtx
2370 extract_low_bits (machine_mode mode, machine_mode src_mode, rtx src)
2371 {
2372   scalar_int_mode int_mode, src_int_mode;
2373 
2374   if (mode == src_mode)
2375     return src;
2376 
2377   if (CONSTANT_P (src))
2378     {
2379       /* simplify_gen_subreg can't be used here, as if simplify_subreg
2380 	 fails, it will happily create (subreg (symbol_ref)) or similar
2381 	 invalid SUBREGs.  */
2382       poly_uint64 byte = subreg_lowpart_offset (mode, src_mode);
2383       rtx ret = simplify_subreg (mode, src, src_mode, byte);
2384       if (ret)
2385 	return ret;
2386 
2387       if (GET_MODE (src) == VOIDmode
2388 	  || !validate_subreg (mode, src_mode, src, byte))
2389 	return NULL_RTX;
2390 
2391       src = force_reg (GET_MODE (src), src);
2392       return gen_rtx_SUBREG (mode, src, byte);
2393     }
2394 
2395   if (GET_MODE_CLASS (mode) == MODE_CC || GET_MODE_CLASS (src_mode) == MODE_CC)
2396     return NULL_RTX;
2397 
2398   if (known_eq (GET_MODE_BITSIZE (mode), GET_MODE_BITSIZE (src_mode))
2399       && targetm.modes_tieable_p (mode, src_mode))
2400     {
2401       rtx x = gen_lowpart_common (mode, src);
2402       if (x)
2403         return x;
2404     }
2405 
2406   if (!int_mode_for_mode (src_mode).exists (&src_int_mode)
2407       || !int_mode_for_mode (mode).exists (&int_mode))
2408     return NULL_RTX;
2409 
2410   if (!targetm.modes_tieable_p (src_int_mode, src_mode))
2411     return NULL_RTX;
2412   if (!targetm.modes_tieable_p (int_mode, mode))
2413     return NULL_RTX;
2414 
2415   src = gen_lowpart (src_int_mode, src);
2416   src = convert_modes (int_mode, src_int_mode, src, true);
2417   src = gen_lowpart (mode, src);
2418   return src;
2419 }
2420 
2421 /* Add INC into TARGET.  */
2422 
2423 void
2424 expand_inc (rtx target, rtx inc)
2425 {
2426   rtx value = expand_binop (GET_MODE (target), add_optab,
2427 			    target, inc,
2428 			    target, 0, OPTAB_LIB_WIDEN);
2429   if (value != target)
2430     emit_move_insn (target, value);
2431 }
2432 
2433 /* Subtract DEC from TARGET.  */
2434 
2435 void
2436 expand_dec (rtx target, rtx dec)
2437 {
2438   rtx value = expand_binop (GET_MODE (target), sub_optab,
2439 			    target, dec,
2440 			    target, 0, OPTAB_LIB_WIDEN);
2441   if (value != target)
2442     emit_move_insn (target, value);
2443 }
2444 
2445 /* Output a shift instruction for expression code CODE,
2446    with SHIFTED being the rtx for the value to shift,
2447    and AMOUNT the rtx for the amount to shift by.
2448    Store the result in the rtx TARGET, if that is convenient.
2449    If UNSIGNEDP is nonzero, do a logical shift; otherwise, arithmetic.
2450    Return the rtx for where the value is.
2451    If that cannot be done, abort the compilation unless MAY_FAIL is true,
2452    in which case 0 is returned.  */
2453 
2454 static rtx
2455 expand_shift_1 (enum tree_code code, machine_mode mode, rtx shifted,
2456 		rtx amount, rtx target, int unsignedp, bool may_fail = false)
2457 {
2458   rtx op1, temp = 0;
2459   int left = (code == LSHIFT_EXPR || code == LROTATE_EXPR);
2460   int rotate = (code == LROTATE_EXPR || code == RROTATE_EXPR);
2461   optab lshift_optab = ashl_optab;
2462   optab rshift_arith_optab = ashr_optab;
2463   optab rshift_uns_optab = lshr_optab;
2464   optab lrotate_optab = rotl_optab;
2465   optab rrotate_optab = rotr_optab;
2466   machine_mode op1_mode;
2467   scalar_mode scalar_mode = GET_MODE_INNER (mode);
2468   int attempt;
2469   bool speed = optimize_insn_for_speed_p ();
2470 
2471   op1 = amount;
2472   op1_mode = GET_MODE (op1);
2473 
2474   /* Determine whether the shift/rotate amount is a vector, or scalar.  If the
2475      shift amount is a vector, use the vector/vector shift patterns.  */
2476   if (VECTOR_MODE_P (mode) && VECTOR_MODE_P (op1_mode))
2477     {
2478       lshift_optab = vashl_optab;
2479       rshift_arith_optab = vashr_optab;
2480       rshift_uns_optab = vlshr_optab;
2481       lrotate_optab = vrotl_optab;
2482       rrotate_optab = vrotr_optab;
2483     }
2484 
2485   /* Previously detected shift-counts computed by NEGATE_EXPR
2486      and shifted in the other direction; but that does not work
2487      on all machines.  */
2488 
2489   if (SHIFT_COUNT_TRUNCATED)
2490     {
2491       if (CONST_INT_P (op1)
2492 	  && ((unsigned HOST_WIDE_INT) INTVAL (op1) >=
2493 	      (unsigned HOST_WIDE_INT) GET_MODE_BITSIZE (scalar_mode)))
2494 	op1 = gen_int_shift_amount (mode,
2495 				    (unsigned HOST_WIDE_INT) INTVAL (op1)
2496 				    % GET_MODE_BITSIZE (scalar_mode));
2497       else if (GET_CODE (op1) == SUBREG
2498 	       && subreg_lowpart_p (op1)
2499 	       && SCALAR_INT_MODE_P (GET_MODE (SUBREG_REG (op1)))
2500 	       && SCALAR_INT_MODE_P (GET_MODE (op1)))
2501 	op1 = SUBREG_REG (op1);
2502     }
2503 
2504   /* Canonicalize rotates by constant amount.  If op1 is bitsize / 2,
2505      prefer left rotation, if op1 is from bitsize / 2 + 1 to
2506      bitsize - 1, use other direction of rotate with 1 .. bitsize / 2 - 1
2507      amount instead.  */
2508   if (rotate
2509       && CONST_INT_P (op1)
2510       && IN_RANGE (INTVAL (op1), GET_MODE_BITSIZE (scalar_mode) / 2 + left,
2511 		   GET_MODE_BITSIZE (scalar_mode) - 1))
2512     {
2513       op1 = gen_int_shift_amount (mode, (GET_MODE_BITSIZE (scalar_mode)
2514 					 - INTVAL (op1)));
2515       left = !left;
2516       code = left ? LROTATE_EXPR : RROTATE_EXPR;
2517     }
2518 
2519   /* Rotation of 16bit values by 8 bits is effectively equivalent to a bswaphi.
2520      Note that this is not the case for bigger values.  For instance a rotation
2521      of 0x01020304 by 16 bits gives 0x03040102 which is different from
2522      0x04030201 (bswapsi).  */
2523   if (rotate
2524       && CONST_INT_P (op1)
2525       && INTVAL (op1) == BITS_PER_UNIT
2526       && GET_MODE_SIZE (scalar_mode) == 2
2527       && optab_handler (bswap_optab, mode) != CODE_FOR_nothing)
2528     return expand_unop (mode, bswap_optab, shifted, NULL_RTX, unsignedp);
2529 
2530   if (op1 == const0_rtx)
2531     return shifted;
2532 
2533   /* Check whether its cheaper to implement a left shift by a constant
2534      bit count by a sequence of additions.  */
2535   if (code == LSHIFT_EXPR
2536       && CONST_INT_P (op1)
2537       && INTVAL (op1) > 0
2538       && INTVAL (op1) < GET_MODE_PRECISION (scalar_mode)
2539       && INTVAL (op1) < MAX_BITS_PER_WORD
2540       && (shift_cost (speed, mode, INTVAL (op1))
2541 	  > INTVAL (op1) * add_cost (speed, mode))
2542       && shift_cost (speed, mode, INTVAL (op1)) != MAX_COST)
2543     {
2544       int i;
2545       for (i = 0; i < INTVAL (op1); i++)
2546 	{
2547 	  temp = force_reg (mode, shifted);
2548 	  shifted = expand_binop (mode, add_optab, temp, temp, NULL_RTX,
2549 				  unsignedp, OPTAB_LIB_WIDEN);
2550 	}
2551       return shifted;
2552     }
2553 
2554   for (attempt = 0; temp == 0 && attempt < 3; attempt++)
2555     {
2556       enum optab_methods methods;
2557 
2558       if (attempt == 0)
2559 	methods = OPTAB_DIRECT;
2560       else if (attempt == 1)
2561 	methods = OPTAB_WIDEN;
2562       else
2563 	methods = OPTAB_LIB_WIDEN;
2564 
2565       if (rotate)
2566 	{
2567 	  /* Widening does not work for rotation.  */
2568 	  if (methods == OPTAB_WIDEN)
2569 	    continue;
2570 	  else if (methods == OPTAB_LIB_WIDEN)
2571 	    {
2572 	      /* If we have been unable to open-code this by a rotation,
2573 		 do it as the IOR of two shifts.  I.e., to rotate A
2574 		 by N bits, compute
2575 		 (A << N) | ((unsigned) A >> ((-N) & (C - 1)))
2576 		 where C is the bitsize of A.
2577 
2578 		 It is theoretically possible that the target machine might
2579 		 not be able to perform either shift and hence we would
2580 		 be making two libcalls rather than just the one for the
2581 		 shift (similarly if IOR could not be done).  We will allow
2582 		 this extremely unlikely lossage to avoid complicating the
2583 		 code below.  */
2584 
2585 	      rtx subtarget = target == shifted ? 0 : target;
2586 	      rtx new_amount, other_amount;
2587 	      rtx temp1;
2588 
2589 	      new_amount = op1;
2590 	      if (op1 == const0_rtx)
2591 		return shifted;
2592 	      else if (CONST_INT_P (op1))
2593 		other_amount = gen_int_shift_amount
2594 		  (mode, GET_MODE_BITSIZE (scalar_mode) - INTVAL (op1));
2595 	      else
2596 		{
2597 		  other_amount
2598 		    = simplify_gen_unary (NEG, GET_MODE (op1),
2599 					  op1, GET_MODE (op1));
2600 		  HOST_WIDE_INT mask = GET_MODE_PRECISION (scalar_mode) - 1;
2601 		  other_amount
2602 		    = simplify_gen_binary (AND, GET_MODE (op1), other_amount,
2603 					   gen_int_mode (mask, GET_MODE (op1)));
2604 		}
2605 
2606 	      shifted = force_reg (mode, shifted);
2607 
2608 	      temp = expand_shift_1 (left ? LSHIFT_EXPR : RSHIFT_EXPR,
2609 				     mode, shifted, new_amount, 0, 1);
2610 	      temp1 = expand_shift_1 (left ? RSHIFT_EXPR : LSHIFT_EXPR,
2611 				      mode, shifted, other_amount,
2612 				      subtarget, 1);
2613 	      return expand_binop (mode, ior_optab, temp, temp1, target,
2614 				   unsignedp, methods);
2615 	    }
2616 
2617 	  temp = expand_binop (mode,
2618 			       left ? lrotate_optab : rrotate_optab,
2619 			       shifted, op1, target, unsignedp, methods);
2620 	}
2621       else if (unsignedp)
2622 	temp = expand_binop (mode,
2623 			     left ? lshift_optab : rshift_uns_optab,
2624 			     shifted, op1, target, unsignedp, methods);
2625 
2626       /* Do arithmetic shifts.
2627 	 Also, if we are going to widen the operand, we can just as well
2628 	 use an arithmetic right-shift instead of a logical one.  */
2629       if (temp == 0 && ! rotate
2630 	  && (! unsignedp || (! left && methods == OPTAB_WIDEN)))
2631 	{
2632 	  enum optab_methods methods1 = methods;
2633 
2634 	  /* If trying to widen a log shift to an arithmetic shift,
2635 	     don't accept an arithmetic shift of the same size.  */
2636 	  if (unsignedp)
2637 	    methods1 = OPTAB_MUST_WIDEN;
2638 
2639 	  /* Arithmetic shift */
2640 
2641 	  temp = expand_binop (mode,
2642 			       left ? lshift_optab : rshift_arith_optab,
2643 			       shifted, op1, target, unsignedp, methods1);
2644 	}
2645 
2646       /* We used to try extzv here for logical right shifts, but that was
2647 	 only useful for one machine, the VAX, and caused poor code
2648 	 generation there for lshrdi3, so the code was deleted and a
2649 	 define_expand for lshrsi3 was added to vax.md.  */
2650     }
2651 
2652   gcc_assert (temp != NULL_RTX || may_fail);
2653   return temp;
2654 }
2655 
2656 /* Output a shift instruction for expression code CODE,
2657    with SHIFTED being the rtx for the value to shift,
2658    and AMOUNT the amount to shift by.
2659    Store the result in the rtx TARGET, if that is convenient.
2660    If UNSIGNEDP is nonzero, do a logical shift; otherwise, arithmetic.
2661    Return the rtx for where the value is.  */
2662 
2663 rtx
2664 expand_shift (enum tree_code code, machine_mode mode, rtx shifted,
2665 	      poly_int64 amount, rtx target, int unsignedp)
2666 {
2667   return expand_shift_1 (code, mode, shifted,
2668 			 gen_int_shift_amount (mode, amount),
2669 			 target, unsignedp);
2670 }
2671 
2672 /* Likewise, but return 0 if that cannot be done.  */
2673 
2674 static rtx
2675 maybe_expand_shift (enum tree_code code, machine_mode mode, rtx shifted,
2676 		    int amount, rtx target, int unsignedp)
2677 {
2678   return expand_shift_1 (code, mode,
2679 			 shifted, GEN_INT (amount), target, unsignedp, true);
2680 }
2681 
2682 /* Output a shift instruction for expression code CODE,
2683    with SHIFTED being the rtx for the value to shift,
2684    and AMOUNT the tree for the amount to shift by.
2685    Store the result in the rtx TARGET, if that is convenient.
2686    If UNSIGNEDP is nonzero, do a logical shift; otherwise, arithmetic.
2687    Return the rtx for where the value is.  */
2688 
2689 rtx
2690 expand_variable_shift (enum tree_code code, machine_mode mode, rtx shifted,
2691 		       tree amount, rtx target, int unsignedp)
2692 {
2693   return expand_shift_1 (code, mode,
2694 			 shifted, expand_normal (amount), target, unsignedp);
2695 }
2696 
2697 
2698 static void synth_mult (struct algorithm *, unsigned HOST_WIDE_INT,
2699 			const struct mult_cost *, machine_mode mode);
2700 static rtx expand_mult_const (machine_mode, rtx, HOST_WIDE_INT, rtx,
2701 			      const struct algorithm *, enum mult_variant);
2702 static unsigned HOST_WIDE_INT invert_mod2n (unsigned HOST_WIDE_INT, int);
2703 static rtx extract_high_half (scalar_int_mode, rtx);
2704 static rtx expmed_mult_highpart (scalar_int_mode, rtx, rtx, rtx, int, int);
2705 static rtx expmed_mult_highpart_optab (scalar_int_mode, rtx, rtx, rtx,
2706 				       int, int);
2707 /* Compute and return the best algorithm for multiplying by T.
2708    The algorithm must cost less than cost_limit
2709    If retval.cost >= COST_LIMIT, no algorithm was found and all
2710    other field of the returned struct are undefined.
2711    MODE is the machine mode of the multiplication.  */
2712 
2713 static void
2714 synth_mult (struct algorithm *alg_out, unsigned HOST_WIDE_INT t,
2715 	    const struct mult_cost *cost_limit, machine_mode mode)
2716 {
2717   int m;
2718   struct algorithm *alg_in, *best_alg;
2719   struct mult_cost best_cost;
2720   struct mult_cost new_limit;
2721   int op_cost, op_latency;
2722   unsigned HOST_WIDE_INT orig_t = t;
2723   unsigned HOST_WIDE_INT q;
2724   int maxm, hash_index;
2725   bool cache_hit = false;
2726   enum alg_code cache_alg = alg_zero;
2727   bool speed = optimize_insn_for_speed_p ();
2728   scalar_int_mode imode;
2729   struct alg_hash_entry *entry_ptr;
2730 
2731   /* Indicate that no algorithm is yet found.  If no algorithm
2732      is found, this value will be returned and indicate failure.  */
2733   alg_out->cost.cost = cost_limit->cost + 1;
2734   alg_out->cost.latency = cost_limit->latency + 1;
2735 
2736   if (cost_limit->cost < 0
2737       || (cost_limit->cost == 0 && cost_limit->latency <= 0))
2738     return;
2739 
2740   /* Be prepared for vector modes.  */
2741   imode = as_a <scalar_int_mode> (GET_MODE_INNER (mode));
2742 
2743   maxm = MIN (BITS_PER_WORD, GET_MODE_BITSIZE (imode));
2744 
2745   /* Restrict the bits of "t" to the multiplication's mode.  */
2746   t &= GET_MODE_MASK (imode);
2747 
2748   /* t == 1 can be done in zero cost.  */
2749   if (t == 1)
2750     {
2751       alg_out->ops = 1;
2752       alg_out->cost.cost = 0;
2753       alg_out->cost.latency = 0;
2754       alg_out->op[0] = alg_m;
2755       return;
2756     }
2757 
2758   /* t == 0 sometimes has a cost.  If it does and it exceeds our limit,
2759      fail now.  */
2760   if (t == 0)
2761     {
2762       if (MULT_COST_LESS (cost_limit, zero_cost (speed)))
2763 	return;
2764       else
2765 	{
2766 	  alg_out->ops = 1;
2767 	  alg_out->cost.cost = zero_cost (speed);
2768 	  alg_out->cost.latency = zero_cost (speed);
2769 	  alg_out->op[0] = alg_zero;
2770 	  return;
2771 	}
2772     }
2773 
2774   /* We'll be needing a couple extra algorithm structures now.  */
2775 
2776   alg_in = XALLOCA (struct algorithm);
2777   best_alg = XALLOCA (struct algorithm);
2778   best_cost = *cost_limit;
2779 
2780   /* Compute the hash index.  */
2781   hash_index = (t ^ (unsigned int) mode ^ (speed * 256)) % NUM_ALG_HASH_ENTRIES;
2782 
2783   /* See if we already know what to do for T.  */
2784   entry_ptr = alg_hash_entry_ptr (hash_index);
2785   if (entry_ptr->t == t
2786       && entry_ptr->mode == mode
2787       && entry_ptr->speed == speed
2788       && entry_ptr->alg != alg_unknown)
2789     {
2790       cache_alg = entry_ptr->alg;
2791 
2792       if (cache_alg == alg_impossible)
2793 	{
2794 	  /* The cache tells us that it's impossible to synthesize
2795 	     multiplication by T within entry_ptr->cost.  */
2796 	  if (!CHEAPER_MULT_COST (&entry_ptr->cost, cost_limit))
2797 	    /* COST_LIMIT is at least as restrictive as the one
2798 	       recorded in the hash table, in which case we have no
2799 	       hope of synthesizing a multiplication.  Just
2800 	       return.  */
2801 	    return;
2802 
2803 	  /* If we get here, COST_LIMIT is less restrictive than the
2804 	     one recorded in the hash table, so we may be able to
2805 	     synthesize a multiplication.  Proceed as if we didn't
2806 	     have the cache entry.  */
2807 	}
2808       else
2809 	{
2810 	  if (CHEAPER_MULT_COST (cost_limit, &entry_ptr->cost))
2811 	    /* The cached algorithm shows that this multiplication
2812 	       requires more cost than COST_LIMIT.  Just return.  This
2813 	       way, we don't clobber this cache entry with
2814 	       alg_impossible but retain useful information.  */
2815 	    return;
2816 
2817 	  cache_hit = true;
2818 
2819 	  switch (cache_alg)
2820 	    {
2821 	    case alg_shift:
2822 	      goto do_alg_shift;
2823 
2824 	    case alg_add_t_m2:
2825 	    case alg_sub_t_m2:
2826 	      goto do_alg_addsub_t_m2;
2827 
2828 	    case alg_add_factor:
2829 	    case alg_sub_factor:
2830 	      goto do_alg_addsub_factor;
2831 
2832 	    case alg_add_t2_m:
2833 	      goto do_alg_add_t2_m;
2834 
2835 	    case alg_sub_t2_m:
2836 	      goto do_alg_sub_t2_m;
2837 
2838 	    default:
2839 	      gcc_unreachable ();
2840 	    }
2841 	}
2842     }
2843 
2844   /* If we have a group of zero bits at the low-order part of T, try
2845      multiplying by the remaining bits and then doing a shift.  */
2846 
2847   if ((t & 1) == 0)
2848     {
2849     do_alg_shift:
2850       m = ctz_or_zero (t); /* m = number of low zero bits */
2851       if (m < maxm)
2852 	{
2853 	  q = t >> m;
2854 	  /* The function expand_shift will choose between a shift and
2855 	     a sequence of additions, so the observed cost is given as
2856 	     MIN (m * add_cost(speed, mode), shift_cost(speed, mode, m)).  */
2857 	  op_cost = m * add_cost (speed, mode);
2858 	  if (shift_cost (speed, mode, m) < op_cost)
2859 	    op_cost = shift_cost (speed, mode, m);
2860 	  new_limit.cost = best_cost.cost - op_cost;
2861 	  new_limit.latency = best_cost.latency - op_cost;
2862 	  synth_mult (alg_in, q, &new_limit, mode);
2863 
2864 	  alg_in->cost.cost += op_cost;
2865 	  alg_in->cost.latency += op_cost;
2866 	  if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2867 	    {
2868 	      best_cost = alg_in->cost;
2869 	      std::swap (alg_in, best_alg);
2870 	      best_alg->log[best_alg->ops] = m;
2871 	      best_alg->op[best_alg->ops] = alg_shift;
2872 	    }
2873 
2874 	  /* See if treating ORIG_T as a signed number yields a better
2875 	     sequence.  Try this sequence only for a negative ORIG_T
2876 	     as it would be useless for a non-negative ORIG_T.  */
2877 	  if ((HOST_WIDE_INT) orig_t < 0)
2878 	    {
2879 	      /* Shift ORIG_T as follows because a right shift of a
2880 		 negative-valued signed type is implementation
2881 		 defined.  */
2882 	      q = ~(~orig_t >> m);
2883 	      /* The function expand_shift will choose between a shift
2884 		 and a sequence of additions, so the observed cost is
2885 		 given as MIN (m * add_cost(speed, mode),
2886 		 shift_cost(speed, mode, m)).  */
2887 	      op_cost = m * add_cost (speed, mode);
2888 	      if (shift_cost (speed, mode, m) < op_cost)
2889 		op_cost = shift_cost (speed, mode, m);
2890 	      new_limit.cost = best_cost.cost - op_cost;
2891 	      new_limit.latency = best_cost.latency - op_cost;
2892 	      synth_mult (alg_in, q, &new_limit, mode);
2893 
2894 	      alg_in->cost.cost += op_cost;
2895 	      alg_in->cost.latency += op_cost;
2896 	      if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2897 		{
2898 		  best_cost = alg_in->cost;
2899 		  std::swap (alg_in, best_alg);
2900 		  best_alg->log[best_alg->ops] = m;
2901 		  best_alg->op[best_alg->ops] = alg_shift;
2902 		}
2903 	    }
2904 	}
2905       if (cache_hit)
2906 	goto done;
2907     }
2908 
2909   /* If we have an odd number, add or subtract one.  */
2910   if ((t & 1) != 0)
2911     {
2912       unsigned HOST_WIDE_INT w;
2913 
2914     do_alg_addsub_t_m2:
2915       for (w = 1; (w & t) != 0; w <<= 1)
2916 	;
2917       /* If T was -1, then W will be zero after the loop.  This is another
2918 	 case where T ends with ...111.  Handling this with (T + 1) and
2919 	 subtract 1 produces slightly better code and results in algorithm
2920 	 selection much faster than treating it like the ...0111 case
2921 	 below.  */
2922       if (w == 0
2923 	  || (w > 2
2924 	      /* Reject the case where t is 3.
2925 		 Thus we prefer addition in that case.  */
2926 	      && t != 3))
2927 	{
2928 	  /* T ends with ...111.  Multiply by (T + 1) and subtract T.  */
2929 
2930 	  op_cost = add_cost (speed, mode);
2931 	  new_limit.cost = best_cost.cost - op_cost;
2932 	  new_limit.latency = best_cost.latency - op_cost;
2933 	  synth_mult (alg_in, t + 1, &new_limit, mode);
2934 
2935 	  alg_in->cost.cost += op_cost;
2936 	  alg_in->cost.latency += op_cost;
2937 	  if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2938 	    {
2939 	      best_cost = alg_in->cost;
2940 	      std::swap (alg_in, best_alg);
2941 	      best_alg->log[best_alg->ops] = 0;
2942 	      best_alg->op[best_alg->ops] = alg_sub_t_m2;
2943 	    }
2944 	}
2945       else
2946 	{
2947 	  /* T ends with ...01 or ...011.  Multiply by (T - 1) and add T.  */
2948 
2949 	  op_cost = add_cost (speed, mode);
2950 	  new_limit.cost = best_cost.cost - op_cost;
2951 	  new_limit.latency = best_cost.latency - op_cost;
2952 	  synth_mult (alg_in, t - 1, &new_limit, mode);
2953 
2954 	  alg_in->cost.cost += op_cost;
2955 	  alg_in->cost.latency += op_cost;
2956 	  if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2957 	    {
2958 	      best_cost = alg_in->cost;
2959 	      std::swap (alg_in, best_alg);
2960 	      best_alg->log[best_alg->ops] = 0;
2961 	      best_alg->op[best_alg->ops] = alg_add_t_m2;
2962 	    }
2963 	}
2964 
2965       /* We may be able to calculate a * -7, a * -15, a * -31, etc
2966 	 quickly with a - a * n for some appropriate constant n.  */
2967       m = exact_log2 (-orig_t + 1);
2968       if (m >= 0 && m < maxm)
2969 	{
2970 	  op_cost = add_cost (speed, mode) + shift_cost (speed, mode, m);
2971 	  /* If the target has a cheap shift-and-subtract insn use
2972 	     that in preference to a shift insn followed by a sub insn.
2973 	     Assume that the shift-and-sub is "atomic" with a latency
2974 	     equal to it's cost, otherwise assume that on superscalar
2975 	     hardware the shift may be executed concurrently with the
2976 	     earlier steps in the algorithm.  */
2977 	  if (shiftsub1_cost (speed, mode, m) <= op_cost)
2978 	    {
2979 	      op_cost = shiftsub1_cost (speed, mode, m);
2980 	      op_latency = op_cost;
2981 	    }
2982 	  else
2983 	    op_latency = add_cost (speed, mode);
2984 
2985 	  new_limit.cost = best_cost.cost - op_cost;
2986 	  new_limit.latency = best_cost.latency - op_latency;
2987 	  synth_mult (alg_in, (unsigned HOST_WIDE_INT) (-orig_t + 1) >> m,
2988 		      &new_limit, mode);
2989 
2990 	  alg_in->cost.cost += op_cost;
2991 	  alg_in->cost.latency += op_latency;
2992 	  if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2993 	    {
2994 	      best_cost = alg_in->cost;
2995 	      std::swap (alg_in, best_alg);
2996 	      best_alg->log[best_alg->ops] = m;
2997 	      best_alg->op[best_alg->ops] = alg_sub_t_m2;
2998 	    }
2999 	}
3000 
3001       if (cache_hit)
3002 	goto done;
3003     }
3004 
3005   /* Look for factors of t of the form
3006      t = q(2**m +- 1), 2 <= m <= floor(log2(t - 1)).
3007      If we find such a factor, we can multiply by t using an algorithm that
3008      multiplies by q, shift the result by m and add/subtract it to itself.
3009 
3010      We search for large factors first and loop down, even if large factors
3011      are less probable than small; if we find a large factor we will find a
3012      good sequence quickly, and therefore be able to prune (by decreasing
3013      COST_LIMIT) the search.  */
3014 
3015  do_alg_addsub_factor:
3016   for (m = floor_log2 (t - 1); m >= 2; m--)
3017     {
3018       unsigned HOST_WIDE_INT d;
3019 
3020       d = (HOST_WIDE_INT_1U << m) + 1;
3021       if (t % d == 0 && t > d && m < maxm
3022 	  && (!cache_hit || cache_alg == alg_add_factor))
3023 	{
3024 	  op_cost = add_cost (speed, mode) + shift_cost (speed, mode, m);
3025 	  if (shiftadd_cost (speed, mode, m) <= op_cost)
3026 	    op_cost = shiftadd_cost (speed, mode, m);
3027 
3028 	  op_latency = op_cost;
3029 
3030 
3031 	  new_limit.cost = best_cost.cost - op_cost;
3032 	  new_limit.latency = best_cost.latency - op_latency;
3033 	  synth_mult (alg_in, t / d, &new_limit, mode);
3034 
3035 	  alg_in->cost.cost += op_cost;
3036 	  alg_in->cost.latency += op_latency;
3037 	  if (alg_in->cost.latency < op_cost)
3038 	    alg_in->cost.latency = op_cost;
3039 	  if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
3040 	    {
3041 	      best_cost = alg_in->cost;
3042 	      std::swap (alg_in, best_alg);
3043 	      best_alg->log[best_alg->ops] = m;
3044 	      best_alg->op[best_alg->ops] = alg_add_factor;
3045 	    }
3046 	  /* Other factors will have been taken care of in the recursion.  */
3047 	  break;
3048 	}
3049 
3050       d = (HOST_WIDE_INT_1U << m) - 1;
3051       if (t % d == 0 && t > d && m < maxm
3052 	  && (!cache_hit || cache_alg == alg_sub_factor))
3053 	{
3054 	  op_cost = add_cost (speed, mode) + shift_cost (speed, mode, m);
3055 	  if (shiftsub0_cost (speed, mode, m) <= op_cost)
3056 	    op_cost = shiftsub0_cost (speed, mode, m);
3057 
3058 	  op_latency = op_cost;
3059 
3060 	  new_limit.cost = best_cost.cost - op_cost;
3061 	  new_limit.latency = best_cost.latency - op_latency;
3062 	  synth_mult (alg_in, t / d, &new_limit, mode);
3063 
3064 	  alg_in->cost.cost += op_cost;
3065 	  alg_in->cost.latency += op_latency;
3066 	  if (alg_in->cost.latency < op_cost)
3067 	    alg_in->cost.latency = op_cost;
3068 	  if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
3069 	    {
3070 	      best_cost = alg_in->cost;
3071 	      std::swap (alg_in, best_alg);
3072 	      best_alg->log[best_alg->ops] = m;
3073 	      best_alg->op[best_alg->ops] = alg_sub_factor;
3074 	    }
3075 	  break;
3076 	}
3077     }
3078   if (cache_hit)
3079     goto done;
3080 
3081   /* Try shift-and-add (load effective address) instructions,
3082      i.e. do a*3, a*5, a*9.  */
3083   if ((t & 1) != 0)
3084     {
3085     do_alg_add_t2_m:
3086       q = t - 1;
3087       m = ctz_hwi (q);
3088       if (q && m < maxm)
3089 	{
3090 	  op_cost = shiftadd_cost (speed, mode, m);
3091 	  new_limit.cost = best_cost.cost - op_cost;
3092 	  new_limit.latency = best_cost.latency - op_cost;
3093 	  synth_mult (alg_in, (t - 1) >> m, &new_limit, mode);
3094 
3095 	  alg_in->cost.cost += op_cost;
3096 	  alg_in->cost.latency += op_cost;
3097 	  if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
3098 	    {
3099 	      best_cost = alg_in->cost;
3100 	      std::swap (alg_in, best_alg);
3101 	      best_alg->log[best_alg->ops] = m;
3102 	      best_alg->op[best_alg->ops] = alg_add_t2_m;
3103 	    }
3104 	}
3105       if (cache_hit)
3106 	goto done;
3107 
3108     do_alg_sub_t2_m:
3109       q = t + 1;
3110       m = ctz_hwi (q);
3111       if (q && m < maxm)
3112 	{
3113 	  op_cost = shiftsub0_cost (speed, mode, m);
3114 	  new_limit.cost = best_cost.cost - op_cost;
3115 	  new_limit.latency = best_cost.latency - op_cost;
3116 	  synth_mult (alg_in, (t + 1) >> m, &new_limit, mode);
3117 
3118 	  alg_in->cost.cost += op_cost;
3119 	  alg_in->cost.latency += op_cost;
3120 	  if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
3121 	    {
3122 	      best_cost = alg_in->cost;
3123 	      std::swap (alg_in, best_alg);
3124 	      best_alg->log[best_alg->ops] = m;
3125 	      best_alg->op[best_alg->ops] = alg_sub_t2_m;
3126 	    }
3127 	}
3128       if (cache_hit)
3129 	goto done;
3130     }
3131 
3132  done:
3133   /* If best_cost has not decreased, we have not found any algorithm.  */
3134   if (!CHEAPER_MULT_COST (&best_cost, cost_limit))
3135     {
3136       /* We failed to find an algorithm.  Record alg_impossible for
3137 	 this case (that is, <T, MODE, COST_LIMIT>) so that next time
3138 	 we are asked to find an algorithm for T within the same or
3139 	 lower COST_LIMIT, we can immediately return to the
3140 	 caller.  */
3141       entry_ptr->t = t;
3142       entry_ptr->mode = mode;
3143       entry_ptr->speed = speed;
3144       entry_ptr->alg = alg_impossible;
3145       entry_ptr->cost = *cost_limit;
3146       return;
3147     }
3148 
3149   /* Cache the result.  */
3150   if (!cache_hit)
3151     {
3152       entry_ptr->t = t;
3153       entry_ptr->mode = mode;
3154       entry_ptr->speed = speed;
3155       entry_ptr->alg = best_alg->op[best_alg->ops];
3156       entry_ptr->cost.cost = best_cost.cost;
3157       entry_ptr->cost.latency = best_cost.latency;
3158     }
3159 
3160   /* If we are getting a too long sequence for `struct algorithm'
3161      to record, make this search fail.  */
3162   if (best_alg->ops == MAX_BITS_PER_WORD)
3163     return;
3164 
3165   /* Copy the algorithm from temporary space to the space at alg_out.
3166      We avoid using structure assignment because the majority of
3167      best_alg is normally undefined, and this is a critical function.  */
3168   alg_out->ops = best_alg->ops + 1;
3169   alg_out->cost = best_cost;
3170   memcpy (alg_out->op, best_alg->op,
3171 	  alg_out->ops * sizeof *alg_out->op);
3172   memcpy (alg_out->log, best_alg->log,
3173 	  alg_out->ops * sizeof *alg_out->log);
3174 }
3175 
3176 /* Find the cheapest way of multiplying a value of mode MODE by VAL.
3177    Try three variations:
3178 
3179        - a shift/add sequence based on VAL itself
3180        - a shift/add sequence based on -VAL, followed by a negation
3181        - a shift/add sequence based on VAL - 1, followed by an addition.
3182 
3183    Return true if the cheapest of these cost less than MULT_COST,
3184    describing the algorithm in *ALG and final fixup in *VARIANT.  */
3185 
3186 bool
3187 choose_mult_variant (machine_mode mode, HOST_WIDE_INT val,
3188 		     struct algorithm *alg, enum mult_variant *variant,
3189 		     int mult_cost)
3190 {
3191   struct algorithm alg2;
3192   struct mult_cost limit;
3193   int op_cost;
3194   bool speed = optimize_insn_for_speed_p ();
3195 
3196   /* Fail quickly for impossible bounds.  */
3197   if (mult_cost < 0)
3198     return false;
3199 
3200   /* Ensure that mult_cost provides a reasonable upper bound.
3201      Any constant multiplication can be performed with less
3202      than 2 * bits additions.  */
3203   op_cost = 2 * GET_MODE_UNIT_BITSIZE (mode) * add_cost (speed, mode);
3204   if (mult_cost > op_cost)
3205     mult_cost = op_cost;
3206 
3207   *variant = basic_variant;
3208   limit.cost = mult_cost;
3209   limit.latency = mult_cost;
3210   synth_mult (alg, val, &limit, mode);
3211 
3212   /* This works only if the inverted value actually fits in an
3213      `unsigned int' */
3214   if (HOST_BITS_PER_INT >= GET_MODE_UNIT_BITSIZE (mode))
3215     {
3216       op_cost = neg_cost (speed, mode);
3217       if (MULT_COST_LESS (&alg->cost, mult_cost))
3218 	{
3219 	  limit.cost = alg->cost.cost - op_cost;
3220 	  limit.latency = alg->cost.latency - op_cost;
3221 	}
3222       else
3223 	{
3224 	  limit.cost = mult_cost - op_cost;
3225 	  limit.latency = mult_cost - op_cost;
3226 	}
3227 
3228       synth_mult (&alg2, -val, &limit, mode);
3229       alg2.cost.cost += op_cost;
3230       alg2.cost.latency += op_cost;
3231       if (CHEAPER_MULT_COST (&alg2.cost, &alg->cost))
3232 	*alg = alg2, *variant = negate_variant;
3233     }
3234 
3235   /* This proves very useful for division-by-constant.  */
3236   op_cost = add_cost (speed, mode);
3237   if (MULT_COST_LESS (&alg->cost, mult_cost))
3238     {
3239       limit.cost = alg->cost.cost - op_cost;
3240       limit.latency = alg->cost.latency - op_cost;
3241     }
3242   else
3243     {
3244       limit.cost = mult_cost - op_cost;
3245       limit.latency = mult_cost - op_cost;
3246     }
3247 
3248   synth_mult (&alg2, val - 1, &limit, mode);
3249   alg2.cost.cost += op_cost;
3250   alg2.cost.latency += op_cost;
3251   if (CHEAPER_MULT_COST (&alg2.cost, &alg->cost))
3252     *alg = alg2, *variant = add_variant;
3253 
3254   return MULT_COST_LESS (&alg->cost, mult_cost);
3255 }
3256 
3257 /* A subroutine of expand_mult, used for constant multiplications.
3258    Multiply OP0 by VAL in mode MODE, storing the result in TARGET if
3259    convenient.  Use the shift/add sequence described by ALG and apply
3260    the final fixup specified by VARIANT.  */
3261 
3262 static rtx
3263 expand_mult_const (machine_mode mode, rtx op0, HOST_WIDE_INT val,
3264 		   rtx target, const struct algorithm *alg,
3265 		   enum mult_variant variant)
3266 {
3267   unsigned HOST_WIDE_INT val_so_far;
3268   rtx_insn *insn;
3269   rtx accum, tem;
3270   int opno;
3271   machine_mode nmode;
3272 
3273   /* Avoid referencing memory over and over and invalid sharing
3274      on SUBREGs.  */
3275   op0 = force_reg (mode, op0);
3276 
3277   /* ACCUM starts out either as OP0 or as a zero, depending on
3278      the first operation.  */
3279 
3280   if (alg->op[0] == alg_zero)
3281     {
3282       accum = copy_to_mode_reg (mode, CONST0_RTX (mode));
3283       val_so_far = 0;
3284     }
3285   else if (alg->op[0] == alg_m)
3286     {
3287       accum = copy_to_mode_reg (mode, op0);
3288       val_so_far = 1;
3289     }
3290   else
3291     gcc_unreachable ();
3292 
3293   for (opno = 1; opno < alg->ops; opno++)
3294     {
3295       int log = alg->log[opno];
3296       rtx shift_subtarget = optimize ? 0 : accum;
3297       rtx add_target
3298 	= (opno == alg->ops - 1 && target != 0 && variant != add_variant
3299 	   && !optimize)
3300 	  ? target : 0;
3301       rtx accum_target = optimize ? 0 : accum;
3302       rtx accum_inner;
3303 
3304       switch (alg->op[opno])
3305 	{
3306 	case alg_shift:
3307 	  tem = expand_shift (LSHIFT_EXPR, mode, accum, log, NULL_RTX, 0);
3308 	  /* REG_EQUAL note will be attached to the following insn.  */
3309 	  emit_move_insn (accum, tem);
3310 	  val_so_far <<= log;
3311 	  break;
3312 
3313 	case alg_add_t_m2:
3314 	  tem = expand_shift (LSHIFT_EXPR, mode, op0, log, NULL_RTX, 0);
3315 	  accum = force_operand (gen_rtx_PLUS (mode, accum, tem),
3316 				 add_target ? add_target : accum_target);
3317 	  val_so_far += HOST_WIDE_INT_1U << log;
3318 	  break;
3319 
3320 	case alg_sub_t_m2:
3321 	  tem = expand_shift (LSHIFT_EXPR, mode, op0, log, NULL_RTX, 0);
3322 	  accum = force_operand (gen_rtx_MINUS (mode, accum, tem),
3323 				 add_target ? add_target : accum_target);
3324 	  val_so_far -= HOST_WIDE_INT_1U << log;
3325 	  break;
3326 
3327 	case alg_add_t2_m:
3328 	  accum = expand_shift (LSHIFT_EXPR, mode, accum,
3329 				log, shift_subtarget, 0);
3330 	  accum = force_operand (gen_rtx_PLUS (mode, accum, op0),
3331 				 add_target ? add_target : accum_target);
3332 	  val_so_far = (val_so_far << log) + 1;
3333 	  break;
3334 
3335 	case alg_sub_t2_m:
3336 	  accum = expand_shift (LSHIFT_EXPR, mode, accum,
3337 				log, shift_subtarget, 0);
3338 	  accum = force_operand (gen_rtx_MINUS (mode, accum, op0),
3339 				 add_target ? add_target : accum_target);
3340 	  val_so_far = (val_so_far << log) - 1;
3341 	  break;
3342 
3343 	case alg_add_factor:
3344 	  tem = expand_shift (LSHIFT_EXPR, mode, accum, log, NULL_RTX, 0);
3345 	  accum = force_operand (gen_rtx_PLUS (mode, accum, tem),
3346 				 add_target ? add_target : accum_target);
3347 	  val_so_far += val_so_far << log;
3348 	  break;
3349 
3350 	case alg_sub_factor:
3351 	  tem = expand_shift (LSHIFT_EXPR, mode, accum, log, NULL_RTX, 0);
3352 	  accum = force_operand (gen_rtx_MINUS (mode, tem, accum),
3353 				 (add_target
3354 				  ? add_target : (optimize ? 0 : tem)));
3355 	  val_so_far = (val_so_far << log) - val_so_far;
3356 	  break;
3357 
3358 	default:
3359 	  gcc_unreachable ();
3360 	}
3361 
3362       if (SCALAR_INT_MODE_P (mode))
3363 	{
3364 	  /* Write a REG_EQUAL note on the last insn so that we can cse
3365 	     multiplication sequences.  Note that if ACCUM is a SUBREG,
3366 	     we've set the inner register and must properly indicate that.  */
3367 	  tem = op0, nmode = mode;
3368 	  accum_inner = accum;
3369 	  if (GET_CODE (accum) == SUBREG)
3370 	    {
3371 	      accum_inner = SUBREG_REG (accum);
3372 	      nmode = GET_MODE (accum_inner);
3373 	      tem = gen_lowpart (nmode, op0);
3374 	    }
3375 
3376 	  /* Don't add a REG_EQUAL note if tem is a paradoxical SUBREG.
3377 	     In that case, only the low bits of accum would be guaranteed to
3378 	     be equal to the content of the REG_EQUAL note, the upper bits
3379 	     can be anything.  */
3380 	  if (!paradoxical_subreg_p (tem))
3381 	    {
3382 	      insn = get_last_insn ();
3383 	      wide_int wval_so_far
3384 		= wi::uhwi (val_so_far,
3385 			    GET_MODE_PRECISION (as_a <scalar_mode> (nmode)));
3386 	      rtx c = immed_wide_int_const (wval_so_far, nmode);
3387 	      set_dst_reg_note (insn, REG_EQUAL, gen_rtx_MULT (nmode, tem, c),
3388 				accum_inner);
3389 	    }
3390 	}
3391     }
3392 
3393   if (variant == negate_variant)
3394     {
3395       val_so_far = -val_so_far;
3396       accum = expand_unop (mode, neg_optab, accum, target, 0);
3397     }
3398   else if (variant == add_variant)
3399     {
3400       val_so_far = val_so_far + 1;
3401       accum = force_operand (gen_rtx_PLUS (mode, accum, op0), target);
3402     }
3403 
3404   /* Compare only the bits of val and val_so_far that are significant
3405      in the result mode, to avoid sign-/zero-extension confusion.  */
3406   nmode = GET_MODE_INNER (mode);
3407   val &= GET_MODE_MASK (nmode);
3408   val_so_far &= GET_MODE_MASK (nmode);
3409   gcc_assert (val == (HOST_WIDE_INT) val_so_far);
3410 
3411   return accum;
3412 }
3413 
3414 /* Perform a multiplication and return an rtx for the result.
3415    MODE is mode of value; OP0 and OP1 are what to multiply (rtx's);
3416    TARGET is a suggestion for where to store the result (an rtx).
3417 
3418    We check specially for a constant integer as OP1.
3419    If you want this check for OP0 as well, then before calling
3420    you should swap the two operands if OP0 would be constant.  */
3421 
3422 rtx
3423 expand_mult (machine_mode mode, rtx op0, rtx op1, rtx target,
3424 	     int unsignedp, bool no_libcall)
3425 {
3426   enum mult_variant variant;
3427   struct algorithm algorithm;
3428   rtx scalar_op1;
3429   int max_cost;
3430   bool speed = optimize_insn_for_speed_p ();
3431   bool do_trapv = flag_trapv && SCALAR_INT_MODE_P (mode) && !unsignedp;
3432 
3433   if (CONSTANT_P (op0))
3434     std::swap (op0, op1);
3435 
3436   /* For vectors, there are several simplifications that can be made if
3437      all elements of the vector constant are identical.  */
3438   scalar_op1 = unwrap_const_vec_duplicate (op1);
3439 
3440   if (INTEGRAL_MODE_P (mode))
3441     {
3442       rtx fake_reg;
3443       HOST_WIDE_INT coeff;
3444       bool is_neg;
3445       int mode_bitsize;
3446 
3447       if (op1 == CONST0_RTX (mode))
3448 	return op1;
3449       if (op1 == CONST1_RTX (mode))
3450 	return op0;
3451       if (op1 == CONSTM1_RTX (mode))
3452 	return expand_unop (mode, do_trapv ? negv_optab : neg_optab,
3453 			    op0, target, 0);
3454 
3455       if (do_trapv)
3456 	goto skip_synth;
3457 
3458       /* If mode is integer vector mode, check if the backend supports
3459 	 vector lshift (by scalar or vector) at all.  If not, we can't use
3460 	 synthetized multiply.  */
3461       if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT
3462 	  && optab_handler (vashl_optab, mode) == CODE_FOR_nothing
3463 	  && optab_handler (ashl_optab, mode) == CODE_FOR_nothing)
3464 	goto skip_synth;
3465 
3466       /* These are the operations that are potentially turned into
3467 	 a sequence of shifts and additions.  */
3468       mode_bitsize = GET_MODE_UNIT_BITSIZE (mode);
3469 
3470       /* synth_mult does an `unsigned int' multiply.  As long as the mode is
3471 	 less than or equal in size to `unsigned int' this doesn't matter.
3472 	 If the mode is larger than `unsigned int', then synth_mult works
3473 	 only if the constant value exactly fits in an `unsigned int' without
3474 	 any truncation.  This means that multiplying by negative values does
3475 	 not work; results are off by 2^32 on a 32 bit machine.  */
3476       if (CONST_INT_P (scalar_op1))
3477 	{
3478 	  coeff = INTVAL (scalar_op1);
3479 	  is_neg = coeff < 0;
3480 	}
3481 #if TARGET_SUPPORTS_WIDE_INT
3482       else if (CONST_WIDE_INT_P (scalar_op1))
3483 #else
3484       else if (CONST_DOUBLE_AS_INT_P (scalar_op1))
3485 #endif
3486 	{
3487 	  int shift = wi::exact_log2 (rtx_mode_t (scalar_op1, mode));
3488 	  /* Perfect power of 2 (other than 1, which is handled above).  */
3489 	  if (shift > 0)
3490 	    return expand_shift (LSHIFT_EXPR, mode, op0,
3491 				 shift, target, unsignedp);
3492 	  else
3493 	    goto skip_synth;
3494 	}
3495       else
3496 	goto skip_synth;
3497 
3498       /* We used to test optimize here, on the grounds that it's better to
3499 	 produce a smaller program when -O is not used.  But this causes
3500 	 such a terrible slowdown sometimes that it seems better to always
3501 	 use synth_mult.  */
3502 
3503       /* Special case powers of two.  */
3504       if (EXACT_POWER_OF_2_OR_ZERO_P (coeff)
3505 	  && !(is_neg && mode_bitsize > HOST_BITS_PER_WIDE_INT))
3506 	return expand_shift (LSHIFT_EXPR, mode, op0,
3507 			     floor_log2 (coeff), target, unsignedp);
3508 
3509       fake_reg = gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1);
3510 
3511       /* Attempt to handle multiplication of DImode values by negative
3512 	 coefficients, by performing the multiplication by a positive
3513 	 multiplier and then inverting the result.  */
3514       if (is_neg && mode_bitsize > HOST_BITS_PER_WIDE_INT)
3515 	{
3516 	  /* Its safe to use -coeff even for INT_MIN, as the
3517 	     result is interpreted as an unsigned coefficient.
3518 	     Exclude cost of op0 from max_cost to match the cost
3519 	     calculation of the synth_mult.  */
3520 	  coeff = -(unsigned HOST_WIDE_INT) coeff;
3521 	  max_cost = (set_src_cost (gen_rtx_MULT (mode, fake_reg, op1),
3522 				    mode, speed)
3523 		      - neg_cost (speed, mode));
3524 	  if (max_cost <= 0)
3525 	    goto skip_synth;
3526 
3527 	  /* Special case powers of two.  */
3528 	  if (EXACT_POWER_OF_2_OR_ZERO_P (coeff))
3529 	    {
3530 	      rtx temp = expand_shift (LSHIFT_EXPR, mode, op0,
3531 				       floor_log2 (coeff), target, unsignedp);
3532 	      return expand_unop (mode, neg_optab, temp, target, 0);
3533 	    }
3534 
3535 	  if (choose_mult_variant (mode, coeff, &algorithm, &variant,
3536 				   max_cost))
3537 	    {
3538 	      rtx temp = expand_mult_const (mode, op0, coeff, NULL_RTX,
3539 					    &algorithm, variant);
3540 	      return expand_unop (mode, neg_optab, temp, target, 0);
3541 	    }
3542 	  goto skip_synth;
3543 	}
3544 
3545       /* Exclude cost of op0 from max_cost to match the cost
3546 	 calculation of the synth_mult.  */
3547       max_cost = set_src_cost (gen_rtx_MULT (mode, fake_reg, op1), mode, speed);
3548       if (choose_mult_variant (mode, coeff, &algorithm, &variant, max_cost))
3549 	return expand_mult_const (mode, op0, coeff, target,
3550 				  &algorithm, variant);
3551     }
3552  skip_synth:
3553 
3554   /* Expand x*2.0 as x+x.  */
3555   if (CONST_DOUBLE_AS_FLOAT_P (scalar_op1)
3556       && real_equal (CONST_DOUBLE_REAL_VALUE (scalar_op1), &dconst2))
3557     {
3558       op0 = force_reg (GET_MODE (op0), op0);
3559       return expand_binop (mode, add_optab, op0, op0,
3560 			   target, unsignedp,
3561 			   no_libcall ? OPTAB_WIDEN : OPTAB_LIB_WIDEN);
3562     }
3563 
3564   /* This used to use umul_optab if unsigned, but for non-widening multiply
3565      there is no difference between signed and unsigned.  */
3566   op0 = expand_binop (mode, do_trapv ? smulv_optab : smul_optab,
3567 		      op0, op1, target, unsignedp,
3568 		      no_libcall ? OPTAB_WIDEN : OPTAB_LIB_WIDEN);
3569   gcc_assert (op0 || no_libcall);
3570   return op0;
3571 }
3572 
3573 /* Return a cost estimate for multiplying a register by the given
3574    COEFFicient in the given MODE and SPEED.  */
3575 
3576 int
3577 mult_by_coeff_cost (HOST_WIDE_INT coeff, machine_mode mode, bool speed)
3578 {
3579   int max_cost;
3580   struct algorithm algorithm;
3581   enum mult_variant variant;
3582 
3583   rtx fake_reg = gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1);
3584   max_cost = set_src_cost (gen_rtx_MULT (mode, fake_reg, fake_reg),
3585 			   mode, speed);
3586   if (choose_mult_variant (mode, coeff, &algorithm, &variant, max_cost))
3587     return algorithm.cost.cost;
3588   else
3589     return max_cost;
3590 }
3591 
3592 /* Perform a widening multiplication and return an rtx for the result.
3593    MODE is mode of value; OP0 and OP1 are what to multiply (rtx's);
3594    TARGET is a suggestion for where to store the result (an rtx).
3595    THIS_OPTAB is the optab we should use, it must be either umul_widen_optab
3596    or smul_widen_optab.
3597 
3598    We check specially for a constant integer as OP1, comparing the
3599    cost of a widening multiply against the cost of a sequence of shifts
3600    and adds.  */
3601 
3602 rtx
3603 expand_widening_mult (machine_mode mode, rtx op0, rtx op1, rtx target,
3604 		      int unsignedp, optab this_optab)
3605 {
3606   bool speed = optimize_insn_for_speed_p ();
3607   rtx cop1;
3608 
3609   if (CONST_INT_P (op1)
3610       && GET_MODE (op0) != VOIDmode
3611       && (cop1 = convert_modes (mode, GET_MODE (op0), op1,
3612 				this_optab == umul_widen_optab))
3613       && CONST_INT_P (cop1)
3614       && (INTVAL (cop1) >= 0
3615 	  || HWI_COMPUTABLE_MODE_P (mode)))
3616     {
3617       HOST_WIDE_INT coeff = INTVAL (cop1);
3618       int max_cost;
3619       enum mult_variant variant;
3620       struct algorithm algorithm;
3621 
3622       if (coeff == 0)
3623 	return CONST0_RTX (mode);
3624 
3625       /* Special case powers of two.  */
3626       if (EXACT_POWER_OF_2_OR_ZERO_P (coeff))
3627 	{
3628 	  op0 = convert_to_mode (mode, op0, this_optab == umul_widen_optab);
3629 	  return expand_shift (LSHIFT_EXPR, mode, op0,
3630 			       floor_log2 (coeff), target, unsignedp);
3631 	}
3632 
3633       /* Exclude cost of op0 from max_cost to match the cost
3634 	 calculation of the synth_mult.  */
3635       max_cost = mul_widen_cost (speed, mode);
3636       if (choose_mult_variant (mode, coeff, &algorithm, &variant,
3637 			       max_cost))
3638 	{
3639 	  op0 = convert_to_mode (mode, op0, this_optab == umul_widen_optab);
3640 	  return expand_mult_const (mode, op0, coeff, target,
3641 				    &algorithm, variant);
3642 	}
3643     }
3644   return expand_binop (mode, this_optab, op0, op1, target,
3645 		       unsignedp, OPTAB_LIB_WIDEN);
3646 }
3647 
3648 /* Choose a minimal N + 1 bit approximation to 1/D that can be used to
3649    replace division by D, and put the least significant N bits of the result
3650    in *MULTIPLIER_PTR and return the most significant bit.
3651 
3652    The width of operations is N (should be <= HOST_BITS_PER_WIDE_INT), the
3653    needed precision is in PRECISION (should be <= N).
3654 
3655    PRECISION should be as small as possible so this function can choose
3656    multiplier more freely.
3657 
3658    The rounded-up logarithm of D is placed in *lgup_ptr.  A shift count that
3659    is to be used for a final right shift is placed in *POST_SHIFT_PTR.
3660 
3661    Using this function, x/D will be equal to (x * m) >> (*POST_SHIFT_PTR),
3662    where m is the full HOST_BITS_PER_WIDE_INT + 1 bit multiplier.  */
3663 
3664 unsigned HOST_WIDE_INT
3665 choose_multiplier (unsigned HOST_WIDE_INT d, int n, int precision,
3666 		   unsigned HOST_WIDE_INT *multiplier_ptr,
3667 		   int *post_shift_ptr, int *lgup_ptr)
3668 {
3669   int lgup, post_shift;
3670   int pow, pow2;
3671 
3672   /* lgup = ceil(log2(divisor)); */
3673   lgup = ceil_log2 (d);
3674 
3675   gcc_assert (lgup <= n);
3676 
3677   pow = n + lgup;
3678   pow2 = n + lgup - precision;
3679 
3680   /* mlow = 2^(N + lgup)/d */
3681   wide_int val = wi::set_bit_in_zero (pow, HOST_BITS_PER_DOUBLE_INT);
3682   wide_int mlow = wi::udiv_trunc (val, d);
3683 
3684   /* mhigh = (2^(N + lgup) + 2^(N + lgup - precision))/d */
3685   val |= wi::set_bit_in_zero (pow2, HOST_BITS_PER_DOUBLE_INT);
3686   wide_int mhigh = wi::udiv_trunc (val, d);
3687 
3688   /* If precision == N, then mlow, mhigh exceed 2^N
3689      (but they do not exceed 2^(N+1)).  */
3690 
3691   /* Reduce to lowest terms.  */
3692   for (post_shift = lgup; post_shift > 0; post_shift--)
3693     {
3694       unsigned HOST_WIDE_INT ml_lo = wi::extract_uhwi (mlow, 1,
3695 						       HOST_BITS_PER_WIDE_INT);
3696       unsigned HOST_WIDE_INT mh_lo = wi::extract_uhwi (mhigh, 1,
3697 						       HOST_BITS_PER_WIDE_INT);
3698       if (ml_lo >= mh_lo)
3699 	break;
3700 
3701       mlow = wi::uhwi (ml_lo, HOST_BITS_PER_DOUBLE_INT);
3702       mhigh = wi::uhwi (mh_lo, HOST_BITS_PER_DOUBLE_INT);
3703     }
3704 
3705   *post_shift_ptr = post_shift;
3706   *lgup_ptr = lgup;
3707   if (n < HOST_BITS_PER_WIDE_INT)
3708     {
3709       unsigned HOST_WIDE_INT mask = (HOST_WIDE_INT_1U << n) - 1;
3710       *multiplier_ptr = mhigh.to_uhwi () & mask;
3711       return mhigh.to_uhwi () >= mask;
3712     }
3713   else
3714     {
3715       *multiplier_ptr = mhigh.to_uhwi ();
3716       return wi::extract_uhwi (mhigh, HOST_BITS_PER_WIDE_INT, 1);
3717     }
3718 }
3719 
3720 /* Compute the inverse of X mod 2**n, i.e., find Y such that X * Y is
3721    congruent to 1 (mod 2**N).  */
3722 
3723 static unsigned HOST_WIDE_INT
3724 invert_mod2n (unsigned HOST_WIDE_INT x, int n)
3725 {
3726   /* Solve x*y == 1 (mod 2^n), where x is odd.  Return y.  */
3727 
3728   /* The algorithm notes that the choice y = x satisfies
3729      x*y == 1 mod 2^3, since x is assumed odd.
3730      Each iteration doubles the number of bits of significance in y.  */
3731 
3732   unsigned HOST_WIDE_INT mask;
3733   unsigned HOST_WIDE_INT y = x;
3734   int nbit = 3;
3735 
3736   mask = (n == HOST_BITS_PER_WIDE_INT
3737 	  ? HOST_WIDE_INT_M1U
3738 	  : (HOST_WIDE_INT_1U << n) - 1);
3739 
3740   while (nbit < n)
3741     {
3742       y = y * (2 - x*y) & mask;		/* Modulo 2^N */
3743       nbit *= 2;
3744     }
3745   return y;
3746 }
3747 
3748 /* Emit code to adjust ADJ_OPERAND after multiplication of wrong signedness
3749    flavor of OP0 and OP1.  ADJ_OPERAND is already the high half of the
3750    product OP0 x OP1.  If UNSIGNEDP is nonzero, adjust the signed product
3751    to become unsigned, if UNSIGNEDP is zero, adjust the unsigned product to
3752    become signed.
3753 
3754    The result is put in TARGET if that is convenient.
3755 
3756    MODE is the mode of operation.  */
3757 
3758 rtx
3759 expand_mult_highpart_adjust (scalar_int_mode mode, rtx adj_operand, rtx op0,
3760 			     rtx op1, rtx target, int unsignedp)
3761 {
3762   rtx tem;
3763   enum rtx_code adj_code = unsignedp ? PLUS : MINUS;
3764 
3765   tem = expand_shift (RSHIFT_EXPR, mode, op0,
3766 		      GET_MODE_BITSIZE (mode) - 1, NULL_RTX, 0);
3767   tem = expand_and (mode, tem, op1, NULL_RTX);
3768   adj_operand
3769     = force_operand (gen_rtx_fmt_ee (adj_code, mode, adj_operand, tem),
3770 		     adj_operand);
3771 
3772   tem = expand_shift (RSHIFT_EXPR, mode, op1,
3773 		      GET_MODE_BITSIZE (mode) - 1, NULL_RTX, 0);
3774   tem = expand_and (mode, tem, op0, NULL_RTX);
3775   target = force_operand (gen_rtx_fmt_ee (adj_code, mode, adj_operand, tem),
3776 			  target);
3777 
3778   return target;
3779 }
3780 
3781 /* Subroutine of expmed_mult_highpart.  Return the MODE high part of OP.  */
3782 
3783 static rtx
3784 extract_high_half (scalar_int_mode mode, rtx op)
3785 {
3786   if (mode == word_mode)
3787     return gen_highpart (mode, op);
3788 
3789   scalar_int_mode wider_mode = GET_MODE_WIDER_MODE (mode).require ();
3790 
3791   op = expand_shift (RSHIFT_EXPR, wider_mode, op,
3792 		     GET_MODE_BITSIZE (mode), 0, 1);
3793   return convert_modes (mode, wider_mode, op, 0);
3794 }
3795 
3796 /* Like expmed_mult_highpart, but only consider using a multiplication
3797    optab.  OP1 is an rtx for the constant operand.  */
3798 
3799 static rtx
3800 expmed_mult_highpart_optab (scalar_int_mode mode, rtx op0, rtx op1,
3801 			    rtx target, int unsignedp, int max_cost)
3802 {
3803   rtx narrow_op1 = gen_int_mode (INTVAL (op1), mode);
3804   optab moptab;
3805   rtx tem;
3806   int size;
3807   bool speed = optimize_insn_for_speed_p ();
3808 
3809   scalar_int_mode wider_mode = GET_MODE_WIDER_MODE (mode).require ();
3810 
3811   size = GET_MODE_BITSIZE (mode);
3812 
3813   /* Firstly, try using a multiplication insn that only generates the needed
3814      high part of the product, and in the sign flavor of unsignedp.  */
3815   if (mul_highpart_cost (speed, mode) < max_cost)
3816     {
3817       moptab = unsignedp ? umul_highpart_optab : smul_highpart_optab;
3818       tem = expand_binop (mode, moptab, op0, narrow_op1, target,
3819 			  unsignedp, OPTAB_DIRECT);
3820       if (tem)
3821 	return tem;
3822     }
3823 
3824   /* Secondly, same as above, but use sign flavor opposite of unsignedp.
3825      Need to adjust the result after the multiplication.  */
3826   if (size - 1 < BITS_PER_WORD
3827       && (mul_highpart_cost (speed, mode)
3828 	  + 2 * shift_cost (speed, mode, size-1)
3829 	  + 4 * add_cost (speed, mode) < max_cost))
3830     {
3831       moptab = unsignedp ? smul_highpart_optab : umul_highpart_optab;
3832       tem = expand_binop (mode, moptab, op0, narrow_op1, target,
3833 			  unsignedp, OPTAB_DIRECT);
3834       if (tem)
3835 	/* We used the wrong signedness.  Adjust the result.  */
3836 	return expand_mult_highpart_adjust (mode, tem, op0, narrow_op1,
3837 					    tem, unsignedp);
3838     }
3839 
3840   /* Try widening multiplication.  */
3841   moptab = unsignedp ? umul_widen_optab : smul_widen_optab;
3842   if (convert_optab_handler (moptab, wider_mode, mode) != CODE_FOR_nothing
3843       && mul_widen_cost (speed, wider_mode) < max_cost)
3844     {
3845       tem = expand_binop (wider_mode, moptab, op0, narrow_op1, 0,
3846 			  unsignedp, OPTAB_WIDEN);
3847       if (tem)
3848 	return extract_high_half (mode, tem);
3849     }
3850 
3851   /* Try widening the mode and perform a non-widening multiplication.  */
3852   if (optab_handler (smul_optab, wider_mode) != CODE_FOR_nothing
3853       && size - 1 < BITS_PER_WORD
3854       && (mul_cost (speed, wider_mode) + shift_cost (speed, mode, size-1)
3855 	  < max_cost))
3856     {
3857       rtx_insn *insns;
3858       rtx wop0, wop1;
3859 
3860       /* We need to widen the operands, for example to ensure the
3861 	 constant multiplier is correctly sign or zero extended.
3862 	 Use a sequence to clean-up any instructions emitted by
3863 	 the conversions if things don't work out.  */
3864       start_sequence ();
3865       wop0 = convert_modes (wider_mode, mode, op0, unsignedp);
3866       wop1 = convert_modes (wider_mode, mode, op1, unsignedp);
3867       tem = expand_binop (wider_mode, smul_optab, wop0, wop1, 0,
3868 			  unsignedp, OPTAB_WIDEN);
3869       insns = get_insns ();
3870       end_sequence ();
3871 
3872       if (tem)
3873 	{
3874 	  emit_insn (insns);
3875 	  return extract_high_half (mode, tem);
3876 	}
3877     }
3878 
3879   /* Try widening multiplication of opposite signedness, and adjust.  */
3880   moptab = unsignedp ? smul_widen_optab : umul_widen_optab;
3881   if (convert_optab_handler (moptab, wider_mode, mode) != CODE_FOR_nothing
3882       && size - 1 < BITS_PER_WORD
3883       && (mul_widen_cost (speed, wider_mode)
3884 	  + 2 * shift_cost (speed, mode, size-1)
3885 	  + 4 * add_cost (speed, mode) < max_cost))
3886     {
3887       tem = expand_binop (wider_mode, moptab, op0, narrow_op1,
3888 			  NULL_RTX, ! unsignedp, OPTAB_WIDEN);
3889       if (tem != 0)
3890 	{
3891 	  tem = extract_high_half (mode, tem);
3892 	  /* We used the wrong signedness.  Adjust the result.  */
3893 	  return expand_mult_highpart_adjust (mode, tem, op0, narrow_op1,
3894 					      target, unsignedp);
3895 	}
3896     }
3897 
3898   return 0;
3899 }
3900 
3901 /* Emit code to multiply OP0 and OP1 (where OP1 is an integer constant),
3902    putting the high half of the result in TARGET if that is convenient,
3903    and return where the result is.  If the operation can not be performed,
3904    0 is returned.
3905 
3906    MODE is the mode of operation and result.
3907 
3908    UNSIGNEDP nonzero means unsigned multiply.
3909 
3910    MAX_COST is the total allowed cost for the expanded RTL.  */
3911 
3912 static rtx
3913 expmed_mult_highpart (scalar_int_mode mode, rtx op0, rtx op1,
3914 		      rtx target, int unsignedp, int max_cost)
3915 {
3916   unsigned HOST_WIDE_INT cnst1;
3917   int extra_cost;
3918   bool sign_adjust = false;
3919   enum mult_variant variant;
3920   struct algorithm alg;
3921   rtx tem;
3922   bool speed = optimize_insn_for_speed_p ();
3923 
3924   /* We can't support modes wider than HOST_BITS_PER_INT.  */
3925   gcc_assert (HWI_COMPUTABLE_MODE_P (mode));
3926 
3927   cnst1 = INTVAL (op1) & GET_MODE_MASK (mode);
3928 
3929   /* We can't optimize modes wider than BITS_PER_WORD.
3930      ??? We might be able to perform double-word arithmetic if
3931      mode == word_mode, however all the cost calculations in
3932      synth_mult etc. assume single-word operations.  */
3933   scalar_int_mode wider_mode = GET_MODE_WIDER_MODE (mode).require ();
3934   if (GET_MODE_BITSIZE (wider_mode) > BITS_PER_WORD)
3935     return expmed_mult_highpart_optab (mode, op0, op1, target,
3936 				       unsignedp, max_cost);
3937 
3938   extra_cost = shift_cost (speed, mode, GET_MODE_BITSIZE (mode) - 1);
3939 
3940   /* Check whether we try to multiply by a negative constant.  */
3941   if (!unsignedp && ((cnst1 >> (GET_MODE_BITSIZE (mode) - 1)) & 1))
3942     {
3943       sign_adjust = true;
3944       extra_cost += add_cost (speed, mode);
3945     }
3946 
3947   /* See whether shift/add multiplication is cheap enough.  */
3948   if (choose_mult_variant (wider_mode, cnst1, &alg, &variant,
3949 			   max_cost - extra_cost))
3950     {
3951       /* See whether the specialized multiplication optabs are
3952 	 cheaper than the shift/add version.  */
3953       tem = expmed_mult_highpart_optab (mode, op0, op1, target, unsignedp,
3954 					alg.cost.cost + extra_cost);
3955       if (tem)
3956 	return tem;
3957 
3958       tem = convert_to_mode (wider_mode, op0, unsignedp);
3959       tem = expand_mult_const (wider_mode, tem, cnst1, 0, &alg, variant);
3960       tem = extract_high_half (mode, tem);
3961 
3962       /* Adjust result for signedness.  */
3963       if (sign_adjust)
3964 	tem = force_operand (gen_rtx_MINUS (mode, tem, op0), tem);
3965 
3966       return tem;
3967     }
3968   return expmed_mult_highpart_optab (mode, op0, op1, target,
3969 				     unsignedp, max_cost);
3970 }
3971 
3972 
3973 /* Expand signed modulus of OP0 by a power of two D in mode MODE.  */
3974 
3975 static rtx
3976 expand_smod_pow2 (scalar_int_mode mode, rtx op0, HOST_WIDE_INT d)
3977 {
3978   rtx result, temp, shift;
3979   rtx_code_label *label;
3980   int logd;
3981   int prec = GET_MODE_PRECISION (mode);
3982 
3983   logd = floor_log2 (d);
3984   result = gen_reg_rtx (mode);
3985 
3986   /* Avoid conditional branches when they're expensive.  */
3987   if (BRANCH_COST (optimize_insn_for_speed_p (), false) >= 2
3988       && optimize_insn_for_speed_p ())
3989     {
3990       rtx signmask = emit_store_flag (result, LT, op0, const0_rtx,
3991 				      mode, 0, -1);
3992       if (signmask)
3993 	{
3994 	  HOST_WIDE_INT masklow = (HOST_WIDE_INT_1 << logd) - 1;
3995 	  signmask = force_reg (mode, signmask);
3996 	  shift = gen_int_shift_amount (mode, GET_MODE_BITSIZE (mode) - logd);
3997 
3998 	  /* Use the rtx_cost of a LSHIFTRT instruction to determine
3999 	     which instruction sequence to use.  If logical right shifts
4000 	     are expensive the use 2 XORs, 2 SUBs and an AND, otherwise
4001 	     use a LSHIFTRT, 1 ADD, 1 SUB and an AND.  */
4002 
4003 	  temp = gen_rtx_LSHIFTRT (mode, result, shift);
4004 	  if (optab_handler (lshr_optab, mode) == CODE_FOR_nothing
4005 	      || (set_src_cost (temp, mode, optimize_insn_for_speed_p ())
4006 		  > COSTS_N_INSNS (2)))
4007 	    {
4008 	      temp = expand_binop (mode, xor_optab, op0, signmask,
4009 				   NULL_RTX, 1, OPTAB_LIB_WIDEN);
4010 	      temp = expand_binop (mode, sub_optab, temp, signmask,
4011 				   NULL_RTX, 1, OPTAB_LIB_WIDEN);
4012 	      temp = expand_binop (mode, and_optab, temp,
4013 				   gen_int_mode (masklow, mode),
4014 				   NULL_RTX, 1, OPTAB_LIB_WIDEN);
4015 	      temp = expand_binop (mode, xor_optab, temp, signmask,
4016 				   NULL_RTX, 1, OPTAB_LIB_WIDEN);
4017 	      temp = expand_binop (mode, sub_optab, temp, signmask,
4018 				   NULL_RTX, 1, OPTAB_LIB_WIDEN);
4019 	    }
4020 	  else
4021 	    {
4022 	      signmask = expand_binop (mode, lshr_optab, signmask, shift,
4023 				       NULL_RTX, 1, OPTAB_LIB_WIDEN);
4024 	      signmask = force_reg (mode, signmask);
4025 
4026 	      temp = expand_binop (mode, add_optab, op0, signmask,
4027 				   NULL_RTX, 1, OPTAB_LIB_WIDEN);
4028 	      temp = expand_binop (mode, and_optab, temp,
4029 				   gen_int_mode (masklow, mode),
4030 				   NULL_RTX, 1, OPTAB_LIB_WIDEN);
4031 	      temp = expand_binop (mode, sub_optab, temp, signmask,
4032 				   NULL_RTX, 1, OPTAB_LIB_WIDEN);
4033 	    }
4034 	  return temp;
4035 	}
4036     }
4037 
4038   /* Mask contains the mode's signbit and the significant bits of the
4039      modulus.  By including the signbit in the operation, many targets
4040      can avoid an explicit compare operation in the following comparison
4041      against zero.  */
4042   wide_int mask = wi::mask (logd, false, prec);
4043   mask = wi::set_bit (mask, prec - 1);
4044 
4045   temp = expand_binop (mode, and_optab, op0,
4046 		       immed_wide_int_const (mask, mode),
4047 		       result, 1, OPTAB_LIB_WIDEN);
4048   if (temp != result)
4049     emit_move_insn (result, temp);
4050 
4051   label = gen_label_rtx ();
4052   do_cmp_and_jump (result, const0_rtx, GE, mode, label);
4053 
4054   temp = expand_binop (mode, sub_optab, result, const1_rtx, result,
4055 		       0, OPTAB_LIB_WIDEN);
4056 
4057   mask = wi::mask (logd, true, prec);
4058   temp = expand_binop (mode, ior_optab, temp,
4059 		       immed_wide_int_const (mask, mode),
4060 		       result, 1, OPTAB_LIB_WIDEN);
4061   temp = expand_binop (mode, add_optab, temp, const1_rtx, result,
4062 		       0, OPTAB_LIB_WIDEN);
4063   if (temp != result)
4064     emit_move_insn (result, temp);
4065   emit_label (label);
4066   return result;
4067 }
4068 
4069 /* Expand signed division of OP0 by a power of two D in mode MODE.
4070    This routine is only called for positive values of D.  */
4071 
4072 static rtx
4073 expand_sdiv_pow2 (scalar_int_mode mode, rtx op0, HOST_WIDE_INT d)
4074 {
4075   rtx temp;
4076   rtx_code_label *label;
4077   int logd;
4078 
4079   logd = floor_log2 (d);
4080 
4081   if (d == 2
4082       && BRANCH_COST (optimize_insn_for_speed_p (),
4083 		      false) >= 1)
4084     {
4085       temp = gen_reg_rtx (mode);
4086       temp = emit_store_flag (temp, LT, op0, const0_rtx, mode, 0, 1);
4087       temp = expand_binop (mode, add_optab, temp, op0, NULL_RTX,
4088 			   0, OPTAB_LIB_WIDEN);
4089       return expand_shift (RSHIFT_EXPR, mode, temp, logd, NULL_RTX, 0);
4090     }
4091 
4092   if (HAVE_conditional_move
4093       && BRANCH_COST (optimize_insn_for_speed_p (), false) >= 2)
4094     {
4095       rtx temp2;
4096 
4097       start_sequence ();
4098       temp2 = copy_to_mode_reg (mode, op0);
4099       temp = expand_binop (mode, add_optab, temp2, gen_int_mode (d - 1, mode),
4100 			   NULL_RTX, 0, OPTAB_LIB_WIDEN);
4101       temp = force_reg (mode, temp);
4102 
4103       /* Construct "temp2 = (temp2 < 0) ? temp : temp2".  */
4104       temp2 = emit_conditional_move (temp2, LT, temp2, const0_rtx,
4105 				     mode, temp, temp2, mode, 0);
4106       if (temp2)
4107 	{
4108 	  rtx_insn *seq = get_insns ();
4109 	  end_sequence ();
4110 	  emit_insn (seq);
4111 	  return expand_shift (RSHIFT_EXPR, mode, temp2, logd, NULL_RTX, 0);
4112 	}
4113       end_sequence ();
4114     }
4115 
4116   if (BRANCH_COST (optimize_insn_for_speed_p (),
4117 		   false) >= 2)
4118     {
4119       int ushift = GET_MODE_BITSIZE (mode) - logd;
4120 
4121       temp = gen_reg_rtx (mode);
4122       temp = emit_store_flag (temp, LT, op0, const0_rtx, mode, 0, -1);
4123       if (GET_MODE_BITSIZE (mode) >= BITS_PER_WORD
4124 	  || shift_cost (optimize_insn_for_speed_p (), mode, ushift)
4125 	     > COSTS_N_INSNS (1))
4126 	temp = expand_binop (mode, and_optab, temp, gen_int_mode (d - 1, mode),
4127 			     NULL_RTX, 0, OPTAB_LIB_WIDEN);
4128       else
4129 	temp = expand_shift (RSHIFT_EXPR, mode, temp,
4130 			     ushift, NULL_RTX, 1);
4131       temp = expand_binop (mode, add_optab, temp, op0, NULL_RTX,
4132 			   0, OPTAB_LIB_WIDEN);
4133       return expand_shift (RSHIFT_EXPR, mode, temp, logd, NULL_RTX, 0);
4134     }
4135 
4136   label = gen_label_rtx ();
4137   temp = copy_to_mode_reg (mode, op0);
4138   do_cmp_and_jump (temp, const0_rtx, GE, mode, label);
4139   expand_inc (temp, gen_int_mode (d - 1, mode));
4140   emit_label (label);
4141   return expand_shift (RSHIFT_EXPR, mode, temp, logd, NULL_RTX, 0);
4142 }
4143 
4144 /* Emit the code to divide OP0 by OP1, putting the result in TARGET
4145    if that is convenient, and returning where the result is.
4146    You may request either the quotient or the remainder as the result;
4147    specify REM_FLAG nonzero to get the remainder.
4148 
4149    CODE is the expression code for which kind of division this is;
4150    it controls how rounding is done.  MODE is the machine mode to use.
4151    UNSIGNEDP nonzero means do unsigned division.  */
4152 
4153 /* ??? For CEIL_MOD_EXPR, can compute incorrect remainder with ANDI
4154    and then correct it by or'ing in missing high bits
4155    if result of ANDI is nonzero.
4156    For ROUND_MOD_EXPR, can use ANDI and then sign-extend the result.
4157    This could optimize to a bfexts instruction.
4158    But C doesn't use these operations, so their optimizations are
4159    left for later.  */
4160 /* ??? For modulo, we don't actually need the highpart of the first product,
4161    the low part will do nicely.  And for small divisors, the second multiply
4162    can also be a low-part only multiply or even be completely left out.
4163    E.g. to calculate the remainder of a division by 3 with a 32 bit
4164    multiply, multiply with 0x55555556 and extract the upper two bits;
4165    the result is exact for inputs up to 0x1fffffff.
4166    The input range can be reduced by using cross-sum rules.
4167    For odd divisors >= 3, the following table gives right shift counts
4168    so that if a number is shifted by an integer multiple of the given
4169    amount, the remainder stays the same:
4170    2, 4, 3, 6, 10, 12, 4, 8, 18, 6, 11, 20, 18, 0, 5, 10, 12, 0, 12, 20,
4171    14, 12, 23, 21, 8, 0, 20, 18, 0, 0, 6, 12, 0, 22, 0, 18, 20, 30, 0, 0,
4172    0, 8, 0, 11, 12, 10, 36, 0, 30, 0, 0, 12, 0, 0, 0, 0, 44, 12, 24, 0,
4173    20, 0, 7, 14, 0, 18, 36, 0, 0, 46, 60, 0, 42, 0, 15, 24, 20, 0, 0, 33,
4174    0, 20, 0, 0, 18, 0, 60, 0, 0, 0, 0, 0, 40, 18, 0, 0, 12
4175 
4176    Cross-sum rules for even numbers can be derived by leaving as many bits
4177    to the right alone as the divisor has zeros to the right.
4178    E.g. if x is an unsigned 32 bit number:
4179    (x mod 12) == (((x & 1023) + ((x >> 8) & ~3)) * 0x15555558 >> 2 * 3) >> 28
4180    */
4181 
4182 rtx
4183 expand_divmod (int rem_flag, enum tree_code code, machine_mode mode,
4184 	       rtx op0, rtx op1, rtx target, int unsignedp)
4185 {
4186   machine_mode compute_mode;
4187   rtx tquotient;
4188   rtx quotient = 0, remainder = 0;
4189   rtx_insn *last;
4190   rtx_insn *insn;
4191   optab optab1, optab2;
4192   int op1_is_constant, op1_is_pow2 = 0;
4193   int max_cost, extra_cost;
4194   static HOST_WIDE_INT last_div_const = 0;
4195   bool speed = optimize_insn_for_speed_p ();
4196 
4197   op1_is_constant = CONST_INT_P (op1);
4198   if (op1_is_constant)
4199     {
4200       wide_int ext_op1 = rtx_mode_t (op1, mode);
4201       op1_is_pow2 = (wi::popcount (ext_op1) == 1
4202 		     || (! unsignedp
4203 			 && wi::popcount (wi::neg (ext_op1)) == 1));
4204     }
4205 
4206   /*
4207      This is the structure of expand_divmod:
4208 
4209      First comes code to fix up the operands so we can perform the operations
4210      correctly and efficiently.
4211 
4212      Second comes a switch statement with code specific for each rounding mode.
4213      For some special operands this code emits all RTL for the desired
4214      operation, for other cases, it generates only a quotient and stores it in
4215      QUOTIENT.  The case for trunc division/remainder might leave quotient = 0,
4216      to indicate that it has not done anything.
4217 
4218      Last comes code that finishes the operation.  If QUOTIENT is set and
4219      REM_FLAG is set, the remainder is computed as OP0 - QUOTIENT * OP1.  If
4220      QUOTIENT is not set, it is computed using trunc rounding.
4221 
4222      We try to generate special code for division and remainder when OP1 is a
4223      constant.  If |OP1| = 2**n we can use shifts and some other fast
4224      operations.  For other values of OP1, we compute a carefully selected
4225      fixed-point approximation m = 1/OP1, and generate code that multiplies OP0
4226      by m.
4227 
4228      In all cases but EXACT_DIV_EXPR, this multiplication requires the upper
4229      half of the product.  Different strategies for generating the product are
4230      implemented in expmed_mult_highpart.
4231 
4232      If what we actually want is the remainder, we generate that by another
4233      by-constant multiplication and a subtraction.  */
4234 
4235   /* We shouldn't be called with OP1 == const1_rtx, but some of the
4236      code below will malfunction if we are, so check here and handle
4237      the special case if so.  */
4238   if (op1 == const1_rtx)
4239     return rem_flag ? const0_rtx : op0;
4240 
4241     /* When dividing by -1, we could get an overflow.
4242      negv_optab can handle overflows.  */
4243   if (! unsignedp && op1 == constm1_rtx)
4244     {
4245       if (rem_flag)
4246 	return const0_rtx;
4247       return expand_unop (mode, flag_trapv && GET_MODE_CLASS (mode) == MODE_INT
4248 			  ? negv_optab : neg_optab, op0, target, 0);
4249     }
4250 
4251   if (target
4252       /* Don't use the function value register as a target
4253 	 since we have to read it as well as write it,
4254 	 and function-inlining gets confused by this.  */
4255       && ((REG_P (target) && REG_FUNCTION_VALUE_P (target))
4256 	  /* Don't clobber an operand while doing a multi-step calculation.  */
4257 	  || ((rem_flag || op1_is_constant)
4258 	      && (reg_mentioned_p (target, op0)
4259 		  || (MEM_P (op0) && MEM_P (target))))
4260 	  || reg_mentioned_p (target, op1)
4261 	  || (MEM_P (op1) && MEM_P (target))))
4262     target = 0;
4263 
4264   /* Get the mode in which to perform this computation.  Normally it will
4265      be MODE, but sometimes we can't do the desired operation in MODE.
4266      If so, pick a wider mode in which we can do the operation.  Convert
4267      to that mode at the start to avoid repeated conversions.
4268 
4269      First see what operations we need.  These depend on the expression
4270      we are evaluating.  (We assume that divxx3 insns exist under the
4271      same conditions that modxx3 insns and that these insns don't normally
4272      fail.  If these assumptions are not correct, we may generate less
4273      efficient code in some cases.)
4274 
4275      Then see if we find a mode in which we can open-code that operation
4276      (either a division, modulus, or shift).  Finally, check for the smallest
4277      mode for which we can do the operation with a library call.  */
4278 
4279   /* We might want to refine this now that we have division-by-constant
4280      optimization.  Since expmed_mult_highpart tries so many variants, it is
4281      not straightforward to generalize this.  Maybe we should make an array
4282      of possible modes in init_expmed?  Save this for GCC 2.7.  */
4283 
4284   optab1 = (op1_is_pow2
4285 	    ? (unsignedp ? lshr_optab : ashr_optab)
4286 	    : (unsignedp ? udiv_optab : sdiv_optab));
4287   optab2 = (op1_is_pow2 ? optab1
4288 	    : (unsignedp ? udivmod_optab : sdivmod_optab));
4289 
4290   FOR_EACH_MODE_FROM (compute_mode, mode)
4291     if (optab_handler (optab1, compute_mode) != CODE_FOR_nothing
4292 	|| optab_handler (optab2, compute_mode) != CODE_FOR_nothing)
4293       break;
4294 
4295   if (compute_mode == VOIDmode)
4296     FOR_EACH_MODE_FROM (compute_mode, mode)
4297       if (optab_libfunc (optab1, compute_mode)
4298 	  || optab_libfunc (optab2, compute_mode))
4299 	break;
4300 
4301   /* If we still couldn't find a mode, use MODE, but expand_binop will
4302      probably die.  */
4303   if (compute_mode == VOIDmode)
4304     compute_mode = mode;
4305 
4306   if (target && GET_MODE (target) == compute_mode)
4307     tquotient = target;
4308   else
4309     tquotient = gen_reg_rtx (compute_mode);
4310 
4311 #if 0
4312   /* It should be possible to restrict the precision to GET_MODE_BITSIZE
4313      (mode), and thereby get better code when OP1 is a constant.  Do that
4314      later.  It will require going over all usages of SIZE below.  */
4315   size = GET_MODE_BITSIZE (mode);
4316 #endif
4317 
4318   /* Only deduct something for a REM if the last divide done was
4319      for a different constant.   Then set the constant of the last
4320      divide.  */
4321   max_cost = (unsignedp
4322 	      ? udiv_cost (speed, compute_mode)
4323 	      : sdiv_cost (speed, compute_mode));
4324   if (rem_flag && ! (last_div_const != 0 && op1_is_constant
4325 		     && INTVAL (op1) == last_div_const))
4326     max_cost -= (mul_cost (speed, compute_mode)
4327 		 + add_cost (speed, compute_mode));
4328 
4329   last_div_const = ! rem_flag && op1_is_constant ? INTVAL (op1) : 0;
4330 
4331   /* Now convert to the best mode to use.  */
4332   if (compute_mode != mode)
4333     {
4334       op0 = convert_modes (compute_mode, mode, op0, unsignedp);
4335       op1 = convert_modes (compute_mode, mode, op1, unsignedp);
4336 
4337       /* convert_modes may have placed op1 into a register, so we
4338 	 must recompute the following.  */
4339       op1_is_constant = CONST_INT_P (op1);
4340       if (op1_is_constant)
4341 	{
4342 	  wide_int ext_op1 = rtx_mode_t (op1, compute_mode);
4343 	  op1_is_pow2 = (wi::popcount (ext_op1) == 1
4344 			 || (! unsignedp
4345 			     && wi::popcount (wi::neg (ext_op1)) == 1));
4346 	}
4347       else
4348 	op1_is_pow2 = 0;
4349     }
4350 
4351   /* If one of the operands is a volatile MEM, copy it into a register.  */
4352 
4353   if (MEM_P (op0) && MEM_VOLATILE_P (op0))
4354     op0 = force_reg (compute_mode, op0);
4355   if (MEM_P (op1) && MEM_VOLATILE_P (op1))
4356     op1 = force_reg (compute_mode, op1);
4357 
4358   /* If we need the remainder or if OP1 is constant, we need to
4359      put OP0 in a register in case it has any queued subexpressions.  */
4360   if (rem_flag || op1_is_constant)
4361     op0 = force_reg (compute_mode, op0);
4362 
4363   last = get_last_insn ();
4364 
4365   /* Promote floor rounding to trunc rounding for unsigned operations.  */
4366   if (unsignedp)
4367     {
4368       if (code == FLOOR_DIV_EXPR)
4369 	code = TRUNC_DIV_EXPR;
4370       if (code == FLOOR_MOD_EXPR)
4371 	code = TRUNC_MOD_EXPR;
4372       if (code == EXACT_DIV_EXPR && op1_is_pow2)
4373 	code = TRUNC_DIV_EXPR;
4374     }
4375 
4376   if (op1 != const0_rtx)
4377     switch (code)
4378       {
4379       case TRUNC_MOD_EXPR:
4380       case TRUNC_DIV_EXPR:
4381 	if (op1_is_constant)
4382 	  {
4383 	    scalar_int_mode int_mode = as_a <scalar_int_mode> (compute_mode);
4384 	    int size = GET_MODE_BITSIZE (int_mode);
4385 	    if (unsignedp)
4386 	      {
4387 		unsigned HOST_WIDE_INT mh, ml;
4388 		int pre_shift, post_shift;
4389 		int dummy;
4390 		wide_int wd = rtx_mode_t (op1, int_mode);
4391 		unsigned HOST_WIDE_INT d = wd.to_uhwi ();
4392 
4393 		if (wi::popcount (wd) == 1)
4394 		  {
4395 		    pre_shift = floor_log2 (d);
4396 		    if (rem_flag)
4397 		      {
4398 			unsigned HOST_WIDE_INT mask
4399 			  = (HOST_WIDE_INT_1U << pre_shift) - 1;
4400 			remainder
4401 			  = expand_binop (int_mode, and_optab, op0,
4402 					  gen_int_mode (mask, int_mode),
4403 					  remainder, 1,
4404 					  OPTAB_LIB_WIDEN);
4405 			if (remainder)
4406 			  return gen_lowpart (mode, remainder);
4407 		      }
4408 		    quotient = expand_shift (RSHIFT_EXPR, int_mode, op0,
4409 					     pre_shift, tquotient, 1);
4410 		  }
4411 		else if (size <= HOST_BITS_PER_WIDE_INT)
4412 		  {
4413 		    if (d >= (HOST_WIDE_INT_1U << (size - 1)))
4414 		      {
4415 			/* Most significant bit of divisor is set; emit an scc
4416 			   insn.  */
4417 			quotient = emit_store_flag_force (tquotient, GEU, op0, op1,
4418 							  int_mode, 1, 1);
4419 		      }
4420 		    else
4421 		      {
4422 			/* Find a suitable multiplier and right shift count
4423 			   instead of multiplying with D.  */
4424 
4425 			mh = choose_multiplier (d, size, size,
4426 						&ml, &post_shift, &dummy);
4427 
4428 			/* If the suggested multiplier is more than SIZE bits,
4429 			   we can do better for even divisors, using an
4430 			   initial right shift.  */
4431 			if (mh != 0 && (d & 1) == 0)
4432 			  {
4433 			    pre_shift = ctz_or_zero (d);
4434 			    mh = choose_multiplier (d >> pre_shift, size,
4435 						    size - pre_shift,
4436 						    &ml, &post_shift, &dummy);
4437 			    gcc_assert (!mh);
4438 			  }
4439 			else
4440 			  pre_shift = 0;
4441 
4442 			if (mh != 0)
4443 			  {
4444 			    rtx t1, t2, t3, t4;
4445 
4446 			    if (post_shift - 1 >= BITS_PER_WORD)
4447 			      goto fail1;
4448 
4449 			    extra_cost
4450 			      = (shift_cost (speed, int_mode, post_shift - 1)
4451 				 + shift_cost (speed, int_mode, 1)
4452 				 + 2 * add_cost (speed, int_mode));
4453 			    t1 = expmed_mult_highpart
4454 			      (int_mode, op0, gen_int_mode (ml, int_mode),
4455 			       NULL_RTX, 1, max_cost - extra_cost);
4456 			    if (t1 == 0)
4457 			      goto fail1;
4458 			    t2 = force_operand (gen_rtx_MINUS (int_mode,
4459 							       op0, t1),
4460 						NULL_RTX);
4461 			    t3 = expand_shift (RSHIFT_EXPR, int_mode,
4462 					       t2, 1, NULL_RTX, 1);
4463 			    t4 = force_operand (gen_rtx_PLUS (int_mode,
4464 							      t1, t3),
4465 						NULL_RTX);
4466 			    quotient = expand_shift
4467 			      (RSHIFT_EXPR, int_mode, t4,
4468 			       post_shift - 1, tquotient, 1);
4469 			  }
4470 			else
4471 			  {
4472 			    rtx t1, t2;
4473 
4474 			    if (pre_shift >= BITS_PER_WORD
4475 				|| post_shift >= BITS_PER_WORD)
4476 			      goto fail1;
4477 
4478 			    t1 = expand_shift
4479 			      (RSHIFT_EXPR, int_mode, op0,
4480 			       pre_shift, NULL_RTX, 1);
4481 			    extra_cost
4482 			      = (shift_cost (speed, int_mode, pre_shift)
4483 				 + shift_cost (speed, int_mode, post_shift));
4484 			    t2 = expmed_mult_highpart
4485 			      (int_mode, t1,
4486 			       gen_int_mode (ml, int_mode),
4487 			       NULL_RTX, 1, max_cost - extra_cost);
4488 			    if (t2 == 0)
4489 			      goto fail1;
4490 			    quotient = expand_shift
4491 			      (RSHIFT_EXPR, int_mode, t2,
4492 			       post_shift, tquotient, 1);
4493 			  }
4494 		      }
4495 		  }
4496 		else		/* Too wide mode to use tricky code */
4497 		  break;
4498 
4499 		insn = get_last_insn ();
4500 		if (insn != last)
4501 		  set_dst_reg_note (insn, REG_EQUAL,
4502 				    gen_rtx_UDIV (int_mode, op0, op1),
4503 				    quotient);
4504 	      }
4505 	    else		/* TRUNC_DIV, signed */
4506 	      {
4507 		unsigned HOST_WIDE_INT ml;
4508 		int lgup, post_shift;
4509 		rtx mlr;
4510 		HOST_WIDE_INT d = INTVAL (op1);
4511 		unsigned HOST_WIDE_INT abs_d;
4512 
4513 		/* Not prepared to handle division/remainder by
4514 		   0xffffffffffffffff8000000000000000 etc.  */
4515 		if (d == HOST_WIDE_INT_MIN && size > HOST_BITS_PER_WIDE_INT)
4516 		  break;
4517 
4518 		/* Since d might be INT_MIN, we have to cast to
4519 		   unsigned HOST_WIDE_INT before negating to avoid
4520 		   undefined signed overflow.  */
4521 		abs_d = (d >= 0
4522 			 ? (unsigned HOST_WIDE_INT) d
4523 			 : - (unsigned HOST_WIDE_INT) d);
4524 
4525 		/* n rem d = n rem -d */
4526 		if (rem_flag && d < 0)
4527 		  {
4528 		    d = abs_d;
4529 		    op1 = gen_int_mode (abs_d, int_mode);
4530 		  }
4531 
4532 		if (d == 1)
4533 		  quotient = op0;
4534 		else if (d == -1)
4535 		  quotient = expand_unop (int_mode, neg_optab, op0,
4536 					  tquotient, 0);
4537 		else if (size <= HOST_BITS_PER_WIDE_INT
4538 			 && abs_d == HOST_WIDE_INT_1U << (size - 1))
4539 		  {
4540 		    /* This case is not handled correctly below.  */
4541 		    quotient = emit_store_flag (tquotient, EQ, op0, op1,
4542 						int_mode, 1, 1);
4543 		    if (quotient == 0)
4544 		      goto fail1;
4545 		  }
4546 		else if (EXACT_POWER_OF_2_OR_ZERO_P (d)
4547 			 && (size <= HOST_BITS_PER_WIDE_INT || d >= 0)
4548 			 && (rem_flag
4549 			     ? smod_pow2_cheap (speed, int_mode)
4550 			     : sdiv_pow2_cheap (speed, int_mode))
4551 			 /* We assume that cheap metric is true if the
4552 			    optab has an expander for this mode.  */
4553 			 && ((optab_handler ((rem_flag ? smod_optab
4554 					      : sdiv_optab),
4555 					     int_mode)
4556 			      != CODE_FOR_nothing)
4557 			     || (optab_handler (sdivmod_optab, int_mode)
4558 				 != CODE_FOR_nothing)))
4559 		  ;
4560 		else if (EXACT_POWER_OF_2_OR_ZERO_P (abs_d))
4561 		  {
4562 		    if (rem_flag)
4563 		      {
4564 			remainder = expand_smod_pow2 (int_mode, op0, d);
4565 			if (remainder)
4566 			  return gen_lowpart (mode, remainder);
4567 		      }
4568 
4569 		    if (sdiv_pow2_cheap (speed, int_mode)
4570 			&& ((optab_handler (sdiv_optab, int_mode)
4571 			     != CODE_FOR_nothing)
4572 			    || (optab_handler (sdivmod_optab, int_mode)
4573 				!= CODE_FOR_nothing)))
4574 		      quotient = expand_divmod (0, TRUNC_DIV_EXPR,
4575 						int_mode, op0,
4576 						gen_int_mode (abs_d,
4577 							      int_mode),
4578 						NULL_RTX, 0);
4579 		    else
4580 		      quotient = expand_sdiv_pow2 (int_mode, op0, abs_d);
4581 
4582 		    /* We have computed OP0 / abs(OP1).  If OP1 is negative,
4583 		       negate the quotient.  */
4584 		    if (d < 0)
4585 		      {
4586 			insn = get_last_insn ();
4587 			if (insn != last
4588 			    && abs_d < (HOST_WIDE_INT_1U
4589 					<< (HOST_BITS_PER_WIDE_INT - 1)))
4590 			  set_dst_reg_note (insn, REG_EQUAL,
4591 					    gen_rtx_DIV (int_mode, op0,
4592 							 gen_int_mode
4593 							   (abs_d,
4594 							    int_mode)),
4595 					    quotient);
4596 
4597 			quotient = expand_unop (int_mode, neg_optab,
4598 						quotient, quotient, 0);
4599 		      }
4600 		  }
4601 		else if (size <= HOST_BITS_PER_WIDE_INT)
4602 		  {
4603 		    choose_multiplier (abs_d, size, size - 1,
4604 				       &ml, &post_shift, &lgup);
4605 		    if (ml < HOST_WIDE_INT_1U << (size - 1))
4606 		      {
4607 			rtx t1, t2, t3;
4608 
4609 			if (post_shift >= BITS_PER_WORD
4610 			    || size - 1 >= BITS_PER_WORD)
4611 			  goto fail1;
4612 
4613 			extra_cost = (shift_cost (speed, int_mode, post_shift)
4614 				      + shift_cost (speed, int_mode, size - 1)
4615 				      + add_cost (speed, int_mode));
4616 			t1 = expmed_mult_highpart
4617 			  (int_mode, op0, gen_int_mode (ml, int_mode),
4618 			   NULL_RTX, 0, max_cost - extra_cost);
4619 			if (t1 == 0)
4620 			  goto fail1;
4621 			t2 = expand_shift
4622 			  (RSHIFT_EXPR, int_mode, t1,
4623 			   post_shift, NULL_RTX, 0);
4624 			t3 = expand_shift
4625 			  (RSHIFT_EXPR, int_mode, op0,
4626 			   size - 1, NULL_RTX, 0);
4627 			if (d < 0)
4628 			  quotient
4629 			    = force_operand (gen_rtx_MINUS (int_mode, t3, t2),
4630 					     tquotient);
4631 			else
4632 			  quotient
4633 			    = force_operand (gen_rtx_MINUS (int_mode, t2, t3),
4634 					     tquotient);
4635 		      }
4636 		    else
4637 		      {
4638 			rtx t1, t2, t3, t4;
4639 
4640 			if (post_shift >= BITS_PER_WORD
4641 			    || size - 1 >= BITS_PER_WORD)
4642 			  goto fail1;
4643 
4644 			ml |= HOST_WIDE_INT_M1U << (size - 1);
4645 			mlr = gen_int_mode (ml, int_mode);
4646 			extra_cost = (shift_cost (speed, int_mode, post_shift)
4647 				      + shift_cost (speed, int_mode, size - 1)
4648 				      + 2 * add_cost (speed, int_mode));
4649 			t1 = expmed_mult_highpart (int_mode, op0, mlr,
4650 						   NULL_RTX, 0,
4651 						   max_cost - extra_cost);
4652 			if (t1 == 0)
4653 			  goto fail1;
4654 			t2 = force_operand (gen_rtx_PLUS (int_mode, t1, op0),
4655 					    NULL_RTX);
4656 			t3 = expand_shift
4657 			  (RSHIFT_EXPR, int_mode, t2,
4658 			   post_shift, NULL_RTX, 0);
4659 			t4 = expand_shift
4660 			  (RSHIFT_EXPR, int_mode, op0,
4661 			   size - 1, NULL_RTX, 0);
4662 			if (d < 0)
4663 			  quotient
4664 			    = force_operand (gen_rtx_MINUS (int_mode, t4, t3),
4665 					     tquotient);
4666 			else
4667 			  quotient
4668 			    = force_operand (gen_rtx_MINUS (int_mode, t3, t4),
4669 					     tquotient);
4670 		      }
4671 		  }
4672 		else		/* Too wide mode to use tricky code */
4673 		  break;
4674 
4675 		insn = get_last_insn ();
4676 		if (insn != last)
4677 		  set_dst_reg_note (insn, REG_EQUAL,
4678 				    gen_rtx_DIV (int_mode, op0, op1),
4679 				    quotient);
4680 	      }
4681 	    break;
4682 	  }
4683       fail1:
4684 	delete_insns_since (last);
4685 	break;
4686 
4687       case FLOOR_DIV_EXPR:
4688       case FLOOR_MOD_EXPR:
4689       /* We will come here only for signed operations.  */
4690 	if (op1_is_constant && HWI_COMPUTABLE_MODE_P (compute_mode))
4691 	  {
4692 	    scalar_int_mode int_mode = as_a <scalar_int_mode> (compute_mode);
4693 	    int size = GET_MODE_BITSIZE (int_mode);
4694 	    unsigned HOST_WIDE_INT mh, ml;
4695 	    int pre_shift, lgup, post_shift;
4696 	    HOST_WIDE_INT d = INTVAL (op1);
4697 
4698 	    if (d > 0)
4699 	      {
4700 		/* We could just as easily deal with negative constants here,
4701 		   but it does not seem worth the trouble for GCC 2.6.  */
4702 		if (EXACT_POWER_OF_2_OR_ZERO_P (d))
4703 		  {
4704 		    pre_shift = floor_log2 (d);
4705 		    if (rem_flag)
4706 		      {
4707 			unsigned HOST_WIDE_INT mask
4708 			  = (HOST_WIDE_INT_1U << pre_shift) - 1;
4709 			remainder = expand_binop
4710 			  (int_mode, and_optab, op0,
4711 			   gen_int_mode (mask, int_mode),
4712 			   remainder, 0, OPTAB_LIB_WIDEN);
4713 			if (remainder)
4714 			  return gen_lowpart (mode, remainder);
4715 		      }
4716 		    quotient = expand_shift
4717 		      (RSHIFT_EXPR, int_mode, op0,
4718 		       pre_shift, tquotient, 0);
4719 		  }
4720 		else
4721 		  {
4722 		    rtx t1, t2, t3, t4;
4723 
4724 		    mh = choose_multiplier (d, size, size - 1,
4725 					    &ml, &post_shift, &lgup);
4726 		    gcc_assert (!mh);
4727 
4728 		    if (post_shift < BITS_PER_WORD
4729 			&& size - 1 < BITS_PER_WORD)
4730 		      {
4731 			t1 = expand_shift
4732 			  (RSHIFT_EXPR, int_mode, op0,
4733 			   size - 1, NULL_RTX, 0);
4734 			t2 = expand_binop (int_mode, xor_optab, op0, t1,
4735 					   NULL_RTX, 0, OPTAB_WIDEN);
4736 			extra_cost = (shift_cost (speed, int_mode, post_shift)
4737 				      + shift_cost (speed, int_mode, size - 1)
4738 				      + 2 * add_cost (speed, int_mode));
4739 			t3 = expmed_mult_highpart
4740 			  (int_mode, t2, gen_int_mode (ml, int_mode),
4741 			   NULL_RTX, 1, max_cost - extra_cost);
4742 			if (t3 != 0)
4743 			  {
4744 			    t4 = expand_shift
4745 			      (RSHIFT_EXPR, int_mode, t3,
4746 			       post_shift, NULL_RTX, 1);
4747 			    quotient = expand_binop (int_mode, xor_optab,
4748 						     t4, t1, tquotient, 0,
4749 						     OPTAB_WIDEN);
4750 			  }
4751 		      }
4752 		  }
4753 	      }
4754 	    else
4755 	      {
4756 		rtx nsign, t1, t2, t3, t4;
4757 		t1 = force_operand (gen_rtx_PLUS (int_mode,
4758 						  op0, constm1_rtx), NULL_RTX);
4759 		t2 = expand_binop (int_mode, ior_optab, op0, t1, NULL_RTX,
4760 				   0, OPTAB_WIDEN);
4761 		nsign = expand_shift (RSHIFT_EXPR, int_mode, t2,
4762 				      size - 1, NULL_RTX, 0);
4763 		t3 = force_operand (gen_rtx_MINUS (int_mode, t1, nsign),
4764 				    NULL_RTX);
4765 		t4 = expand_divmod (0, TRUNC_DIV_EXPR, int_mode, t3, op1,
4766 				    NULL_RTX, 0);
4767 		if (t4)
4768 		  {
4769 		    rtx t5;
4770 		    t5 = expand_unop (int_mode, one_cmpl_optab, nsign,
4771 				      NULL_RTX, 0);
4772 		    quotient = force_operand (gen_rtx_PLUS (int_mode, t4, t5),
4773 					      tquotient);
4774 		  }
4775 	      }
4776 	  }
4777 
4778 	if (quotient != 0)
4779 	  break;
4780 	delete_insns_since (last);
4781 
4782 	/* Try using an instruction that produces both the quotient and
4783 	   remainder, using truncation.  We can easily compensate the quotient
4784 	   or remainder to get floor rounding, once we have the remainder.
4785 	   Notice that we compute also the final remainder value here,
4786 	   and return the result right away.  */
4787 	if (target == 0 || GET_MODE (target) != compute_mode)
4788 	  target = gen_reg_rtx (compute_mode);
4789 
4790 	if (rem_flag)
4791 	  {
4792 	    remainder
4793 	      = REG_P (target) ? target : gen_reg_rtx (compute_mode);
4794 	    quotient = gen_reg_rtx (compute_mode);
4795 	  }
4796 	else
4797 	  {
4798 	    quotient
4799 	      = REG_P (target) ? target : gen_reg_rtx (compute_mode);
4800 	    remainder = gen_reg_rtx (compute_mode);
4801 	  }
4802 
4803 	if (expand_twoval_binop (sdivmod_optab, op0, op1,
4804 				 quotient, remainder, 0))
4805 	  {
4806 	    /* This could be computed with a branch-less sequence.
4807 	       Save that for later.  */
4808 	    rtx tem;
4809 	    rtx_code_label *label = gen_label_rtx ();
4810 	    do_cmp_and_jump (remainder, const0_rtx, EQ, compute_mode, label);
4811 	    tem = expand_binop (compute_mode, xor_optab, op0, op1,
4812 				NULL_RTX, 0, OPTAB_WIDEN);
4813 	    do_cmp_and_jump (tem, const0_rtx, GE, compute_mode, label);
4814 	    expand_dec (quotient, const1_rtx);
4815 	    expand_inc (remainder, op1);
4816 	    emit_label (label);
4817 	    return gen_lowpart (mode, rem_flag ? remainder : quotient);
4818 	  }
4819 
4820 	/* No luck with division elimination or divmod.  Have to do it
4821 	   by conditionally adjusting op0 *and* the result.  */
4822 	{
4823 	  rtx_code_label *label1, *label2, *label3, *label4, *label5;
4824 	  rtx adjusted_op0;
4825 	  rtx tem;
4826 
4827 	  quotient = gen_reg_rtx (compute_mode);
4828 	  adjusted_op0 = copy_to_mode_reg (compute_mode, op0);
4829 	  label1 = gen_label_rtx ();
4830 	  label2 = gen_label_rtx ();
4831 	  label3 = gen_label_rtx ();
4832 	  label4 = gen_label_rtx ();
4833 	  label5 = gen_label_rtx ();
4834 	  do_cmp_and_jump (op1, const0_rtx, LT, compute_mode, label2);
4835 	  do_cmp_and_jump (adjusted_op0, const0_rtx, LT, compute_mode, label1);
4836 	  tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4837 			      quotient, 0, OPTAB_LIB_WIDEN);
4838 	  if (tem != quotient)
4839 	    emit_move_insn (quotient, tem);
4840 	  emit_jump_insn (targetm.gen_jump (label5));
4841 	  emit_barrier ();
4842 	  emit_label (label1);
4843 	  expand_inc (adjusted_op0, const1_rtx);
4844 	  emit_jump_insn (targetm.gen_jump (label4));
4845 	  emit_barrier ();
4846 	  emit_label (label2);
4847 	  do_cmp_and_jump (adjusted_op0, const0_rtx, GT, compute_mode, label3);
4848 	  tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4849 			      quotient, 0, OPTAB_LIB_WIDEN);
4850 	  if (tem != quotient)
4851 	    emit_move_insn (quotient, tem);
4852 	  emit_jump_insn (targetm.gen_jump (label5));
4853 	  emit_barrier ();
4854 	  emit_label (label3);
4855 	  expand_dec (adjusted_op0, const1_rtx);
4856 	  emit_label (label4);
4857 	  tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4858 			      quotient, 0, OPTAB_LIB_WIDEN);
4859 	  if (tem != quotient)
4860 	    emit_move_insn (quotient, tem);
4861 	  expand_dec (quotient, const1_rtx);
4862 	  emit_label (label5);
4863 	}
4864 	break;
4865 
4866       case CEIL_DIV_EXPR:
4867       case CEIL_MOD_EXPR:
4868 	if (unsignedp)
4869 	  {
4870 	    if (op1_is_constant
4871 		&& EXACT_POWER_OF_2_OR_ZERO_P (INTVAL (op1))
4872 		&& (HWI_COMPUTABLE_MODE_P (compute_mode)
4873 		    || INTVAL (op1) >= 0))
4874 	      {
4875 		scalar_int_mode int_mode
4876 		  = as_a <scalar_int_mode> (compute_mode);
4877 		rtx t1, t2, t3;
4878 		unsigned HOST_WIDE_INT d = INTVAL (op1);
4879 		t1 = expand_shift (RSHIFT_EXPR, int_mode, op0,
4880 				   floor_log2 (d), tquotient, 1);
4881 		t2 = expand_binop (int_mode, and_optab, op0,
4882 				   gen_int_mode (d - 1, int_mode),
4883 				   NULL_RTX, 1, OPTAB_LIB_WIDEN);
4884 		t3 = gen_reg_rtx (int_mode);
4885 		t3 = emit_store_flag (t3, NE, t2, const0_rtx, int_mode, 1, 1);
4886 		if (t3 == 0)
4887 		  {
4888 		    rtx_code_label *lab;
4889 		    lab = gen_label_rtx ();
4890 		    do_cmp_and_jump (t2, const0_rtx, EQ, int_mode, lab);
4891 		    expand_inc (t1, const1_rtx);
4892 		    emit_label (lab);
4893 		    quotient = t1;
4894 		  }
4895 		else
4896 		  quotient = force_operand (gen_rtx_PLUS (int_mode, t1, t3),
4897 					    tquotient);
4898 		break;
4899 	      }
4900 
4901 	    /* Try using an instruction that produces both the quotient and
4902 	       remainder, using truncation.  We can easily compensate the
4903 	       quotient or remainder to get ceiling rounding, once we have the
4904 	       remainder.  Notice that we compute also the final remainder
4905 	       value here, and return the result right away.  */
4906 	    if (target == 0 || GET_MODE (target) != compute_mode)
4907 	      target = gen_reg_rtx (compute_mode);
4908 
4909 	    if (rem_flag)
4910 	      {
4911 		remainder = (REG_P (target)
4912 			     ? target : gen_reg_rtx (compute_mode));
4913 		quotient = gen_reg_rtx (compute_mode);
4914 	      }
4915 	    else
4916 	      {
4917 		quotient = (REG_P (target)
4918 			    ? target : gen_reg_rtx (compute_mode));
4919 		remainder = gen_reg_rtx (compute_mode);
4920 	      }
4921 
4922 	    if (expand_twoval_binop (udivmod_optab, op0, op1, quotient,
4923 				     remainder, 1))
4924 	      {
4925 		/* This could be computed with a branch-less sequence.
4926 		   Save that for later.  */
4927 		rtx_code_label *label = gen_label_rtx ();
4928 		do_cmp_and_jump (remainder, const0_rtx, EQ,
4929 				 compute_mode, label);
4930 		expand_inc (quotient, const1_rtx);
4931 		expand_dec (remainder, op1);
4932 		emit_label (label);
4933 		return gen_lowpart (mode, rem_flag ? remainder : quotient);
4934 	      }
4935 
4936 	    /* No luck with division elimination or divmod.  Have to do it
4937 	       by conditionally adjusting op0 *and* the result.  */
4938 	    {
4939 	      rtx_code_label *label1, *label2;
4940 	      rtx adjusted_op0, tem;
4941 
4942 	      quotient = gen_reg_rtx (compute_mode);
4943 	      adjusted_op0 = copy_to_mode_reg (compute_mode, op0);
4944 	      label1 = gen_label_rtx ();
4945 	      label2 = gen_label_rtx ();
4946 	      do_cmp_and_jump (adjusted_op0, const0_rtx, NE,
4947 			       compute_mode, label1);
4948 	      emit_move_insn  (quotient, const0_rtx);
4949 	      emit_jump_insn (targetm.gen_jump (label2));
4950 	      emit_barrier ();
4951 	      emit_label (label1);
4952 	      expand_dec (adjusted_op0, const1_rtx);
4953 	      tem = expand_binop (compute_mode, udiv_optab, adjusted_op0, op1,
4954 				  quotient, 1, OPTAB_LIB_WIDEN);
4955 	      if (tem != quotient)
4956 		emit_move_insn (quotient, tem);
4957 	      expand_inc (quotient, const1_rtx);
4958 	      emit_label (label2);
4959 	    }
4960 	  }
4961 	else /* signed */
4962 	  {
4963 	    if (op1_is_constant && EXACT_POWER_OF_2_OR_ZERO_P (INTVAL (op1))
4964 		&& INTVAL (op1) >= 0)
4965 	      {
4966 		/* This is extremely similar to the code for the unsigned case
4967 		   above.  For 2.7 we should merge these variants, but for
4968 		   2.6.1 I don't want to touch the code for unsigned since that
4969 		   get used in C.  The signed case will only be used by other
4970 		   languages (Ada).  */
4971 
4972 		rtx t1, t2, t3;
4973 		unsigned HOST_WIDE_INT d = INTVAL (op1);
4974 		t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
4975 				   floor_log2 (d), tquotient, 0);
4976 		t2 = expand_binop (compute_mode, and_optab, op0,
4977 				   gen_int_mode (d - 1, compute_mode),
4978 				   NULL_RTX, 1, OPTAB_LIB_WIDEN);
4979 		t3 = gen_reg_rtx (compute_mode);
4980 		t3 = emit_store_flag (t3, NE, t2, const0_rtx,
4981 				      compute_mode, 1, 1);
4982 		if (t3 == 0)
4983 		  {
4984 		    rtx_code_label *lab;
4985 		    lab = gen_label_rtx ();
4986 		    do_cmp_and_jump (t2, const0_rtx, EQ, compute_mode, lab);
4987 		    expand_inc (t1, const1_rtx);
4988 		    emit_label (lab);
4989 		    quotient = t1;
4990 		  }
4991 		else
4992 		  quotient = force_operand (gen_rtx_PLUS (compute_mode,
4993 							  t1, t3),
4994 					    tquotient);
4995 		break;
4996 	      }
4997 
4998 	    /* Try using an instruction that produces both the quotient and
4999 	       remainder, using truncation.  We can easily compensate the
5000 	       quotient or remainder to get ceiling rounding, once we have the
5001 	       remainder.  Notice that we compute also the final remainder
5002 	       value here, and return the result right away.  */
5003 	    if (target == 0 || GET_MODE (target) != compute_mode)
5004 	      target = gen_reg_rtx (compute_mode);
5005 	    if (rem_flag)
5006 	      {
5007 		remainder= (REG_P (target)
5008 			    ? target : gen_reg_rtx (compute_mode));
5009 		quotient = gen_reg_rtx (compute_mode);
5010 	      }
5011 	    else
5012 	      {
5013 		quotient = (REG_P (target)
5014 			    ? target : gen_reg_rtx (compute_mode));
5015 		remainder = gen_reg_rtx (compute_mode);
5016 	      }
5017 
5018 	    if (expand_twoval_binop (sdivmod_optab, op0, op1, quotient,
5019 				     remainder, 0))
5020 	      {
5021 		/* This could be computed with a branch-less sequence.
5022 		   Save that for later.  */
5023 		rtx tem;
5024 		rtx_code_label *label = gen_label_rtx ();
5025 		do_cmp_and_jump (remainder, const0_rtx, EQ,
5026 				 compute_mode, label);
5027 		tem = expand_binop (compute_mode, xor_optab, op0, op1,
5028 				    NULL_RTX, 0, OPTAB_WIDEN);
5029 		do_cmp_and_jump (tem, const0_rtx, LT, compute_mode, label);
5030 		expand_inc (quotient, const1_rtx);
5031 		expand_dec (remainder, op1);
5032 		emit_label (label);
5033 		return gen_lowpart (mode, rem_flag ? remainder : quotient);
5034 	      }
5035 
5036 	    /* No luck with division elimination or divmod.  Have to do it
5037 	       by conditionally adjusting op0 *and* the result.  */
5038 	    {
5039 	      rtx_code_label *label1, *label2, *label3, *label4, *label5;
5040 	      rtx adjusted_op0;
5041 	      rtx tem;
5042 
5043 	      quotient = gen_reg_rtx (compute_mode);
5044 	      adjusted_op0 = copy_to_mode_reg (compute_mode, op0);
5045 	      label1 = gen_label_rtx ();
5046 	      label2 = gen_label_rtx ();
5047 	      label3 = gen_label_rtx ();
5048 	      label4 = gen_label_rtx ();
5049 	      label5 = gen_label_rtx ();
5050 	      do_cmp_and_jump (op1, const0_rtx, LT, compute_mode, label2);
5051 	      do_cmp_and_jump (adjusted_op0, const0_rtx, GT,
5052 			       compute_mode, label1);
5053 	      tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
5054 				  quotient, 0, OPTAB_LIB_WIDEN);
5055 	      if (tem != quotient)
5056 		emit_move_insn (quotient, tem);
5057 	      emit_jump_insn (targetm.gen_jump (label5));
5058 	      emit_barrier ();
5059 	      emit_label (label1);
5060 	      expand_dec (adjusted_op0, const1_rtx);
5061 	      emit_jump_insn (targetm.gen_jump (label4));
5062 	      emit_barrier ();
5063 	      emit_label (label2);
5064 	      do_cmp_and_jump (adjusted_op0, const0_rtx, LT,
5065 			       compute_mode, label3);
5066 	      tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
5067 				  quotient, 0, OPTAB_LIB_WIDEN);
5068 	      if (tem != quotient)
5069 		emit_move_insn (quotient, tem);
5070 	      emit_jump_insn (targetm.gen_jump (label5));
5071 	      emit_barrier ();
5072 	      emit_label (label3);
5073 	      expand_inc (adjusted_op0, const1_rtx);
5074 	      emit_label (label4);
5075 	      tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
5076 				  quotient, 0, OPTAB_LIB_WIDEN);
5077 	      if (tem != quotient)
5078 		emit_move_insn (quotient, tem);
5079 	      expand_inc (quotient, const1_rtx);
5080 	      emit_label (label5);
5081 	    }
5082 	  }
5083 	break;
5084 
5085       case EXACT_DIV_EXPR:
5086 	if (op1_is_constant && HWI_COMPUTABLE_MODE_P (compute_mode))
5087 	  {
5088 	    scalar_int_mode int_mode = as_a <scalar_int_mode> (compute_mode);
5089 	    int size = GET_MODE_BITSIZE (int_mode);
5090 	    HOST_WIDE_INT d = INTVAL (op1);
5091 	    unsigned HOST_WIDE_INT ml;
5092 	    int pre_shift;
5093 	    rtx t1;
5094 
5095 	    pre_shift = ctz_or_zero (d);
5096 	    ml = invert_mod2n (d >> pre_shift, size);
5097 	    t1 = expand_shift (RSHIFT_EXPR, int_mode, op0,
5098 			       pre_shift, NULL_RTX, unsignedp);
5099 	    quotient = expand_mult (int_mode, t1, gen_int_mode (ml, int_mode),
5100 				    NULL_RTX, 1);
5101 
5102 	    insn = get_last_insn ();
5103 	    set_dst_reg_note (insn, REG_EQUAL,
5104 			      gen_rtx_fmt_ee (unsignedp ? UDIV : DIV,
5105 					      int_mode, op0, op1),
5106 			      quotient);
5107 	  }
5108 	break;
5109 
5110       case ROUND_DIV_EXPR:
5111       case ROUND_MOD_EXPR:
5112 	if (unsignedp)
5113 	  {
5114 	    scalar_int_mode int_mode = as_a <scalar_int_mode> (compute_mode);
5115 	    rtx tem;
5116 	    rtx_code_label *label;
5117 	    label = gen_label_rtx ();
5118 	    quotient = gen_reg_rtx (int_mode);
5119 	    remainder = gen_reg_rtx (int_mode);
5120 	    if (expand_twoval_binop (udivmod_optab, op0, op1, quotient, remainder, 1) == 0)
5121 	      {
5122 		rtx tem;
5123 		quotient = expand_binop (int_mode, udiv_optab, op0, op1,
5124 					 quotient, 1, OPTAB_LIB_WIDEN);
5125 		tem = expand_mult (int_mode, quotient, op1, NULL_RTX, 1);
5126 		remainder = expand_binop (int_mode, sub_optab, op0, tem,
5127 					  remainder, 1, OPTAB_LIB_WIDEN);
5128 	      }
5129 	    tem = plus_constant (int_mode, op1, -1);
5130 	    tem = expand_shift (RSHIFT_EXPR, int_mode, tem, 1, NULL_RTX, 1);
5131 	    do_cmp_and_jump (remainder, tem, LEU, int_mode, label);
5132 	    expand_inc (quotient, const1_rtx);
5133 	    expand_dec (remainder, op1);
5134 	    emit_label (label);
5135 	  }
5136 	else
5137 	  {
5138 	    scalar_int_mode int_mode = as_a <scalar_int_mode> (compute_mode);
5139 	    int size = GET_MODE_BITSIZE (int_mode);
5140 	    rtx abs_rem, abs_op1, tem, mask;
5141 	    rtx_code_label *label;
5142 	    label = gen_label_rtx ();
5143 	    quotient = gen_reg_rtx (int_mode);
5144 	    remainder = gen_reg_rtx (int_mode);
5145 	    if (expand_twoval_binop (sdivmod_optab, op0, op1, quotient, remainder, 0) == 0)
5146 	      {
5147 		rtx tem;
5148 		quotient = expand_binop (int_mode, sdiv_optab, op0, op1,
5149 					 quotient, 0, OPTAB_LIB_WIDEN);
5150 		tem = expand_mult (int_mode, quotient, op1, NULL_RTX, 0);
5151 		remainder = expand_binop (int_mode, sub_optab, op0, tem,
5152 					  remainder, 0, OPTAB_LIB_WIDEN);
5153 	      }
5154 	    abs_rem = expand_abs (int_mode, remainder, NULL_RTX, 1, 0);
5155 	    abs_op1 = expand_abs (int_mode, op1, NULL_RTX, 1, 0);
5156 	    tem = expand_shift (LSHIFT_EXPR, int_mode, abs_rem,
5157 				1, NULL_RTX, 1);
5158 	    do_cmp_and_jump (tem, abs_op1, LTU, int_mode, label);
5159 	    tem = expand_binop (int_mode, xor_optab, op0, op1,
5160 				NULL_RTX, 0, OPTAB_WIDEN);
5161 	    mask = expand_shift (RSHIFT_EXPR, int_mode, tem,
5162 				 size - 1, NULL_RTX, 0);
5163 	    tem = expand_binop (int_mode, xor_optab, mask, const1_rtx,
5164 				NULL_RTX, 0, OPTAB_WIDEN);
5165 	    tem = expand_binop (int_mode, sub_optab, tem, mask,
5166 				NULL_RTX, 0, OPTAB_WIDEN);
5167 	    expand_inc (quotient, tem);
5168 	    tem = expand_binop (int_mode, xor_optab, mask, op1,
5169 				NULL_RTX, 0, OPTAB_WIDEN);
5170 	    tem = expand_binop (int_mode, sub_optab, tem, mask,
5171 				NULL_RTX, 0, OPTAB_WIDEN);
5172 	    expand_dec (remainder, tem);
5173 	    emit_label (label);
5174 	  }
5175 	return gen_lowpart (mode, rem_flag ? remainder : quotient);
5176 
5177       default:
5178 	gcc_unreachable ();
5179       }
5180 
5181   if (quotient == 0)
5182     {
5183       if (target && GET_MODE (target) != compute_mode)
5184 	target = 0;
5185 
5186       if (rem_flag)
5187 	{
5188 	  /* Try to produce the remainder without producing the quotient.
5189 	     If we seem to have a divmod pattern that does not require widening,
5190 	     don't try widening here.  We should really have a WIDEN argument
5191 	     to expand_twoval_binop, since what we'd really like to do here is
5192 	     1) try a mod insn in compute_mode
5193 	     2) try a divmod insn in compute_mode
5194 	     3) try a div insn in compute_mode and multiply-subtract to get
5195 	        remainder
5196 	     4) try the same things with widening allowed.  */
5197 	  remainder
5198 	    = sign_expand_binop (compute_mode, umod_optab, smod_optab,
5199 				 op0, op1, target,
5200 				 unsignedp,
5201 				 ((optab_handler (optab2, compute_mode)
5202 				   != CODE_FOR_nothing)
5203 				  ? OPTAB_DIRECT : OPTAB_WIDEN));
5204 	  if (remainder == 0)
5205 	    {
5206 	      /* No luck there.  Can we do remainder and divide at once
5207 		 without a library call?  */
5208 	      remainder = gen_reg_rtx (compute_mode);
5209 	      if (! expand_twoval_binop ((unsignedp
5210 					  ? udivmod_optab
5211 					  : sdivmod_optab),
5212 					 op0, op1,
5213 					 NULL_RTX, remainder, unsignedp))
5214 		remainder = 0;
5215 	    }
5216 
5217 	  if (remainder)
5218 	    return gen_lowpart (mode, remainder);
5219 	}
5220 
5221       /* Produce the quotient.  Try a quotient insn, but not a library call.
5222 	 If we have a divmod in this mode, use it in preference to widening
5223 	 the div (for this test we assume it will not fail). Note that optab2
5224 	 is set to the one of the two optabs that the call below will use.  */
5225       quotient
5226 	= sign_expand_binop (compute_mode, udiv_optab, sdiv_optab,
5227 			     op0, op1, rem_flag ? NULL_RTX : target,
5228 			     unsignedp,
5229 			     ((optab_handler (optab2, compute_mode)
5230 			       != CODE_FOR_nothing)
5231 			      ? OPTAB_DIRECT : OPTAB_WIDEN));
5232 
5233       if (quotient == 0)
5234 	{
5235 	  /* No luck there.  Try a quotient-and-remainder insn,
5236 	     keeping the quotient alone.  */
5237 	  quotient = gen_reg_rtx (compute_mode);
5238 	  if (! expand_twoval_binop (unsignedp ? udivmod_optab : sdivmod_optab,
5239 				     op0, op1,
5240 				     quotient, NULL_RTX, unsignedp))
5241 	    {
5242 	      quotient = 0;
5243 	      if (! rem_flag)
5244 		/* Still no luck.  If we are not computing the remainder,
5245 		   use a library call for the quotient.  */
5246 		quotient = sign_expand_binop (compute_mode,
5247 					      udiv_optab, sdiv_optab,
5248 					      op0, op1, target,
5249 					      unsignedp, OPTAB_LIB_WIDEN);
5250 	    }
5251 	}
5252     }
5253 
5254   if (rem_flag)
5255     {
5256       if (target && GET_MODE (target) != compute_mode)
5257 	target = 0;
5258 
5259       if (quotient == 0)
5260 	{
5261 	  /* No divide instruction either.  Use library for remainder.  */
5262 	  remainder = sign_expand_binop (compute_mode, umod_optab, smod_optab,
5263 					 op0, op1, target,
5264 					 unsignedp, OPTAB_LIB_WIDEN);
5265 	  /* No remainder function.  Try a quotient-and-remainder
5266 	     function, keeping the remainder.  */
5267 	  if (!remainder)
5268 	    {
5269 	      remainder = gen_reg_rtx (compute_mode);
5270 	      if (!expand_twoval_binop_libfunc
5271 		  (unsignedp ? udivmod_optab : sdivmod_optab,
5272 		   op0, op1,
5273 		   NULL_RTX, remainder,
5274 		   unsignedp ? UMOD : MOD))
5275 		remainder = NULL_RTX;
5276 	    }
5277 	}
5278       else
5279 	{
5280 	  /* We divided.  Now finish doing X - Y * (X / Y).  */
5281 	  remainder = expand_mult (compute_mode, quotient, op1,
5282 				   NULL_RTX, unsignedp);
5283 	  remainder = expand_binop (compute_mode, sub_optab, op0,
5284 				    remainder, target, unsignedp,
5285 				    OPTAB_LIB_WIDEN);
5286 	}
5287     }
5288 
5289   return gen_lowpart (mode, rem_flag ? remainder : quotient);
5290 }
5291 
5292 /* Return a tree node with data type TYPE, describing the value of X.
5293    Usually this is an VAR_DECL, if there is no obvious better choice.
5294    X may be an expression, however we only support those expressions
5295    generated by loop.c.  */
5296 
5297 tree
5298 make_tree (tree type, rtx x)
5299 {
5300   tree t;
5301 
5302   switch (GET_CODE (x))
5303     {
5304     case CONST_INT:
5305     case CONST_WIDE_INT:
5306       t = wide_int_to_tree (type, rtx_mode_t (x, TYPE_MODE (type)));
5307       return t;
5308 
5309     case CONST_DOUBLE:
5310       STATIC_ASSERT (HOST_BITS_PER_WIDE_INT * 2 <= MAX_BITSIZE_MODE_ANY_INT);
5311       if (TARGET_SUPPORTS_WIDE_INT == 0 && GET_MODE (x) == VOIDmode)
5312 	t = wide_int_to_tree (type,
5313 			      wide_int::from_array (&CONST_DOUBLE_LOW (x), 2,
5314 						    HOST_BITS_PER_WIDE_INT * 2));
5315       else
5316 	t = build_real (type, *CONST_DOUBLE_REAL_VALUE (x));
5317 
5318       return t;
5319 
5320     case CONST_VECTOR:
5321       {
5322 	unsigned int npatterns = CONST_VECTOR_NPATTERNS (x);
5323 	unsigned int nelts_per_pattern = CONST_VECTOR_NELTS_PER_PATTERN (x);
5324 	tree itype = TREE_TYPE (type);
5325 
5326 	/* Build a tree with vector elements.  */
5327 	tree_vector_builder elts (type, npatterns, nelts_per_pattern);
5328 	unsigned int count = elts.encoded_nelts ();
5329 	for (unsigned int i = 0; i < count; ++i)
5330 	  {
5331 	    rtx elt = CONST_VECTOR_ELT (x, i);
5332 	    elts.quick_push (make_tree (itype, elt));
5333 	  }
5334 
5335 	return elts.build ();
5336       }
5337 
5338     case PLUS:
5339       return fold_build2 (PLUS_EXPR, type, make_tree (type, XEXP (x, 0)),
5340 			  make_tree (type, XEXP (x, 1)));
5341 
5342     case MINUS:
5343       return fold_build2 (MINUS_EXPR, type, make_tree (type, XEXP (x, 0)),
5344 			  make_tree (type, XEXP (x, 1)));
5345 
5346     case NEG:
5347       return fold_build1 (NEGATE_EXPR, type, make_tree (type, XEXP (x, 0)));
5348 
5349     case MULT:
5350       return fold_build2 (MULT_EXPR, type, make_tree (type, XEXP (x, 0)),
5351 			  make_tree (type, XEXP (x, 1)));
5352 
5353     case ASHIFT:
5354       return fold_build2 (LSHIFT_EXPR, type, make_tree (type, XEXP (x, 0)),
5355 			  make_tree (type, XEXP (x, 1)));
5356 
5357     case LSHIFTRT:
5358       t = unsigned_type_for (type);
5359       return fold_convert (type, build2 (RSHIFT_EXPR, t,
5360 			    		 make_tree (t, XEXP (x, 0)),
5361 				    	 make_tree (type, XEXP (x, 1))));
5362 
5363     case ASHIFTRT:
5364       t = signed_type_for (type);
5365       return fold_convert (type, build2 (RSHIFT_EXPR, t,
5366 					 make_tree (t, XEXP (x, 0)),
5367 				    	 make_tree (type, XEXP (x, 1))));
5368 
5369     case DIV:
5370       if (TREE_CODE (type) != REAL_TYPE)
5371 	t = signed_type_for (type);
5372       else
5373 	t = type;
5374 
5375       return fold_convert (type, build2 (TRUNC_DIV_EXPR, t,
5376 				    	 make_tree (t, XEXP (x, 0)),
5377 				    	 make_tree (t, XEXP (x, 1))));
5378     case UDIV:
5379       t = unsigned_type_for (type);
5380       return fold_convert (type, build2 (TRUNC_DIV_EXPR, t,
5381 				    	 make_tree (t, XEXP (x, 0)),
5382 				    	 make_tree (t, XEXP (x, 1))));
5383 
5384     case SIGN_EXTEND:
5385     case ZERO_EXTEND:
5386       t = lang_hooks.types.type_for_mode (GET_MODE (XEXP (x, 0)),
5387 					  GET_CODE (x) == ZERO_EXTEND);
5388       return fold_convert (type, make_tree (t, XEXP (x, 0)));
5389 
5390     case CONST:
5391       return make_tree (type, XEXP (x, 0));
5392 
5393     case SYMBOL_REF:
5394       t = SYMBOL_REF_DECL (x);
5395       if (t)
5396 	return fold_convert (type, build_fold_addr_expr (t));
5397       /* fall through.  */
5398 
5399     default:
5400       if (CONST_POLY_INT_P (x))
5401 	return wide_int_to_tree (t, const_poly_int_value (x));
5402 
5403       t = build_decl (RTL_LOCATION (x), VAR_DECL, NULL_TREE, type);
5404 
5405       /* If TYPE is a POINTER_TYPE, we might need to convert X from
5406 	 address mode to pointer mode.  */
5407       if (POINTER_TYPE_P (type))
5408 	x = convert_memory_address_addr_space
5409 	  (SCALAR_INT_TYPE_MODE (type), x, TYPE_ADDR_SPACE (TREE_TYPE (type)));
5410 
5411       /* Note that we do *not* use SET_DECL_RTL here, because we do not
5412 	 want set_decl_rtl to go adjusting REG_ATTRS for this temporary.  */
5413       t->decl_with_rtl.rtl = x;
5414 
5415       return t;
5416     }
5417 }
5418 
5419 /* Compute the logical-and of OP0 and OP1, storing it in TARGET
5420    and returning TARGET.
5421 
5422    If TARGET is 0, a pseudo-register or constant is returned.  */
5423 
5424 rtx
5425 expand_and (machine_mode mode, rtx op0, rtx op1, rtx target)
5426 {
5427   rtx tem = 0;
5428 
5429   if (GET_MODE (op0) == VOIDmode && GET_MODE (op1) == VOIDmode)
5430     tem = simplify_binary_operation (AND, mode, op0, op1);
5431   if (tem == 0)
5432     tem = expand_binop (mode, and_optab, op0, op1, target, 0, OPTAB_LIB_WIDEN);
5433 
5434   if (target == 0)
5435     target = tem;
5436   else if (tem != target)
5437     emit_move_insn (target, tem);
5438   return target;
5439 }
5440 
5441 /* Helper function for emit_store_flag.  */
5442 rtx
5443 emit_cstore (rtx target, enum insn_code icode, enum rtx_code code,
5444 	     machine_mode mode, machine_mode compare_mode,
5445 	     int unsignedp, rtx x, rtx y, int normalizep,
5446 	     machine_mode target_mode)
5447 {
5448   struct expand_operand ops[4];
5449   rtx op0, comparison, subtarget;
5450   rtx_insn *last;
5451   scalar_int_mode result_mode = targetm.cstore_mode (icode);
5452   scalar_int_mode int_target_mode;
5453 
5454   last = get_last_insn ();
5455   x = prepare_operand (icode, x, 2, mode, compare_mode, unsignedp);
5456   y = prepare_operand (icode, y, 3, mode, compare_mode, unsignedp);
5457   if (!x || !y)
5458     {
5459       delete_insns_since (last);
5460       return NULL_RTX;
5461     }
5462 
5463   if (target_mode == VOIDmode)
5464     int_target_mode = result_mode;
5465   else
5466     int_target_mode = as_a <scalar_int_mode> (target_mode);
5467   if (!target)
5468     target = gen_reg_rtx (int_target_mode);
5469 
5470   comparison = gen_rtx_fmt_ee (code, result_mode, x, y);
5471 
5472   create_output_operand (&ops[0], optimize ? NULL_RTX : target, result_mode);
5473   create_fixed_operand (&ops[1], comparison);
5474   create_fixed_operand (&ops[2], x);
5475   create_fixed_operand (&ops[3], y);
5476   if (!maybe_expand_insn (icode, 4, ops))
5477     {
5478       delete_insns_since (last);
5479       return NULL_RTX;
5480     }
5481   subtarget = ops[0].value;
5482 
5483   /* If we are converting to a wider mode, first convert to
5484      INT_TARGET_MODE, then normalize.  This produces better combining
5485      opportunities on machines that have a SIGN_EXTRACT when we are
5486      testing a single bit.  This mostly benefits the 68k.
5487 
5488      If STORE_FLAG_VALUE does not have the sign bit set when
5489      interpreted in MODE, we can do this conversion as unsigned, which
5490      is usually more efficient.  */
5491   if (GET_MODE_SIZE (int_target_mode) > GET_MODE_SIZE (result_mode))
5492     {
5493       convert_move (target, subtarget,
5494 		    val_signbit_known_clear_p (result_mode,
5495 					       STORE_FLAG_VALUE));
5496       op0 = target;
5497       result_mode = int_target_mode;
5498     }
5499   else
5500     op0 = subtarget;
5501 
5502   /* If we want to keep subexpressions around, don't reuse our last
5503      target.  */
5504   if (optimize)
5505     subtarget = 0;
5506 
5507   /* Now normalize to the proper value in MODE.  Sometimes we don't
5508      have to do anything.  */
5509   if (normalizep == 0 || normalizep == STORE_FLAG_VALUE)
5510     ;
5511   /* STORE_FLAG_VALUE might be the most negative number, so write
5512      the comparison this way to avoid a compiler-time warning.  */
5513   else if (- normalizep == STORE_FLAG_VALUE)
5514     op0 = expand_unop (result_mode, neg_optab, op0, subtarget, 0);
5515 
5516   /* We don't want to use STORE_FLAG_VALUE < 0 below since this makes
5517      it hard to use a value of just the sign bit due to ANSI integer
5518      constant typing rules.  */
5519   else if (val_signbit_known_set_p (result_mode, STORE_FLAG_VALUE))
5520     op0 = expand_shift (RSHIFT_EXPR, result_mode, op0,
5521 			GET_MODE_BITSIZE (result_mode) - 1, subtarget,
5522 			normalizep == 1);
5523   else
5524     {
5525       gcc_assert (STORE_FLAG_VALUE & 1);
5526 
5527       op0 = expand_and (result_mode, op0, const1_rtx, subtarget);
5528       if (normalizep == -1)
5529 	op0 = expand_unop (result_mode, neg_optab, op0, op0, 0);
5530     }
5531 
5532   /* If we were converting to a smaller mode, do the conversion now.  */
5533   if (int_target_mode != result_mode)
5534     {
5535       convert_move (target, op0, 0);
5536       return target;
5537     }
5538   else
5539     return op0;
5540 }
5541 
5542 
5543 /* A subroutine of emit_store_flag only including "tricks" that do not
5544    need a recursive call.  These are kept separate to avoid infinite
5545    loops.  */
5546 
5547 static rtx
5548 emit_store_flag_1 (rtx target, enum rtx_code code, rtx op0, rtx op1,
5549 		   machine_mode mode, int unsignedp, int normalizep,
5550 		   machine_mode target_mode)
5551 {
5552   rtx subtarget;
5553   enum insn_code icode;
5554   machine_mode compare_mode;
5555   enum mode_class mclass;
5556   enum rtx_code scode;
5557 
5558   if (unsignedp)
5559     code = unsigned_condition (code);
5560   scode = swap_condition (code);
5561 
5562   /* If one operand is constant, make it the second one.  Only do this
5563      if the other operand is not constant as well.  */
5564 
5565   if (swap_commutative_operands_p (op0, op1))
5566     {
5567       std::swap (op0, op1);
5568       code = swap_condition (code);
5569     }
5570 
5571   if (mode == VOIDmode)
5572     mode = GET_MODE (op0);
5573 
5574   /* For some comparisons with 1 and -1, we can convert this to
5575      comparisons with zero.  This will often produce more opportunities for
5576      store-flag insns.  */
5577 
5578   switch (code)
5579     {
5580     case LT:
5581       if (op1 == const1_rtx)
5582 	op1 = const0_rtx, code = LE;
5583       break;
5584     case LE:
5585       if (op1 == constm1_rtx)
5586 	op1 = const0_rtx, code = LT;
5587       break;
5588     case GE:
5589       if (op1 == const1_rtx)
5590 	op1 = const0_rtx, code = GT;
5591       break;
5592     case GT:
5593       if (op1 == constm1_rtx)
5594 	op1 = const0_rtx, code = GE;
5595       break;
5596     case GEU:
5597       if (op1 == const1_rtx)
5598 	op1 = const0_rtx, code = NE;
5599       break;
5600     case LTU:
5601       if (op1 == const1_rtx)
5602 	op1 = const0_rtx, code = EQ;
5603       break;
5604     default:
5605       break;
5606     }
5607 
5608   /* If we are comparing a double-word integer with zero or -1, we can
5609      convert the comparison into one involving a single word.  */
5610   scalar_int_mode int_mode;
5611   if (is_int_mode (mode, &int_mode)
5612       && GET_MODE_BITSIZE (int_mode) == BITS_PER_WORD * 2
5613       && (!MEM_P (op0) || ! MEM_VOLATILE_P (op0)))
5614     {
5615       rtx tem;
5616       if ((code == EQ || code == NE)
5617 	  && (op1 == const0_rtx || op1 == constm1_rtx))
5618 	{
5619 	  rtx op00, op01;
5620 
5621 	  /* Do a logical OR or AND of the two words and compare the
5622 	     result.  */
5623 	  op00 = simplify_gen_subreg (word_mode, op0, int_mode, 0);
5624 	  op01 = simplify_gen_subreg (word_mode, op0, int_mode, UNITS_PER_WORD);
5625 	  tem = expand_binop (word_mode,
5626 			      op1 == const0_rtx ? ior_optab : and_optab,
5627 			      op00, op01, NULL_RTX, unsignedp,
5628 			      OPTAB_DIRECT);
5629 
5630 	  if (tem != 0)
5631 	    tem = emit_store_flag (NULL_RTX, code, tem, op1, word_mode,
5632 				   unsignedp, normalizep);
5633 	}
5634       else if ((code == LT || code == GE) && op1 == const0_rtx)
5635 	{
5636 	  rtx op0h;
5637 
5638 	  /* If testing the sign bit, can just test on high word.  */
5639 	  op0h = simplify_gen_subreg (word_mode, op0, int_mode,
5640 				      subreg_highpart_offset (word_mode,
5641 							      int_mode));
5642 	  tem = emit_store_flag (NULL_RTX, code, op0h, op1, word_mode,
5643 				 unsignedp, normalizep);
5644 	}
5645       else
5646 	tem = NULL_RTX;
5647 
5648       if (tem)
5649 	{
5650 	  if (target_mode == VOIDmode || GET_MODE (tem) == target_mode)
5651 	    return tem;
5652 	  if (!target)
5653 	    target = gen_reg_rtx (target_mode);
5654 
5655 	  convert_move (target, tem,
5656 			!val_signbit_known_set_p (word_mode,
5657 						  (normalizep ? normalizep
5658 						   : STORE_FLAG_VALUE)));
5659 	  return target;
5660 	}
5661     }
5662 
5663   /* If this is A < 0 or A >= 0, we can do this by taking the ones
5664      complement of A (for GE) and shifting the sign bit to the low bit.  */
5665   if (op1 == const0_rtx && (code == LT || code == GE)
5666       && is_int_mode (mode, &int_mode)
5667       && (normalizep || STORE_FLAG_VALUE == 1
5668 	  || val_signbit_p (int_mode, STORE_FLAG_VALUE)))
5669     {
5670       scalar_int_mode int_target_mode;
5671       subtarget = target;
5672 
5673       if (!target)
5674 	int_target_mode = int_mode;
5675       else
5676 	{
5677 	  /* If the result is to be wider than OP0, it is best to convert it
5678 	     first.  If it is to be narrower, it is *incorrect* to convert it
5679 	     first.  */
5680 	  int_target_mode = as_a <scalar_int_mode> (target_mode);
5681 	  if (GET_MODE_SIZE (int_target_mode) > GET_MODE_SIZE (int_mode))
5682 	    {
5683 	      op0 = convert_modes (int_target_mode, int_mode, op0, 0);
5684 	      int_mode = int_target_mode;
5685 	    }
5686 	}
5687 
5688       if (int_target_mode != int_mode)
5689 	subtarget = 0;
5690 
5691       if (code == GE)
5692 	op0 = expand_unop (int_mode, one_cmpl_optab, op0,
5693 			   ((STORE_FLAG_VALUE == 1 || normalizep)
5694 			    ? 0 : subtarget), 0);
5695 
5696       if (STORE_FLAG_VALUE == 1 || normalizep)
5697 	/* If we are supposed to produce a 0/1 value, we want to do
5698 	   a logical shift from the sign bit to the low-order bit; for
5699 	   a -1/0 value, we do an arithmetic shift.  */
5700 	op0 = expand_shift (RSHIFT_EXPR, int_mode, op0,
5701 			    GET_MODE_BITSIZE (int_mode) - 1,
5702 			    subtarget, normalizep != -1);
5703 
5704       if (int_mode != int_target_mode)
5705 	op0 = convert_modes (int_target_mode, int_mode, op0, 0);
5706 
5707       return op0;
5708     }
5709 
5710   mclass = GET_MODE_CLASS (mode);
5711   FOR_EACH_MODE_FROM (compare_mode, mode)
5712     {
5713      machine_mode optab_mode = mclass == MODE_CC ? CCmode : compare_mode;
5714      icode = optab_handler (cstore_optab, optab_mode);
5715      if (icode != CODE_FOR_nothing)
5716 	{
5717 	  do_pending_stack_adjust ();
5718 	  rtx tem = emit_cstore (target, icode, code, mode, compare_mode,
5719 				 unsignedp, op0, op1, normalizep, target_mode);
5720 	  if (tem)
5721 	    return tem;
5722 
5723 	  if (GET_MODE_CLASS (mode) == MODE_FLOAT)
5724 	    {
5725 	      tem = emit_cstore (target, icode, scode, mode, compare_mode,
5726 				 unsignedp, op1, op0, normalizep, target_mode);
5727 	      if (tem)
5728 	        return tem;
5729 	    }
5730 	  break;
5731 	}
5732     }
5733 
5734   return 0;
5735 }
5736 
5737 /* Subroutine of emit_store_flag that handles cases in which the operands
5738    are scalar integers.  SUBTARGET is the target to use for temporary
5739    operations and TRUEVAL is the value to store when the condition is
5740    true.  All other arguments are as for emit_store_flag.  */
5741 
5742 rtx
5743 emit_store_flag_int (rtx target, rtx subtarget, enum rtx_code code, rtx op0,
5744 		     rtx op1, scalar_int_mode mode, int unsignedp,
5745 		     int normalizep, rtx trueval)
5746 {
5747   machine_mode target_mode = target ? GET_MODE (target) : VOIDmode;
5748   rtx_insn *last = get_last_insn ();
5749 
5750   /* If this is an equality comparison of integers, we can try to exclusive-or
5751      (or subtract) the two operands and use a recursive call to try the
5752      comparison with zero.  Don't do any of these cases if branches are
5753      very cheap.  */
5754 
5755   if ((code == EQ || code == NE) && op1 != const0_rtx)
5756     {
5757       rtx tem = expand_binop (mode, xor_optab, op0, op1, subtarget, 1,
5758 			      OPTAB_WIDEN);
5759 
5760       if (tem == 0)
5761 	tem = expand_binop (mode, sub_optab, op0, op1, subtarget, 1,
5762 			    OPTAB_WIDEN);
5763       if (tem != 0)
5764 	tem = emit_store_flag (target, code, tem, const0_rtx,
5765 			       mode, unsignedp, normalizep);
5766       if (tem != 0)
5767 	return tem;
5768 
5769       delete_insns_since (last);
5770     }
5771 
5772   /* For integer comparisons, try the reverse comparison.  However, for
5773      small X and if we'd have anyway to extend, implementing "X != 0"
5774      as "-(int)X >> 31" is still cheaper than inverting "(int)X == 0".  */
5775   rtx_code rcode = reverse_condition (code);
5776   if (can_compare_p (rcode, mode, ccp_store_flag)
5777       && ! (optab_handler (cstore_optab, mode) == CODE_FOR_nothing
5778 	    && code == NE
5779 	    && GET_MODE_SIZE (mode) < UNITS_PER_WORD
5780 	    && op1 == const0_rtx))
5781     {
5782       int want_add = ((STORE_FLAG_VALUE == 1 && normalizep == -1)
5783 		      || (STORE_FLAG_VALUE == -1 && normalizep == 1));
5784 
5785       /* Again, for the reverse comparison, use either an addition or a XOR.  */
5786       if (want_add
5787 	  && rtx_cost (GEN_INT (normalizep), mode, PLUS, 1,
5788 		       optimize_insn_for_speed_p ()) == 0)
5789 	{
5790 	  rtx tem = emit_store_flag_1 (subtarget, rcode, op0, op1, mode, 0,
5791 				       STORE_FLAG_VALUE, target_mode);
5792 	  if (tem != 0)
5793 	    tem = expand_binop (target_mode, add_optab, tem,
5794 				gen_int_mode (normalizep, target_mode),
5795 				target, 0, OPTAB_WIDEN);
5796 	  if (tem != 0)
5797 	    return tem;
5798 	}
5799       else if (!want_add
5800 	       && rtx_cost (trueval, mode, XOR, 1,
5801 			    optimize_insn_for_speed_p ()) == 0)
5802 	{
5803 	  rtx tem = emit_store_flag_1 (subtarget, rcode, op0, op1, mode, 0,
5804 				       normalizep, target_mode);
5805 	  if (tem != 0)
5806 	    tem = expand_binop (target_mode, xor_optab, tem, trueval, target,
5807 				INTVAL (trueval) >= 0, OPTAB_WIDEN);
5808 	  if (tem != 0)
5809 	    return tem;
5810 	}
5811 
5812       delete_insns_since (last);
5813     }
5814 
5815   /* Some other cases we can do are EQ, NE, LE, and GT comparisons with
5816      the constant zero.  Reject all other comparisons at this point.  Only
5817      do LE and GT if branches are expensive since they are expensive on
5818      2-operand machines.  */
5819 
5820   if (op1 != const0_rtx
5821       || (code != EQ && code != NE
5822 	  && (BRANCH_COST (optimize_insn_for_speed_p (),
5823 			   false) <= 1 || (code != LE && code != GT))))
5824     return 0;
5825 
5826   /* Try to put the result of the comparison in the sign bit.  Assume we can't
5827      do the necessary operation below.  */
5828 
5829   rtx tem = 0;
5830 
5831   /* To see if A <= 0, compute (A | (A - 1)).  A <= 0 iff that result has
5832      the sign bit set.  */
5833 
5834   if (code == LE)
5835     {
5836       /* This is destructive, so SUBTARGET can't be OP0.  */
5837       if (rtx_equal_p (subtarget, op0))
5838 	subtarget = 0;
5839 
5840       tem = expand_binop (mode, sub_optab, op0, const1_rtx, subtarget, 0,
5841 			  OPTAB_WIDEN);
5842       if (tem)
5843 	tem = expand_binop (mode, ior_optab, op0, tem, subtarget, 0,
5844 			    OPTAB_WIDEN);
5845     }
5846 
5847   /* To see if A > 0, compute (((signed) A) << BITS) - A, where BITS is the
5848      number of bits in the mode of OP0, minus one.  */
5849 
5850   if (code == GT)
5851     {
5852       if (rtx_equal_p (subtarget, op0))
5853 	subtarget = 0;
5854 
5855       tem = maybe_expand_shift (RSHIFT_EXPR, mode, op0,
5856 				GET_MODE_BITSIZE (mode) - 1,
5857 				subtarget, 0);
5858       if (tem)
5859 	tem = expand_binop (mode, sub_optab, tem, op0, subtarget, 0,
5860 			    OPTAB_WIDEN);
5861     }
5862 
5863   if (code == EQ || code == NE)
5864     {
5865       /* For EQ or NE, one way to do the comparison is to apply an operation
5866 	 that converts the operand into a positive number if it is nonzero
5867 	 or zero if it was originally zero.  Then, for EQ, we subtract 1 and
5868 	 for NE we negate.  This puts the result in the sign bit.  Then we
5869 	 normalize with a shift, if needed.
5870 
5871 	 Two operations that can do the above actions are ABS and FFS, so try
5872 	 them.  If that doesn't work, and MODE is smaller than a full word,
5873 	 we can use zero-extension to the wider mode (an unsigned conversion)
5874 	 as the operation.  */
5875 
5876       /* Note that ABS doesn't yield a positive number for INT_MIN, but
5877 	 that is compensated by the subsequent overflow when subtracting
5878 	 one / negating.  */
5879 
5880       if (optab_handler (abs_optab, mode) != CODE_FOR_nothing)
5881 	tem = expand_unop (mode, abs_optab, op0, subtarget, 1);
5882       else if (optab_handler (ffs_optab, mode) != CODE_FOR_nothing)
5883 	tem = expand_unop (mode, ffs_optab, op0, subtarget, 1);
5884       else if (GET_MODE_SIZE (mode) < UNITS_PER_WORD)
5885 	{
5886 	  tem = convert_modes (word_mode, mode, op0, 1);
5887 	  mode = word_mode;
5888 	}
5889 
5890       if (tem != 0)
5891 	{
5892 	  if (code == EQ)
5893 	    tem = expand_binop (mode, sub_optab, tem, const1_rtx, subtarget,
5894 				0, OPTAB_WIDEN);
5895 	  else
5896 	    tem = expand_unop (mode, neg_optab, tem, subtarget, 0);
5897 	}
5898 
5899       /* If we couldn't do it that way, for NE we can "or" the two's complement
5900 	 of the value with itself.  For EQ, we take the one's complement of
5901 	 that "or", which is an extra insn, so we only handle EQ if branches
5902 	 are expensive.  */
5903 
5904       if (tem == 0
5905 	  && (code == NE
5906 	      || BRANCH_COST (optimize_insn_for_speed_p (),
5907 			      false) > 1))
5908 	{
5909 	  if (rtx_equal_p (subtarget, op0))
5910 	    subtarget = 0;
5911 
5912 	  tem = expand_unop (mode, neg_optab, op0, subtarget, 0);
5913 	  tem = expand_binop (mode, ior_optab, tem, op0, subtarget, 0,
5914 			      OPTAB_WIDEN);
5915 
5916 	  if (tem && code == EQ)
5917 	    tem = expand_unop (mode, one_cmpl_optab, tem, subtarget, 0);
5918 	}
5919     }
5920 
5921   if (tem && normalizep)
5922     tem = maybe_expand_shift (RSHIFT_EXPR, mode, tem,
5923 			      GET_MODE_BITSIZE (mode) - 1,
5924 			      subtarget, normalizep == 1);
5925 
5926   if (tem)
5927     {
5928       if (!target)
5929 	;
5930       else if (GET_MODE (tem) != target_mode)
5931 	{
5932 	  convert_move (target, tem, 0);
5933 	  tem = target;
5934 	}
5935       else if (!subtarget)
5936 	{
5937 	  emit_move_insn (target, tem);
5938 	  tem = target;
5939 	}
5940     }
5941   else
5942     delete_insns_since (last);
5943 
5944   return tem;
5945 }
5946 
5947 /* Emit a store-flags instruction for comparison CODE on OP0 and OP1
5948    and storing in TARGET.  Normally return TARGET.
5949    Return 0 if that cannot be done.
5950 
5951    MODE is the mode to use for OP0 and OP1 should they be CONST_INTs.  If
5952    it is VOIDmode, they cannot both be CONST_INT.
5953 
5954    UNSIGNEDP is for the case where we have to widen the operands
5955    to perform the operation.  It says to use zero-extension.
5956 
5957    NORMALIZEP is 1 if we should convert the result to be either zero
5958    or one.  Normalize is -1 if we should convert the result to be
5959    either zero or -1.  If NORMALIZEP is zero, the result will be left
5960    "raw" out of the scc insn.  */
5961 
5962 rtx
5963 emit_store_flag (rtx target, enum rtx_code code, rtx op0, rtx op1,
5964 		 machine_mode mode, int unsignedp, int normalizep)
5965 {
5966   machine_mode target_mode = target ? GET_MODE (target) : VOIDmode;
5967   enum rtx_code rcode;
5968   rtx subtarget;
5969   rtx tem, trueval;
5970   rtx_insn *last;
5971 
5972   /* If we compare constants, we shouldn't use a store-flag operation,
5973      but a constant load.  We can get there via the vanilla route that
5974      usually generates a compare-branch sequence, but will in this case
5975      fold the comparison to a constant, and thus elide the branch.  */
5976   if (CONSTANT_P (op0) && CONSTANT_P (op1))
5977     return NULL_RTX;
5978 
5979   tem = emit_store_flag_1 (target, code, op0, op1, mode, unsignedp, normalizep,
5980 			   target_mode);
5981   if (tem)
5982     return tem;
5983 
5984   /* If we reached here, we can't do this with a scc insn, however there
5985      are some comparisons that can be done in other ways.  Don't do any
5986      of these cases if branches are very cheap.  */
5987   if (BRANCH_COST (optimize_insn_for_speed_p (), false) == 0)
5988     return 0;
5989 
5990   /* See what we need to return.  We can only return a 1, -1, or the
5991      sign bit.  */
5992 
5993   if (normalizep == 0)
5994     {
5995       if (STORE_FLAG_VALUE == 1 || STORE_FLAG_VALUE == -1)
5996 	normalizep = STORE_FLAG_VALUE;
5997 
5998       else if (val_signbit_p (mode, STORE_FLAG_VALUE))
5999 	;
6000       else
6001 	return 0;
6002     }
6003 
6004   last = get_last_insn ();
6005 
6006   /* If optimizing, use different pseudo registers for each insn, instead
6007      of reusing the same pseudo.  This leads to better CSE, but slows
6008      down the compiler, since there are more pseudos.  */
6009   subtarget = (!optimize
6010 	       && (target_mode == mode)) ? target : NULL_RTX;
6011   trueval = GEN_INT (normalizep ? normalizep : STORE_FLAG_VALUE);
6012 
6013   /* For floating-point comparisons, try the reverse comparison or try
6014      changing the "orderedness" of the comparison.  */
6015   if (GET_MODE_CLASS (mode) == MODE_FLOAT)
6016     {
6017       enum rtx_code first_code;
6018       bool and_them;
6019 
6020       rcode = reverse_condition_maybe_unordered (code);
6021       if (can_compare_p (rcode, mode, ccp_store_flag)
6022 	  && (code == ORDERED || code == UNORDERED
6023 	      || (! HONOR_NANS (mode) && (code == LTGT || code == UNEQ))
6024 	      || (! HONOR_SNANS (mode) && (code == EQ || code == NE))))
6025 	{
6026 	  int want_add = ((STORE_FLAG_VALUE == 1 && normalizep == -1)
6027 			  || (STORE_FLAG_VALUE == -1 && normalizep == 1));
6028 
6029 	  /* For the reverse comparison, use either an addition or a XOR.  */
6030 	  if (want_add
6031 	      && rtx_cost (GEN_INT (normalizep), mode, PLUS, 1,
6032 			   optimize_insn_for_speed_p ()) == 0)
6033 	    {
6034 	      tem = emit_store_flag_1 (subtarget, rcode, op0, op1, mode, 0,
6035 				       STORE_FLAG_VALUE, target_mode);
6036 	      if (tem)
6037 		return expand_binop (target_mode, add_optab, tem,
6038 				     gen_int_mode (normalizep, target_mode),
6039 				     target, 0, OPTAB_WIDEN);
6040 	    }
6041 	  else if (!want_add
6042 		   && rtx_cost (trueval, mode, XOR, 1,
6043 				optimize_insn_for_speed_p ()) == 0)
6044 	    {
6045 	      tem = emit_store_flag_1 (subtarget, rcode, op0, op1, mode, 0,
6046 				       normalizep, target_mode);
6047 	      if (tem)
6048 		return expand_binop (target_mode, xor_optab, tem, trueval,
6049 				     target, INTVAL (trueval) >= 0,
6050 				     OPTAB_WIDEN);
6051 	    }
6052 	}
6053 
6054       delete_insns_since (last);
6055 
6056       /* Cannot split ORDERED and UNORDERED, only try the above trick.  */
6057       if (code == ORDERED || code == UNORDERED)
6058 	return 0;
6059 
6060       and_them = split_comparison (code, mode, &first_code, &code);
6061 
6062       /* If there are no NaNs, the first comparison should always fall through.
6063 	 Effectively change the comparison to the other one.  */
6064       if (!HONOR_NANS (mode))
6065 	{
6066 	  gcc_assert (first_code == (and_them ? ORDERED : UNORDERED));
6067 	  return emit_store_flag_1 (target, code, op0, op1, mode, 0, normalizep,
6068 				    target_mode);
6069 	}
6070 
6071       if (!HAVE_conditional_move)
6072 	return 0;
6073 
6074       /* Do not turn a trapping comparison into a non-trapping one.  */
6075       if ((code != EQ && code != NE && code != UNEQ && code != LTGT)
6076 	  && flag_trapping_math)
6077 	return 0;
6078 
6079       /* Try using a setcc instruction for ORDERED/UNORDERED, followed by a
6080 	 conditional move.  */
6081       tem = emit_store_flag_1 (subtarget, first_code, op0, op1, mode, 0,
6082 			       normalizep, target_mode);
6083       if (tem == 0)
6084 	return 0;
6085 
6086       if (and_them)
6087 	tem = emit_conditional_move (target, code, op0, op1, mode,
6088 				     tem, const0_rtx, GET_MODE (tem), 0);
6089       else
6090 	tem = emit_conditional_move (target, code, op0, op1, mode,
6091 				     trueval, tem, GET_MODE (tem), 0);
6092 
6093       if (tem == 0)
6094 	delete_insns_since (last);
6095       return tem;
6096     }
6097 
6098   /* The remaining tricks only apply to integer comparisons.  */
6099 
6100   scalar_int_mode int_mode;
6101   if (is_int_mode (mode, &int_mode))
6102     return emit_store_flag_int (target, subtarget, code, op0, op1, int_mode,
6103 				unsignedp, normalizep, trueval);
6104 
6105   return 0;
6106 }
6107 
6108 /* Like emit_store_flag, but always succeeds.  */
6109 
6110 rtx
6111 emit_store_flag_force (rtx target, enum rtx_code code, rtx op0, rtx op1,
6112 		       machine_mode mode, int unsignedp, int normalizep)
6113 {
6114   rtx tem;
6115   rtx_code_label *label;
6116   rtx trueval, falseval;
6117 
6118   /* First see if emit_store_flag can do the job.  */
6119   tem = emit_store_flag (target, code, op0, op1, mode, unsignedp, normalizep);
6120   if (tem != 0)
6121     return tem;
6122 
6123   /* If one operand is constant, make it the second one.  Only do this
6124      if the other operand is not constant as well.  */
6125   if (swap_commutative_operands_p (op0, op1))
6126     {
6127       std::swap (op0, op1);
6128       code = swap_condition (code);
6129     }
6130 
6131   if (mode == VOIDmode)
6132     mode = GET_MODE (op0);
6133 
6134   if (!target)
6135     target = gen_reg_rtx (word_mode);
6136 
6137   /* If this failed, we have to do this with set/compare/jump/set code.
6138      For foo != 0, if foo is in OP0, just replace it with 1 if nonzero.  */
6139   trueval = normalizep ? GEN_INT (normalizep) : const1_rtx;
6140   if (code == NE
6141       && GET_MODE_CLASS (mode) == MODE_INT
6142       && REG_P (target)
6143       && op0 == target
6144       && op1 == const0_rtx)
6145     {
6146       label = gen_label_rtx ();
6147       do_compare_rtx_and_jump (target, const0_rtx, EQ, unsignedp, mode,
6148 			       NULL_RTX, NULL, label,
6149 			       profile_probability::uninitialized ());
6150       emit_move_insn (target, trueval);
6151       emit_label (label);
6152       return target;
6153     }
6154 
6155   if (!REG_P (target)
6156       || reg_mentioned_p (target, op0) || reg_mentioned_p (target, op1))
6157     target = gen_reg_rtx (GET_MODE (target));
6158 
6159   /* Jump in the right direction if the target cannot implement CODE
6160      but can jump on its reverse condition.  */
6161   falseval = const0_rtx;
6162   if (! can_compare_p (code, mode, ccp_jump)
6163       && (! FLOAT_MODE_P (mode)
6164           || code == ORDERED || code == UNORDERED
6165           || (! HONOR_NANS (mode) && (code == LTGT || code == UNEQ))
6166           || (! HONOR_SNANS (mode) && (code == EQ || code == NE))))
6167     {
6168       enum rtx_code rcode;
6169       if (FLOAT_MODE_P (mode))
6170         rcode = reverse_condition_maybe_unordered (code);
6171       else
6172         rcode = reverse_condition (code);
6173 
6174       /* Canonicalize to UNORDERED for the libcall.  */
6175       if (can_compare_p (rcode, mode, ccp_jump)
6176           || (code == ORDERED && ! can_compare_p (ORDERED, mode, ccp_jump)))
6177 	{
6178 	  falseval = trueval;
6179 	  trueval = const0_rtx;
6180 	  code = rcode;
6181 	}
6182     }
6183 
6184   emit_move_insn (target, trueval);
6185   label = gen_label_rtx ();
6186   do_compare_rtx_and_jump (op0, op1, code, unsignedp, mode, NULL_RTX, NULL,
6187 			   label, profile_probability::uninitialized ());
6188 
6189   emit_move_insn (target, falseval);
6190   emit_label (label);
6191 
6192   return target;
6193 }
6194 
6195 /* Perform possibly multi-word comparison and conditional jump to LABEL
6196    if ARG1 OP ARG2 true where ARG1 and ARG2 are of mode MODE.  This is
6197    now a thin wrapper around do_compare_rtx_and_jump.  */
6198 
6199 static void
6200 do_cmp_and_jump (rtx arg1, rtx arg2, enum rtx_code op, machine_mode mode,
6201 		 rtx_code_label *label)
6202 {
6203   int unsignedp = (op == LTU || op == LEU || op == GTU || op == GEU);
6204   do_compare_rtx_and_jump (arg1, arg2, op, unsignedp, mode, NULL_RTX,
6205 			   NULL, label, profile_probability::uninitialized ());
6206 }
6207