xref: /netbsd-src/external/gpl3/gcc.old/dist/gcc/expmed.c (revision b7b7574d3bf8eeb51a1fa3977b59142ec6434a55)
1 /* Medium-level subroutines: convert bit-field store and extract
2    and shifts, multiplies and divides to rtl instructions.
3    Copyright (C) 1987, 1988, 1989, 1992, 1993, 1994, 1995, 1996, 1997, 1998,
4    1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010
5    Free Software Foundation, Inc.
6 
7 This file is part of GCC.
8 
9 GCC is free software; you can redistribute it and/or modify it under
10 the terms of the GNU General Public License as published by the Free
11 Software Foundation; either version 3, or (at your option) any later
12 version.
13 
14 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
15 WARRANTY; without even the implied warranty of MERCHANTABILITY or
16 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
17 for more details.
18 
19 You should have received a copy of the GNU General Public License
20 along with GCC; see the file COPYING3.  If not see
21 <http://www.gnu.org/licenses/>.  */
22 
23 
24 #include "config.h"
25 #include "system.h"
26 #include "coretypes.h"
27 #include "tm.h"
28 #include "toplev.h"
29 #include "rtl.h"
30 #include "tree.h"
31 #include "tm_p.h"
32 #include "flags.h"
33 #include "insn-config.h"
34 #include "expr.h"
35 #include "optabs.h"
36 #include "real.h"
37 #include "recog.h"
38 #include "langhooks.h"
39 #include "df.h"
40 #include "target.h"
41 
42 static void store_fixed_bit_field (rtx, unsigned HOST_WIDE_INT,
43 				   unsigned HOST_WIDE_INT,
44 				   unsigned HOST_WIDE_INT, rtx);
45 static void store_split_bit_field (rtx, unsigned HOST_WIDE_INT,
46 				   unsigned HOST_WIDE_INT, rtx);
47 static rtx extract_fixed_bit_field (enum machine_mode, rtx,
48 				    unsigned HOST_WIDE_INT,
49 				    unsigned HOST_WIDE_INT,
50 				    unsigned HOST_WIDE_INT, rtx, int);
51 static rtx mask_rtx (enum machine_mode, int, int, int);
52 static rtx lshift_value (enum machine_mode, rtx, int, int);
53 static rtx extract_split_bit_field (rtx, unsigned HOST_WIDE_INT,
54 				    unsigned HOST_WIDE_INT, int);
55 static void do_cmp_and_jump (rtx, rtx, enum rtx_code, enum machine_mode, rtx);
56 static rtx expand_smod_pow2 (enum machine_mode, rtx, HOST_WIDE_INT);
57 static rtx expand_sdiv_pow2 (enum machine_mode, rtx, HOST_WIDE_INT);
58 
59 /* Test whether a value is zero of a power of two.  */
60 #define EXACT_POWER_OF_2_OR_ZERO_P(x) (((x) & ((x) - 1)) == 0)
61 
62 /* Nonzero means divides or modulus operations are relatively cheap for
63    powers of two, so don't use branches; emit the operation instead.
64    Usually, this will mean that the MD file will emit non-branch
65    sequences.  */
66 
67 static bool sdiv_pow2_cheap[2][NUM_MACHINE_MODES];
68 static bool smod_pow2_cheap[2][NUM_MACHINE_MODES];
69 
70 #ifndef SLOW_UNALIGNED_ACCESS
71 #define SLOW_UNALIGNED_ACCESS(MODE, ALIGN) STRICT_ALIGNMENT
72 #endif
73 
74 /* For compilers that support multiple targets with different word sizes,
75    MAX_BITS_PER_WORD contains the biggest value of BITS_PER_WORD.  An example
76    is the H8/300(H) compiler.  */
77 
78 #ifndef MAX_BITS_PER_WORD
79 #define MAX_BITS_PER_WORD BITS_PER_WORD
80 #endif
81 
82 /* Reduce conditional compilation elsewhere.  */
83 #ifndef HAVE_insv
84 #define HAVE_insv	0
85 #define CODE_FOR_insv	CODE_FOR_nothing
86 #define gen_insv(a,b,c,d) NULL_RTX
87 #endif
88 #ifndef HAVE_extv
89 #define HAVE_extv	0
90 #define CODE_FOR_extv	CODE_FOR_nothing
91 #define gen_extv(a,b,c,d) NULL_RTX
92 #endif
93 #ifndef HAVE_extzv
94 #define HAVE_extzv	0
95 #define CODE_FOR_extzv	CODE_FOR_nothing
96 #define gen_extzv(a,b,c,d) NULL_RTX
97 #endif
98 
99 /* Cost of various pieces of RTL.  Note that some of these are indexed by
100    shift count and some by mode.  */
101 static int zero_cost[2];
102 static int add_cost[2][NUM_MACHINE_MODES];
103 static int neg_cost[2][NUM_MACHINE_MODES];
104 static int shift_cost[2][NUM_MACHINE_MODES][MAX_BITS_PER_WORD];
105 static int shiftadd_cost[2][NUM_MACHINE_MODES][MAX_BITS_PER_WORD];
106 static int shiftsub0_cost[2][NUM_MACHINE_MODES][MAX_BITS_PER_WORD];
107 static int shiftsub1_cost[2][NUM_MACHINE_MODES][MAX_BITS_PER_WORD];
108 static int mul_cost[2][NUM_MACHINE_MODES];
109 static int sdiv_cost[2][NUM_MACHINE_MODES];
110 static int udiv_cost[2][NUM_MACHINE_MODES];
111 static int mul_widen_cost[2][NUM_MACHINE_MODES];
112 static int mul_highpart_cost[2][NUM_MACHINE_MODES];
113 
114 void
115 init_expmed (void)
116 {
117   struct
118   {
119     struct rtx_def reg;		rtunion reg_fld[2];
120     struct rtx_def plus;	rtunion plus_fld1;
121     struct rtx_def neg;
122     struct rtx_def mult;	rtunion mult_fld1;
123     struct rtx_def sdiv;	rtunion sdiv_fld1;
124     struct rtx_def udiv;	rtunion udiv_fld1;
125     struct rtx_def zext;
126     struct rtx_def sdiv_32;	rtunion sdiv_32_fld1;
127     struct rtx_def smod_32;	rtunion smod_32_fld1;
128     struct rtx_def wide_mult;	rtunion wide_mult_fld1;
129     struct rtx_def wide_lshr;	rtunion wide_lshr_fld1;
130     struct rtx_def wide_trunc;
131     struct rtx_def shift;	rtunion shift_fld1;
132     struct rtx_def shift_mult;	rtunion shift_mult_fld1;
133     struct rtx_def shift_add;	rtunion shift_add_fld1;
134     struct rtx_def shift_sub0;	rtunion shift_sub0_fld1;
135     struct rtx_def shift_sub1;	rtunion shift_sub1_fld1;
136   } all;
137 
138   rtx pow2[MAX_BITS_PER_WORD];
139   rtx cint[MAX_BITS_PER_WORD];
140   int m, n;
141   enum machine_mode mode, wider_mode;
142   int speed;
143 
144 
145   for (m = 1; m < MAX_BITS_PER_WORD; m++)
146     {
147       pow2[m] = GEN_INT ((HOST_WIDE_INT) 1 << m);
148       cint[m] = GEN_INT (m);
149     }
150   memset (&all, 0, sizeof all);
151 
152   PUT_CODE (&all.reg, REG);
153   /* Avoid using hard regs in ways which may be unsupported.  */
154   SET_REGNO (&all.reg, LAST_VIRTUAL_REGISTER + 1);
155 
156   PUT_CODE (&all.plus, PLUS);
157   XEXP (&all.plus, 0) = &all.reg;
158   XEXP (&all.plus, 1) = &all.reg;
159 
160   PUT_CODE (&all.neg, NEG);
161   XEXP (&all.neg, 0) = &all.reg;
162 
163   PUT_CODE (&all.mult, MULT);
164   XEXP (&all.mult, 0) = &all.reg;
165   XEXP (&all.mult, 1) = &all.reg;
166 
167   PUT_CODE (&all.sdiv, DIV);
168   XEXP (&all.sdiv, 0) = &all.reg;
169   XEXP (&all.sdiv, 1) = &all.reg;
170 
171   PUT_CODE (&all.udiv, UDIV);
172   XEXP (&all.udiv, 0) = &all.reg;
173   XEXP (&all.udiv, 1) = &all.reg;
174 
175   PUT_CODE (&all.sdiv_32, DIV);
176   XEXP (&all.sdiv_32, 0) = &all.reg;
177   XEXP (&all.sdiv_32, 1) = 32 < MAX_BITS_PER_WORD ? cint[32] : GEN_INT (32);
178 
179   PUT_CODE (&all.smod_32, MOD);
180   XEXP (&all.smod_32, 0) = &all.reg;
181   XEXP (&all.smod_32, 1) = XEXP (&all.sdiv_32, 1);
182 
183   PUT_CODE (&all.zext, ZERO_EXTEND);
184   XEXP (&all.zext, 0) = &all.reg;
185 
186   PUT_CODE (&all.wide_mult, MULT);
187   XEXP (&all.wide_mult, 0) = &all.zext;
188   XEXP (&all.wide_mult, 1) = &all.zext;
189 
190   PUT_CODE (&all.wide_lshr, LSHIFTRT);
191   XEXP (&all.wide_lshr, 0) = &all.wide_mult;
192 
193   PUT_CODE (&all.wide_trunc, TRUNCATE);
194   XEXP (&all.wide_trunc, 0) = &all.wide_lshr;
195 
196   PUT_CODE (&all.shift, ASHIFT);
197   XEXP (&all.shift, 0) = &all.reg;
198 
199   PUT_CODE (&all.shift_mult, MULT);
200   XEXP (&all.shift_mult, 0) = &all.reg;
201 
202   PUT_CODE (&all.shift_add, PLUS);
203   XEXP (&all.shift_add, 0) = &all.shift_mult;
204   XEXP (&all.shift_add, 1) = &all.reg;
205 
206   PUT_CODE (&all.shift_sub0, MINUS);
207   XEXP (&all.shift_sub0, 0) = &all.shift_mult;
208   XEXP (&all.shift_sub0, 1) = &all.reg;
209 
210   PUT_CODE (&all.shift_sub1, MINUS);
211   XEXP (&all.shift_sub1, 0) = &all.reg;
212   XEXP (&all.shift_sub1, 1) = &all.shift_mult;
213 
214   for (speed = 0; speed < 2; speed++)
215     {
216       crtl->maybe_hot_insn_p = speed;
217       zero_cost[speed] = rtx_cost (const0_rtx, SET, speed);
218 
219       for (mode = GET_CLASS_NARROWEST_MODE (MODE_INT);
220 	   mode != VOIDmode;
221 	   mode = GET_MODE_WIDER_MODE (mode))
222 	{
223 	  PUT_MODE (&all.reg, mode);
224 	  PUT_MODE (&all.plus, mode);
225 	  PUT_MODE (&all.neg, mode);
226 	  PUT_MODE (&all.mult, mode);
227 	  PUT_MODE (&all.sdiv, mode);
228 	  PUT_MODE (&all.udiv, mode);
229 	  PUT_MODE (&all.sdiv_32, mode);
230 	  PUT_MODE (&all.smod_32, mode);
231 	  PUT_MODE (&all.wide_trunc, mode);
232 	  PUT_MODE (&all.shift, mode);
233 	  PUT_MODE (&all.shift_mult, mode);
234 	  PUT_MODE (&all.shift_add, mode);
235 	  PUT_MODE (&all.shift_sub0, mode);
236 	  PUT_MODE (&all.shift_sub1, mode);
237 
238 	  add_cost[speed][mode] = rtx_cost (&all.plus, SET, speed);
239 	  neg_cost[speed][mode] = rtx_cost (&all.neg, SET, speed);
240 	  mul_cost[speed][mode] = rtx_cost (&all.mult, SET, speed);
241 	  sdiv_cost[speed][mode] = rtx_cost (&all.sdiv, SET, speed);
242 	  udiv_cost[speed][mode] = rtx_cost (&all.udiv, SET, speed);
243 
244 	  sdiv_pow2_cheap[speed][mode] = (rtx_cost (&all.sdiv_32, SET, speed)
245 				          <= 2 * add_cost[speed][mode]);
246 	  smod_pow2_cheap[speed][mode] = (rtx_cost (&all.smod_32, SET, speed)
247 				          <= 4 * add_cost[speed][mode]);
248 
249 	  wider_mode = GET_MODE_WIDER_MODE (mode);
250 	  if (wider_mode != VOIDmode)
251 	    {
252 	      PUT_MODE (&all.zext, wider_mode);
253 	      PUT_MODE (&all.wide_mult, wider_mode);
254 	      PUT_MODE (&all.wide_lshr, wider_mode);
255 	      XEXP (&all.wide_lshr, 1) = GEN_INT (GET_MODE_BITSIZE (mode));
256 
257 	      mul_widen_cost[speed][wider_mode]
258 	        = rtx_cost (&all.wide_mult, SET, speed);
259 	      mul_highpart_cost[speed][mode]
260 	        = rtx_cost (&all.wide_trunc, SET, speed);
261 	    }
262 
263 	  shift_cost[speed][mode][0] = 0;
264 	  shiftadd_cost[speed][mode][0] = shiftsub0_cost[speed][mode][0]
265 	    = shiftsub1_cost[speed][mode][0] = add_cost[speed][mode];
266 
267 	  n = MIN (MAX_BITS_PER_WORD, GET_MODE_BITSIZE (mode));
268 	  for (m = 1; m < n; m++)
269 	    {
270 	      XEXP (&all.shift, 1) = cint[m];
271 	      XEXP (&all.shift_mult, 1) = pow2[m];
272 
273 	      shift_cost[speed][mode][m] = rtx_cost (&all.shift, SET, speed);
274 	      shiftadd_cost[speed][mode][m] = rtx_cost (&all.shift_add, SET, speed);
275 	      shiftsub0_cost[speed][mode][m] = rtx_cost (&all.shift_sub0, SET, speed);
276 	      shiftsub1_cost[speed][mode][m] = rtx_cost (&all.shift_sub1, SET, speed);
277 	    }
278 	}
279     }
280   default_rtl_profile ();
281 }
282 
283 /* Return an rtx representing minus the value of X.
284    MODE is the intended mode of the result,
285    useful if X is a CONST_INT.  */
286 
287 rtx
288 negate_rtx (enum machine_mode mode, rtx x)
289 {
290   rtx result = simplify_unary_operation (NEG, mode, x, mode);
291 
292   if (result == 0)
293     result = expand_unop (mode, neg_optab, x, NULL_RTX, 0);
294 
295   return result;
296 }
297 
298 /* Report on the availability of insv/extv/extzv and the desired mode
299    of each of their operands.  Returns MAX_MACHINE_MODE if HAVE_foo
300    is false; else the mode of the specified operand.  If OPNO is -1,
301    all the caller cares about is whether the insn is available.  */
302 enum machine_mode
303 mode_for_extraction (enum extraction_pattern pattern, int opno)
304 {
305   const struct insn_data *data;
306 
307   switch (pattern)
308     {
309     case EP_insv:
310       if (HAVE_insv)
311 	{
312 	  data = &insn_data[CODE_FOR_insv];
313 	  break;
314 	}
315       return MAX_MACHINE_MODE;
316 
317     case EP_extv:
318       if (HAVE_extv)
319 	{
320 	  data = &insn_data[CODE_FOR_extv];
321 	  break;
322 	}
323       return MAX_MACHINE_MODE;
324 
325     case EP_extzv:
326       if (HAVE_extzv)
327 	{
328 	  data = &insn_data[CODE_FOR_extzv];
329 	  break;
330 	}
331       return MAX_MACHINE_MODE;
332 
333     default:
334       gcc_unreachable ();
335     }
336 
337   if (opno == -1)
338     return VOIDmode;
339 
340   /* Everyone who uses this function used to follow it with
341      if (result == VOIDmode) result = word_mode; */
342   if (data->operand[opno].mode == VOIDmode)
343     return word_mode;
344   return data->operand[opno].mode;
345 }
346 
347 /* Return true if X, of mode MODE, matches the predicate for operand
348    OPNO of instruction ICODE.  Allow volatile memories, regardless of
349    the ambient volatile_ok setting.  */
350 
351 static bool
352 check_predicate_volatile_ok (enum insn_code icode, int opno,
353 			     rtx x, enum machine_mode mode)
354 {
355   bool save_volatile_ok, result;
356 
357   save_volatile_ok = volatile_ok;
358   result = insn_data[(int) icode].operand[opno].predicate (x, mode);
359   volatile_ok = save_volatile_ok;
360   return result;
361 }
362 
363 /* A subroutine of store_bit_field, with the same arguments.  Return true
364    if the operation could be implemented.
365 
366    If FALLBACK_P is true, fall back to store_fixed_bit_field if we have
367    no other way of implementing the operation.  If FALLBACK_P is false,
368    return false instead.  */
369 
370 static bool
371 store_bit_field_1 (rtx str_rtx, unsigned HOST_WIDE_INT bitsize,
372 		   unsigned HOST_WIDE_INT bitnum, enum machine_mode fieldmode,
373 		   rtx value, bool fallback_p)
374 {
375   unsigned int unit
376     = (MEM_P (str_rtx)) ? BITS_PER_UNIT : BITS_PER_WORD;
377   unsigned HOST_WIDE_INT offset, bitpos;
378   rtx op0 = str_rtx;
379   int byte_offset;
380   rtx orig_value;
381 
382   enum machine_mode op_mode = mode_for_extraction (EP_insv, 3);
383 
384   while (GET_CODE (op0) == SUBREG)
385     {
386       /* The following line once was done only if WORDS_BIG_ENDIAN,
387 	 but I think that is a mistake.  WORDS_BIG_ENDIAN is
388 	 meaningful at a much higher level; when structures are copied
389 	 between memory and regs, the higher-numbered regs
390 	 always get higher addresses.  */
391       int inner_mode_size = GET_MODE_SIZE (GET_MODE (SUBREG_REG (op0)));
392       int outer_mode_size = GET_MODE_SIZE (GET_MODE (op0));
393 
394       byte_offset = 0;
395 
396       /* Paradoxical subregs need special handling on big endian machines.  */
397       if (SUBREG_BYTE (op0) == 0 && inner_mode_size < outer_mode_size)
398 	{
399 	  int difference = inner_mode_size - outer_mode_size;
400 
401 	  if (WORDS_BIG_ENDIAN)
402 	    byte_offset += (difference / UNITS_PER_WORD) * UNITS_PER_WORD;
403 	  if (BYTES_BIG_ENDIAN)
404 	    byte_offset += difference % UNITS_PER_WORD;
405 	}
406       else
407 	byte_offset = SUBREG_BYTE (op0);
408 
409       bitnum += byte_offset * BITS_PER_UNIT;
410       op0 = SUBREG_REG (op0);
411     }
412 
413   /* No action is needed if the target is a register and if the field
414      lies completely outside that register.  This can occur if the source
415      code contains an out-of-bounds access to a small array.  */
416   if (REG_P (op0) && bitnum >= GET_MODE_BITSIZE (GET_MODE (op0)))
417     return true;
418 
419   /* Use vec_set patterns for inserting parts of vectors whenever
420      available.  */
421   if (VECTOR_MODE_P (GET_MODE (op0))
422       && !MEM_P (op0)
423       && (optab_handler (vec_set_optab, GET_MODE (op0))->insn_code
424 	  != CODE_FOR_nothing)
425       && fieldmode == GET_MODE_INNER (GET_MODE (op0))
426       && bitsize == GET_MODE_BITSIZE (GET_MODE_INNER (GET_MODE (op0)))
427       && !(bitnum % GET_MODE_BITSIZE (GET_MODE_INNER (GET_MODE (op0)))))
428     {
429       enum machine_mode outermode = GET_MODE (op0);
430       enum machine_mode innermode = GET_MODE_INNER (outermode);
431       int icode = (int) optab_handler (vec_set_optab, outermode)->insn_code;
432       int pos = bitnum / GET_MODE_BITSIZE (innermode);
433       rtx rtxpos = GEN_INT (pos);
434       rtx src = value;
435       rtx dest = op0;
436       rtx pat, seq;
437       enum machine_mode mode0 = insn_data[icode].operand[0].mode;
438       enum machine_mode mode1 = insn_data[icode].operand[1].mode;
439       enum machine_mode mode2 = insn_data[icode].operand[2].mode;
440 
441       start_sequence ();
442 
443       if (! (*insn_data[icode].operand[1].predicate) (src, mode1))
444 	src = copy_to_mode_reg (mode1, src);
445 
446       if (! (*insn_data[icode].operand[2].predicate) (rtxpos, mode2))
447 	rtxpos = copy_to_mode_reg (mode1, rtxpos);
448 
449       /* We could handle this, but we should always be called with a pseudo
450 	 for our targets and all insns should take them as outputs.  */
451       gcc_assert ((*insn_data[icode].operand[0].predicate) (dest, mode0)
452 		  && (*insn_data[icode].operand[1].predicate) (src, mode1)
453 		  && (*insn_data[icode].operand[2].predicate) (rtxpos, mode2));
454       pat = GEN_FCN (icode) (dest, src, rtxpos);
455       seq = get_insns ();
456       end_sequence ();
457       if (pat)
458 	{
459 	  emit_insn (seq);
460 	  emit_insn (pat);
461 	  return true;
462 	}
463     }
464 
465   /* If the target is a register, overwriting the entire object, or storing
466      a full-word or multi-word field can be done with just a SUBREG.
467 
468      If the target is memory, storing any naturally aligned field can be
469      done with a simple store.  For targets that support fast unaligned
470      memory, any naturally sized, unit aligned field can be done directly.  */
471 
472   offset = bitnum / unit;
473   bitpos = bitnum % unit;
474   byte_offset = (bitnum % BITS_PER_WORD) / BITS_PER_UNIT
475                 + (offset * UNITS_PER_WORD);
476 
477   if (bitpos == 0
478       && bitsize == GET_MODE_BITSIZE (fieldmode)
479       && (!MEM_P (op0)
480 	  ? ((GET_MODE_SIZE (fieldmode) >= UNITS_PER_WORD
481 	     || GET_MODE_SIZE (GET_MODE (op0)) == GET_MODE_SIZE (fieldmode))
482 	     && byte_offset % GET_MODE_SIZE (fieldmode) == 0)
483 	  : (! SLOW_UNALIGNED_ACCESS (fieldmode, MEM_ALIGN (op0))
484 	     || (offset * BITS_PER_UNIT % bitsize == 0
485 		 && MEM_ALIGN (op0) % GET_MODE_BITSIZE (fieldmode) == 0))))
486     {
487       if (MEM_P (op0))
488 	op0 = adjust_address (op0, fieldmode, offset);
489       else if (GET_MODE (op0) != fieldmode)
490 	op0 = simplify_gen_subreg (fieldmode, op0, GET_MODE (op0),
491 				   byte_offset);
492       emit_move_insn (op0, value);
493       return true;
494     }
495 
496   /* Make sure we are playing with integral modes.  Pun with subregs
497      if we aren't.  This must come after the entire register case above,
498      since that case is valid for any mode.  The following cases are only
499      valid for integral modes.  */
500   {
501     enum machine_mode imode = int_mode_for_mode (GET_MODE (op0));
502     if (imode != GET_MODE (op0))
503       {
504 	if (MEM_P (op0))
505 	  op0 = adjust_address (op0, imode, 0);
506 	else
507 	  {
508 	    gcc_assert (imode != BLKmode);
509 	    op0 = gen_lowpart (imode, op0);
510 	  }
511       }
512   }
513 
514   /* We may be accessing data outside the field, which means
515      we can alias adjacent data.  */
516   if (MEM_P (op0))
517     {
518       op0 = shallow_copy_rtx (op0);
519       set_mem_alias_set (op0, 0);
520       set_mem_expr (op0, 0);
521     }
522 
523   /* If OP0 is a register, BITPOS must count within a word.
524      But as we have it, it counts within whatever size OP0 now has.
525      On a bigendian machine, these are not the same, so convert.  */
526   if (BYTES_BIG_ENDIAN
527       && !MEM_P (op0)
528       && unit > GET_MODE_BITSIZE (GET_MODE (op0)))
529     bitpos += unit - GET_MODE_BITSIZE (GET_MODE (op0));
530 
531   /* Storing an lsb-aligned field in a register
532      can be done with a movestrict instruction.  */
533 
534   if (!MEM_P (op0)
535       && (BYTES_BIG_ENDIAN ? bitpos + bitsize == unit : bitpos == 0)
536       && bitsize == GET_MODE_BITSIZE (fieldmode)
537       && (optab_handler (movstrict_optab, fieldmode)->insn_code
538 	  != CODE_FOR_nothing))
539     {
540       int icode = optab_handler (movstrict_optab, fieldmode)->insn_code;
541       rtx insn;
542       rtx start = get_last_insn ();
543       rtx arg0 = op0;
544 
545       /* Get appropriate low part of the value being stored.  */
546       if (CONST_INT_P (value) || REG_P (value))
547 	value = gen_lowpart (fieldmode, value);
548       else if (!(GET_CODE (value) == SYMBOL_REF
549 		 || GET_CODE (value) == LABEL_REF
550 		 || GET_CODE (value) == CONST))
551 	value = convert_to_mode (fieldmode, value, 0);
552 
553       if (! (*insn_data[icode].operand[1].predicate) (value, fieldmode))
554 	value = copy_to_mode_reg (fieldmode, value);
555 
556       if (GET_CODE (op0) == SUBREG)
557 	{
558 	  /* Else we've got some float mode source being extracted into
559 	     a different float mode destination -- this combination of
560 	     subregs results in Severe Tire Damage.  */
561 	  gcc_assert (GET_MODE (SUBREG_REG (op0)) == fieldmode
562 		      || GET_MODE_CLASS (fieldmode) == MODE_INT
563 		      || GET_MODE_CLASS (fieldmode) == MODE_PARTIAL_INT);
564 	  arg0 = SUBREG_REG (op0);
565 	}
566 
567       insn = (GEN_FCN (icode)
568 		 (gen_rtx_SUBREG (fieldmode, arg0,
569 				  (bitnum % BITS_PER_WORD) / BITS_PER_UNIT
570 				  + (offset * UNITS_PER_WORD)),
571 				  value));
572       if (insn)
573 	{
574 	  emit_insn (insn);
575 	  return true;
576 	}
577       delete_insns_since (start);
578     }
579 
580   /* Handle fields bigger than a word.  */
581 
582   if (bitsize > BITS_PER_WORD)
583     {
584       /* Here we transfer the words of the field
585 	 in the order least significant first.
586 	 This is because the most significant word is the one which may
587 	 be less than full.
588 	 However, only do that if the value is not BLKmode.  */
589 
590       unsigned int backwards = WORDS_BIG_ENDIAN && fieldmode != BLKmode;
591       unsigned int nwords = (bitsize + (BITS_PER_WORD - 1)) / BITS_PER_WORD;
592       unsigned int i;
593       rtx last;
594 
595       /* This is the mode we must force value to, so that there will be enough
596 	 subwords to extract.  Note that fieldmode will often (always?) be
597 	 VOIDmode, because that is what store_field uses to indicate that this
598 	 is a bit field, but passing VOIDmode to operand_subword_force
599 	 is not allowed.  */
600       fieldmode = GET_MODE (value);
601       if (fieldmode == VOIDmode)
602 	fieldmode = smallest_mode_for_size (nwords * BITS_PER_WORD, MODE_INT);
603 
604       last = get_last_insn ();
605       for (i = 0; i < nwords; i++)
606 	{
607 	  /* If I is 0, use the low-order word in both field and target;
608 	     if I is 1, use the next to lowest word; and so on.  */
609 	  unsigned int wordnum = (backwards ? nwords - i - 1 : i);
610 	  unsigned int bit_offset = (backwards
611 				     ? MAX ((int) bitsize - ((int) i + 1)
612 					    * BITS_PER_WORD,
613 					    0)
614 				     : (int) i * BITS_PER_WORD);
615 	  rtx value_word = operand_subword_force (value, wordnum, fieldmode);
616 
617 	  if (!store_bit_field_1 (op0, MIN (BITS_PER_WORD,
618 					    bitsize - i * BITS_PER_WORD),
619 				  bitnum + bit_offset, word_mode,
620 				  value_word, fallback_p))
621 	    {
622 	      delete_insns_since (last);
623 	      return false;
624 	    }
625 	}
626       return true;
627     }
628 
629   /* From here on we can assume that the field to be stored in is
630      a full-word (whatever type that is), since it is shorter than a word.  */
631 
632   /* OFFSET is the number of words or bytes (UNIT says which)
633      from STR_RTX to the first word or byte containing part of the field.  */
634 
635   if (!MEM_P (op0))
636     {
637       if (offset != 0
638 	  || GET_MODE_SIZE (GET_MODE (op0)) > UNITS_PER_WORD)
639 	{
640 	  if (!REG_P (op0))
641 	    {
642 	      /* Since this is a destination (lvalue), we can't copy
643 		 it to a pseudo.  We can remove a SUBREG that does not
644 		 change the size of the operand.  Such a SUBREG may
645 		 have been added above.  */
646 	      gcc_assert (GET_CODE (op0) == SUBREG
647 			  && (GET_MODE_SIZE (GET_MODE (op0))
648 			      == GET_MODE_SIZE (GET_MODE (SUBREG_REG (op0)))));
649 	      op0 = SUBREG_REG (op0);
650 	    }
651 	  op0 = gen_rtx_SUBREG (mode_for_size (BITS_PER_WORD, MODE_INT, 0),
652 		                op0, (offset * UNITS_PER_WORD));
653 	}
654       offset = 0;
655     }
656 
657   /* If VALUE has a floating-point or complex mode, access it as an
658      integer of the corresponding size.  This can occur on a machine
659      with 64 bit registers that uses SFmode for float.  It can also
660      occur for unaligned float or complex fields.  */
661   orig_value = value;
662   if (GET_MODE (value) != VOIDmode
663       && GET_MODE_CLASS (GET_MODE (value)) != MODE_INT
664       && GET_MODE_CLASS (GET_MODE (value)) != MODE_PARTIAL_INT)
665     {
666       value = gen_reg_rtx (int_mode_for_mode (GET_MODE (value)));
667       emit_move_insn (gen_lowpart (GET_MODE (orig_value), value), orig_value);
668     }
669 
670   /* Now OFFSET is nonzero only if OP0 is memory
671      and is therefore always measured in bytes.  */
672 
673   if (HAVE_insv
674       && GET_MODE (value) != BLKmode
675       && bitsize > 0
676       && GET_MODE_BITSIZE (op_mode) >= bitsize
677       && ! ((REG_P (op0) || GET_CODE (op0) == SUBREG)
678 	    && (bitsize + bitpos > GET_MODE_BITSIZE (op_mode)))
679       && insn_data[CODE_FOR_insv].operand[1].predicate (GEN_INT (bitsize),
680 							VOIDmode)
681       && check_predicate_volatile_ok (CODE_FOR_insv, 0, op0, VOIDmode))
682     {
683       int xbitpos = bitpos;
684       rtx value1;
685       rtx xop0 = op0;
686       rtx last = get_last_insn ();
687       rtx pat;
688       bool copy_back = false;
689 
690       /* Add OFFSET into OP0's address.  */
691       if (MEM_P (xop0))
692 	xop0 = adjust_address (xop0, byte_mode, offset);
693 
694       /* If xop0 is a register, we need it in OP_MODE
695 	 to make it acceptable to the format of insv.  */
696       if (GET_CODE (xop0) == SUBREG)
697 	/* We can't just change the mode, because this might clobber op0,
698 	   and we will need the original value of op0 if insv fails.  */
699 	xop0 = gen_rtx_SUBREG (op_mode, SUBREG_REG (xop0), SUBREG_BYTE (xop0));
700       if (REG_P (xop0) && GET_MODE (xop0) != op_mode)
701 	xop0 = gen_lowpart_SUBREG (op_mode, xop0);
702 
703       /* If the destination is a paradoxical subreg such that we need a
704 	 truncate to the inner mode, perform the insertion on a temporary and
705 	 truncate the result to the original destination.  Note that we can't
706 	 just truncate the paradoxical subreg as (truncate:N (subreg:W (reg:N
707 	 X) 0)) is (reg:N X).  */
708       if (GET_CODE (xop0) == SUBREG
709 	  && REG_P (SUBREG_REG (xop0))
710 	  && (!TRULY_NOOP_TRUNCATION
711 	      (GET_MODE_BITSIZE (GET_MODE (SUBREG_REG (xop0))),
712 	       GET_MODE_BITSIZE (op_mode))))
713 	{
714 	  rtx tem = gen_reg_rtx (op_mode);
715 	  emit_move_insn (tem, xop0);
716 	  xop0 = tem;
717 	  copy_back = true;
718 	}
719 
720       /* On big-endian machines, we count bits from the most significant.
721 	 If the bit field insn does not, we must invert.  */
722 
723       if (BITS_BIG_ENDIAN != BYTES_BIG_ENDIAN)
724 	xbitpos = unit - bitsize - xbitpos;
725 
726       /* We have been counting XBITPOS within UNIT.
727 	 Count instead within the size of the register.  */
728       if (BITS_BIG_ENDIAN && !MEM_P (xop0))
729 	xbitpos += GET_MODE_BITSIZE (op_mode) - unit;
730 
731       unit = GET_MODE_BITSIZE (op_mode);
732 
733       /* Convert VALUE to op_mode (which insv insn wants) in VALUE1.  */
734       value1 = value;
735       if (GET_MODE (value) != op_mode)
736 	{
737 	  if (GET_MODE_BITSIZE (GET_MODE (value)) >= bitsize)
738 	    {
739 	      /* Optimization: Don't bother really extending VALUE
740 		 if it has all the bits we will actually use.  However,
741 		 if we must narrow it, be sure we do it correctly.  */
742 
743 	      if (GET_MODE_SIZE (GET_MODE (value)) < GET_MODE_SIZE (op_mode))
744 		{
745 		  rtx tmp;
746 
747 		  tmp = simplify_subreg (op_mode, value1, GET_MODE (value), 0);
748 		  if (! tmp)
749 		    tmp = simplify_gen_subreg (op_mode,
750 					       force_reg (GET_MODE (value),
751 							  value1),
752 					       GET_MODE (value), 0);
753 		  value1 = tmp;
754 		}
755 	      else
756 		value1 = gen_lowpart (op_mode, value1);
757 	    }
758 	  else if (CONST_INT_P (value))
759 	    value1 = gen_int_mode (INTVAL (value), op_mode);
760 	  else
761 	    /* Parse phase is supposed to make VALUE's data type
762 	       match that of the component reference, which is a type
763 	       at least as wide as the field; so VALUE should have
764 	       a mode that corresponds to that type.  */
765 	    gcc_assert (CONSTANT_P (value));
766 	}
767 
768       /* If this machine's insv insists on a register,
769 	 get VALUE1 into a register.  */
770       if (! ((*insn_data[(int) CODE_FOR_insv].operand[3].predicate)
771 	     (value1, op_mode)))
772 	value1 = force_reg (op_mode, value1);
773 
774       pat = gen_insv (xop0, GEN_INT (bitsize), GEN_INT (xbitpos), value1);
775       if (pat)
776 	{
777 	  emit_insn (pat);
778 
779 	  if (copy_back)
780 	    convert_move (op0, xop0, true);
781 	  return true;
782 	}
783       delete_insns_since (last);
784     }
785 
786   /* If OP0 is a memory, try copying it to a register and seeing if a
787      cheap register alternative is available.  */
788   if (HAVE_insv && MEM_P (op0))
789     {
790       enum machine_mode bestmode;
791 
792       /* Get the mode to use for inserting into this field.  If OP0 is
793 	 BLKmode, get the smallest mode consistent with the alignment. If
794 	 OP0 is a non-BLKmode object that is no wider than OP_MODE, use its
795 	 mode. Otherwise, use the smallest mode containing the field.  */
796 
797       if (GET_MODE (op0) == BLKmode
798 	  || (op_mode != MAX_MACHINE_MODE
799 	      && GET_MODE_SIZE (GET_MODE (op0)) > GET_MODE_SIZE (op_mode)))
800 	bestmode = get_best_mode (bitsize, bitnum, MEM_ALIGN (op0),
801 				  (op_mode == MAX_MACHINE_MODE
802 				   ? VOIDmode : op_mode),
803 				  MEM_VOLATILE_P (op0));
804       else
805 	bestmode = GET_MODE (op0);
806 
807       if (bestmode != VOIDmode
808 	  && GET_MODE_SIZE (bestmode) >= GET_MODE_SIZE (fieldmode)
809 	  && !(SLOW_UNALIGNED_ACCESS (bestmode, MEM_ALIGN (op0))
810 	       && GET_MODE_BITSIZE (bestmode) > MEM_ALIGN (op0)))
811 	{
812 	  rtx last, tempreg, xop0;
813 	  unsigned HOST_WIDE_INT xoffset, xbitpos;
814 
815 	  last = get_last_insn ();
816 
817 	  /* Adjust address to point to the containing unit of
818 	     that mode.  Compute the offset as a multiple of this unit,
819 	     counting in bytes.  */
820 	  unit = GET_MODE_BITSIZE (bestmode);
821 	  xoffset = (bitnum / unit) * GET_MODE_SIZE (bestmode);
822 	  xbitpos = bitnum % unit;
823 	  xop0 = adjust_address (op0, bestmode, xoffset);
824 
825 	  /* Fetch that unit, store the bitfield in it, then store
826 	     the unit.  */
827 	  tempreg = copy_to_reg (xop0);
828 	  if (store_bit_field_1 (tempreg, bitsize, xbitpos,
829 				 fieldmode, orig_value, false))
830 	    {
831 	      emit_move_insn (xop0, tempreg);
832 	      return true;
833 	    }
834 	  delete_insns_since (last);
835 	}
836     }
837 
838   if (!fallback_p)
839     return false;
840 
841   store_fixed_bit_field (op0, offset, bitsize, bitpos, value);
842   return true;
843 }
844 
845 /* Generate code to store value from rtx VALUE
846    into a bit-field within structure STR_RTX
847    containing BITSIZE bits starting at bit BITNUM.
848    FIELDMODE is the machine-mode of the FIELD_DECL node for this field.  */
849 
850 void
851 store_bit_field (rtx str_rtx, unsigned HOST_WIDE_INT bitsize,
852 		 unsigned HOST_WIDE_INT bitnum, enum machine_mode fieldmode,
853 		 rtx value)
854 {
855   if (!store_bit_field_1 (str_rtx, bitsize, bitnum, fieldmode, value, true))
856     gcc_unreachable ();
857 }
858 
859 /* Use shifts and boolean operations to store VALUE
860    into a bit field of width BITSIZE
861    in a memory location specified by OP0 except offset by OFFSET bytes.
862      (OFFSET must be 0 if OP0 is a register.)
863    The field starts at position BITPOS within the byte.
864     (If OP0 is a register, it may be a full word or a narrower mode,
865      but BITPOS still counts within a full word,
866      which is significant on bigendian machines.)  */
867 
868 static void
869 store_fixed_bit_field (rtx op0, unsigned HOST_WIDE_INT offset,
870 		       unsigned HOST_WIDE_INT bitsize,
871 		       unsigned HOST_WIDE_INT bitpos, rtx value)
872 {
873   enum machine_mode mode;
874   unsigned int total_bits = BITS_PER_WORD;
875   rtx temp;
876   int all_zero = 0;
877   int all_one = 0;
878 
879   /* There is a case not handled here:
880      a structure with a known alignment of just a halfword
881      and a field split across two aligned halfwords within the structure.
882      Or likewise a structure with a known alignment of just a byte
883      and a field split across two bytes.
884      Such cases are not supposed to be able to occur.  */
885 
886   if (REG_P (op0) || GET_CODE (op0) == SUBREG)
887     {
888       gcc_assert (!offset);
889       /* Special treatment for a bit field split across two registers.  */
890       if (bitsize + bitpos > BITS_PER_WORD)
891 	{
892 	  store_split_bit_field (op0, bitsize, bitpos, value);
893 	  return;
894 	}
895     }
896   else
897     {
898       /* Get the proper mode to use for this field.  We want a mode that
899 	 includes the entire field.  If such a mode would be larger than
900 	 a word, we won't be doing the extraction the normal way.
901 	 We don't want a mode bigger than the destination.  */
902 
903       mode = GET_MODE (op0);
904       if (GET_MODE_BITSIZE (mode) == 0
905 	  || GET_MODE_BITSIZE (mode) > GET_MODE_BITSIZE (word_mode))
906 	mode = word_mode;
907       mode = get_best_mode (bitsize, bitpos + offset * BITS_PER_UNIT,
908 			    MEM_ALIGN (op0), mode, MEM_VOLATILE_P (op0));
909 
910       if (mode == VOIDmode)
911 	{
912 	  /* The only way this should occur is if the field spans word
913 	     boundaries.  */
914 	  store_split_bit_field (op0, bitsize, bitpos + offset * BITS_PER_UNIT,
915 				 value);
916 	  return;
917 	}
918 
919       total_bits = GET_MODE_BITSIZE (mode);
920 
921       /* Make sure bitpos is valid for the chosen mode.  Adjust BITPOS to
922 	 be in the range 0 to total_bits-1, and put any excess bytes in
923 	 OFFSET.  */
924       if (bitpos >= total_bits)
925 	{
926 	  offset += (bitpos / total_bits) * (total_bits / BITS_PER_UNIT);
927 	  bitpos -= ((bitpos / total_bits) * (total_bits / BITS_PER_UNIT)
928 		     * BITS_PER_UNIT);
929 	}
930 
931       /* Get ref to an aligned byte, halfword, or word containing the field.
932 	 Adjust BITPOS to be position within a word,
933 	 and OFFSET to be the offset of that word.
934 	 Then alter OP0 to refer to that word.  */
935       bitpos += (offset % (total_bits / BITS_PER_UNIT)) * BITS_PER_UNIT;
936       offset -= (offset % (total_bits / BITS_PER_UNIT));
937       op0 = adjust_address (op0, mode, offset);
938     }
939 
940   mode = GET_MODE (op0);
941 
942   /* Now MODE is either some integral mode for a MEM as OP0,
943      or is a full-word for a REG as OP0.  TOTAL_BITS corresponds.
944      The bit field is contained entirely within OP0.
945      BITPOS is the starting bit number within OP0.
946      (OP0's mode may actually be narrower than MODE.)  */
947 
948   if (BYTES_BIG_ENDIAN)
949       /* BITPOS is the distance between our msb
950 	 and that of the containing datum.
951 	 Convert it to the distance from the lsb.  */
952       bitpos = total_bits - bitsize - bitpos;
953 
954   /* Now BITPOS is always the distance between our lsb
955      and that of OP0.  */
956 
957   /* Shift VALUE left by BITPOS bits.  If VALUE is not constant,
958      we must first convert its mode to MODE.  */
959 
960   if (CONST_INT_P (value))
961     {
962       HOST_WIDE_INT v = INTVAL (value);
963 
964       if (bitsize < HOST_BITS_PER_WIDE_INT)
965 	v &= ((HOST_WIDE_INT) 1 << bitsize) - 1;
966 
967       if (v == 0)
968 	all_zero = 1;
969       else if ((bitsize < HOST_BITS_PER_WIDE_INT
970 		&& v == ((HOST_WIDE_INT) 1 << bitsize) - 1)
971 	       || (bitsize == HOST_BITS_PER_WIDE_INT && v == -1))
972 	all_one = 1;
973 
974       value = lshift_value (mode, value, bitpos, bitsize);
975     }
976   else
977     {
978       int must_and = (GET_MODE_BITSIZE (GET_MODE (value)) != bitsize
979 		      && bitpos + bitsize != GET_MODE_BITSIZE (mode));
980 
981       if (GET_MODE (value) != mode)
982 	value = convert_to_mode (mode, value, 1);
983 
984       if (must_and)
985 	value = expand_binop (mode, and_optab, value,
986 			      mask_rtx (mode, 0, bitsize, 0),
987 			      NULL_RTX, 1, OPTAB_LIB_WIDEN);
988       if (bitpos > 0)
989 	value = expand_shift (LSHIFT_EXPR, mode, value,
990 			      build_int_cst (NULL_TREE, bitpos), NULL_RTX, 1);
991     }
992 
993   /* Now clear the chosen bits in OP0,
994      except that if VALUE is -1 we need not bother.  */
995   /* We keep the intermediates in registers to allow CSE to combine
996      consecutive bitfield assignments.  */
997 
998   temp = force_reg (mode, op0);
999 
1000   if (! all_one)
1001     {
1002       temp = expand_binop (mode, and_optab, temp,
1003 			   mask_rtx (mode, bitpos, bitsize, 1),
1004 			   NULL_RTX, 1, OPTAB_LIB_WIDEN);
1005       temp = force_reg (mode, temp);
1006     }
1007 
1008   /* Now logical-or VALUE into OP0, unless it is zero.  */
1009 
1010   if (! all_zero)
1011     {
1012       temp = expand_binop (mode, ior_optab, temp, value,
1013 			   NULL_RTX, 1, OPTAB_LIB_WIDEN);
1014       temp = force_reg (mode, temp);
1015     }
1016 
1017   if (op0 != temp)
1018     {
1019       op0 = copy_rtx (op0);
1020       emit_move_insn (op0, temp);
1021     }
1022 }
1023 
1024 /* Store a bit field that is split across multiple accessible memory objects.
1025 
1026    OP0 is the REG, SUBREG or MEM rtx for the first of the objects.
1027    BITSIZE is the field width; BITPOS the position of its first bit
1028    (within the word).
1029    VALUE is the value to store.
1030 
1031    This does not yet handle fields wider than BITS_PER_WORD.  */
1032 
1033 static void
1034 store_split_bit_field (rtx op0, unsigned HOST_WIDE_INT bitsize,
1035 		       unsigned HOST_WIDE_INT bitpos, rtx value)
1036 {
1037   unsigned int unit;
1038   unsigned int bitsdone = 0;
1039 
1040   /* Make sure UNIT isn't larger than BITS_PER_WORD, we can only handle that
1041      much at a time.  */
1042   if (REG_P (op0) || GET_CODE (op0) == SUBREG)
1043     unit = BITS_PER_WORD;
1044   else
1045     unit = MIN (MEM_ALIGN (op0), BITS_PER_WORD);
1046 
1047   /* If VALUE is a constant other than a CONST_INT, get it into a register in
1048      WORD_MODE.  If we can do this using gen_lowpart_common, do so.  Note
1049      that VALUE might be a floating-point constant.  */
1050   if (CONSTANT_P (value) && !CONST_INT_P (value))
1051     {
1052       rtx word = gen_lowpart_common (word_mode, value);
1053 
1054       if (word && (value != word))
1055 	value = word;
1056       else
1057 	value = gen_lowpart_common (word_mode,
1058 				    force_reg (GET_MODE (value) != VOIDmode
1059 					       ? GET_MODE (value)
1060 					       : word_mode, value));
1061     }
1062 
1063   while (bitsdone < bitsize)
1064     {
1065       unsigned HOST_WIDE_INT thissize;
1066       rtx part, word;
1067       unsigned HOST_WIDE_INT thispos;
1068       unsigned HOST_WIDE_INT offset;
1069 
1070       offset = (bitpos + bitsdone) / unit;
1071       thispos = (bitpos + bitsdone) % unit;
1072 
1073       /* THISSIZE must not overrun a word boundary.  Otherwise,
1074 	 store_fixed_bit_field will call us again, and we will mutually
1075 	 recurse forever.  */
1076       thissize = MIN (bitsize - bitsdone, BITS_PER_WORD);
1077       thissize = MIN (thissize, unit - thispos);
1078 
1079       if (BYTES_BIG_ENDIAN)
1080 	{
1081 	  int total_bits;
1082 
1083 	  /* We must do an endian conversion exactly the same way as it is
1084 	     done in extract_bit_field, so that the two calls to
1085 	     extract_fixed_bit_field will have comparable arguments.  */
1086 	  if (!MEM_P (value) || GET_MODE (value) == BLKmode)
1087 	    total_bits = BITS_PER_WORD;
1088 	  else
1089 	    total_bits = GET_MODE_BITSIZE (GET_MODE (value));
1090 
1091 	  /* Fetch successively less significant portions.  */
1092 	  if (CONST_INT_P (value))
1093 	    part = GEN_INT (((unsigned HOST_WIDE_INT) (INTVAL (value))
1094 			     >> (bitsize - bitsdone - thissize))
1095 			    & (((HOST_WIDE_INT) 1 << thissize) - 1));
1096 	  else
1097 	    /* The args are chosen so that the last part includes the
1098 	       lsb.  Give extract_bit_field the value it needs (with
1099 	       endianness compensation) to fetch the piece we want.  */
1100 	    part = extract_fixed_bit_field (word_mode, value, 0, thissize,
1101 					    total_bits - bitsize + bitsdone,
1102 					    NULL_RTX, 1);
1103 	}
1104       else
1105 	{
1106 	  /* Fetch successively more significant portions.  */
1107 	  if (CONST_INT_P (value))
1108 	    part = GEN_INT (((unsigned HOST_WIDE_INT) (INTVAL (value))
1109 			     >> bitsdone)
1110 			    & (((HOST_WIDE_INT) 1 << thissize) - 1));
1111 	  else
1112 	    part = extract_fixed_bit_field (word_mode, value, 0, thissize,
1113 					    bitsdone, NULL_RTX, 1);
1114 	}
1115 
1116       /* If OP0 is a register, then handle OFFSET here.
1117 
1118 	 When handling multiword bitfields, extract_bit_field may pass
1119 	 down a word_mode SUBREG of a larger REG for a bitfield that actually
1120 	 crosses a word boundary.  Thus, for a SUBREG, we must find
1121 	 the current word starting from the base register.  */
1122       if (GET_CODE (op0) == SUBREG)
1123 	{
1124 	  int word_offset = (SUBREG_BYTE (op0) / UNITS_PER_WORD) + offset;
1125 	  word = operand_subword_force (SUBREG_REG (op0), word_offset,
1126 					GET_MODE (SUBREG_REG (op0)));
1127 	  offset = 0;
1128 	}
1129       else if (REG_P (op0))
1130 	{
1131 	  word = operand_subword_force (op0, offset, GET_MODE (op0));
1132 	  offset = 0;
1133 	}
1134       else
1135 	word = op0;
1136 
1137       /* OFFSET is in UNITs, and UNIT is in bits.
1138          store_fixed_bit_field wants offset in bytes.  */
1139       store_fixed_bit_field (word, offset * unit / BITS_PER_UNIT, thissize,
1140 			     thispos, part);
1141       bitsdone += thissize;
1142     }
1143 }
1144 
1145 /* A subroutine of extract_bit_field_1 that converts return value X
1146    to either MODE or TMODE.  MODE, TMODE and UNSIGNEDP are arguments
1147    to extract_bit_field.  */
1148 
1149 static rtx
1150 convert_extracted_bit_field (rtx x, enum machine_mode mode,
1151 			     enum machine_mode tmode, bool unsignedp)
1152 {
1153   if (GET_MODE (x) == tmode || GET_MODE (x) == mode)
1154     return x;
1155 
1156   /* If the x mode is not a scalar integral, first convert to the
1157      integer mode of that size and then access it as a floating-point
1158      value via a SUBREG.  */
1159   if (!SCALAR_INT_MODE_P (tmode))
1160     {
1161       enum machine_mode smode;
1162 
1163       smode = mode_for_size (GET_MODE_BITSIZE (tmode), MODE_INT, 0);
1164       x = convert_to_mode (smode, x, unsignedp);
1165       x = force_reg (smode, x);
1166       return gen_lowpart (tmode, x);
1167     }
1168 
1169   return convert_to_mode (tmode, x, unsignedp);
1170 }
1171 
1172 /* A subroutine of extract_bit_field, with the same arguments.
1173    If FALLBACK_P is true, fall back to extract_fixed_bit_field
1174    if we can find no other means of implementing the operation.
1175    if FALLBACK_P is false, return NULL instead.  */
1176 
1177 static rtx
1178 extract_bit_field_1 (rtx str_rtx, unsigned HOST_WIDE_INT bitsize,
1179 		     unsigned HOST_WIDE_INT bitnum, int unsignedp, rtx target,
1180 		     enum machine_mode mode, enum machine_mode tmode,
1181 		     bool fallback_p)
1182 {
1183   unsigned int unit
1184     = (MEM_P (str_rtx)) ? BITS_PER_UNIT : BITS_PER_WORD;
1185   unsigned HOST_WIDE_INT offset, bitpos;
1186   rtx op0 = str_rtx;
1187   enum machine_mode int_mode;
1188   enum machine_mode ext_mode;
1189   enum machine_mode mode1;
1190   enum insn_code icode;
1191   int byte_offset;
1192 
1193   if (tmode == VOIDmode)
1194     tmode = mode;
1195 
1196   while (GET_CODE (op0) == SUBREG)
1197     {
1198       bitnum += SUBREG_BYTE (op0) * BITS_PER_UNIT;
1199       op0 = SUBREG_REG (op0);
1200     }
1201 
1202   /* If we have an out-of-bounds access to a register, just return an
1203      uninitialized register of the required mode.  This can occur if the
1204      source code contains an out-of-bounds access to a small array.  */
1205   if (REG_P (op0) && bitnum >= GET_MODE_BITSIZE (GET_MODE (op0)))
1206     return gen_reg_rtx (tmode);
1207 
1208   if (REG_P (op0)
1209       && mode == GET_MODE (op0)
1210       && bitnum == 0
1211       && bitsize == GET_MODE_BITSIZE (GET_MODE (op0)))
1212     {
1213       /* We're trying to extract a full register from itself.  */
1214       return op0;
1215     }
1216 
1217   /* See if we can get a better vector mode before extracting.  */
1218   if (VECTOR_MODE_P (GET_MODE (op0))
1219       && !MEM_P (op0)
1220       && GET_MODE_INNER (GET_MODE (op0)) != tmode)
1221     {
1222       enum machine_mode new_mode;
1223       int nunits = GET_MODE_NUNITS (GET_MODE (op0));
1224 
1225       if (GET_MODE_CLASS (tmode) == MODE_FLOAT)
1226 	new_mode = MIN_MODE_VECTOR_FLOAT;
1227       else if (GET_MODE_CLASS (tmode) == MODE_FRACT)
1228 	new_mode = MIN_MODE_VECTOR_FRACT;
1229       else if (GET_MODE_CLASS (tmode) == MODE_UFRACT)
1230 	new_mode = MIN_MODE_VECTOR_UFRACT;
1231       else if (GET_MODE_CLASS (tmode) == MODE_ACCUM)
1232 	new_mode = MIN_MODE_VECTOR_ACCUM;
1233       else if (GET_MODE_CLASS (tmode) == MODE_UACCUM)
1234 	new_mode = MIN_MODE_VECTOR_UACCUM;
1235       else
1236 	new_mode = MIN_MODE_VECTOR_INT;
1237 
1238       for (; new_mode != VOIDmode ; new_mode = GET_MODE_WIDER_MODE (new_mode))
1239 	if (GET_MODE_NUNITS (new_mode) == nunits
1240 	    && GET_MODE_SIZE (new_mode) == GET_MODE_SIZE (GET_MODE (op0))
1241 	    && targetm.vector_mode_supported_p (new_mode))
1242 	  break;
1243       if (new_mode != VOIDmode)
1244 	op0 = gen_lowpart (new_mode, op0);
1245     }
1246 
1247   /* Use vec_extract patterns for extracting parts of vectors whenever
1248      available.  */
1249   if (VECTOR_MODE_P (GET_MODE (op0))
1250       && !MEM_P (op0)
1251       && (optab_handler (vec_extract_optab, GET_MODE (op0))->insn_code
1252 	  != CODE_FOR_nothing)
1253       && ((bitnum + bitsize - 1) / GET_MODE_BITSIZE (GET_MODE_INNER (GET_MODE (op0)))
1254 	  == bitnum / GET_MODE_BITSIZE (GET_MODE_INNER (GET_MODE (op0)))))
1255     {
1256       enum machine_mode outermode = GET_MODE (op0);
1257       enum machine_mode innermode = GET_MODE_INNER (outermode);
1258       int icode = (int) optab_handler (vec_extract_optab, outermode)->insn_code;
1259       unsigned HOST_WIDE_INT pos = bitnum / GET_MODE_BITSIZE (innermode);
1260       rtx rtxpos = GEN_INT (pos);
1261       rtx src = op0;
1262       rtx dest = NULL, pat, seq;
1263       enum machine_mode mode0 = insn_data[icode].operand[0].mode;
1264       enum machine_mode mode1 = insn_data[icode].operand[1].mode;
1265       enum machine_mode mode2 = insn_data[icode].operand[2].mode;
1266 
1267       if (innermode == tmode || innermode == mode)
1268 	dest = target;
1269 
1270       if (!dest)
1271 	dest = gen_reg_rtx (innermode);
1272 
1273       start_sequence ();
1274 
1275       if (! (*insn_data[icode].operand[0].predicate) (dest, mode0))
1276 	dest = copy_to_mode_reg (mode0, dest);
1277 
1278       if (! (*insn_data[icode].operand[1].predicate) (src, mode1))
1279 	src = copy_to_mode_reg (mode1, src);
1280 
1281       if (! (*insn_data[icode].operand[2].predicate) (rtxpos, mode2))
1282 	rtxpos = copy_to_mode_reg (mode1, rtxpos);
1283 
1284       /* We could handle this, but we should always be called with a pseudo
1285 	 for our targets and all insns should take them as outputs.  */
1286       gcc_assert ((*insn_data[icode].operand[0].predicate) (dest, mode0)
1287 		  && (*insn_data[icode].operand[1].predicate) (src, mode1)
1288 		  && (*insn_data[icode].operand[2].predicate) (rtxpos, mode2));
1289 
1290       pat = GEN_FCN (icode) (dest, src, rtxpos);
1291       seq = get_insns ();
1292       end_sequence ();
1293       if (pat)
1294 	{
1295 	  emit_insn (seq);
1296 	  emit_insn (pat);
1297       	  if (mode0 != mode)
1298 	    return gen_lowpart (tmode, dest);
1299 	  return dest;
1300 	}
1301     }
1302 
1303   /* Make sure we are playing with integral modes.  Pun with subregs
1304      if we aren't.  */
1305   {
1306     enum machine_mode imode = int_mode_for_mode (GET_MODE (op0));
1307     if (imode != GET_MODE (op0))
1308       {
1309 	if (MEM_P (op0))
1310 	  op0 = adjust_address (op0, imode, 0);
1311 	else if (imode != BLKmode)
1312 	  {
1313 	    op0 = gen_lowpart (imode, op0);
1314 
1315 	    /* If we got a SUBREG, force it into a register since we
1316 	       aren't going to be able to do another SUBREG on it.  */
1317 	    if (GET_CODE (op0) == SUBREG)
1318 	      op0 = force_reg (imode, op0);
1319 	  }
1320 	else if (REG_P (op0))
1321 	  {
1322 	    rtx reg, subreg;
1323 	    imode = smallest_mode_for_size (GET_MODE_BITSIZE (GET_MODE (op0)),
1324 					    MODE_INT);
1325 	    reg = gen_reg_rtx (imode);
1326 	    subreg = gen_lowpart_SUBREG (GET_MODE (op0), reg);
1327 	    emit_move_insn (subreg, op0);
1328 	    op0 = reg;
1329 	    bitnum += SUBREG_BYTE (subreg) * BITS_PER_UNIT;
1330 	  }
1331 	else
1332 	  {
1333 	    rtx mem = assign_stack_temp (GET_MODE (op0),
1334 					 GET_MODE_SIZE (GET_MODE (op0)), 0);
1335 	    emit_move_insn (mem, op0);
1336 	    op0 = adjust_address (mem, BLKmode, 0);
1337 	  }
1338       }
1339   }
1340 
1341   /* We may be accessing data outside the field, which means
1342      we can alias adjacent data.  */
1343   if (MEM_P (op0))
1344     {
1345       op0 = shallow_copy_rtx (op0);
1346       set_mem_alias_set (op0, 0);
1347       set_mem_expr (op0, 0);
1348     }
1349 
1350   /* Extraction of a full-word or multi-word value from a structure
1351      in a register or aligned memory can be done with just a SUBREG.
1352      A subword value in the least significant part of a register
1353      can also be extracted with a SUBREG.  For this, we need the
1354      byte offset of the value in op0.  */
1355 
1356   bitpos = bitnum % unit;
1357   offset = bitnum / unit;
1358   byte_offset = bitpos / BITS_PER_UNIT + offset * UNITS_PER_WORD;
1359 
1360   /* If OP0 is a register, BITPOS must count within a word.
1361      But as we have it, it counts within whatever size OP0 now has.
1362      On a bigendian machine, these are not the same, so convert.  */
1363   if (BYTES_BIG_ENDIAN
1364       && !MEM_P (op0)
1365       && unit > GET_MODE_BITSIZE (GET_MODE (op0)))
1366     bitpos += unit - GET_MODE_BITSIZE (GET_MODE (op0));
1367 
1368   /* ??? We currently assume TARGET is at least as big as BITSIZE.
1369      If that's wrong, the solution is to test for it and set TARGET to 0
1370      if needed.  */
1371 
1372   /* Only scalar integer modes can be converted via subregs.  There is an
1373      additional problem for FP modes here in that they can have a precision
1374      which is different from the size.  mode_for_size uses precision, but
1375      we want a mode based on the size, so we must avoid calling it for FP
1376      modes.  */
1377   mode1  = (SCALAR_INT_MODE_P (tmode)
1378 	    ? mode_for_size (bitsize, GET_MODE_CLASS (tmode), 0)
1379 	    : mode);
1380 
1381   if (((bitsize >= BITS_PER_WORD && bitsize == GET_MODE_BITSIZE (mode)
1382 	&& bitpos % BITS_PER_WORD == 0)
1383        || (mode1 != BLKmode
1384 	   /* ??? The big endian test here is wrong.  This is correct
1385 	      if the value is in a register, and if mode_for_size is not
1386 	      the same mode as op0.  This causes us to get unnecessarily
1387 	      inefficient code from the Thumb port when -mbig-endian.  */
1388 	   && (BYTES_BIG_ENDIAN
1389 	       ? bitpos + bitsize == BITS_PER_WORD
1390 	       : bitpos == 0)))
1391       && ((!MEM_P (op0)
1392 	   && TRULY_NOOP_TRUNCATION (GET_MODE_BITSIZE (mode1),
1393 				     GET_MODE_BITSIZE (GET_MODE (op0)))
1394 	   && GET_MODE_SIZE (mode1) != 0
1395 	   && byte_offset % GET_MODE_SIZE (mode1) == 0)
1396 	  || (MEM_P (op0)
1397 	      && (! SLOW_UNALIGNED_ACCESS (mode, MEM_ALIGN (op0))
1398 		  || (offset * BITS_PER_UNIT % bitsize == 0
1399 		      && MEM_ALIGN (op0) % bitsize == 0)))))
1400     {
1401       if (MEM_P (op0))
1402 	op0 = adjust_address (op0, mode1, offset);
1403       else if (mode1 != GET_MODE (op0))
1404 	{
1405 	  rtx sub = simplify_gen_subreg (mode1, op0, GET_MODE (op0),
1406 					 byte_offset);
1407 	  if (sub == NULL)
1408 	    goto no_subreg_mode_swap;
1409 	  op0 = sub;
1410 	}
1411       if (mode1 != mode)
1412 	return convert_to_mode (tmode, op0, unsignedp);
1413       return op0;
1414     }
1415  no_subreg_mode_swap:
1416 
1417   /* Handle fields bigger than a word.  */
1418 
1419   if (bitsize > BITS_PER_WORD)
1420     {
1421       /* Here we transfer the words of the field
1422 	 in the order least significant first.
1423 	 This is because the most significant word is the one which may
1424 	 be less than full.  */
1425 
1426       unsigned int nwords = (bitsize + (BITS_PER_WORD - 1)) / BITS_PER_WORD;
1427       unsigned int i;
1428 
1429       if (target == 0 || !REG_P (target))
1430 	target = gen_reg_rtx (mode);
1431 
1432       /* Indicate for flow that the entire target reg is being set.  */
1433       emit_clobber (target);
1434 
1435       for (i = 0; i < nwords; i++)
1436 	{
1437 	  /* If I is 0, use the low-order word in both field and target;
1438 	     if I is 1, use the next to lowest word; and so on.  */
1439 	  /* Word number in TARGET to use.  */
1440 	  unsigned int wordnum
1441 	    = (WORDS_BIG_ENDIAN
1442 	       ? GET_MODE_SIZE (GET_MODE (target)) / UNITS_PER_WORD - i - 1
1443 	       : i);
1444 	  /* Offset from start of field in OP0.  */
1445 	  unsigned int bit_offset = (WORDS_BIG_ENDIAN
1446 				     ? MAX (0, ((int) bitsize - ((int) i + 1)
1447 						* (int) BITS_PER_WORD))
1448 				     : (int) i * BITS_PER_WORD);
1449 	  rtx target_part = operand_subword (target, wordnum, 1, VOIDmode);
1450 	  rtx result_part
1451 	    = extract_bit_field (op0, MIN (BITS_PER_WORD,
1452 					   bitsize - i * BITS_PER_WORD),
1453 				 bitnum + bit_offset, 1, target_part, mode,
1454 				 word_mode);
1455 
1456 	  gcc_assert (target_part);
1457 
1458 	  if (result_part != target_part)
1459 	    emit_move_insn (target_part, result_part);
1460 	}
1461 
1462       if (unsignedp)
1463 	{
1464 	  /* Unless we've filled TARGET, the upper regs in a multi-reg value
1465 	     need to be zero'd out.  */
1466 	  if (GET_MODE_SIZE (GET_MODE (target)) > nwords * UNITS_PER_WORD)
1467 	    {
1468 	      unsigned int i, total_words;
1469 
1470 	      total_words = GET_MODE_SIZE (GET_MODE (target)) / UNITS_PER_WORD;
1471 	      for (i = nwords; i < total_words; i++)
1472 		emit_move_insn
1473 		  (operand_subword (target,
1474 				    WORDS_BIG_ENDIAN ? total_words - i - 1 : i,
1475 				    1, VOIDmode),
1476 		   const0_rtx);
1477 	    }
1478 	  return target;
1479 	}
1480 
1481       /* Signed bit field: sign-extend with two arithmetic shifts.  */
1482       target = expand_shift (LSHIFT_EXPR, mode, target,
1483 			     build_int_cst (NULL_TREE,
1484 					    GET_MODE_BITSIZE (mode) - bitsize),
1485 			     NULL_RTX, 0);
1486       return expand_shift (RSHIFT_EXPR, mode, target,
1487 			   build_int_cst (NULL_TREE,
1488 					  GET_MODE_BITSIZE (mode) - bitsize),
1489 			   NULL_RTX, 0);
1490     }
1491 
1492   /* From here on we know the desired field is smaller than a word.  */
1493 
1494   /* Check if there is a correspondingly-sized integer field, so we can
1495      safely extract it as one size of integer, if necessary; then
1496      truncate or extend to the size that is wanted; then use SUBREGs or
1497      convert_to_mode to get one of the modes we really wanted.  */
1498 
1499   int_mode = int_mode_for_mode (tmode);
1500   if (int_mode == BLKmode)
1501     int_mode = int_mode_for_mode (mode);
1502   /* Should probably push op0 out to memory and then do a load.  */
1503   gcc_assert (int_mode != BLKmode);
1504 
1505   /* OFFSET is the number of words or bytes (UNIT says which)
1506      from STR_RTX to the first word or byte containing part of the field.  */
1507   if (!MEM_P (op0))
1508     {
1509       if (offset != 0
1510 	  || GET_MODE_SIZE (GET_MODE (op0)) > UNITS_PER_WORD)
1511 	{
1512 	  if (!REG_P (op0))
1513 	    op0 = copy_to_reg (op0);
1514 	  op0 = gen_rtx_SUBREG (mode_for_size (BITS_PER_WORD, MODE_INT, 0),
1515 		                op0, (offset * UNITS_PER_WORD));
1516 	}
1517       offset = 0;
1518     }
1519 
1520   /* Now OFFSET is nonzero only for memory operands.  */
1521   ext_mode = mode_for_extraction (unsignedp ? EP_extzv : EP_extv, 0);
1522   icode = unsignedp ? CODE_FOR_extzv : CODE_FOR_extv;
1523   if (ext_mode != MAX_MACHINE_MODE
1524       && bitsize > 0
1525       && GET_MODE_BITSIZE (ext_mode) >= bitsize
1526       /* If op0 is a register, we need it in EXT_MODE to make it
1527 	 acceptable to the format of ext(z)v.  */
1528       && !(GET_CODE (op0) == SUBREG && GET_MODE (op0) != ext_mode)
1529       && !((REG_P (op0) || GET_CODE (op0) == SUBREG)
1530 	   && (bitsize + bitpos > GET_MODE_BITSIZE (ext_mode)))
1531       && check_predicate_volatile_ok (icode, 1, op0, GET_MODE (op0)))
1532     {
1533       unsigned HOST_WIDE_INT xbitpos = bitpos, xoffset = offset;
1534       rtx bitsize_rtx, bitpos_rtx;
1535       rtx last = get_last_insn ();
1536       rtx xop0 = op0;
1537       rtx xtarget = target;
1538       rtx xspec_target = target;
1539       rtx xspec_target_subreg = 0;
1540       rtx pat;
1541 
1542       /* If op0 is a register, we need it in EXT_MODE to make it
1543 	 acceptable to the format of ext(z)v.  */
1544       if (REG_P (xop0) && GET_MODE (xop0) != ext_mode)
1545 	xop0 = gen_lowpart_SUBREG (ext_mode, xop0);
1546       if (MEM_P (xop0))
1547 	/* Get ref to first byte containing part of the field.  */
1548 	xop0 = adjust_address (xop0, byte_mode, xoffset);
1549 
1550       /* On big-endian machines, we count bits from the most significant.
1551 	 If the bit field insn does not, we must invert.  */
1552       if (BITS_BIG_ENDIAN != BYTES_BIG_ENDIAN)
1553 	xbitpos = unit - bitsize - xbitpos;
1554 
1555       /* Now convert from counting within UNIT to counting in EXT_MODE.  */
1556       if (BITS_BIG_ENDIAN && !MEM_P (xop0))
1557 	xbitpos += GET_MODE_BITSIZE (ext_mode) - unit;
1558 
1559       unit = GET_MODE_BITSIZE (ext_mode);
1560 
1561       if (xtarget == 0)
1562 	xtarget = xspec_target = gen_reg_rtx (tmode);
1563 
1564       if (GET_MODE (xtarget) != ext_mode)
1565 	{
1566 	  /* Don't use LHS paradoxical subreg if explicit truncation is needed
1567 	     between the mode of the extraction (word_mode) and the target
1568 	     mode.  Instead, create a temporary and use convert_move to set
1569 	     the target.  */
1570 	  if (REG_P (xtarget)
1571 	      && TRULY_NOOP_TRUNCATION (GET_MODE_BITSIZE (GET_MODE (xtarget)),
1572 					GET_MODE_BITSIZE (ext_mode)))
1573 	    {
1574 	      xtarget = gen_lowpart (ext_mode, xtarget);
1575 	      if (GET_MODE_SIZE (ext_mode)
1576 		  > GET_MODE_SIZE (GET_MODE (xspec_target)))
1577 		xspec_target_subreg = xtarget;
1578 	    }
1579 	  else
1580 	    xtarget = gen_reg_rtx (ext_mode);
1581 	}
1582 
1583       /* If this machine's ext(z)v insists on a register target,
1584 	 make sure we have one.  */
1585       if (!insn_data[(int) icode].operand[0].predicate (xtarget, ext_mode))
1586 	xtarget = gen_reg_rtx (ext_mode);
1587 
1588       bitsize_rtx = GEN_INT (bitsize);
1589       bitpos_rtx = GEN_INT (xbitpos);
1590 
1591       pat = (unsignedp
1592 	     ? gen_extzv (xtarget, xop0, bitsize_rtx, bitpos_rtx)
1593 	     : gen_extv (xtarget, xop0, bitsize_rtx, bitpos_rtx));
1594       if (pat)
1595 	{
1596 	  emit_insn (pat);
1597 	  if (xtarget == xspec_target)
1598 	    return xtarget;
1599 	  if (xtarget == xspec_target_subreg)
1600 	    return xspec_target;
1601 	  return convert_extracted_bit_field (xtarget, mode, tmode, unsignedp);
1602 	}
1603       delete_insns_since (last);
1604     }
1605 
1606   /* If OP0 is a memory, try copying it to a register and seeing if a
1607      cheap register alternative is available.  */
1608   if (ext_mode != MAX_MACHINE_MODE && MEM_P (op0))
1609     {
1610       enum machine_mode bestmode;
1611 
1612       /* Get the mode to use for inserting into this field.  If
1613 	 OP0 is BLKmode, get the smallest mode consistent with the
1614 	 alignment. If OP0 is a non-BLKmode object that is no
1615 	 wider than EXT_MODE, use its mode. Otherwise, use the
1616 	 smallest mode containing the field.  */
1617 
1618       if (GET_MODE (op0) == BLKmode
1619 	  || (ext_mode != MAX_MACHINE_MODE
1620 	      && GET_MODE_SIZE (GET_MODE (op0)) > GET_MODE_SIZE (ext_mode)))
1621 	bestmode = get_best_mode (bitsize, bitnum, MEM_ALIGN (op0),
1622 				  (ext_mode == MAX_MACHINE_MODE
1623 				   ? VOIDmode : ext_mode),
1624 				  MEM_VOLATILE_P (op0));
1625       else
1626 	bestmode = GET_MODE (op0);
1627 
1628       if (bestmode != VOIDmode
1629 	  && !(SLOW_UNALIGNED_ACCESS (bestmode, MEM_ALIGN (op0))
1630 	       && GET_MODE_BITSIZE (bestmode) > MEM_ALIGN (op0)))
1631 	{
1632 	  unsigned HOST_WIDE_INT xoffset, xbitpos;
1633 
1634 	  /* Compute the offset as a multiple of this unit,
1635 	     counting in bytes.  */
1636 	  unit = GET_MODE_BITSIZE (bestmode);
1637 	  xoffset = (bitnum / unit) * GET_MODE_SIZE (bestmode);
1638 	  xbitpos = bitnum % unit;
1639 
1640 	  /* Make sure the register is big enough for the whole field.  */
1641 	  if (xoffset * BITS_PER_UNIT + unit
1642 	      >= offset * BITS_PER_UNIT + bitsize)
1643 	    {
1644 	      rtx last, result, xop0;
1645 
1646 	      last = get_last_insn ();
1647 
1648 	      /* Fetch it to a register in that size.  */
1649 	      xop0 = adjust_address (op0, bestmode, xoffset);
1650 	      xop0 = force_reg (bestmode, xop0);
1651 	      result = extract_bit_field_1 (xop0, bitsize, xbitpos,
1652 					    unsignedp, target,
1653 					    mode, tmode, false);
1654 	      if (result)
1655 		return result;
1656 
1657 	      delete_insns_since (last);
1658 	    }
1659 	}
1660     }
1661 
1662   if (!fallback_p)
1663     return NULL;
1664 
1665   target = extract_fixed_bit_field (int_mode, op0, offset, bitsize,
1666 				    bitpos, target, unsignedp);
1667   return convert_extracted_bit_field (target, mode, tmode, unsignedp);
1668 }
1669 
1670 /* Generate code to extract a byte-field from STR_RTX
1671    containing BITSIZE bits, starting at BITNUM,
1672    and put it in TARGET if possible (if TARGET is nonzero).
1673    Regardless of TARGET, we return the rtx for where the value is placed.
1674 
1675    STR_RTX is the structure containing the byte (a REG or MEM).
1676    UNSIGNEDP is nonzero if this is an unsigned bit field.
1677    MODE is the natural mode of the field value once extracted.
1678    TMODE is the mode the caller would like the value to have;
1679    but the value may be returned with type MODE instead.
1680 
1681    If a TARGET is specified and we can store in it at no extra cost,
1682    we do so, and return TARGET.
1683    Otherwise, we return a REG of mode TMODE or MODE, with TMODE preferred
1684    if they are equally easy.  */
1685 
1686 rtx
1687 extract_bit_field (rtx str_rtx, unsigned HOST_WIDE_INT bitsize,
1688 		   unsigned HOST_WIDE_INT bitnum, int unsignedp, rtx target,
1689 		   enum machine_mode mode, enum machine_mode tmode)
1690 {
1691   return extract_bit_field_1 (str_rtx, bitsize, bitnum, unsignedp,
1692 			      target, mode, tmode, true);
1693 }
1694 
1695 /* Extract a bit field using shifts and boolean operations
1696    Returns an rtx to represent the value.
1697    OP0 addresses a register (word) or memory (byte).
1698    BITPOS says which bit within the word or byte the bit field starts in.
1699    OFFSET says how many bytes farther the bit field starts;
1700     it is 0 if OP0 is a register.
1701    BITSIZE says how many bits long the bit field is.
1702     (If OP0 is a register, it may be narrower than a full word,
1703      but BITPOS still counts within a full word,
1704      which is significant on bigendian machines.)
1705 
1706    UNSIGNEDP is nonzero for an unsigned bit field (don't sign-extend value).
1707    If TARGET is nonzero, attempts to store the value there
1708    and return TARGET, but this is not guaranteed.
1709    If TARGET is not used, create a pseudo-reg of mode TMODE for the value.  */
1710 
1711 static rtx
1712 extract_fixed_bit_field (enum machine_mode tmode, rtx op0,
1713 			 unsigned HOST_WIDE_INT offset,
1714 			 unsigned HOST_WIDE_INT bitsize,
1715 			 unsigned HOST_WIDE_INT bitpos, rtx target,
1716 			 int unsignedp)
1717 {
1718   unsigned int total_bits = BITS_PER_WORD;
1719   enum machine_mode mode;
1720 
1721   if (GET_CODE (op0) == SUBREG || REG_P (op0))
1722     {
1723       /* Special treatment for a bit field split across two registers.  */
1724       if (bitsize + bitpos > BITS_PER_WORD)
1725 	return extract_split_bit_field (op0, bitsize, bitpos, unsignedp);
1726     }
1727   else
1728     {
1729       /* Get the proper mode to use for this field.  We want a mode that
1730 	 includes the entire field.  If such a mode would be larger than
1731 	 a word, we won't be doing the extraction the normal way.  */
1732 
1733       mode = get_best_mode (bitsize, bitpos + offset * BITS_PER_UNIT,
1734 			    MEM_ALIGN (op0), word_mode, MEM_VOLATILE_P (op0));
1735 
1736       if (mode == VOIDmode)
1737 	/* The only way this should occur is if the field spans word
1738 	   boundaries.  */
1739 	return extract_split_bit_field (op0, bitsize,
1740 					bitpos + offset * BITS_PER_UNIT,
1741 					unsignedp);
1742 
1743       total_bits = GET_MODE_BITSIZE (mode);
1744 
1745       /* Make sure bitpos is valid for the chosen mode.  Adjust BITPOS to
1746 	 be in the range 0 to total_bits-1, and put any excess bytes in
1747 	 OFFSET.  */
1748       if (bitpos >= total_bits)
1749 	{
1750 	  offset += (bitpos / total_bits) * (total_bits / BITS_PER_UNIT);
1751 	  bitpos -= ((bitpos / total_bits) * (total_bits / BITS_PER_UNIT)
1752 		     * BITS_PER_UNIT);
1753 	}
1754 
1755       /* Get ref to an aligned byte, halfword, or word containing the field.
1756 	 Adjust BITPOS to be position within a word,
1757 	 and OFFSET to be the offset of that word.
1758 	 Then alter OP0 to refer to that word.  */
1759       bitpos += (offset % (total_bits / BITS_PER_UNIT)) * BITS_PER_UNIT;
1760       offset -= (offset % (total_bits / BITS_PER_UNIT));
1761       op0 = adjust_address (op0, mode, offset);
1762     }
1763 
1764   mode = GET_MODE (op0);
1765 
1766   if (BYTES_BIG_ENDIAN)
1767     /* BITPOS is the distance between our msb and that of OP0.
1768        Convert it to the distance from the lsb.  */
1769     bitpos = total_bits - bitsize - bitpos;
1770 
1771   /* Now BITPOS is always the distance between the field's lsb and that of OP0.
1772      We have reduced the big-endian case to the little-endian case.  */
1773 
1774   if (unsignedp)
1775     {
1776       if (bitpos)
1777 	{
1778 	  /* If the field does not already start at the lsb,
1779 	     shift it so it does.  */
1780 	  tree amount = build_int_cst (NULL_TREE, bitpos);
1781 	  /* Maybe propagate the target for the shift.  */
1782 	  /* But not if we will return it--could confuse integrate.c.  */
1783 	  rtx subtarget = (target != 0 && REG_P (target) ? target : 0);
1784 	  if (tmode != mode) subtarget = 0;
1785 	  op0 = expand_shift (RSHIFT_EXPR, mode, op0, amount, subtarget, 1);
1786 	}
1787       /* Convert the value to the desired mode.  */
1788       if (mode != tmode)
1789 	op0 = convert_to_mode (tmode, op0, 1);
1790 
1791       /* Unless the msb of the field used to be the msb when we shifted,
1792 	 mask out the upper bits.  */
1793 
1794       if (GET_MODE_BITSIZE (mode) != bitpos + bitsize)
1795 	return expand_binop (GET_MODE (op0), and_optab, op0,
1796 			     mask_rtx (GET_MODE (op0), 0, bitsize, 0),
1797 			     target, 1, OPTAB_LIB_WIDEN);
1798       return op0;
1799     }
1800 
1801   /* To extract a signed bit-field, first shift its msb to the msb of the word,
1802      then arithmetic-shift its lsb to the lsb of the word.  */
1803   op0 = force_reg (mode, op0);
1804   if (mode != tmode)
1805     target = 0;
1806 
1807   /* Find the narrowest integer mode that contains the field.  */
1808 
1809   for (mode = GET_CLASS_NARROWEST_MODE (MODE_INT); mode != VOIDmode;
1810        mode = GET_MODE_WIDER_MODE (mode))
1811     if (GET_MODE_BITSIZE (mode) >= bitsize + bitpos)
1812       {
1813 	op0 = convert_to_mode (mode, op0, 0);
1814 	break;
1815       }
1816 
1817   if (GET_MODE_BITSIZE (mode) != (bitsize + bitpos))
1818     {
1819       tree amount
1820 	= build_int_cst (NULL_TREE,
1821 			 GET_MODE_BITSIZE (mode) - (bitsize + bitpos));
1822       /* Maybe propagate the target for the shift.  */
1823       rtx subtarget = (target != 0 && REG_P (target) ? target : 0);
1824       op0 = expand_shift (LSHIFT_EXPR, mode, op0, amount, subtarget, 1);
1825     }
1826 
1827   return expand_shift (RSHIFT_EXPR, mode, op0,
1828 		       build_int_cst (NULL_TREE,
1829 				      GET_MODE_BITSIZE (mode) - bitsize),
1830 		       target, 0);
1831 }
1832 
1833 /* Return a constant integer (CONST_INT or CONST_DOUBLE) mask value
1834    of mode MODE with BITSIZE ones followed by BITPOS zeros, or the
1835    complement of that if COMPLEMENT.  The mask is truncated if
1836    necessary to the width of mode MODE.  The mask is zero-extended if
1837    BITSIZE+BITPOS is too small for MODE.  */
1838 
1839 static rtx
1840 mask_rtx (enum machine_mode mode, int bitpos, int bitsize, int complement)
1841 {
1842   HOST_WIDE_INT masklow, maskhigh;
1843 
1844   if (bitsize == 0)
1845     masklow = 0;
1846   else if (bitpos < HOST_BITS_PER_WIDE_INT)
1847     masklow = (HOST_WIDE_INT) -1 << bitpos;
1848   else
1849     masklow = 0;
1850 
1851   if (bitpos + bitsize < HOST_BITS_PER_WIDE_INT)
1852     masklow &= ((unsigned HOST_WIDE_INT) -1
1853 		>> (HOST_BITS_PER_WIDE_INT - bitpos - bitsize));
1854 
1855   if (bitpos <= HOST_BITS_PER_WIDE_INT)
1856     maskhigh = -1;
1857   else
1858     maskhigh = (HOST_WIDE_INT) -1 << (bitpos - HOST_BITS_PER_WIDE_INT);
1859 
1860   if (bitsize == 0)
1861     maskhigh = 0;
1862   else if (bitpos + bitsize > HOST_BITS_PER_WIDE_INT)
1863     maskhigh &= ((unsigned HOST_WIDE_INT) -1
1864 		 >> (2 * HOST_BITS_PER_WIDE_INT - bitpos - bitsize));
1865   else
1866     maskhigh = 0;
1867 
1868   if (complement)
1869     {
1870       maskhigh = ~maskhigh;
1871       masklow = ~masklow;
1872     }
1873 
1874   return immed_double_const (masklow, maskhigh, mode);
1875 }
1876 
1877 /* Return a constant integer (CONST_INT or CONST_DOUBLE) rtx with the value
1878    VALUE truncated to BITSIZE bits and then shifted left BITPOS bits.  */
1879 
1880 static rtx
1881 lshift_value (enum machine_mode mode, rtx value, int bitpos, int bitsize)
1882 {
1883   unsigned HOST_WIDE_INT v = INTVAL (value);
1884   HOST_WIDE_INT low, high;
1885 
1886   if (bitsize < HOST_BITS_PER_WIDE_INT)
1887     v &= ~((HOST_WIDE_INT) -1 << bitsize);
1888 
1889   if (bitpos < HOST_BITS_PER_WIDE_INT)
1890     {
1891       low = v << bitpos;
1892       high = (bitpos > 0 ? (v >> (HOST_BITS_PER_WIDE_INT - bitpos)) : 0);
1893     }
1894   else
1895     {
1896       low = 0;
1897       high = v << (bitpos - HOST_BITS_PER_WIDE_INT);
1898     }
1899 
1900   return immed_double_const (low, high, mode);
1901 }
1902 
1903 /* Extract a bit field that is split across two words
1904    and return an RTX for the result.
1905 
1906    OP0 is the REG, SUBREG or MEM rtx for the first of the two words.
1907    BITSIZE is the field width; BITPOS, position of its first bit, in the word.
1908    UNSIGNEDP is 1 if should zero-extend the contents; else sign-extend.  */
1909 
1910 static rtx
1911 extract_split_bit_field (rtx op0, unsigned HOST_WIDE_INT bitsize,
1912 			 unsigned HOST_WIDE_INT bitpos, int unsignedp)
1913 {
1914   unsigned int unit;
1915   unsigned int bitsdone = 0;
1916   rtx result = NULL_RTX;
1917   int first = 1;
1918 
1919   /* Make sure UNIT isn't larger than BITS_PER_WORD, we can only handle that
1920      much at a time.  */
1921   if (REG_P (op0) || GET_CODE (op0) == SUBREG)
1922     unit = BITS_PER_WORD;
1923   else
1924     unit = MIN (MEM_ALIGN (op0), BITS_PER_WORD);
1925 
1926   while (bitsdone < bitsize)
1927     {
1928       unsigned HOST_WIDE_INT thissize;
1929       rtx part, word;
1930       unsigned HOST_WIDE_INT thispos;
1931       unsigned HOST_WIDE_INT offset;
1932 
1933       offset = (bitpos + bitsdone) / unit;
1934       thispos = (bitpos + bitsdone) % unit;
1935 
1936       /* THISSIZE must not overrun a word boundary.  Otherwise,
1937 	 extract_fixed_bit_field will call us again, and we will mutually
1938 	 recurse forever.  */
1939       thissize = MIN (bitsize - bitsdone, BITS_PER_WORD);
1940       thissize = MIN (thissize, unit - thispos);
1941 
1942       /* If OP0 is a register, then handle OFFSET here.
1943 
1944 	 When handling multiword bitfields, extract_bit_field may pass
1945 	 down a word_mode SUBREG of a larger REG for a bitfield that actually
1946 	 crosses a word boundary.  Thus, for a SUBREG, we must find
1947 	 the current word starting from the base register.  */
1948       if (GET_CODE (op0) == SUBREG)
1949 	{
1950 	  int word_offset = (SUBREG_BYTE (op0) / UNITS_PER_WORD) + offset;
1951 	  word = operand_subword_force (SUBREG_REG (op0), word_offset,
1952 					GET_MODE (SUBREG_REG (op0)));
1953 	  offset = 0;
1954 	}
1955       else if (REG_P (op0))
1956 	{
1957 	  word = operand_subword_force (op0, offset, GET_MODE (op0));
1958 	  offset = 0;
1959 	}
1960       else
1961 	word = op0;
1962 
1963       /* Extract the parts in bit-counting order,
1964 	 whose meaning is determined by BYTES_PER_UNIT.
1965 	 OFFSET is in UNITs, and UNIT is in bits.
1966 	 extract_fixed_bit_field wants offset in bytes.  */
1967       part = extract_fixed_bit_field (word_mode, word,
1968 				      offset * unit / BITS_PER_UNIT,
1969 				      thissize, thispos, 0, 1);
1970       bitsdone += thissize;
1971 
1972       /* Shift this part into place for the result.  */
1973       if (BYTES_BIG_ENDIAN)
1974 	{
1975 	  if (bitsize != bitsdone)
1976 	    part = expand_shift (LSHIFT_EXPR, word_mode, part,
1977 				 build_int_cst (NULL_TREE, bitsize - bitsdone),
1978 				 0, 1);
1979 	}
1980       else
1981 	{
1982 	  if (bitsdone != thissize)
1983 	    part = expand_shift (LSHIFT_EXPR, word_mode, part,
1984 				 build_int_cst (NULL_TREE,
1985 						bitsdone - thissize), 0, 1);
1986 	}
1987 
1988       if (first)
1989 	result = part;
1990       else
1991 	/* Combine the parts with bitwise or.  This works
1992 	   because we extracted each part as an unsigned bit field.  */
1993 	result = expand_binop (word_mode, ior_optab, part, result, NULL_RTX, 1,
1994 			       OPTAB_LIB_WIDEN);
1995 
1996       first = 0;
1997     }
1998 
1999   /* Unsigned bit field: we are done.  */
2000   if (unsignedp)
2001     return result;
2002   /* Signed bit field: sign-extend with two arithmetic shifts.  */
2003   result = expand_shift (LSHIFT_EXPR, word_mode, result,
2004 			 build_int_cst (NULL_TREE, BITS_PER_WORD - bitsize),
2005 			 NULL_RTX, 0);
2006   return expand_shift (RSHIFT_EXPR, word_mode, result,
2007 		       build_int_cst (NULL_TREE, BITS_PER_WORD - bitsize),
2008 		       NULL_RTX, 0);
2009 }
2010 
2011 /* Try to read the low bits of SRC as an rvalue of mode MODE, preserving
2012    the bit pattern.  SRC_MODE is the mode of SRC; if this is smaller than
2013    MODE, fill the upper bits with zeros.  Fail if the layout of either
2014    mode is unknown (as for CC modes) or if the extraction would involve
2015    unprofitable mode punning.  Return the value on success, otherwise
2016    return null.
2017 
2018    This is different from gen_lowpart* in these respects:
2019 
2020      - the returned value must always be considered an rvalue
2021 
2022      - when MODE is wider than SRC_MODE, the extraction involves
2023        a zero extension
2024 
2025      - when MODE is smaller than SRC_MODE, the extraction involves
2026        a truncation (and is thus subject to TRULY_NOOP_TRUNCATION).
2027 
2028    In other words, this routine performs a computation, whereas the
2029    gen_lowpart* routines are conceptually lvalue or rvalue subreg
2030    operations.  */
2031 
2032 rtx
2033 extract_low_bits (enum machine_mode mode, enum machine_mode src_mode, rtx src)
2034 {
2035   enum machine_mode int_mode, src_int_mode;
2036 
2037   if (mode == src_mode)
2038     return src;
2039 
2040   if (CONSTANT_P (src))
2041     {
2042       /* simplify_gen_subreg can't be used here, as if simplify_subreg
2043 	 fails, it will happily create (subreg (symbol_ref)) or similar
2044 	 invalid SUBREGs.  */
2045       unsigned int byte = subreg_lowpart_offset (mode, src_mode);
2046       rtx ret = simplify_subreg (mode, src, src_mode, byte);
2047       if (ret)
2048 	return ret;
2049 
2050       if (GET_MODE (src) == VOIDmode
2051 	  || !validate_subreg (mode, src_mode, src, byte))
2052 	return NULL_RTX;
2053 
2054       src = force_reg (GET_MODE (src), src);
2055       return gen_rtx_SUBREG (mode, src, byte);
2056     }
2057 
2058   if (GET_MODE_CLASS (mode) == MODE_CC || GET_MODE_CLASS (src_mode) == MODE_CC)
2059     return NULL_RTX;
2060 
2061   if (GET_MODE_BITSIZE (mode) == GET_MODE_BITSIZE (src_mode)
2062       && MODES_TIEABLE_P (mode, src_mode))
2063     {
2064       rtx x = gen_lowpart_common (mode, src);
2065       if (x)
2066         return x;
2067     }
2068 
2069   src_int_mode = int_mode_for_mode (src_mode);
2070   int_mode = int_mode_for_mode (mode);
2071   if (src_int_mode == BLKmode || int_mode == BLKmode)
2072     return NULL_RTX;
2073 
2074   if (!MODES_TIEABLE_P (src_int_mode, src_mode))
2075     return NULL_RTX;
2076   if (!MODES_TIEABLE_P (int_mode, mode))
2077     return NULL_RTX;
2078 
2079   src = gen_lowpart (src_int_mode, src);
2080   src = convert_modes (int_mode, src_int_mode, src, true);
2081   src = gen_lowpart (mode, src);
2082   return src;
2083 }
2084 
2085 /* Add INC into TARGET.  */
2086 
2087 void
2088 expand_inc (rtx target, rtx inc)
2089 {
2090   rtx value = expand_binop (GET_MODE (target), add_optab,
2091 			    target, inc,
2092 			    target, 0, OPTAB_LIB_WIDEN);
2093   if (value != target)
2094     emit_move_insn (target, value);
2095 }
2096 
2097 /* Subtract DEC from TARGET.  */
2098 
2099 void
2100 expand_dec (rtx target, rtx dec)
2101 {
2102   rtx value = expand_binop (GET_MODE (target), sub_optab,
2103 			    target, dec,
2104 			    target, 0, OPTAB_LIB_WIDEN);
2105   if (value != target)
2106     emit_move_insn (target, value);
2107 }
2108 
2109 /* Output a shift instruction for expression code CODE,
2110    with SHIFTED being the rtx for the value to shift,
2111    and AMOUNT the tree for the amount to shift by.
2112    Store the result in the rtx TARGET, if that is convenient.
2113    If UNSIGNEDP is nonzero, do a logical shift; otherwise, arithmetic.
2114    Return the rtx for where the value is.  */
2115 
2116 rtx
2117 expand_shift (enum tree_code code, enum machine_mode mode, rtx shifted,
2118 	      tree amount, rtx target, int unsignedp)
2119 {
2120   rtx op1, temp = 0;
2121   int left = (code == LSHIFT_EXPR || code == LROTATE_EXPR);
2122   int rotate = (code == LROTATE_EXPR || code == RROTATE_EXPR);
2123   optab lshift_optab = ashl_optab;
2124   optab rshift_arith_optab = ashr_optab;
2125   optab rshift_uns_optab = lshr_optab;
2126   optab lrotate_optab = rotl_optab;
2127   optab rrotate_optab = rotr_optab;
2128   enum machine_mode op1_mode;
2129   int attempt;
2130   bool speed = optimize_insn_for_speed_p ();
2131 
2132   op1 = expand_normal (amount);
2133   op1_mode = GET_MODE (op1);
2134 
2135   /* Determine whether the shift/rotate amount is a vector, or scalar.  If the
2136      shift amount is a vector, use the vector/vector shift patterns.  */
2137   if (VECTOR_MODE_P (mode) && VECTOR_MODE_P (op1_mode))
2138     {
2139       lshift_optab = vashl_optab;
2140       rshift_arith_optab = vashr_optab;
2141       rshift_uns_optab = vlshr_optab;
2142       lrotate_optab = vrotl_optab;
2143       rrotate_optab = vrotr_optab;
2144     }
2145 
2146   /* Previously detected shift-counts computed by NEGATE_EXPR
2147      and shifted in the other direction; but that does not work
2148      on all machines.  */
2149 
2150   if (SHIFT_COUNT_TRUNCATED)
2151     {
2152       if (CONST_INT_P (op1)
2153 	  && ((unsigned HOST_WIDE_INT) INTVAL (op1) >=
2154 	      (unsigned HOST_WIDE_INT) GET_MODE_BITSIZE (mode)))
2155 	op1 = GEN_INT ((unsigned HOST_WIDE_INT) INTVAL (op1)
2156 		       % GET_MODE_BITSIZE (mode));
2157       else if (GET_CODE (op1) == SUBREG
2158 	       && subreg_lowpart_p (op1)
2159 	       && INTEGRAL_MODE_P (GET_MODE (SUBREG_REG (op1))))
2160 	op1 = SUBREG_REG (op1);
2161     }
2162 
2163   if (op1 == const0_rtx)
2164     return shifted;
2165 
2166   /* Check whether its cheaper to implement a left shift by a constant
2167      bit count by a sequence of additions.  */
2168   if (code == LSHIFT_EXPR
2169       && CONST_INT_P (op1)
2170       && INTVAL (op1) > 0
2171       && INTVAL (op1) < GET_MODE_BITSIZE (mode)
2172       && INTVAL (op1) < MAX_BITS_PER_WORD
2173       && shift_cost[speed][mode][INTVAL (op1)] > INTVAL (op1) * add_cost[speed][mode]
2174       && shift_cost[speed][mode][INTVAL (op1)] != MAX_COST)
2175     {
2176       int i;
2177       for (i = 0; i < INTVAL (op1); i++)
2178 	{
2179 	  temp = force_reg (mode, shifted);
2180 	  shifted = expand_binop (mode, add_optab, temp, temp, NULL_RTX,
2181 				  unsignedp, OPTAB_LIB_WIDEN);
2182 	}
2183       return shifted;
2184     }
2185 
2186   for (attempt = 0; temp == 0 && attempt < 3; attempt++)
2187     {
2188       enum optab_methods methods;
2189 
2190       if (attempt == 0)
2191 	methods = OPTAB_DIRECT;
2192       else if (attempt == 1)
2193 	methods = OPTAB_WIDEN;
2194       else
2195 	methods = OPTAB_LIB_WIDEN;
2196 
2197       if (rotate)
2198 	{
2199 	  /* Widening does not work for rotation.  */
2200 	  if (methods == OPTAB_WIDEN)
2201 	    continue;
2202 	  else if (methods == OPTAB_LIB_WIDEN)
2203 	    {
2204 	      /* If we have been unable to open-code this by a rotation,
2205 		 do it as the IOR of two shifts.  I.e., to rotate A
2206 		 by N bits, compute (A << N) | ((unsigned) A >> (C - N))
2207 		 where C is the bitsize of A.
2208 
2209 		 It is theoretically possible that the target machine might
2210 		 not be able to perform either shift and hence we would
2211 		 be making two libcalls rather than just the one for the
2212 		 shift (similarly if IOR could not be done).  We will allow
2213 		 this extremely unlikely lossage to avoid complicating the
2214 		 code below.  */
2215 
2216 	      rtx subtarget = target == shifted ? 0 : target;
2217 	      tree new_amount, other_amount;
2218 	      rtx temp1;
2219 	      tree type = TREE_TYPE (amount);
2220 	      if (GET_MODE (op1) != TYPE_MODE (type)
2221 		  && GET_MODE (op1) != VOIDmode)
2222 		op1 = convert_to_mode (TYPE_MODE (type), op1, 1);
2223 	      new_amount = make_tree (type, op1);
2224 	      other_amount
2225 		= fold_build2 (MINUS_EXPR, type,
2226 			       build_int_cst (type, GET_MODE_BITSIZE (mode)),
2227 			       new_amount);
2228 
2229 	      shifted = force_reg (mode, shifted);
2230 
2231 	      temp = expand_shift (left ? LSHIFT_EXPR : RSHIFT_EXPR,
2232 				   mode, shifted, new_amount, 0, 1);
2233 	      temp1 = expand_shift (left ? RSHIFT_EXPR : LSHIFT_EXPR,
2234 				    mode, shifted, other_amount, subtarget, 1);
2235 	      return expand_binop (mode, ior_optab, temp, temp1, target,
2236 				   unsignedp, methods);
2237 	    }
2238 
2239 	  temp = expand_binop (mode,
2240 			       left ? lrotate_optab : rrotate_optab,
2241 			       shifted, op1, target, unsignedp, methods);
2242 	}
2243       else if (unsignedp)
2244 	temp = expand_binop (mode,
2245 			     left ? lshift_optab : rshift_uns_optab,
2246 			     shifted, op1, target, unsignedp, methods);
2247 
2248       /* Do arithmetic shifts.
2249 	 Also, if we are going to widen the operand, we can just as well
2250 	 use an arithmetic right-shift instead of a logical one.  */
2251       if (temp == 0 && ! rotate
2252 	  && (! unsignedp || (! left && methods == OPTAB_WIDEN)))
2253 	{
2254 	  enum optab_methods methods1 = methods;
2255 
2256 	  /* If trying to widen a log shift to an arithmetic shift,
2257 	     don't accept an arithmetic shift of the same size.  */
2258 	  if (unsignedp)
2259 	    methods1 = OPTAB_MUST_WIDEN;
2260 
2261 	  /* Arithmetic shift */
2262 
2263 	  temp = expand_binop (mode,
2264 			       left ? lshift_optab : rshift_arith_optab,
2265 			       shifted, op1, target, unsignedp, methods1);
2266 	}
2267 
2268       /* We used to try extzv here for logical right shifts, but that was
2269 	 only useful for one machine, the VAX, and caused poor code
2270 	 generation there for lshrdi3, so the code was deleted and a
2271 	 define_expand for lshrsi3 was added to vax.md.  */
2272     }
2273 
2274   gcc_assert (temp);
2275   return temp;
2276 }
2277 
2278 enum alg_code {
2279   alg_unknown,
2280   alg_zero,
2281   alg_m, alg_shift,
2282   alg_add_t_m2,
2283   alg_sub_t_m2,
2284   alg_add_factor,
2285   alg_sub_factor,
2286   alg_add_t2_m,
2287   alg_sub_t2_m,
2288   alg_impossible
2289 };
2290 
2291 /* This structure holds the "cost" of a multiply sequence.  The
2292    "cost" field holds the total rtx_cost of every operator in the
2293    synthetic multiplication sequence, hence cost(a op b) is defined
2294    as rtx_cost(op) + cost(a) + cost(b), where cost(leaf) is zero.
2295    The "latency" field holds the minimum possible latency of the
2296    synthetic multiply, on a hypothetical infinitely parallel CPU.
2297    This is the critical path, or the maximum height, of the expression
2298    tree which is the sum of rtx_costs on the most expensive path from
2299    any leaf to the root.  Hence latency(a op b) is defined as zero for
2300    leaves and rtx_cost(op) + max(latency(a), latency(b)) otherwise.  */
2301 
2302 struct mult_cost {
2303   short cost;     /* Total rtx_cost of the multiplication sequence.  */
2304   short latency;  /* The latency of the multiplication sequence.  */
2305 };
2306 
2307 /* This macro is used to compare a pointer to a mult_cost against an
2308    single integer "rtx_cost" value.  This is equivalent to the macro
2309    CHEAPER_MULT_COST(X,Z) where Z = {Y,Y}.  */
2310 #define MULT_COST_LESS(X,Y) ((X)->cost < (Y)	\
2311 			     || ((X)->cost == (Y) && (X)->latency < (Y)))
2312 
2313 /* This macro is used to compare two pointers to mult_costs against
2314    each other.  The macro returns true if X is cheaper than Y.
2315    Currently, the cheaper of two mult_costs is the one with the
2316    lower "cost".  If "cost"s are tied, the lower latency is cheaper.  */
2317 #define CHEAPER_MULT_COST(X,Y)  ((X)->cost < (Y)->cost		\
2318 				 || ((X)->cost == (Y)->cost	\
2319 				     && (X)->latency < (Y)->latency))
2320 
2321 /* This structure records a sequence of operations.
2322    `ops' is the number of operations recorded.
2323    `cost' is their total cost.
2324    The operations are stored in `op' and the corresponding
2325    logarithms of the integer coefficients in `log'.
2326 
2327    These are the operations:
2328    alg_zero		total := 0;
2329    alg_m		total := multiplicand;
2330    alg_shift		total := total * coeff
2331    alg_add_t_m2		total := total + multiplicand * coeff;
2332    alg_sub_t_m2		total := total - multiplicand * coeff;
2333    alg_add_factor	total := total * coeff + total;
2334    alg_sub_factor	total := total * coeff - total;
2335    alg_add_t2_m		total := total * coeff + multiplicand;
2336    alg_sub_t2_m		total := total * coeff - multiplicand;
2337 
2338    The first operand must be either alg_zero or alg_m.  */
2339 
2340 struct algorithm
2341 {
2342   struct mult_cost cost;
2343   short ops;
2344   /* The size of the OP and LOG fields are not directly related to the
2345      word size, but the worst-case algorithms will be if we have few
2346      consecutive ones or zeros, i.e., a multiplicand like 10101010101...
2347      In that case we will generate shift-by-2, add, shift-by-2, add,...,
2348      in total wordsize operations.  */
2349   enum alg_code op[MAX_BITS_PER_WORD];
2350   char log[MAX_BITS_PER_WORD];
2351 };
2352 
2353 /* The entry for our multiplication cache/hash table.  */
2354 struct alg_hash_entry {
2355   /* The number we are multiplying by.  */
2356   unsigned HOST_WIDE_INT t;
2357 
2358   /* The mode in which we are multiplying something by T.  */
2359   enum machine_mode mode;
2360 
2361   /* The best multiplication algorithm for t.  */
2362   enum alg_code alg;
2363 
2364   /* The cost of multiplication if ALG_CODE is not alg_impossible.
2365      Otherwise, the cost within which multiplication by T is
2366      impossible.  */
2367   struct mult_cost cost;
2368 
2369   /* OPtimized for speed? */
2370   bool speed;
2371 };
2372 
2373 /* The number of cache/hash entries.  */
2374 #if HOST_BITS_PER_WIDE_INT == 64
2375 #define NUM_ALG_HASH_ENTRIES 1031
2376 #else
2377 #define NUM_ALG_HASH_ENTRIES 307
2378 #endif
2379 
2380 /* Each entry of ALG_HASH caches alg_code for some integer.  This is
2381    actually a hash table.  If we have a collision, that the older
2382    entry is kicked out.  */
2383 static struct alg_hash_entry alg_hash[NUM_ALG_HASH_ENTRIES];
2384 
2385 /* Indicates the type of fixup needed after a constant multiplication.
2386    BASIC_VARIANT means no fixup is needed, NEGATE_VARIANT means that
2387    the result should be negated, and ADD_VARIANT means that the
2388    multiplicand should be added to the result.  */
2389 enum mult_variant {basic_variant, negate_variant, add_variant};
2390 
2391 static void synth_mult (struct algorithm *, unsigned HOST_WIDE_INT,
2392 			const struct mult_cost *, enum machine_mode mode);
2393 static bool choose_mult_variant (enum machine_mode, HOST_WIDE_INT,
2394 				 struct algorithm *, enum mult_variant *, int);
2395 static rtx expand_mult_const (enum machine_mode, rtx, HOST_WIDE_INT, rtx,
2396 			      const struct algorithm *, enum mult_variant);
2397 static unsigned HOST_WIDE_INT choose_multiplier (unsigned HOST_WIDE_INT, int,
2398 						 int, rtx *, int *, int *);
2399 static unsigned HOST_WIDE_INT invert_mod2n (unsigned HOST_WIDE_INT, int);
2400 static rtx extract_high_half (enum machine_mode, rtx);
2401 static rtx expand_mult_highpart (enum machine_mode, rtx, rtx, rtx, int, int);
2402 static rtx expand_mult_highpart_optab (enum machine_mode, rtx, rtx, rtx,
2403 				       int, int);
2404 /* Compute and return the best algorithm for multiplying by T.
2405    The algorithm must cost less than cost_limit
2406    If retval.cost >= COST_LIMIT, no algorithm was found and all
2407    other field of the returned struct are undefined.
2408    MODE is the machine mode of the multiplication.  */
2409 
2410 static void
2411 synth_mult (struct algorithm *alg_out, unsigned HOST_WIDE_INT t,
2412 	    const struct mult_cost *cost_limit, enum machine_mode mode)
2413 {
2414   int m;
2415   struct algorithm *alg_in, *best_alg;
2416   struct mult_cost best_cost;
2417   struct mult_cost new_limit;
2418   int op_cost, op_latency;
2419   unsigned HOST_WIDE_INT orig_t = t;
2420   unsigned HOST_WIDE_INT q;
2421   int maxm = MIN (BITS_PER_WORD, GET_MODE_BITSIZE (mode));
2422   int hash_index;
2423   bool cache_hit = false;
2424   enum alg_code cache_alg = alg_zero;
2425   bool speed = optimize_insn_for_speed_p ();
2426 
2427   /* Indicate that no algorithm is yet found.  If no algorithm
2428      is found, this value will be returned and indicate failure.  */
2429   alg_out->cost.cost = cost_limit->cost + 1;
2430   alg_out->cost.latency = cost_limit->latency + 1;
2431 
2432   if (cost_limit->cost < 0
2433       || (cost_limit->cost == 0 && cost_limit->latency <= 0))
2434     return;
2435 
2436   /* Restrict the bits of "t" to the multiplication's mode.  */
2437   t &= GET_MODE_MASK (mode);
2438 
2439   /* t == 1 can be done in zero cost.  */
2440   if (t == 1)
2441     {
2442       alg_out->ops = 1;
2443       alg_out->cost.cost = 0;
2444       alg_out->cost.latency = 0;
2445       alg_out->op[0] = alg_m;
2446       return;
2447     }
2448 
2449   /* t == 0 sometimes has a cost.  If it does and it exceeds our limit,
2450      fail now.  */
2451   if (t == 0)
2452     {
2453       if (MULT_COST_LESS (cost_limit, zero_cost[speed]))
2454 	return;
2455       else
2456 	{
2457 	  alg_out->ops = 1;
2458 	  alg_out->cost.cost = zero_cost[speed];
2459 	  alg_out->cost.latency = zero_cost[speed];
2460 	  alg_out->op[0] = alg_zero;
2461 	  return;
2462 	}
2463     }
2464 
2465   /* We'll be needing a couple extra algorithm structures now.  */
2466 
2467   alg_in = XALLOCA (struct algorithm);
2468   best_alg = XALLOCA (struct algorithm);
2469   best_cost = *cost_limit;
2470 
2471   /* Compute the hash index.  */
2472   hash_index = (t ^ (unsigned int) mode ^ (speed * 256)) % NUM_ALG_HASH_ENTRIES;
2473 
2474   /* See if we already know what to do for T.  */
2475   if (alg_hash[hash_index].t == t
2476       && alg_hash[hash_index].mode == mode
2477       && alg_hash[hash_index].mode == mode
2478       && alg_hash[hash_index].speed == speed
2479       && alg_hash[hash_index].alg != alg_unknown)
2480     {
2481       cache_alg = alg_hash[hash_index].alg;
2482 
2483       if (cache_alg == alg_impossible)
2484 	{
2485 	  /* The cache tells us that it's impossible to synthesize
2486 	     multiplication by T within alg_hash[hash_index].cost.  */
2487 	  if (!CHEAPER_MULT_COST (&alg_hash[hash_index].cost, cost_limit))
2488 	    /* COST_LIMIT is at least as restrictive as the one
2489 	       recorded in the hash table, in which case we have no
2490 	       hope of synthesizing a multiplication.  Just
2491 	       return.  */
2492 	    return;
2493 
2494 	  /* If we get here, COST_LIMIT is less restrictive than the
2495 	     one recorded in the hash table, so we may be able to
2496 	     synthesize a multiplication.  Proceed as if we didn't
2497 	     have the cache entry.  */
2498 	}
2499       else
2500 	{
2501 	  if (CHEAPER_MULT_COST (cost_limit, &alg_hash[hash_index].cost))
2502 	    /* The cached algorithm shows that this multiplication
2503 	       requires more cost than COST_LIMIT.  Just return.  This
2504 	       way, we don't clobber this cache entry with
2505 	       alg_impossible but retain useful information.  */
2506 	    return;
2507 
2508 	  cache_hit = true;
2509 
2510 	  switch (cache_alg)
2511 	    {
2512 	    case alg_shift:
2513 	      goto do_alg_shift;
2514 
2515 	    case alg_add_t_m2:
2516 	    case alg_sub_t_m2:
2517 	      goto do_alg_addsub_t_m2;
2518 
2519 	    case alg_add_factor:
2520 	    case alg_sub_factor:
2521 	      goto do_alg_addsub_factor;
2522 
2523 	    case alg_add_t2_m:
2524 	      goto do_alg_add_t2_m;
2525 
2526 	    case alg_sub_t2_m:
2527 	      goto do_alg_sub_t2_m;
2528 
2529 	    default:
2530 	      gcc_unreachable ();
2531 	    }
2532 	}
2533     }
2534 
2535   /* If we have a group of zero bits at the low-order part of T, try
2536      multiplying by the remaining bits and then doing a shift.  */
2537 
2538   if ((t & 1) == 0)
2539     {
2540     do_alg_shift:
2541       m = floor_log2 (t & -t);	/* m = number of low zero bits */
2542       if (m < maxm)
2543 	{
2544 	  q = t >> m;
2545 	  /* The function expand_shift will choose between a shift and
2546 	     a sequence of additions, so the observed cost is given as
2547 	     MIN (m * add_cost[speed][mode], shift_cost[speed][mode][m]).  */
2548 	  op_cost = m * add_cost[speed][mode];
2549 	  if (shift_cost[speed][mode][m] < op_cost)
2550 	    op_cost = shift_cost[speed][mode][m];
2551 	  new_limit.cost = best_cost.cost - op_cost;
2552 	  new_limit.latency = best_cost.latency - op_cost;
2553 	  synth_mult (alg_in, q, &new_limit, mode);
2554 
2555 	  alg_in->cost.cost += op_cost;
2556 	  alg_in->cost.latency += op_cost;
2557 	  if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2558 	    {
2559 	      struct algorithm *x;
2560 	      best_cost = alg_in->cost;
2561 	      x = alg_in, alg_in = best_alg, best_alg = x;
2562 	      best_alg->log[best_alg->ops] = m;
2563 	      best_alg->op[best_alg->ops] = alg_shift;
2564 	    }
2565 
2566 	  /* See if treating ORIG_T as a signed number yields a better
2567 	     sequence.  Try this sequence only for a negative ORIG_T
2568 	     as it would be useless for a non-negative ORIG_T.  */
2569 	  if ((HOST_WIDE_INT) orig_t < 0)
2570 	    {
2571 	      /* Shift ORIG_T as follows because a right shift of a
2572 		 negative-valued signed type is implementation
2573 		 defined.  */
2574 	      q = ~(~orig_t >> m);
2575 	      /* The function expand_shift will choose between a shift
2576 		 and a sequence of additions, so the observed cost is
2577 		 given as MIN (m * add_cost[speed][mode],
2578 		 shift_cost[speed][mode][m]).  */
2579 	      op_cost = m * add_cost[speed][mode];
2580 	      if (shift_cost[speed][mode][m] < op_cost)
2581 		op_cost = shift_cost[speed][mode][m];
2582 	      new_limit.cost = best_cost.cost - op_cost;
2583 	      new_limit.latency = best_cost.latency - op_cost;
2584 	      synth_mult (alg_in, q, &new_limit, mode);
2585 
2586 	      alg_in->cost.cost += op_cost;
2587 	      alg_in->cost.latency += op_cost;
2588 	      if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2589 		{
2590 		  struct algorithm *x;
2591 		  best_cost = alg_in->cost;
2592 		  x = alg_in, alg_in = best_alg, best_alg = x;
2593 		  best_alg->log[best_alg->ops] = m;
2594 		  best_alg->op[best_alg->ops] = alg_shift;
2595 		}
2596 	    }
2597 	}
2598       if (cache_hit)
2599 	goto done;
2600     }
2601 
2602   /* If we have an odd number, add or subtract one.  */
2603   if ((t & 1) != 0)
2604     {
2605       unsigned HOST_WIDE_INT w;
2606 
2607     do_alg_addsub_t_m2:
2608       for (w = 1; (w & t) != 0; w <<= 1)
2609 	;
2610       /* If T was -1, then W will be zero after the loop.  This is another
2611 	 case where T ends with ...111.  Handling this with (T + 1) and
2612 	 subtract 1 produces slightly better code and results in algorithm
2613 	 selection much faster than treating it like the ...0111 case
2614 	 below.  */
2615       if (w == 0
2616 	  || (w > 2
2617 	      /* Reject the case where t is 3.
2618 		 Thus we prefer addition in that case.  */
2619 	      && t != 3))
2620 	{
2621 	  /* T ends with ...111.  Multiply by (T + 1) and subtract 1.  */
2622 
2623 	  op_cost = add_cost[speed][mode];
2624 	  new_limit.cost = best_cost.cost - op_cost;
2625 	  new_limit.latency = best_cost.latency - op_cost;
2626 	  synth_mult (alg_in, t + 1, &new_limit, mode);
2627 
2628 	  alg_in->cost.cost += op_cost;
2629 	  alg_in->cost.latency += op_cost;
2630 	  if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2631 	    {
2632 	      struct algorithm *x;
2633 	      best_cost = alg_in->cost;
2634 	      x = alg_in, alg_in = best_alg, best_alg = x;
2635 	      best_alg->log[best_alg->ops] = 0;
2636 	      best_alg->op[best_alg->ops] = alg_sub_t_m2;
2637 	    }
2638 	}
2639       else
2640 	{
2641 	  /* T ends with ...01 or ...011.  Multiply by (T - 1) and add 1.  */
2642 
2643 	  op_cost = add_cost[speed][mode];
2644 	  new_limit.cost = best_cost.cost - op_cost;
2645 	  new_limit.latency = best_cost.latency - op_cost;
2646 	  synth_mult (alg_in, t - 1, &new_limit, mode);
2647 
2648 	  alg_in->cost.cost += op_cost;
2649 	  alg_in->cost.latency += op_cost;
2650 	  if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2651 	    {
2652 	      struct algorithm *x;
2653 	      best_cost = alg_in->cost;
2654 	      x = alg_in, alg_in = best_alg, best_alg = x;
2655 	      best_alg->log[best_alg->ops] = 0;
2656 	      best_alg->op[best_alg->ops] = alg_add_t_m2;
2657 	    }
2658 	}
2659 
2660       /* We may be able to calculate a * -7, a * -15, a * -31, etc
2661 	 quickly with a - a * n for some appropriate constant n.  */
2662       m = exact_log2 (-orig_t + 1);
2663       if (m >= 0 && m < maxm)
2664 	{
2665 	  op_cost = shiftsub1_cost[speed][mode][m];
2666 	  new_limit.cost = best_cost.cost - op_cost;
2667 	  new_limit.latency = best_cost.latency - op_cost;
2668 	  synth_mult (alg_in, (unsigned HOST_WIDE_INT) (-orig_t + 1) >> m, &new_limit, mode);
2669 
2670 	  alg_in->cost.cost += op_cost;
2671 	  alg_in->cost.latency += op_cost;
2672 	  if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2673 	    {
2674 	      struct algorithm *x;
2675 	      best_cost = alg_in->cost;
2676 	      x = alg_in, alg_in = best_alg, best_alg = x;
2677 	      best_alg->log[best_alg->ops] = m;
2678 	      best_alg->op[best_alg->ops] = alg_sub_t_m2;
2679 	    }
2680 	}
2681 
2682       if (cache_hit)
2683 	goto done;
2684     }
2685 
2686   /* Look for factors of t of the form
2687      t = q(2**m +- 1), 2 <= m <= floor(log2(t - 1)).
2688      If we find such a factor, we can multiply by t using an algorithm that
2689      multiplies by q, shift the result by m and add/subtract it to itself.
2690 
2691      We search for large factors first and loop down, even if large factors
2692      are less probable than small; if we find a large factor we will find a
2693      good sequence quickly, and therefore be able to prune (by decreasing
2694      COST_LIMIT) the search.  */
2695 
2696  do_alg_addsub_factor:
2697   for (m = floor_log2 (t - 1); m >= 2; m--)
2698     {
2699       unsigned HOST_WIDE_INT d;
2700 
2701       d = ((unsigned HOST_WIDE_INT) 1 << m) + 1;
2702       if (t % d == 0 && t > d && m < maxm
2703 	  && (!cache_hit || cache_alg == alg_add_factor))
2704 	{
2705 	  /* If the target has a cheap shift-and-add instruction use
2706 	     that in preference to a shift insn followed by an add insn.
2707 	     Assume that the shift-and-add is "atomic" with a latency
2708 	     equal to its cost, otherwise assume that on superscalar
2709 	     hardware the shift may be executed concurrently with the
2710 	     earlier steps in the algorithm.  */
2711 	  op_cost = add_cost[speed][mode] + shift_cost[speed][mode][m];
2712 	  if (shiftadd_cost[speed][mode][m] < op_cost)
2713 	    {
2714 	      op_cost = shiftadd_cost[speed][mode][m];
2715 	      op_latency = op_cost;
2716 	    }
2717 	  else
2718 	    op_latency = add_cost[speed][mode];
2719 
2720 	  new_limit.cost = best_cost.cost - op_cost;
2721 	  new_limit.latency = best_cost.latency - op_latency;
2722 	  synth_mult (alg_in, t / d, &new_limit, mode);
2723 
2724 	  alg_in->cost.cost += op_cost;
2725 	  alg_in->cost.latency += op_latency;
2726 	  if (alg_in->cost.latency < op_cost)
2727 	    alg_in->cost.latency = op_cost;
2728 	  if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2729 	    {
2730 	      struct algorithm *x;
2731 	      best_cost = alg_in->cost;
2732 	      x = alg_in, alg_in = best_alg, best_alg = x;
2733 	      best_alg->log[best_alg->ops] = m;
2734 	      best_alg->op[best_alg->ops] = alg_add_factor;
2735 	    }
2736 	  /* Other factors will have been taken care of in the recursion.  */
2737 	  break;
2738 	}
2739 
2740       d = ((unsigned HOST_WIDE_INT) 1 << m) - 1;
2741       if (t % d == 0 && t > d && m < maxm
2742 	  && (!cache_hit || cache_alg == alg_sub_factor))
2743 	{
2744 	  /* If the target has a cheap shift-and-subtract insn use
2745 	     that in preference to a shift insn followed by a sub insn.
2746 	     Assume that the shift-and-sub is "atomic" with a latency
2747 	     equal to it's cost, otherwise assume that on superscalar
2748 	     hardware the shift may be executed concurrently with the
2749 	     earlier steps in the algorithm.  */
2750 	  op_cost = add_cost[speed][mode] + shift_cost[speed][mode][m];
2751 	  if (shiftsub0_cost[speed][mode][m] < op_cost)
2752 	    {
2753 	      op_cost = shiftsub0_cost[speed][mode][m];
2754 	      op_latency = op_cost;
2755 	    }
2756 	  else
2757 	    op_latency = add_cost[speed][mode];
2758 
2759 	  new_limit.cost = best_cost.cost - op_cost;
2760 	  new_limit.latency = best_cost.latency - op_latency;
2761 	  synth_mult (alg_in, t / d, &new_limit, mode);
2762 
2763 	  alg_in->cost.cost += op_cost;
2764 	  alg_in->cost.latency += op_latency;
2765 	  if (alg_in->cost.latency < op_cost)
2766 	    alg_in->cost.latency = op_cost;
2767 	  if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2768 	    {
2769 	      struct algorithm *x;
2770 	      best_cost = alg_in->cost;
2771 	      x = alg_in, alg_in = best_alg, best_alg = x;
2772 	      best_alg->log[best_alg->ops] = m;
2773 	      best_alg->op[best_alg->ops] = alg_sub_factor;
2774 	    }
2775 	  break;
2776 	}
2777     }
2778   if (cache_hit)
2779     goto done;
2780 
2781   /* Try shift-and-add (load effective address) instructions,
2782      i.e. do a*3, a*5, a*9.  */
2783   if ((t & 1) != 0)
2784     {
2785     do_alg_add_t2_m:
2786       q = t - 1;
2787       q = q & -q;
2788       m = exact_log2 (q);
2789       if (m >= 0 && m < maxm)
2790 	{
2791 	  op_cost = shiftadd_cost[speed][mode][m];
2792 	  new_limit.cost = best_cost.cost - op_cost;
2793 	  new_limit.latency = best_cost.latency - op_cost;
2794 	  synth_mult (alg_in, (t - 1) >> m, &new_limit, mode);
2795 
2796 	  alg_in->cost.cost += op_cost;
2797 	  alg_in->cost.latency += op_cost;
2798 	  if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2799 	    {
2800 	      struct algorithm *x;
2801 	      best_cost = alg_in->cost;
2802 	      x = alg_in, alg_in = best_alg, best_alg = x;
2803 	      best_alg->log[best_alg->ops] = m;
2804 	      best_alg->op[best_alg->ops] = alg_add_t2_m;
2805 	    }
2806 	}
2807       if (cache_hit)
2808 	goto done;
2809 
2810     do_alg_sub_t2_m:
2811       q = t + 1;
2812       q = q & -q;
2813       m = exact_log2 (q);
2814       if (m >= 0 && m < maxm)
2815 	{
2816 	  op_cost = shiftsub0_cost[speed][mode][m];
2817 	  new_limit.cost = best_cost.cost - op_cost;
2818 	  new_limit.latency = best_cost.latency - op_cost;
2819 	  synth_mult (alg_in, (t + 1) >> m, &new_limit, mode);
2820 
2821 	  alg_in->cost.cost += op_cost;
2822 	  alg_in->cost.latency += op_cost;
2823 	  if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2824 	    {
2825 	      struct algorithm *x;
2826 	      best_cost = alg_in->cost;
2827 	      x = alg_in, alg_in = best_alg, best_alg = x;
2828 	      best_alg->log[best_alg->ops] = m;
2829 	      best_alg->op[best_alg->ops] = alg_sub_t2_m;
2830 	    }
2831 	}
2832       if (cache_hit)
2833 	goto done;
2834     }
2835 
2836  done:
2837   /* If best_cost has not decreased, we have not found any algorithm.  */
2838   if (!CHEAPER_MULT_COST (&best_cost, cost_limit))
2839     {
2840       /* We failed to find an algorithm.  Record alg_impossible for
2841 	 this case (that is, <T, MODE, COST_LIMIT>) so that next time
2842 	 we are asked to find an algorithm for T within the same or
2843 	 lower COST_LIMIT, we can immediately return to the
2844 	 caller.  */
2845       alg_hash[hash_index].t = t;
2846       alg_hash[hash_index].mode = mode;
2847       alg_hash[hash_index].speed = speed;
2848       alg_hash[hash_index].alg = alg_impossible;
2849       alg_hash[hash_index].cost = *cost_limit;
2850       return;
2851     }
2852 
2853   /* Cache the result.  */
2854   if (!cache_hit)
2855     {
2856       alg_hash[hash_index].t = t;
2857       alg_hash[hash_index].mode = mode;
2858       alg_hash[hash_index].speed = speed;
2859       alg_hash[hash_index].alg = best_alg->op[best_alg->ops];
2860       alg_hash[hash_index].cost.cost = best_cost.cost;
2861       alg_hash[hash_index].cost.latency = best_cost.latency;
2862     }
2863 
2864   /* If we are getting a too long sequence for `struct algorithm'
2865      to record, make this search fail.  */
2866   if (best_alg->ops == MAX_BITS_PER_WORD)
2867     return;
2868 
2869   /* Copy the algorithm from temporary space to the space at alg_out.
2870      We avoid using structure assignment because the majority of
2871      best_alg is normally undefined, and this is a critical function.  */
2872   alg_out->ops = best_alg->ops + 1;
2873   alg_out->cost = best_cost;
2874   memcpy (alg_out->op, best_alg->op,
2875 	  alg_out->ops * sizeof *alg_out->op);
2876   memcpy (alg_out->log, best_alg->log,
2877 	  alg_out->ops * sizeof *alg_out->log);
2878 }
2879 
2880 /* Find the cheapest way of multiplying a value of mode MODE by VAL.
2881    Try three variations:
2882 
2883        - a shift/add sequence based on VAL itself
2884        - a shift/add sequence based on -VAL, followed by a negation
2885        - a shift/add sequence based on VAL - 1, followed by an addition.
2886 
2887    Return true if the cheapest of these cost less than MULT_COST,
2888    describing the algorithm in *ALG and final fixup in *VARIANT.  */
2889 
2890 static bool
2891 choose_mult_variant (enum machine_mode mode, HOST_WIDE_INT val,
2892 		     struct algorithm *alg, enum mult_variant *variant,
2893 		     int mult_cost)
2894 {
2895   struct algorithm alg2;
2896   struct mult_cost limit;
2897   int op_cost;
2898   bool speed = optimize_insn_for_speed_p ();
2899 
2900   /* Fail quickly for impossible bounds.  */
2901   if (mult_cost < 0)
2902     return false;
2903 
2904   /* Ensure that mult_cost provides a reasonable upper bound.
2905      Any constant multiplication can be performed with less
2906      than 2 * bits additions.  */
2907   op_cost = 2 * GET_MODE_BITSIZE (mode) * add_cost[speed][mode];
2908   if (mult_cost > op_cost)
2909     mult_cost = op_cost;
2910 
2911   *variant = basic_variant;
2912   limit.cost = mult_cost;
2913   limit.latency = mult_cost;
2914   synth_mult (alg, val, &limit, mode);
2915 
2916   /* This works only if the inverted value actually fits in an
2917      `unsigned int' */
2918   if (HOST_BITS_PER_INT >= GET_MODE_BITSIZE (mode))
2919     {
2920       op_cost = neg_cost[speed][mode];
2921       if (MULT_COST_LESS (&alg->cost, mult_cost))
2922 	{
2923 	  limit.cost = alg->cost.cost - op_cost;
2924 	  limit.latency = alg->cost.latency - op_cost;
2925 	}
2926       else
2927 	{
2928 	  limit.cost = mult_cost - op_cost;
2929 	  limit.latency = mult_cost - op_cost;
2930 	}
2931 
2932       synth_mult (&alg2, -val, &limit, mode);
2933       alg2.cost.cost += op_cost;
2934       alg2.cost.latency += op_cost;
2935       if (CHEAPER_MULT_COST (&alg2.cost, &alg->cost))
2936 	*alg = alg2, *variant = negate_variant;
2937     }
2938 
2939   /* This proves very useful for division-by-constant.  */
2940   op_cost = add_cost[speed][mode];
2941   if (MULT_COST_LESS (&alg->cost, mult_cost))
2942     {
2943       limit.cost = alg->cost.cost - op_cost;
2944       limit.latency = alg->cost.latency - op_cost;
2945     }
2946   else
2947     {
2948       limit.cost = mult_cost - op_cost;
2949       limit.latency = mult_cost - op_cost;
2950     }
2951 
2952   synth_mult (&alg2, val - 1, &limit, mode);
2953   alg2.cost.cost += op_cost;
2954   alg2.cost.latency += op_cost;
2955   if (CHEAPER_MULT_COST (&alg2.cost, &alg->cost))
2956     *alg = alg2, *variant = add_variant;
2957 
2958   return MULT_COST_LESS (&alg->cost, mult_cost);
2959 }
2960 
2961 /* A subroutine of expand_mult, used for constant multiplications.
2962    Multiply OP0 by VAL in mode MODE, storing the result in TARGET if
2963    convenient.  Use the shift/add sequence described by ALG and apply
2964    the final fixup specified by VARIANT.  */
2965 
2966 static rtx
2967 expand_mult_const (enum machine_mode mode, rtx op0, HOST_WIDE_INT val,
2968 		   rtx target, const struct algorithm *alg,
2969 		   enum mult_variant variant)
2970 {
2971   HOST_WIDE_INT val_so_far;
2972   rtx insn, accum, tem;
2973   int opno;
2974   enum machine_mode nmode;
2975 
2976   /* Avoid referencing memory over and over and invalid sharing
2977      on SUBREGs.  */
2978   op0 = force_reg (mode, op0);
2979 
2980   /* ACCUM starts out either as OP0 or as a zero, depending on
2981      the first operation.  */
2982 
2983   if (alg->op[0] == alg_zero)
2984     {
2985       accum = copy_to_mode_reg (mode, const0_rtx);
2986       val_so_far = 0;
2987     }
2988   else if (alg->op[0] == alg_m)
2989     {
2990       accum = copy_to_mode_reg (mode, op0);
2991       val_so_far = 1;
2992     }
2993   else
2994     gcc_unreachable ();
2995 
2996   for (opno = 1; opno < alg->ops; opno++)
2997     {
2998       int log = alg->log[opno];
2999       rtx shift_subtarget = optimize ? 0 : accum;
3000       rtx add_target
3001 	= (opno == alg->ops - 1 && target != 0 && variant != add_variant
3002 	   && !optimize)
3003 	  ? target : 0;
3004       rtx accum_target = optimize ? 0 : accum;
3005 
3006       switch (alg->op[opno])
3007 	{
3008 	case alg_shift:
3009 	  tem = expand_shift (LSHIFT_EXPR, mode, accum,
3010 			      build_int_cst (NULL_TREE, log),
3011 			      NULL_RTX, 0);
3012 	  /* REG_EQUAL note will be attached to the following insn.  */
3013 	  emit_move_insn (accum, tem);
3014 	  val_so_far <<= log;
3015 	  break;
3016 
3017 	case alg_add_t_m2:
3018 	  tem = expand_shift (LSHIFT_EXPR, mode, op0,
3019 			      build_int_cst (NULL_TREE, log),
3020 			      NULL_RTX, 0);
3021 	  accum = force_operand (gen_rtx_PLUS (mode, accum, tem),
3022 				 add_target ? add_target : accum_target);
3023 	  val_so_far += (HOST_WIDE_INT) 1 << log;
3024 	  break;
3025 
3026 	case alg_sub_t_m2:
3027 	  tem = expand_shift (LSHIFT_EXPR, mode, op0,
3028 			      build_int_cst (NULL_TREE, log),
3029 			      NULL_RTX, 0);
3030 	  accum = force_operand (gen_rtx_MINUS (mode, accum, tem),
3031 				 add_target ? add_target : accum_target);
3032 	  val_so_far -= (HOST_WIDE_INT) 1 << log;
3033 	  break;
3034 
3035 	case alg_add_t2_m:
3036 	  accum = expand_shift (LSHIFT_EXPR, mode, accum,
3037 				build_int_cst (NULL_TREE, log),
3038 				shift_subtarget,
3039 				0);
3040 	  accum = force_operand (gen_rtx_PLUS (mode, accum, op0),
3041 				 add_target ? add_target : accum_target);
3042 	  val_so_far = (val_so_far << log) + 1;
3043 	  break;
3044 
3045 	case alg_sub_t2_m:
3046 	  accum = expand_shift (LSHIFT_EXPR, mode, accum,
3047 				build_int_cst (NULL_TREE, log),
3048 				shift_subtarget, 0);
3049 	  accum = force_operand (gen_rtx_MINUS (mode, accum, op0),
3050 				 add_target ? add_target : accum_target);
3051 	  val_so_far = (val_so_far << log) - 1;
3052 	  break;
3053 
3054 	case alg_add_factor:
3055 	  tem = expand_shift (LSHIFT_EXPR, mode, accum,
3056 			      build_int_cst (NULL_TREE, log),
3057 			      NULL_RTX, 0);
3058 	  accum = force_operand (gen_rtx_PLUS (mode, accum, tem),
3059 				 add_target ? add_target : accum_target);
3060 	  val_so_far += val_so_far << log;
3061 	  break;
3062 
3063 	case alg_sub_factor:
3064 	  tem = expand_shift (LSHIFT_EXPR, mode, accum,
3065 			      build_int_cst (NULL_TREE, log),
3066 			      NULL_RTX, 0);
3067 	  accum = force_operand (gen_rtx_MINUS (mode, tem, accum),
3068 				 (add_target
3069 				  ? add_target : (optimize ? 0 : tem)));
3070 	  val_so_far = (val_so_far << log) - val_so_far;
3071 	  break;
3072 
3073 	default:
3074 	  gcc_unreachable ();
3075 	}
3076 
3077       /* Write a REG_EQUAL note on the last insn so that we can cse
3078 	 multiplication sequences.  Note that if ACCUM is a SUBREG,
3079 	 we've set the inner register and must properly indicate
3080 	 that.  */
3081 
3082       tem = op0, nmode = mode;
3083       if (GET_CODE (accum) == SUBREG)
3084 	{
3085 	  nmode = GET_MODE (SUBREG_REG (accum));
3086 	  tem = gen_lowpart (nmode, op0);
3087 	}
3088 
3089       insn = get_last_insn ();
3090       set_unique_reg_note (insn, REG_EQUAL,
3091 			   gen_rtx_MULT (nmode, tem,
3092 					 GEN_INT (val_so_far)));
3093     }
3094 
3095   if (variant == negate_variant)
3096     {
3097       val_so_far = -val_so_far;
3098       accum = expand_unop (mode, neg_optab, accum, target, 0);
3099     }
3100   else if (variant == add_variant)
3101     {
3102       val_so_far = val_so_far + 1;
3103       accum = force_operand (gen_rtx_PLUS (mode, accum, op0), target);
3104     }
3105 
3106   /* Compare only the bits of val and val_so_far that are significant
3107      in the result mode, to avoid sign-/zero-extension confusion.  */
3108   val &= GET_MODE_MASK (mode);
3109   val_so_far &= GET_MODE_MASK (mode);
3110   gcc_assert (val == val_so_far);
3111 
3112   return accum;
3113 }
3114 
3115 /* Perform a multiplication and return an rtx for the result.
3116    MODE is mode of value; OP0 and OP1 are what to multiply (rtx's);
3117    TARGET is a suggestion for where to store the result (an rtx).
3118 
3119    We check specially for a constant integer as OP1.
3120    If you want this check for OP0 as well, then before calling
3121    you should swap the two operands if OP0 would be constant.  */
3122 
3123 rtx
3124 expand_mult (enum machine_mode mode, rtx op0, rtx op1, rtx target,
3125 	     int unsignedp)
3126 {
3127   enum mult_variant variant;
3128   struct algorithm algorithm;
3129   int max_cost;
3130   bool speed = optimize_insn_for_speed_p ();
3131 
3132   /* Handling const0_rtx here allows us to use zero as a rogue value for
3133      coeff below.  */
3134   if (op1 == const0_rtx)
3135     return const0_rtx;
3136   if (op1 == const1_rtx)
3137     return op0;
3138   if (op1 == constm1_rtx)
3139     return expand_unop (mode,
3140 			GET_MODE_CLASS (mode) == MODE_INT
3141 			&& !unsignedp && flag_trapv
3142 			? negv_optab : neg_optab,
3143 			op0, target, 0);
3144 
3145   /* These are the operations that are potentially turned into a sequence
3146      of shifts and additions.  */
3147   if (SCALAR_INT_MODE_P (mode)
3148       && (unsignedp || !flag_trapv))
3149     {
3150       HOST_WIDE_INT coeff = 0;
3151       rtx fake_reg = gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1);
3152 
3153       /* synth_mult does an `unsigned int' multiply.  As long as the mode is
3154 	 less than or equal in size to `unsigned int' this doesn't matter.
3155 	 If the mode is larger than `unsigned int', then synth_mult works
3156 	 only if the constant value exactly fits in an `unsigned int' without
3157 	 any truncation.  This means that multiplying by negative values does
3158 	 not work; results are off by 2^32 on a 32 bit machine.  */
3159 
3160       if (CONST_INT_P (op1))
3161 	{
3162 	  /* Attempt to handle multiplication of DImode values by negative
3163 	     coefficients, by performing the multiplication by a positive
3164 	     multiplier and then inverting the result.  */
3165 	  if (INTVAL (op1) < 0
3166 	      && GET_MODE_BITSIZE (mode) > HOST_BITS_PER_WIDE_INT)
3167 	    {
3168 	      /* Its safe to use -INTVAL (op1) even for INT_MIN, as the
3169 		 result is interpreted as an unsigned coefficient.
3170 		 Exclude cost of op0 from max_cost to match the cost
3171 		 calculation of the synth_mult.  */
3172 	      max_cost = rtx_cost (gen_rtx_MULT (mode, fake_reg, op1), SET, speed)
3173 			 - neg_cost[speed][mode];
3174 	      if (max_cost > 0
3175 		  && choose_mult_variant (mode, -INTVAL (op1), &algorithm,
3176 					  &variant, max_cost))
3177 		{
3178 		  rtx temp = expand_mult_const (mode, op0, -INTVAL (op1),
3179 						NULL_RTX, &algorithm,
3180 						variant);
3181 		  return expand_unop (mode, neg_optab, temp, target, 0);
3182 		}
3183 	    }
3184 	  else coeff = INTVAL (op1);
3185 	}
3186       else if (GET_CODE (op1) == CONST_DOUBLE)
3187 	{
3188 	  /* If we are multiplying in DImode, it may still be a win
3189 	     to try to work with shifts and adds.  */
3190 	  if (CONST_DOUBLE_HIGH (op1) == 0
3191 	      && CONST_DOUBLE_LOW (op1) > 0)
3192 	    coeff = CONST_DOUBLE_LOW (op1);
3193 	  else if (CONST_DOUBLE_LOW (op1) == 0
3194 		   && EXACT_POWER_OF_2_OR_ZERO_P (CONST_DOUBLE_HIGH (op1)))
3195 	    {
3196 	      int shift = floor_log2 (CONST_DOUBLE_HIGH (op1))
3197 			  + HOST_BITS_PER_WIDE_INT;
3198 	      return expand_shift (LSHIFT_EXPR, mode, op0,
3199 				   build_int_cst (NULL_TREE, shift),
3200 				   target, unsignedp);
3201 	    }
3202 	}
3203 
3204       /* We used to test optimize here, on the grounds that it's better to
3205 	 produce a smaller program when -O is not used.  But this causes
3206 	 such a terrible slowdown sometimes that it seems better to always
3207 	 use synth_mult.  */
3208       if (coeff != 0)
3209 	{
3210 	  /* Special case powers of two.  */
3211 	  if (EXACT_POWER_OF_2_OR_ZERO_P (coeff))
3212 	    return expand_shift (LSHIFT_EXPR, mode, op0,
3213 				 build_int_cst (NULL_TREE, floor_log2 (coeff)),
3214 				 target, unsignedp);
3215 
3216 	  /* Exclude cost of op0 from max_cost to match the cost
3217 	     calculation of the synth_mult.  */
3218 	  max_cost = rtx_cost (gen_rtx_MULT (mode, fake_reg, op1), SET, speed);
3219 	  if (choose_mult_variant (mode, coeff, &algorithm, &variant,
3220 				   max_cost))
3221 	    return expand_mult_const (mode, op0, coeff, target,
3222 				      &algorithm, variant);
3223 	}
3224     }
3225 
3226   if (GET_CODE (op0) == CONST_DOUBLE)
3227     {
3228       rtx temp = op0;
3229       op0 = op1;
3230       op1 = temp;
3231     }
3232 
3233   /* Expand x*2.0 as x+x.  */
3234   if (GET_CODE (op1) == CONST_DOUBLE
3235       && SCALAR_FLOAT_MODE_P (mode))
3236     {
3237       REAL_VALUE_TYPE d;
3238       REAL_VALUE_FROM_CONST_DOUBLE (d, op1);
3239 
3240       if (REAL_VALUES_EQUAL (d, dconst2))
3241 	{
3242 	  op0 = force_reg (GET_MODE (op0), op0);
3243 	  return expand_binop (mode, add_optab, op0, op0,
3244 			       target, unsignedp, OPTAB_LIB_WIDEN);
3245 	}
3246     }
3247 
3248   /* This used to use umul_optab if unsigned, but for non-widening multiply
3249      there is no difference between signed and unsigned.  */
3250   op0 = expand_binop (mode,
3251 		      ! unsignedp
3252 		      && flag_trapv && (GET_MODE_CLASS(mode) == MODE_INT)
3253 		      ? smulv_optab : smul_optab,
3254 		      op0, op1, target, unsignedp, OPTAB_LIB_WIDEN);
3255   gcc_assert (op0);
3256   return op0;
3257 }
3258 
3259 /* Return the smallest n such that 2**n >= X.  */
3260 
3261 int
3262 ceil_log2 (unsigned HOST_WIDE_INT x)
3263 {
3264   return floor_log2 (x - 1) + 1;
3265 }
3266 
3267 /* Choose a minimal N + 1 bit approximation to 1/D that can be used to
3268    replace division by D, and put the least significant N bits of the result
3269    in *MULTIPLIER_PTR and return the most significant bit.
3270 
3271    The width of operations is N (should be <= HOST_BITS_PER_WIDE_INT), the
3272    needed precision is in PRECISION (should be <= N).
3273 
3274    PRECISION should be as small as possible so this function can choose
3275    multiplier more freely.
3276 
3277    The rounded-up logarithm of D is placed in *lgup_ptr.  A shift count that
3278    is to be used for a final right shift is placed in *POST_SHIFT_PTR.
3279 
3280    Using this function, x/D will be equal to (x * m) >> (*POST_SHIFT_PTR),
3281    where m is the full HOST_BITS_PER_WIDE_INT + 1 bit multiplier.  */
3282 
3283 static
3284 unsigned HOST_WIDE_INT
3285 choose_multiplier (unsigned HOST_WIDE_INT d, int n, int precision,
3286 		   rtx *multiplier_ptr, int *post_shift_ptr, int *lgup_ptr)
3287 {
3288   HOST_WIDE_INT mhigh_hi, mlow_hi;
3289   unsigned HOST_WIDE_INT mhigh_lo, mlow_lo;
3290   int lgup, post_shift;
3291   int pow, pow2;
3292   unsigned HOST_WIDE_INT nl, dummy1;
3293   HOST_WIDE_INT nh, dummy2;
3294 
3295   /* lgup = ceil(log2(divisor)); */
3296   lgup = ceil_log2 (d);
3297 
3298   gcc_assert (lgup <= n);
3299 
3300   pow = n + lgup;
3301   pow2 = n + lgup - precision;
3302 
3303   /* We could handle this with some effort, but this case is much
3304      better handled directly with a scc insn, so rely on caller using
3305      that.  */
3306   gcc_assert (pow != 2 * HOST_BITS_PER_WIDE_INT);
3307 
3308   /* mlow = 2^(N + lgup)/d */
3309  if (pow >= HOST_BITS_PER_WIDE_INT)
3310     {
3311       nh = (HOST_WIDE_INT) 1 << (pow - HOST_BITS_PER_WIDE_INT);
3312       nl = 0;
3313     }
3314   else
3315     {
3316       nh = 0;
3317       nl = (unsigned HOST_WIDE_INT) 1 << pow;
3318     }
3319   div_and_round_double (TRUNC_DIV_EXPR, 1, nl, nh, d, (HOST_WIDE_INT) 0,
3320 			&mlow_lo, &mlow_hi, &dummy1, &dummy2);
3321 
3322   /* mhigh = (2^(N + lgup) + 2^N + lgup - precision)/d */
3323   if (pow2 >= HOST_BITS_PER_WIDE_INT)
3324     nh |= (HOST_WIDE_INT) 1 << (pow2 - HOST_BITS_PER_WIDE_INT);
3325   else
3326     nl |= (unsigned HOST_WIDE_INT) 1 << pow2;
3327   div_and_round_double (TRUNC_DIV_EXPR, 1, nl, nh, d, (HOST_WIDE_INT) 0,
3328 			&mhigh_lo, &mhigh_hi, &dummy1, &dummy2);
3329 
3330   gcc_assert (!mhigh_hi || nh - d < d);
3331   gcc_assert (mhigh_hi <= 1 && mlow_hi <= 1);
3332   /* Assert that mlow < mhigh.  */
3333   gcc_assert (mlow_hi < mhigh_hi
3334 	      || (mlow_hi == mhigh_hi && mlow_lo < mhigh_lo));
3335 
3336   /* If precision == N, then mlow, mhigh exceed 2^N
3337      (but they do not exceed 2^(N+1)).  */
3338 
3339   /* Reduce to lowest terms.  */
3340   for (post_shift = lgup; post_shift > 0; post_shift--)
3341     {
3342       unsigned HOST_WIDE_INT ml_lo = (mlow_hi << (HOST_BITS_PER_WIDE_INT - 1)) | (mlow_lo >> 1);
3343       unsigned HOST_WIDE_INT mh_lo = (mhigh_hi << (HOST_BITS_PER_WIDE_INT - 1)) | (mhigh_lo >> 1);
3344       if (ml_lo >= mh_lo)
3345 	break;
3346 
3347       mlow_hi = 0;
3348       mlow_lo = ml_lo;
3349       mhigh_hi = 0;
3350       mhigh_lo = mh_lo;
3351     }
3352 
3353   *post_shift_ptr = post_shift;
3354   *lgup_ptr = lgup;
3355   if (n < HOST_BITS_PER_WIDE_INT)
3356     {
3357       unsigned HOST_WIDE_INT mask = ((unsigned HOST_WIDE_INT) 1 << n) - 1;
3358       *multiplier_ptr = GEN_INT (mhigh_lo & mask);
3359       return mhigh_lo >= mask;
3360     }
3361   else
3362     {
3363       *multiplier_ptr = GEN_INT (mhigh_lo);
3364       return mhigh_hi;
3365     }
3366 }
3367 
3368 /* Compute the inverse of X mod 2**n, i.e., find Y such that X * Y is
3369    congruent to 1 (mod 2**N).  */
3370 
3371 static unsigned HOST_WIDE_INT
3372 invert_mod2n (unsigned HOST_WIDE_INT x, int n)
3373 {
3374   /* Solve x*y == 1 (mod 2^n), where x is odd.  Return y.  */
3375 
3376   /* The algorithm notes that the choice y = x satisfies
3377      x*y == 1 mod 2^3, since x is assumed odd.
3378      Each iteration doubles the number of bits of significance in y.  */
3379 
3380   unsigned HOST_WIDE_INT mask;
3381   unsigned HOST_WIDE_INT y = x;
3382   int nbit = 3;
3383 
3384   mask = (n == HOST_BITS_PER_WIDE_INT
3385 	  ? ~(unsigned HOST_WIDE_INT) 0
3386 	  : ((unsigned HOST_WIDE_INT) 1 << n) - 1);
3387 
3388   while (nbit < n)
3389     {
3390       y = y * (2 - x*y) & mask;		/* Modulo 2^N */
3391       nbit *= 2;
3392     }
3393   return y;
3394 }
3395 
3396 /* Emit code to adjust ADJ_OPERAND after multiplication of wrong signedness
3397    flavor of OP0 and OP1.  ADJ_OPERAND is already the high half of the
3398    product OP0 x OP1.  If UNSIGNEDP is nonzero, adjust the signed product
3399    to become unsigned, if UNSIGNEDP is zero, adjust the unsigned product to
3400    become signed.
3401 
3402    The result is put in TARGET if that is convenient.
3403 
3404    MODE is the mode of operation.  */
3405 
3406 rtx
3407 expand_mult_highpart_adjust (enum machine_mode mode, rtx adj_operand, rtx op0,
3408 			     rtx op1, rtx target, int unsignedp)
3409 {
3410   rtx tem;
3411   enum rtx_code adj_code = unsignedp ? PLUS : MINUS;
3412 
3413   tem = expand_shift (RSHIFT_EXPR, mode, op0,
3414 		      build_int_cst (NULL_TREE, GET_MODE_BITSIZE (mode) - 1),
3415 		      NULL_RTX, 0);
3416   tem = expand_and (mode, tem, op1, NULL_RTX);
3417   adj_operand
3418     = force_operand (gen_rtx_fmt_ee (adj_code, mode, adj_operand, tem),
3419 		     adj_operand);
3420 
3421   tem = expand_shift (RSHIFT_EXPR, mode, op1,
3422 		      build_int_cst (NULL_TREE, GET_MODE_BITSIZE (mode) - 1),
3423 		      NULL_RTX, 0);
3424   tem = expand_and (mode, tem, op0, NULL_RTX);
3425   target = force_operand (gen_rtx_fmt_ee (adj_code, mode, adj_operand, tem),
3426 			  target);
3427 
3428   return target;
3429 }
3430 
3431 /* Subroutine of expand_mult_highpart.  Return the MODE high part of OP.  */
3432 
3433 static rtx
3434 extract_high_half (enum machine_mode mode, rtx op)
3435 {
3436   enum machine_mode wider_mode;
3437 
3438   if (mode == word_mode)
3439     return gen_highpart (mode, op);
3440 
3441   gcc_assert (!SCALAR_FLOAT_MODE_P (mode));
3442 
3443   wider_mode = GET_MODE_WIDER_MODE (mode);
3444   op = expand_shift (RSHIFT_EXPR, wider_mode, op,
3445 		     build_int_cst (NULL_TREE, GET_MODE_BITSIZE (mode)), 0, 1);
3446   return convert_modes (mode, wider_mode, op, 0);
3447 }
3448 
3449 /* Like expand_mult_highpart, but only consider using a multiplication
3450    optab.  OP1 is an rtx for the constant operand.  */
3451 
3452 static rtx
3453 expand_mult_highpart_optab (enum machine_mode mode, rtx op0, rtx op1,
3454 			    rtx target, int unsignedp, int max_cost)
3455 {
3456   rtx narrow_op1 = gen_int_mode (INTVAL (op1), mode);
3457   enum machine_mode wider_mode;
3458   optab moptab;
3459   rtx tem;
3460   int size;
3461   bool speed = optimize_insn_for_speed_p ();
3462 
3463   gcc_assert (!SCALAR_FLOAT_MODE_P (mode));
3464 
3465   wider_mode = GET_MODE_WIDER_MODE (mode);
3466   size = GET_MODE_BITSIZE (mode);
3467 
3468   /* Firstly, try using a multiplication insn that only generates the needed
3469      high part of the product, and in the sign flavor of unsignedp.  */
3470   if (mul_highpart_cost[speed][mode] < max_cost)
3471     {
3472       moptab = unsignedp ? umul_highpart_optab : smul_highpart_optab;
3473       tem = expand_binop (mode, moptab, op0, narrow_op1, target,
3474 			  unsignedp, OPTAB_DIRECT);
3475       if (tem)
3476 	return tem;
3477     }
3478 
3479   /* Secondly, same as above, but use sign flavor opposite of unsignedp.
3480      Need to adjust the result after the multiplication.  */
3481   if (size - 1 < BITS_PER_WORD
3482       && (mul_highpart_cost[speed][mode] + 2 * shift_cost[speed][mode][size-1]
3483 	  + 4 * add_cost[speed][mode] < max_cost))
3484     {
3485       moptab = unsignedp ? smul_highpart_optab : umul_highpart_optab;
3486       tem = expand_binop (mode, moptab, op0, narrow_op1, target,
3487 			  unsignedp, OPTAB_DIRECT);
3488       if (tem)
3489 	/* We used the wrong signedness.  Adjust the result.  */
3490 	return expand_mult_highpart_adjust (mode, tem, op0, narrow_op1,
3491 					    tem, unsignedp);
3492     }
3493 
3494   /* Try widening multiplication.  */
3495   moptab = unsignedp ? umul_widen_optab : smul_widen_optab;
3496   if (optab_handler (moptab, wider_mode)->insn_code != CODE_FOR_nothing
3497       && mul_widen_cost[speed][wider_mode] < max_cost)
3498     {
3499       tem = expand_binop (wider_mode, moptab, op0, narrow_op1, 0,
3500 			  unsignedp, OPTAB_WIDEN);
3501       if (tem)
3502 	return extract_high_half (mode, tem);
3503     }
3504 
3505   /* Try widening the mode and perform a non-widening multiplication.  */
3506   if (optab_handler (smul_optab, wider_mode)->insn_code != CODE_FOR_nothing
3507       && size - 1 < BITS_PER_WORD
3508       && mul_cost[speed][wider_mode] + shift_cost[speed][mode][size-1] < max_cost)
3509     {
3510       rtx insns, wop0, wop1;
3511 
3512       /* We need to widen the operands, for example to ensure the
3513 	 constant multiplier is correctly sign or zero extended.
3514 	 Use a sequence to clean-up any instructions emitted by
3515 	 the conversions if things don't work out.  */
3516       start_sequence ();
3517       wop0 = convert_modes (wider_mode, mode, op0, unsignedp);
3518       wop1 = convert_modes (wider_mode, mode, op1, unsignedp);
3519       tem = expand_binop (wider_mode, smul_optab, wop0, wop1, 0,
3520 			  unsignedp, OPTAB_WIDEN);
3521       insns = get_insns ();
3522       end_sequence ();
3523 
3524       if (tem)
3525 	{
3526 	  emit_insn (insns);
3527 	  return extract_high_half (mode, tem);
3528 	}
3529     }
3530 
3531   /* Try widening multiplication of opposite signedness, and adjust.  */
3532   moptab = unsignedp ? smul_widen_optab : umul_widen_optab;
3533   if (optab_handler (moptab, wider_mode)->insn_code != CODE_FOR_nothing
3534       && size - 1 < BITS_PER_WORD
3535       && (mul_widen_cost[speed][wider_mode] + 2 * shift_cost[speed][mode][size-1]
3536 	  + 4 * add_cost[speed][mode] < max_cost))
3537     {
3538       tem = expand_binop (wider_mode, moptab, op0, narrow_op1,
3539 			  NULL_RTX, ! unsignedp, OPTAB_WIDEN);
3540       if (tem != 0)
3541 	{
3542 	  tem = extract_high_half (mode, tem);
3543 	  /* We used the wrong signedness.  Adjust the result.  */
3544 	  return expand_mult_highpart_adjust (mode, tem, op0, narrow_op1,
3545 					      target, unsignedp);
3546 	}
3547     }
3548 
3549   return 0;
3550 }
3551 
3552 /* Emit code to multiply OP0 and OP1 (where OP1 is an integer constant),
3553    putting the high half of the result in TARGET if that is convenient,
3554    and return where the result is.  If the operation can not be performed,
3555    0 is returned.
3556 
3557    MODE is the mode of operation and result.
3558 
3559    UNSIGNEDP nonzero means unsigned multiply.
3560 
3561    MAX_COST is the total allowed cost for the expanded RTL.  */
3562 
3563 static rtx
3564 expand_mult_highpart (enum machine_mode mode, rtx op0, rtx op1,
3565 		      rtx target, int unsignedp, int max_cost)
3566 {
3567   enum machine_mode wider_mode = GET_MODE_WIDER_MODE (mode);
3568   unsigned HOST_WIDE_INT cnst1;
3569   int extra_cost;
3570   bool sign_adjust = false;
3571   enum mult_variant variant;
3572   struct algorithm alg;
3573   rtx tem;
3574   bool speed = optimize_insn_for_speed_p ();
3575 
3576   gcc_assert (!SCALAR_FLOAT_MODE_P (mode));
3577   /* We can't support modes wider than HOST_BITS_PER_INT.  */
3578   gcc_assert (GET_MODE_BITSIZE (mode) <= HOST_BITS_PER_WIDE_INT);
3579 
3580   cnst1 = INTVAL (op1) & GET_MODE_MASK (mode);
3581 
3582   /* We can't optimize modes wider than BITS_PER_WORD.
3583      ??? We might be able to perform double-word arithmetic if
3584      mode == word_mode, however all the cost calculations in
3585      synth_mult etc. assume single-word operations.  */
3586   if (GET_MODE_BITSIZE (wider_mode) > BITS_PER_WORD)
3587     return expand_mult_highpart_optab (mode, op0, op1, target,
3588 				       unsignedp, max_cost);
3589 
3590   extra_cost = shift_cost[speed][mode][GET_MODE_BITSIZE (mode) - 1];
3591 
3592   /* Check whether we try to multiply by a negative constant.  */
3593   if (!unsignedp && ((cnst1 >> (GET_MODE_BITSIZE (mode) - 1)) & 1))
3594     {
3595       sign_adjust = true;
3596       extra_cost += add_cost[speed][mode];
3597     }
3598 
3599   /* See whether shift/add multiplication is cheap enough.  */
3600   if (choose_mult_variant (wider_mode, cnst1, &alg, &variant,
3601 			   max_cost - extra_cost))
3602     {
3603       /* See whether the specialized multiplication optabs are
3604 	 cheaper than the shift/add version.  */
3605       tem = expand_mult_highpart_optab (mode, op0, op1, target, unsignedp,
3606 					alg.cost.cost + extra_cost);
3607       if (tem)
3608 	return tem;
3609 
3610       tem = convert_to_mode (wider_mode, op0, unsignedp);
3611       tem = expand_mult_const (wider_mode, tem, cnst1, 0, &alg, variant);
3612       tem = extract_high_half (mode, tem);
3613 
3614       /* Adjust result for signedness.  */
3615       if (sign_adjust)
3616 	tem = force_operand (gen_rtx_MINUS (mode, tem, op0), tem);
3617 
3618       return tem;
3619     }
3620   return expand_mult_highpart_optab (mode, op0, op1, target,
3621 				     unsignedp, max_cost);
3622 }
3623 
3624 
3625 /* Expand signed modulus of OP0 by a power of two D in mode MODE.  */
3626 
3627 static rtx
3628 expand_smod_pow2 (enum machine_mode mode, rtx op0, HOST_WIDE_INT d)
3629 {
3630   unsigned HOST_WIDE_INT masklow, maskhigh;
3631   rtx result, temp, shift, label;
3632   int logd;
3633 
3634   logd = floor_log2 (d);
3635   result = gen_reg_rtx (mode);
3636 
3637   /* Avoid conditional branches when they're expensive.  */
3638   if (BRANCH_COST (optimize_insn_for_speed_p (), false) >= 2
3639       && optimize_insn_for_speed_p ())
3640     {
3641       rtx signmask = emit_store_flag (result, LT, op0, const0_rtx,
3642 				      mode, 0, -1);
3643       if (signmask)
3644 	{
3645 	  signmask = force_reg (mode, signmask);
3646 	  masklow = ((HOST_WIDE_INT) 1 << logd) - 1;
3647 	  shift = GEN_INT (GET_MODE_BITSIZE (mode) - logd);
3648 
3649 	  /* Use the rtx_cost of a LSHIFTRT instruction to determine
3650 	     which instruction sequence to use.  If logical right shifts
3651 	     are expensive the use 2 XORs, 2 SUBs and an AND, otherwise
3652 	     use a LSHIFTRT, 1 ADD, 1 SUB and an AND.  */
3653 
3654 	  temp = gen_rtx_LSHIFTRT (mode, result, shift);
3655 	  if (optab_handler (lshr_optab, mode)->insn_code == CODE_FOR_nothing
3656 	      || rtx_cost (temp, SET, optimize_insn_for_speed_p ()) > COSTS_N_INSNS (2))
3657 	    {
3658 	      temp = expand_binop (mode, xor_optab, op0, signmask,
3659 				   NULL_RTX, 1, OPTAB_LIB_WIDEN);
3660 	      temp = expand_binop (mode, sub_optab, temp, signmask,
3661 				   NULL_RTX, 1, OPTAB_LIB_WIDEN);
3662 	      temp = expand_binop (mode, and_optab, temp, GEN_INT (masklow),
3663 				   NULL_RTX, 1, OPTAB_LIB_WIDEN);
3664 	      temp = expand_binop (mode, xor_optab, temp, signmask,
3665 				   NULL_RTX, 1, OPTAB_LIB_WIDEN);
3666 	      temp = expand_binop (mode, sub_optab, temp, signmask,
3667 				   NULL_RTX, 1, OPTAB_LIB_WIDEN);
3668 	    }
3669 	  else
3670 	    {
3671 	      signmask = expand_binop (mode, lshr_optab, signmask, shift,
3672 				       NULL_RTX, 1, OPTAB_LIB_WIDEN);
3673 	      signmask = force_reg (mode, signmask);
3674 
3675 	      temp = expand_binop (mode, add_optab, op0, signmask,
3676 				   NULL_RTX, 1, OPTAB_LIB_WIDEN);
3677 	      temp = expand_binop (mode, and_optab, temp, GEN_INT (masklow),
3678 				   NULL_RTX, 1, OPTAB_LIB_WIDEN);
3679 	      temp = expand_binop (mode, sub_optab, temp, signmask,
3680 				   NULL_RTX, 1, OPTAB_LIB_WIDEN);
3681 	    }
3682 	  return temp;
3683 	}
3684     }
3685 
3686   /* Mask contains the mode's signbit and the significant bits of the
3687      modulus.  By including the signbit in the operation, many targets
3688      can avoid an explicit compare operation in the following comparison
3689      against zero.  */
3690 
3691   masklow = ((HOST_WIDE_INT) 1 << logd) - 1;
3692   if (GET_MODE_BITSIZE (mode) <= HOST_BITS_PER_WIDE_INT)
3693     {
3694       masklow |= (HOST_WIDE_INT) -1 << (GET_MODE_BITSIZE (mode) - 1);
3695       maskhigh = -1;
3696     }
3697   else
3698     maskhigh = (HOST_WIDE_INT) -1
3699 		 << (GET_MODE_BITSIZE (mode) - HOST_BITS_PER_WIDE_INT - 1);
3700 
3701   temp = expand_binop (mode, and_optab, op0,
3702 		       immed_double_const (masklow, maskhigh, mode),
3703 		       result, 1, OPTAB_LIB_WIDEN);
3704   if (temp != result)
3705     emit_move_insn (result, temp);
3706 
3707   label = gen_label_rtx ();
3708   do_cmp_and_jump (result, const0_rtx, GE, mode, label);
3709 
3710   temp = expand_binop (mode, sub_optab, result, const1_rtx, result,
3711 		       0, OPTAB_LIB_WIDEN);
3712   masklow = (HOST_WIDE_INT) -1 << logd;
3713   maskhigh = -1;
3714   temp = expand_binop (mode, ior_optab, temp,
3715 		       immed_double_const (masklow, maskhigh, mode),
3716 		       result, 1, OPTAB_LIB_WIDEN);
3717   temp = expand_binop (mode, add_optab, temp, const1_rtx, result,
3718 		       0, OPTAB_LIB_WIDEN);
3719   if (temp != result)
3720     emit_move_insn (result, temp);
3721   emit_label (label);
3722   return result;
3723 }
3724 
3725 /* Expand signed division of OP0 by a power of two D in mode MODE.
3726    This routine is only called for positive values of D.  */
3727 
3728 static rtx
3729 expand_sdiv_pow2 (enum machine_mode mode, rtx op0, HOST_WIDE_INT d)
3730 {
3731   rtx temp, label;
3732   tree shift;
3733   int logd;
3734 
3735   logd = floor_log2 (d);
3736   shift = build_int_cst (NULL_TREE, logd);
3737 
3738   if (d == 2
3739       && BRANCH_COST (optimize_insn_for_speed_p (),
3740 		      false) >= 1)
3741     {
3742       temp = gen_reg_rtx (mode);
3743       temp = emit_store_flag (temp, LT, op0, const0_rtx, mode, 0, 1);
3744       temp = expand_binop (mode, add_optab, temp, op0, NULL_RTX,
3745 			   0, OPTAB_LIB_WIDEN);
3746       return expand_shift (RSHIFT_EXPR, mode, temp, shift, NULL_RTX, 0);
3747     }
3748 
3749 #ifdef HAVE_conditional_move
3750   if (BRANCH_COST (optimize_insn_for_speed_p (), false)
3751       >= 2)
3752     {
3753       rtx temp2;
3754 
3755       /* ??? emit_conditional_move forces a stack adjustment via
3756 	 compare_from_rtx so, if the sequence is discarded, it will
3757 	 be lost.  Do it now instead.  */
3758       do_pending_stack_adjust ();
3759 
3760       start_sequence ();
3761       temp2 = copy_to_mode_reg (mode, op0);
3762       temp = expand_binop (mode, add_optab, temp2, GEN_INT (d-1),
3763 			   NULL_RTX, 0, OPTAB_LIB_WIDEN);
3764       temp = force_reg (mode, temp);
3765 
3766       /* Construct "temp2 = (temp2 < 0) ? temp : temp2".  */
3767       temp2 = emit_conditional_move (temp2, LT, temp2, const0_rtx,
3768 				     mode, temp, temp2, mode, 0);
3769       if (temp2)
3770 	{
3771 	  rtx seq = get_insns ();
3772 	  end_sequence ();
3773 	  emit_insn (seq);
3774 	  return expand_shift (RSHIFT_EXPR, mode, temp2, shift, NULL_RTX, 0);
3775 	}
3776       end_sequence ();
3777     }
3778 #endif
3779 
3780   if (BRANCH_COST (optimize_insn_for_speed_p (),
3781 		   false) >= 2)
3782     {
3783       int ushift = GET_MODE_BITSIZE (mode) - logd;
3784 
3785       temp = gen_reg_rtx (mode);
3786       temp = emit_store_flag (temp, LT, op0, const0_rtx, mode, 0, -1);
3787       if (shift_cost[optimize_insn_for_speed_p ()][mode][ushift] > COSTS_N_INSNS (1))
3788 	temp = expand_binop (mode, and_optab, temp, GEN_INT (d - 1),
3789 			     NULL_RTX, 0, OPTAB_LIB_WIDEN);
3790       else
3791 	temp = expand_shift (RSHIFT_EXPR, mode, temp,
3792 			     build_int_cst (NULL_TREE, ushift),
3793 			     NULL_RTX, 1);
3794       temp = expand_binop (mode, add_optab, temp, op0, NULL_RTX,
3795 			   0, OPTAB_LIB_WIDEN);
3796       return expand_shift (RSHIFT_EXPR, mode, temp, shift, NULL_RTX, 0);
3797     }
3798 
3799   label = gen_label_rtx ();
3800   temp = copy_to_mode_reg (mode, op0);
3801   do_cmp_and_jump (temp, const0_rtx, GE, mode, label);
3802   expand_inc (temp, GEN_INT (d - 1));
3803   emit_label (label);
3804   return expand_shift (RSHIFT_EXPR, mode, temp, shift, NULL_RTX, 0);
3805 }
3806 
3807 /* Emit the code to divide OP0 by OP1, putting the result in TARGET
3808    if that is convenient, and returning where the result is.
3809    You may request either the quotient or the remainder as the result;
3810    specify REM_FLAG nonzero to get the remainder.
3811 
3812    CODE is the expression code for which kind of division this is;
3813    it controls how rounding is done.  MODE is the machine mode to use.
3814    UNSIGNEDP nonzero means do unsigned division.  */
3815 
3816 /* ??? For CEIL_MOD_EXPR, can compute incorrect remainder with ANDI
3817    and then correct it by or'ing in missing high bits
3818    if result of ANDI is nonzero.
3819    For ROUND_MOD_EXPR, can use ANDI and then sign-extend the result.
3820    This could optimize to a bfexts instruction.
3821    But C doesn't use these operations, so their optimizations are
3822    left for later.  */
3823 /* ??? For modulo, we don't actually need the highpart of the first product,
3824    the low part will do nicely.  And for small divisors, the second multiply
3825    can also be a low-part only multiply or even be completely left out.
3826    E.g. to calculate the remainder of a division by 3 with a 32 bit
3827    multiply, multiply with 0x55555556 and extract the upper two bits;
3828    the result is exact for inputs up to 0x1fffffff.
3829    The input range can be reduced by using cross-sum rules.
3830    For odd divisors >= 3, the following table gives right shift counts
3831    so that if a number is shifted by an integer multiple of the given
3832    amount, the remainder stays the same:
3833    2, 4, 3, 6, 10, 12, 4, 8, 18, 6, 11, 20, 18, 0, 5, 10, 12, 0, 12, 20,
3834    14, 12, 23, 21, 8, 0, 20, 18, 0, 0, 6, 12, 0, 22, 0, 18, 20, 30, 0, 0,
3835    0, 8, 0, 11, 12, 10, 36, 0, 30, 0, 0, 12, 0, 0, 0, 0, 44, 12, 24, 0,
3836    20, 0, 7, 14, 0, 18, 36, 0, 0, 46, 60, 0, 42, 0, 15, 24, 20, 0, 0, 33,
3837    0, 20, 0, 0, 18, 0, 60, 0, 0, 0, 0, 0, 40, 18, 0, 0, 12
3838 
3839    Cross-sum rules for even numbers can be derived by leaving as many bits
3840    to the right alone as the divisor has zeros to the right.
3841    E.g. if x is an unsigned 32 bit number:
3842    (x mod 12) == (((x & 1023) + ((x >> 8) & ~3)) * 0x15555558 >> 2 * 3) >> 28
3843    */
3844 
3845 rtx
3846 expand_divmod (int rem_flag, enum tree_code code, enum machine_mode mode,
3847 	       rtx op0, rtx op1, rtx target, int unsignedp)
3848 {
3849   enum machine_mode compute_mode;
3850   rtx tquotient;
3851   rtx quotient = 0, remainder = 0;
3852   rtx last;
3853   int size;
3854   rtx insn, set;
3855   optab optab1, optab2;
3856   int op1_is_constant, op1_is_pow2 = 0;
3857   int max_cost, extra_cost;
3858   static HOST_WIDE_INT last_div_const = 0;
3859   static HOST_WIDE_INT ext_op1;
3860   bool speed = optimize_insn_for_speed_p ();
3861 
3862   op1_is_constant = CONST_INT_P (op1);
3863   if (op1_is_constant)
3864     {
3865       ext_op1 = INTVAL (op1);
3866       if (unsignedp)
3867 	ext_op1 &= GET_MODE_MASK (mode);
3868       op1_is_pow2 = ((EXACT_POWER_OF_2_OR_ZERO_P (ext_op1)
3869 		     || (! unsignedp && EXACT_POWER_OF_2_OR_ZERO_P (-ext_op1))));
3870     }
3871 
3872   /*
3873      This is the structure of expand_divmod:
3874 
3875      First comes code to fix up the operands so we can perform the operations
3876      correctly and efficiently.
3877 
3878      Second comes a switch statement with code specific for each rounding mode.
3879      For some special operands this code emits all RTL for the desired
3880      operation, for other cases, it generates only a quotient and stores it in
3881      QUOTIENT.  The case for trunc division/remainder might leave quotient = 0,
3882      to indicate that it has not done anything.
3883 
3884      Last comes code that finishes the operation.  If QUOTIENT is set and
3885      REM_FLAG is set, the remainder is computed as OP0 - QUOTIENT * OP1.  If
3886      QUOTIENT is not set, it is computed using trunc rounding.
3887 
3888      We try to generate special code for division and remainder when OP1 is a
3889      constant.  If |OP1| = 2**n we can use shifts and some other fast
3890      operations.  For other values of OP1, we compute a carefully selected
3891      fixed-point approximation m = 1/OP1, and generate code that multiplies OP0
3892      by m.
3893 
3894      In all cases but EXACT_DIV_EXPR, this multiplication requires the upper
3895      half of the product.  Different strategies for generating the product are
3896      implemented in expand_mult_highpart.
3897 
3898      If what we actually want is the remainder, we generate that by another
3899      by-constant multiplication and a subtraction.  */
3900 
3901   /* We shouldn't be called with OP1 == const1_rtx, but some of the
3902      code below will malfunction if we are, so check here and handle
3903      the special case if so.  */
3904   if (op1 == const1_rtx)
3905     return rem_flag ? const0_rtx : op0;
3906 
3907     /* When dividing by -1, we could get an overflow.
3908      negv_optab can handle overflows.  */
3909   if (! unsignedp && op1 == constm1_rtx)
3910     {
3911       if (rem_flag)
3912 	return const0_rtx;
3913       return expand_unop (mode, flag_trapv && GET_MODE_CLASS(mode) == MODE_INT
3914 			  ? negv_optab : neg_optab, op0, target, 0);
3915     }
3916 
3917   if (target
3918       /* Don't use the function value register as a target
3919 	 since we have to read it as well as write it,
3920 	 and function-inlining gets confused by this.  */
3921       && ((REG_P (target) && REG_FUNCTION_VALUE_P (target))
3922 	  /* Don't clobber an operand while doing a multi-step calculation.  */
3923 	  || ((rem_flag || op1_is_constant)
3924 	      && (reg_mentioned_p (target, op0)
3925 		  || (MEM_P (op0) && MEM_P (target))))
3926 	  || reg_mentioned_p (target, op1)
3927 	  || (MEM_P (op1) && MEM_P (target))))
3928     target = 0;
3929 
3930   /* Get the mode in which to perform this computation.  Normally it will
3931      be MODE, but sometimes we can't do the desired operation in MODE.
3932      If so, pick a wider mode in which we can do the operation.  Convert
3933      to that mode at the start to avoid repeated conversions.
3934 
3935      First see what operations we need.  These depend on the expression
3936      we are evaluating.  (We assume that divxx3 insns exist under the
3937      same conditions that modxx3 insns and that these insns don't normally
3938      fail.  If these assumptions are not correct, we may generate less
3939      efficient code in some cases.)
3940 
3941      Then see if we find a mode in which we can open-code that operation
3942      (either a division, modulus, or shift).  Finally, check for the smallest
3943      mode for which we can do the operation with a library call.  */
3944 
3945   /* We might want to refine this now that we have division-by-constant
3946      optimization.  Since expand_mult_highpart tries so many variants, it is
3947      not straightforward to generalize this.  Maybe we should make an array
3948      of possible modes in init_expmed?  Save this for GCC 2.7.  */
3949 
3950   optab1 = ((op1_is_pow2 && op1 != const0_rtx)
3951 	    ? (unsignedp ? lshr_optab : ashr_optab)
3952 	    : (unsignedp ? udiv_optab : sdiv_optab));
3953   optab2 = ((op1_is_pow2 && op1 != const0_rtx)
3954 	    ? optab1
3955 	    : (unsignedp ? udivmod_optab : sdivmod_optab));
3956 
3957   for (compute_mode = mode; compute_mode != VOIDmode;
3958        compute_mode = GET_MODE_WIDER_MODE (compute_mode))
3959     if (optab_handler (optab1, compute_mode)->insn_code != CODE_FOR_nothing
3960 	|| optab_handler (optab2, compute_mode)->insn_code != CODE_FOR_nothing)
3961       break;
3962 
3963   if (compute_mode == VOIDmode)
3964     for (compute_mode = mode; compute_mode != VOIDmode;
3965 	 compute_mode = GET_MODE_WIDER_MODE (compute_mode))
3966       if (optab_libfunc (optab1, compute_mode)
3967 	  || optab_libfunc (optab2, compute_mode))
3968 	break;
3969 
3970   /* If we still couldn't find a mode, use MODE, but expand_binop will
3971      probably die.  */
3972   if (compute_mode == VOIDmode)
3973     compute_mode = mode;
3974 
3975   if (target && GET_MODE (target) == compute_mode)
3976     tquotient = target;
3977   else
3978     tquotient = gen_reg_rtx (compute_mode);
3979 
3980   size = GET_MODE_BITSIZE (compute_mode);
3981 #if 0
3982   /* It should be possible to restrict the precision to GET_MODE_BITSIZE
3983      (mode), and thereby get better code when OP1 is a constant.  Do that
3984      later.  It will require going over all usages of SIZE below.  */
3985   size = GET_MODE_BITSIZE (mode);
3986 #endif
3987 
3988   /* Only deduct something for a REM if the last divide done was
3989      for a different constant.   Then set the constant of the last
3990      divide.  */
3991   max_cost = unsignedp ? udiv_cost[speed][compute_mode] : sdiv_cost[speed][compute_mode];
3992   if (rem_flag && ! (last_div_const != 0 && op1_is_constant
3993 		     && INTVAL (op1) == last_div_const))
3994     max_cost -= mul_cost[speed][compute_mode] + add_cost[speed][compute_mode];
3995 
3996   last_div_const = ! rem_flag && op1_is_constant ? INTVAL (op1) : 0;
3997 
3998   /* Now convert to the best mode to use.  */
3999   if (compute_mode != mode)
4000     {
4001       op0 = convert_modes (compute_mode, mode, op0, unsignedp);
4002       op1 = convert_modes (compute_mode, mode, op1, unsignedp);
4003 
4004       /* convert_modes may have placed op1 into a register, so we
4005 	 must recompute the following.  */
4006       op1_is_constant = CONST_INT_P (op1);
4007       op1_is_pow2 = (op1_is_constant
4008 		     && ((EXACT_POWER_OF_2_OR_ZERO_P (INTVAL (op1))
4009 			  || (! unsignedp
4010 			      && EXACT_POWER_OF_2_OR_ZERO_P (-INTVAL (op1)))))) ;
4011     }
4012 
4013   /* If one of the operands is a volatile MEM, copy it into a register.  */
4014 
4015   if (MEM_P (op0) && MEM_VOLATILE_P (op0))
4016     op0 = force_reg (compute_mode, op0);
4017   if (MEM_P (op1) && MEM_VOLATILE_P (op1))
4018     op1 = force_reg (compute_mode, op1);
4019 
4020   /* If we need the remainder or if OP1 is constant, we need to
4021      put OP0 in a register in case it has any queued subexpressions.  */
4022   if (rem_flag || op1_is_constant)
4023     op0 = force_reg (compute_mode, op0);
4024 
4025   last = get_last_insn ();
4026 
4027   /* Promote floor rounding to trunc rounding for unsigned operations.  */
4028   if (unsignedp)
4029     {
4030       if (code == FLOOR_DIV_EXPR)
4031 	code = TRUNC_DIV_EXPR;
4032       if (code == FLOOR_MOD_EXPR)
4033 	code = TRUNC_MOD_EXPR;
4034       if (code == EXACT_DIV_EXPR && op1_is_pow2)
4035 	code = TRUNC_DIV_EXPR;
4036     }
4037 
4038   if (op1 != const0_rtx)
4039     switch (code)
4040       {
4041       case TRUNC_MOD_EXPR:
4042       case TRUNC_DIV_EXPR:
4043 	if (op1_is_constant)
4044 	  {
4045 	    if (unsignedp)
4046 	      {
4047 		unsigned HOST_WIDE_INT mh;
4048 		int pre_shift, post_shift;
4049 		int dummy;
4050 		rtx ml;
4051 		unsigned HOST_WIDE_INT d = (INTVAL (op1)
4052 					    & GET_MODE_MASK (compute_mode));
4053 
4054 		if (EXACT_POWER_OF_2_OR_ZERO_P (d))
4055 		  {
4056 		    pre_shift = floor_log2 (d);
4057 		    if (rem_flag)
4058 		      {
4059 			remainder
4060 			  = expand_binop (compute_mode, and_optab, op0,
4061 					  GEN_INT (((HOST_WIDE_INT) 1 << pre_shift) - 1),
4062 					  remainder, 1,
4063 					  OPTAB_LIB_WIDEN);
4064 			if (remainder)
4065 			  return gen_lowpart (mode, remainder);
4066 		      }
4067 		    quotient = expand_shift (RSHIFT_EXPR, compute_mode, op0,
4068 					     build_int_cst (NULL_TREE,
4069 							    pre_shift),
4070 					     tquotient, 1);
4071 		  }
4072 		else if (size <= HOST_BITS_PER_WIDE_INT)
4073 		  {
4074 		    if (d >= ((unsigned HOST_WIDE_INT) 1 << (size - 1)))
4075 		      {
4076 			/* Most significant bit of divisor is set; emit an scc
4077 			   insn.  */
4078 			quotient = emit_store_flag_force (tquotient, GEU, op0, op1,
4079 							  compute_mode, 1, 1);
4080 		      }
4081 		    else
4082 		      {
4083 			/* Find a suitable multiplier and right shift count
4084 			   instead of multiplying with D.  */
4085 
4086 			mh = choose_multiplier (d, size, size,
4087 						&ml, &post_shift, &dummy);
4088 
4089 			/* If the suggested multiplier is more than SIZE bits,
4090 			   we can do better for even divisors, using an
4091 			   initial right shift.  */
4092 			if (mh != 0 && (d & 1) == 0)
4093 			  {
4094 			    pre_shift = floor_log2 (d & -d);
4095 			    mh = choose_multiplier (d >> pre_shift, size,
4096 						    size - pre_shift,
4097 						    &ml, &post_shift, &dummy);
4098 			    gcc_assert (!mh);
4099 			  }
4100 			else
4101 			  pre_shift = 0;
4102 
4103 			if (mh != 0)
4104 			  {
4105 			    rtx t1, t2, t3, t4;
4106 
4107 			    if (post_shift - 1 >= BITS_PER_WORD)
4108 			      goto fail1;
4109 
4110 			    extra_cost
4111 			      = (shift_cost[speed][compute_mode][post_shift - 1]
4112 				 + shift_cost[speed][compute_mode][1]
4113 				 + 2 * add_cost[speed][compute_mode]);
4114 			    t1 = expand_mult_highpart (compute_mode, op0, ml,
4115 						       NULL_RTX, 1,
4116 						       max_cost - extra_cost);
4117 			    if (t1 == 0)
4118 			      goto fail1;
4119 			    t2 = force_operand (gen_rtx_MINUS (compute_mode,
4120 							       op0, t1),
4121 						NULL_RTX);
4122 			    t3 = expand_shift
4123 			      (RSHIFT_EXPR, compute_mode, t2,
4124 			       build_int_cst (NULL_TREE, 1),
4125 			       NULL_RTX,1);
4126 			    t4 = force_operand (gen_rtx_PLUS (compute_mode,
4127 							      t1, t3),
4128 						NULL_RTX);
4129 			    quotient = expand_shift
4130 			      (RSHIFT_EXPR, compute_mode, t4,
4131 			       build_int_cst (NULL_TREE, post_shift - 1),
4132 			       tquotient, 1);
4133 			  }
4134 			else
4135 			  {
4136 			    rtx t1, t2;
4137 
4138 			    if (pre_shift >= BITS_PER_WORD
4139 				|| post_shift >= BITS_PER_WORD)
4140 			      goto fail1;
4141 
4142 			    t1 = expand_shift
4143 			      (RSHIFT_EXPR, compute_mode, op0,
4144 			       build_int_cst (NULL_TREE, pre_shift),
4145 			       NULL_RTX, 1);
4146 			    extra_cost
4147 			      = (shift_cost[speed][compute_mode][pre_shift]
4148 				 + shift_cost[speed][compute_mode][post_shift]);
4149 			    t2 = expand_mult_highpart (compute_mode, t1, ml,
4150 						       NULL_RTX, 1,
4151 						       max_cost - extra_cost);
4152 			    if (t2 == 0)
4153 			      goto fail1;
4154 			    quotient = expand_shift
4155 			      (RSHIFT_EXPR, compute_mode, t2,
4156 			       build_int_cst (NULL_TREE, post_shift),
4157 			       tquotient, 1);
4158 			  }
4159 		      }
4160 		  }
4161 		else		/* Too wide mode to use tricky code */
4162 		  break;
4163 
4164 		insn = get_last_insn ();
4165 		if (insn != last
4166 		    && (set = single_set (insn)) != 0
4167 		    && SET_DEST (set) == quotient)
4168 		  set_unique_reg_note (insn,
4169 				       REG_EQUAL,
4170 				       gen_rtx_UDIV (compute_mode, op0, op1));
4171 	      }
4172 	    else		/* TRUNC_DIV, signed */
4173 	      {
4174 		unsigned HOST_WIDE_INT ml;
4175 		int lgup, post_shift;
4176 		rtx mlr;
4177 		HOST_WIDE_INT d = INTVAL (op1);
4178 		unsigned HOST_WIDE_INT abs_d;
4179 
4180 		/* Since d might be INT_MIN, we have to cast to
4181 		   unsigned HOST_WIDE_INT before negating to avoid
4182 		   undefined signed overflow.  */
4183 		abs_d = (d >= 0
4184 			 ? (unsigned HOST_WIDE_INT) d
4185 			 : - (unsigned HOST_WIDE_INT) d);
4186 
4187 		/* n rem d = n rem -d */
4188 		if (rem_flag && d < 0)
4189 		  {
4190 		    d = abs_d;
4191 		    op1 = gen_int_mode (abs_d, compute_mode);
4192 		  }
4193 
4194 		if (d == 1)
4195 		  quotient = op0;
4196 		else if (d == -1)
4197 		  quotient = expand_unop (compute_mode, neg_optab, op0,
4198 					  tquotient, 0);
4199 		else if (HOST_BITS_PER_WIDE_INT >= size
4200 			 && abs_d == (unsigned HOST_WIDE_INT) 1 << (size - 1))
4201 		  {
4202 		    /* This case is not handled correctly below.  */
4203 		    quotient = emit_store_flag (tquotient, EQ, op0, op1,
4204 						compute_mode, 1, 1);
4205 		    if (quotient == 0)
4206 		      goto fail1;
4207 		  }
4208 		else if (EXACT_POWER_OF_2_OR_ZERO_P (d)
4209 			 && (rem_flag ? smod_pow2_cheap[speed][compute_mode]
4210 				      : sdiv_pow2_cheap[speed][compute_mode])
4211 			 /* We assume that cheap metric is true if the
4212 			    optab has an expander for this mode.  */
4213 			 && ((optab_handler ((rem_flag ? smod_optab
4214 					      : sdiv_optab),
4215 					      compute_mode)->insn_code
4216 			      != CODE_FOR_nothing)
4217 			     || (optab_handler(sdivmod_optab,
4218 					       compute_mode)
4219 				 ->insn_code != CODE_FOR_nothing)))
4220 		  ;
4221 		else if (EXACT_POWER_OF_2_OR_ZERO_P (abs_d))
4222 		  {
4223 		    if (rem_flag)
4224 		      {
4225 			remainder = expand_smod_pow2 (compute_mode, op0, d);
4226 			if (remainder)
4227 			  return gen_lowpart (mode, remainder);
4228 		      }
4229 
4230 		    if (sdiv_pow2_cheap[speed][compute_mode]
4231 			&& ((optab_handler (sdiv_optab, compute_mode)->insn_code
4232 			     != CODE_FOR_nothing)
4233 			    || (optab_handler (sdivmod_optab, compute_mode)->insn_code
4234 				!= CODE_FOR_nothing)))
4235 		      quotient = expand_divmod (0, TRUNC_DIV_EXPR,
4236 						compute_mode, op0,
4237 						gen_int_mode (abs_d,
4238 							      compute_mode),
4239 						NULL_RTX, 0);
4240 		    else
4241 		      quotient = expand_sdiv_pow2 (compute_mode, op0, abs_d);
4242 
4243 		    /* We have computed OP0 / abs(OP1).  If OP1 is negative,
4244 		       negate the quotient.  */
4245 		    if (d < 0)
4246 		      {
4247 			insn = get_last_insn ();
4248 			if (insn != last
4249 			    && (set = single_set (insn)) != 0
4250 			    && SET_DEST (set) == quotient
4251 			    && abs_d < ((unsigned HOST_WIDE_INT) 1
4252 					<< (HOST_BITS_PER_WIDE_INT - 1)))
4253 			  set_unique_reg_note (insn,
4254 					       REG_EQUAL,
4255 					       gen_rtx_DIV (compute_mode,
4256 							    op0,
4257 							    GEN_INT
4258 							    (trunc_int_for_mode
4259 							     (abs_d,
4260 							      compute_mode))));
4261 
4262 			quotient = expand_unop (compute_mode, neg_optab,
4263 						quotient, quotient, 0);
4264 		      }
4265 		  }
4266 		else if (size <= HOST_BITS_PER_WIDE_INT)
4267 		  {
4268 		    choose_multiplier (abs_d, size, size - 1,
4269 				       &mlr, &post_shift, &lgup);
4270 		    ml = (unsigned HOST_WIDE_INT) INTVAL (mlr);
4271 		    if (ml < (unsigned HOST_WIDE_INT) 1 << (size - 1))
4272 		      {
4273 			rtx t1, t2, t3;
4274 
4275 			if (post_shift >= BITS_PER_WORD
4276 			    || size - 1 >= BITS_PER_WORD)
4277 			  goto fail1;
4278 
4279 			extra_cost = (shift_cost[speed][compute_mode][post_shift]
4280 				      + shift_cost[speed][compute_mode][size - 1]
4281 				      + add_cost[speed][compute_mode]);
4282 			t1 = expand_mult_highpart (compute_mode, op0, mlr,
4283 						   NULL_RTX, 0,
4284 						   max_cost - extra_cost);
4285 			if (t1 == 0)
4286 			  goto fail1;
4287 			t2 = expand_shift
4288 			  (RSHIFT_EXPR, compute_mode, t1,
4289 			   build_int_cst (NULL_TREE, post_shift),
4290 			   NULL_RTX, 0);
4291 			t3 = expand_shift
4292 			  (RSHIFT_EXPR, compute_mode, op0,
4293 			   build_int_cst (NULL_TREE, size - 1),
4294 			   NULL_RTX, 0);
4295 			if (d < 0)
4296 			  quotient
4297 			    = force_operand (gen_rtx_MINUS (compute_mode,
4298 							    t3, t2),
4299 					     tquotient);
4300 			else
4301 			  quotient
4302 			    = force_operand (gen_rtx_MINUS (compute_mode,
4303 							    t2, t3),
4304 					     tquotient);
4305 		      }
4306 		    else
4307 		      {
4308 			rtx t1, t2, t3, t4;
4309 
4310 			if (post_shift >= BITS_PER_WORD
4311 			    || size - 1 >= BITS_PER_WORD)
4312 			  goto fail1;
4313 
4314 			ml |= (~(unsigned HOST_WIDE_INT) 0) << (size - 1);
4315 			mlr = gen_int_mode (ml, compute_mode);
4316 			extra_cost = (shift_cost[speed][compute_mode][post_shift]
4317 				      + shift_cost[speed][compute_mode][size - 1]
4318 				      + 2 * add_cost[speed][compute_mode]);
4319 			t1 = expand_mult_highpart (compute_mode, op0, mlr,
4320 						   NULL_RTX, 0,
4321 						   max_cost - extra_cost);
4322 			if (t1 == 0)
4323 			  goto fail1;
4324 			t2 = force_operand (gen_rtx_PLUS (compute_mode,
4325 							  t1, op0),
4326 					    NULL_RTX);
4327 			t3 = expand_shift
4328 			  (RSHIFT_EXPR, compute_mode, t2,
4329 			   build_int_cst (NULL_TREE, post_shift),
4330 			   NULL_RTX, 0);
4331 			t4 = expand_shift
4332 			  (RSHIFT_EXPR, compute_mode, op0,
4333 			   build_int_cst (NULL_TREE, size - 1),
4334 			   NULL_RTX, 0);
4335 			if (d < 0)
4336 			  quotient
4337 			    = force_operand (gen_rtx_MINUS (compute_mode,
4338 							    t4, t3),
4339 					     tquotient);
4340 			else
4341 			  quotient
4342 			    = force_operand (gen_rtx_MINUS (compute_mode,
4343 							    t3, t4),
4344 					     tquotient);
4345 		      }
4346 		  }
4347 		else		/* Too wide mode to use tricky code */
4348 		  break;
4349 
4350 		insn = get_last_insn ();
4351 		if (insn != last
4352 		    && (set = single_set (insn)) != 0
4353 		    && SET_DEST (set) == quotient)
4354 		  set_unique_reg_note (insn,
4355 				       REG_EQUAL,
4356 				       gen_rtx_DIV (compute_mode, op0, op1));
4357 	      }
4358 	    break;
4359 	  }
4360       fail1:
4361 	delete_insns_since (last);
4362 	break;
4363 
4364       case FLOOR_DIV_EXPR:
4365       case FLOOR_MOD_EXPR:
4366       /* We will come here only for signed operations.  */
4367 	if (op1_is_constant && HOST_BITS_PER_WIDE_INT >= size)
4368 	  {
4369 	    unsigned HOST_WIDE_INT mh;
4370 	    int pre_shift, lgup, post_shift;
4371 	    HOST_WIDE_INT d = INTVAL (op1);
4372 	    rtx ml;
4373 
4374 	    if (d > 0)
4375 	      {
4376 		/* We could just as easily deal with negative constants here,
4377 		   but it does not seem worth the trouble for GCC 2.6.  */
4378 		if (EXACT_POWER_OF_2_OR_ZERO_P (d))
4379 		  {
4380 		    pre_shift = floor_log2 (d);
4381 		    if (rem_flag)
4382 		      {
4383 			remainder = expand_binop (compute_mode, and_optab, op0,
4384 						  GEN_INT (((HOST_WIDE_INT) 1 << pre_shift) - 1),
4385 						  remainder, 0, OPTAB_LIB_WIDEN);
4386 			if (remainder)
4387 			  return gen_lowpart (mode, remainder);
4388 		      }
4389 		    quotient = expand_shift
4390 		      (RSHIFT_EXPR, compute_mode, op0,
4391 		       build_int_cst (NULL_TREE, pre_shift),
4392 		       tquotient, 0);
4393 		  }
4394 		else
4395 		  {
4396 		    rtx t1, t2, t3, t4;
4397 
4398 		    mh = choose_multiplier (d, size, size - 1,
4399 					    &ml, &post_shift, &lgup);
4400 		    gcc_assert (!mh);
4401 
4402 		    if (post_shift < BITS_PER_WORD
4403 			&& size - 1 < BITS_PER_WORD)
4404 		      {
4405 			t1 = expand_shift
4406 			  (RSHIFT_EXPR, compute_mode, op0,
4407 			   build_int_cst (NULL_TREE, size - 1),
4408 			   NULL_RTX, 0);
4409 			t2 = expand_binop (compute_mode, xor_optab, op0, t1,
4410 					   NULL_RTX, 0, OPTAB_WIDEN);
4411 			extra_cost = (shift_cost[speed][compute_mode][post_shift]
4412 				      + shift_cost[speed][compute_mode][size - 1]
4413 				      + 2 * add_cost[speed][compute_mode]);
4414 			t3 = expand_mult_highpart (compute_mode, t2, ml,
4415 						   NULL_RTX, 1,
4416 						   max_cost - extra_cost);
4417 			if (t3 != 0)
4418 			  {
4419 			    t4 = expand_shift
4420 			      (RSHIFT_EXPR, compute_mode, t3,
4421 			       build_int_cst (NULL_TREE, post_shift),
4422 			       NULL_RTX, 1);
4423 			    quotient = expand_binop (compute_mode, xor_optab,
4424 						     t4, t1, tquotient, 0,
4425 						     OPTAB_WIDEN);
4426 			  }
4427 		      }
4428 		  }
4429 	      }
4430 	    else
4431 	      {
4432 		rtx nsign, t1, t2, t3, t4;
4433 		t1 = force_operand (gen_rtx_PLUS (compute_mode,
4434 						  op0, constm1_rtx), NULL_RTX);
4435 		t2 = expand_binop (compute_mode, ior_optab, op0, t1, NULL_RTX,
4436 				   0, OPTAB_WIDEN);
4437 		nsign = expand_shift
4438 		  (RSHIFT_EXPR, compute_mode, t2,
4439 		   build_int_cst (NULL_TREE, size - 1),
4440 		   NULL_RTX, 0);
4441 		t3 = force_operand (gen_rtx_MINUS (compute_mode, t1, nsign),
4442 				    NULL_RTX);
4443 		t4 = expand_divmod (0, TRUNC_DIV_EXPR, compute_mode, t3, op1,
4444 				    NULL_RTX, 0);
4445 		if (t4)
4446 		  {
4447 		    rtx t5;
4448 		    t5 = expand_unop (compute_mode, one_cmpl_optab, nsign,
4449 				      NULL_RTX, 0);
4450 		    quotient = force_operand (gen_rtx_PLUS (compute_mode,
4451 							    t4, t5),
4452 					      tquotient);
4453 		  }
4454 	      }
4455 	  }
4456 
4457 	if (quotient != 0)
4458 	  break;
4459 	delete_insns_since (last);
4460 
4461 	/* Try using an instruction that produces both the quotient and
4462 	   remainder, using truncation.  We can easily compensate the quotient
4463 	   or remainder to get floor rounding, once we have the remainder.
4464 	   Notice that we compute also the final remainder value here,
4465 	   and return the result right away.  */
4466 	if (target == 0 || GET_MODE (target) != compute_mode)
4467 	  target = gen_reg_rtx (compute_mode);
4468 
4469 	if (rem_flag)
4470 	  {
4471 	    remainder
4472 	      = REG_P (target) ? target : gen_reg_rtx (compute_mode);
4473 	    quotient = gen_reg_rtx (compute_mode);
4474 	  }
4475 	else
4476 	  {
4477 	    quotient
4478 	      = REG_P (target) ? target : gen_reg_rtx (compute_mode);
4479 	    remainder = gen_reg_rtx (compute_mode);
4480 	  }
4481 
4482 	if (expand_twoval_binop (sdivmod_optab, op0, op1,
4483 				 quotient, remainder, 0))
4484 	  {
4485 	    /* This could be computed with a branch-less sequence.
4486 	       Save that for later.  */
4487 	    rtx tem;
4488 	    rtx label = gen_label_rtx ();
4489 	    do_cmp_and_jump (remainder, const0_rtx, EQ, compute_mode, label);
4490 	    tem = expand_binop (compute_mode, xor_optab, op0, op1,
4491 				NULL_RTX, 0, OPTAB_WIDEN);
4492 	    do_cmp_and_jump (tem, const0_rtx, GE, compute_mode, label);
4493 	    expand_dec (quotient, const1_rtx);
4494 	    expand_inc (remainder, op1);
4495 	    emit_label (label);
4496 	    return gen_lowpart (mode, rem_flag ? remainder : quotient);
4497 	  }
4498 
4499 	/* No luck with division elimination or divmod.  Have to do it
4500 	   by conditionally adjusting op0 *and* the result.  */
4501 	{
4502 	  rtx label1, label2, label3, label4, label5;
4503 	  rtx adjusted_op0;
4504 	  rtx tem;
4505 
4506 	  quotient = gen_reg_rtx (compute_mode);
4507 	  adjusted_op0 = copy_to_mode_reg (compute_mode, op0);
4508 	  label1 = gen_label_rtx ();
4509 	  label2 = gen_label_rtx ();
4510 	  label3 = gen_label_rtx ();
4511 	  label4 = gen_label_rtx ();
4512 	  label5 = gen_label_rtx ();
4513 	  do_cmp_and_jump (op1, const0_rtx, LT, compute_mode, label2);
4514 	  do_cmp_and_jump (adjusted_op0, const0_rtx, LT, compute_mode, label1);
4515 	  tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4516 			      quotient, 0, OPTAB_LIB_WIDEN);
4517 	  if (tem != quotient)
4518 	    emit_move_insn (quotient, tem);
4519 	  emit_jump_insn (gen_jump (label5));
4520 	  emit_barrier ();
4521 	  emit_label (label1);
4522 	  expand_inc (adjusted_op0, const1_rtx);
4523 	  emit_jump_insn (gen_jump (label4));
4524 	  emit_barrier ();
4525 	  emit_label (label2);
4526 	  do_cmp_and_jump (adjusted_op0, const0_rtx, GT, compute_mode, label3);
4527 	  tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4528 			      quotient, 0, OPTAB_LIB_WIDEN);
4529 	  if (tem != quotient)
4530 	    emit_move_insn (quotient, tem);
4531 	  emit_jump_insn (gen_jump (label5));
4532 	  emit_barrier ();
4533 	  emit_label (label3);
4534 	  expand_dec (adjusted_op0, const1_rtx);
4535 	  emit_label (label4);
4536 	  tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4537 			      quotient, 0, OPTAB_LIB_WIDEN);
4538 	  if (tem != quotient)
4539 	    emit_move_insn (quotient, tem);
4540 	  expand_dec (quotient, const1_rtx);
4541 	  emit_label (label5);
4542 	}
4543 	break;
4544 
4545       case CEIL_DIV_EXPR:
4546       case CEIL_MOD_EXPR:
4547 	if (unsignedp)
4548 	  {
4549 	    if (op1_is_constant && EXACT_POWER_OF_2_OR_ZERO_P (INTVAL (op1)))
4550 	      {
4551 		rtx t1, t2, t3;
4552 		unsigned HOST_WIDE_INT d = INTVAL (op1);
4553 		t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
4554 				   build_int_cst (NULL_TREE, floor_log2 (d)),
4555 				   tquotient, 1);
4556 		t2 = expand_binop (compute_mode, and_optab, op0,
4557 				   GEN_INT (d - 1),
4558 				   NULL_RTX, 1, OPTAB_LIB_WIDEN);
4559 		t3 = gen_reg_rtx (compute_mode);
4560 		t3 = emit_store_flag (t3, NE, t2, const0_rtx,
4561 				      compute_mode, 1, 1);
4562 		if (t3 == 0)
4563 		  {
4564 		    rtx lab;
4565 		    lab = gen_label_rtx ();
4566 		    do_cmp_and_jump (t2, const0_rtx, EQ, compute_mode, lab);
4567 		    expand_inc (t1, const1_rtx);
4568 		    emit_label (lab);
4569 		    quotient = t1;
4570 		  }
4571 		else
4572 		  quotient = force_operand (gen_rtx_PLUS (compute_mode,
4573 							  t1, t3),
4574 					    tquotient);
4575 		break;
4576 	      }
4577 
4578 	    /* Try using an instruction that produces both the quotient and
4579 	       remainder, using truncation.  We can easily compensate the
4580 	       quotient or remainder to get ceiling rounding, once we have the
4581 	       remainder.  Notice that we compute also the final remainder
4582 	       value here, and return the result right away.  */
4583 	    if (target == 0 || GET_MODE (target) != compute_mode)
4584 	      target = gen_reg_rtx (compute_mode);
4585 
4586 	    if (rem_flag)
4587 	      {
4588 		remainder = (REG_P (target)
4589 			     ? target : gen_reg_rtx (compute_mode));
4590 		quotient = gen_reg_rtx (compute_mode);
4591 	      }
4592 	    else
4593 	      {
4594 		quotient = (REG_P (target)
4595 			    ? target : gen_reg_rtx (compute_mode));
4596 		remainder = gen_reg_rtx (compute_mode);
4597 	      }
4598 
4599 	    if (expand_twoval_binop (udivmod_optab, op0, op1, quotient,
4600 				     remainder, 1))
4601 	      {
4602 		/* This could be computed with a branch-less sequence.
4603 		   Save that for later.  */
4604 		rtx label = gen_label_rtx ();
4605 		do_cmp_and_jump (remainder, const0_rtx, EQ,
4606 				 compute_mode, label);
4607 		expand_inc (quotient, const1_rtx);
4608 		expand_dec (remainder, op1);
4609 		emit_label (label);
4610 		return gen_lowpart (mode, rem_flag ? remainder : quotient);
4611 	      }
4612 
4613 	    /* No luck with division elimination or divmod.  Have to do it
4614 	       by conditionally adjusting op0 *and* the result.  */
4615 	    {
4616 	      rtx label1, label2;
4617 	      rtx adjusted_op0, tem;
4618 
4619 	      quotient = gen_reg_rtx (compute_mode);
4620 	      adjusted_op0 = copy_to_mode_reg (compute_mode, op0);
4621 	      label1 = gen_label_rtx ();
4622 	      label2 = gen_label_rtx ();
4623 	      do_cmp_and_jump (adjusted_op0, const0_rtx, NE,
4624 			       compute_mode, label1);
4625 	      emit_move_insn  (quotient, const0_rtx);
4626 	      emit_jump_insn (gen_jump (label2));
4627 	      emit_barrier ();
4628 	      emit_label (label1);
4629 	      expand_dec (adjusted_op0, const1_rtx);
4630 	      tem = expand_binop (compute_mode, udiv_optab, adjusted_op0, op1,
4631 				  quotient, 1, OPTAB_LIB_WIDEN);
4632 	      if (tem != quotient)
4633 		emit_move_insn (quotient, tem);
4634 	      expand_inc (quotient, const1_rtx);
4635 	      emit_label (label2);
4636 	    }
4637 	  }
4638 	else /* signed */
4639 	  {
4640 	    if (op1_is_constant && EXACT_POWER_OF_2_OR_ZERO_P (INTVAL (op1))
4641 		&& INTVAL (op1) >= 0)
4642 	      {
4643 		/* This is extremely similar to the code for the unsigned case
4644 		   above.  For 2.7 we should merge these variants, but for
4645 		   2.6.1 I don't want to touch the code for unsigned since that
4646 		   get used in C.  The signed case will only be used by other
4647 		   languages (Ada).  */
4648 
4649 		rtx t1, t2, t3;
4650 		unsigned HOST_WIDE_INT d = INTVAL (op1);
4651 		t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
4652 				   build_int_cst (NULL_TREE, floor_log2 (d)),
4653 				   tquotient, 0);
4654 		t2 = expand_binop (compute_mode, and_optab, op0,
4655 				   GEN_INT (d - 1),
4656 				   NULL_RTX, 1, OPTAB_LIB_WIDEN);
4657 		t3 = gen_reg_rtx (compute_mode);
4658 		t3 = emit_store_flag (t3, NE, t2, const0_rtx,
4659 				      compute_mode, 1, 1);
4660 		if (t3 == 0)
4661 		  {
4662 		    rtx lab;
4663 		    lab = gen_label_rtx ();
4664 		    do_cmp_and_jump (t2, const0_rtx, EQ, compute_mode, lab);
4665 		    expand_inc (t1, const1_rtx);
4666 		    emit_label (lab);
4667 		    quotient = t1;
4668 		  }
4669 		else
4670 		  quotient = force_operand (gen_rtx_PLUS (compute_mode,
4671 							  t1, t3),
4672 					    tquotient);
4673 		break;
4674 	      }
4675 
4676 	    /* Try using an instruction that produces both the quotient and
4677 	       remainder, using truncation.  We can easily compensate the
4678 	       quotient or remainder to get ceiling rounding, once we have the
4679 	       remainder.  Notice that we compute also the final remainder
4680 	       value here, and return the result right away.  */
4681 	    if (target == 0 || GET_MODE (target) != compute_mode)
4682 	      target = gen_reg_rtx (compute_mode);
4683 	    if (rem_flag)
4684 	      {
4685 		remainder= (REG_P (target)
4686 			    ? target : gen_reg_rtx (compute_mode));
4687 		quotient = gen_reg_rtx (compute_mode);
4688 	      }
4689 	    else
4690 	      {
4691 		quotient = (REG_P (target)
4692 			    ? target : gen_reg_rtx (compute_mode));
4693 		remainder = gen_reg_rtx (compute_mode);
4694 	      }
4695 
4696 	    if (expand_twoval_binop (sdivmod_optab, op0, op1, quotient,
4697 				     remainder, 0))
4698 	      {
4699 		/* This could be computed with a branch-less sequence.
4700 		   Save that for later.  */
4701 		rtx tem;
4702 		rtx label = gen_label_rtx ();
4703 		do_cmp_and_jump (remainder, const0_rtx, EQ,
4704 				 compute_mode, label);
4705 		tem = expand_binop (compute_mode, xor_optab, op0, op1,
4706 				    NULL_RTX, 0, OPTAB_WIDEN);
4707 		do_cmp_and_jump (tem, const0_rtx, LT, compute_mode, label);
4708 		expand_inc (quotient, const1_rtx);
4709 		expand_dec (remainder, op1);
4710 		emit_label (label);
4711 		return gen_lowpart (mode, rem_flag ? remainder : quotient);
4712 	      }
4713 
4714 	    /* No luck with division elimination or divmod.  Have to do it
4715 	       by conditionally adjusting op0 *and* the result.  */
4716 	    {
4717 	      rtx label1, label2, label3, label4, label5;
4718 	      rtx adjusted_op0;
4719 	      rtx tem;
4720 
4721 	      quotient = gen_reg_rtx (compute_mode);
4722 	      adjusted_op0 = copy_to_mode_reg (compute_mode, op0);
4723 	      label1 = gen_label_rtx ();
4724 	      label2 = gen_label_rtx ();
4725 	      label3 = gen_label_rtx ();
4726 	      label4 = gen_label_rtx ();
4727 	      label5 = gen_label_rtx ();
4728 	      do_cmp_and_jump (op1, const0_rtx, LT, compute_mode, label2);
4729 	      do_cmp_and_jump (adjusted_op0, const0_rtx, GT,
4730 			       compute_mode, label1);
4731 	      tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4732 				  quotient, 0, OPTAB_LIB_WIDEN);
4733 	      if (tem != quotient)
4734 		emit_move_insn (quotient, tem);
4735 	      emit_jump_insn (gen_jump (label5));
4736 	      emit_barrier ();
4737 	      emit_label (label1);
4738 	      expand_dec (adjusted_op0, const1_rtx);
4739 	      emit_jump_insn (gen_jump (label4));
4740 	      emit_barrier ();
4741 	      emit_label (label2);
4742 	      do_cmp_and_jump (adjusted_op0, const0_rtx, LT,
4743 			       compute_mode, label3);
4744 	      tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4745 				  quotient, 0, OPTAB_LIB_WIDEN);
4746 	      if (tem != quotient)
4747 		emit_move_insn (quotient, tem);
4748 	      emit_jump_insn (gen_jump (label5));
4749 	      emit_barrier ();
4750 	      emit_label (label3);
4751 	      expand_inc (adjusted_op0, const1_rtx);
4752 	      emit_label (label4);
4753 	      tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4754 				  quotient, 0, OPTAB_LIB_WIDEN);
4755 	      if (tem != quotient)
4756 		emit_move_insn (quotient, tem);
4757 	      expand_inc (quotient, const1_rtx);
4758 	      emit_label (label5);
4759 	    }
4760 	  }
4761 	break;
4762 
4763       case EXACT_DIV_EXPR:
4764 	if (op1_is_constant && HOST_BITS_PER_WIDE_INT >= size)
4765 	  {
4766 	    HOST_WIDE_INT d = INTVAL (op1);
4767 	    unsigned HOST_WIDE_INT ml;
4768 	    int pre_shift;
4769 	    rtx t1;
4770 
4771 	    pre_shift = floor_log2 (d & -d);
4772 	    ml = invert_mod2n (d >> pre_shift, size);
4773 	    t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
4774 			       build_int_cst (NULL_TREE, pre_shift),
4775 			       NULL_RTX, unsignedp);
4776 	    quotient = expand_mult (compute_mode, t1,
4777 				    gen_int_mode (ml, compute_mode),
4778 				    NULL_RTX, 1);
4779 
4780 	    insn = get_last_insn ();
4781 	    set_unique_reg_note (insn,
4782 				 REG_EQUAL,
4783 				 gen_rtx_fmt_ee (unsignedp ? UDIV : DIV,
4784 						 compute_mode,
4785 						 op0, op1));
4786 	  }
4787 	break;
4788 
4789       case ROUND_DIV_EXPR:
4790       case ROUND_MOD_EXPR:
4791 	if (unsignedp)
4792 	  {
4793 	    rtx tem;
4794 	    rtx label;
4795 	    label = gen_label_rtx ();
4796 	    quotient = gen_reg_rtx (compute_mode);
4797 	    remainder = gen_reg_rtx (compute_mode);
4798 	    if (expand_twoval_binop (udivmod_optab, op0, op1, quotient, remainder, 1) == 0)
4799 	      {
4800 		rtx tem;
4801 		quotient = expand_binop (compute_mode, udiv_optab, op0, op1,
4802 					 quotient, 1, OPTAB_LIB_WIDEN);
4803 		tem = expand_mult (compute_mode, quotient, op1, NULL_RTX, 1);
4804 		remainder = expand_binop (compute_mode, sub_optab, op0, tem,
4805 					  remainder, 1, OPTAB_LIB_WIDEN);
4806 	      }
4807 	    tem = plus_constant (op1, -1);
4808 	    tem = expand_shift (RSHIFT_EXPR, compute_mode, tem,
4809 				build_int_cst (NULL_TREE, 1),
4810 				NULL_RTX, 1);
4811 	    do_cmp_and_jump (remainder, tem, LEU, compute_mode, label);
4812 	    expand_inc (quotient, const1_rtx);
4813 	    expand_dec (remainder, op1);
4814 	    emit_label (label);
4815 	  }
4816 	else
4817 	  {
4818 	    rtx abs_rem, abs_op1, tem, mask;
4819 	    rtx label;
4820 	    label = gen_label_rtx ();
4821 	    quotient = gen_reg_rtx (compute_mode);
4822 	    remainder = gen_reg_rtx (compute_mode);
4823 	    if (expand_twoval_binop (sdivmod_optab, op0, op1, quotient, remainder, 0) == 0)
4824 	      {
4825 		rtx tem;
4826 		quotient = expand_binop (compute_mode, sdiv_optab, op0, op1,
4827 					 quotient, 0, OPTAB_LIB_WIDEN);
4828 		tem = expand_mult (compute_mode, quotient, op1, NULL_RTX, 0);
4829 		remainder = expand_binop (compute_mode, sub_optab, op0, tem,
4830 					  remainder, 0, OPTAB_LIB_WIDEN);
4831 	      }
4832 	    abs_rem = expand_abs (compute_mode, remainder, NULL_RTX, 1, 0);
4833 	    abs_op1 = expand_abs (compute_mode, op1, NULL_RTX, 1, 0);
4834 	    tem = expand_shift (LSHIFT_EXPR, compute_mode, abs_rem,
4835 				build_int_cst (NULL_TREE, 1),
4836 				NULL_RTX, 1);
4837 	    do_cmp_and_jump (tem, abs_op1, LTU, compute_mode, label);
4838 	    tem = expand_binop (compute_mode, xor_optab, op0, op1,
4839 				NULL_RTX, 0, OPTAB_WIDEN);
4840 	    mask = expand_shift (RSHIFT_EXPR, compute_mode, tem,
4841 				 build_int_cst (NULL_TREE, size - 1),
4842 				 NULL_RTX, 0);
4843 	    tem = expand_binop (compute_mode, xor_optab, mask, const1_rtx,
4844 				NULL_RTX, 0, OPTAB_WIDEN);
4845 	    tem = expand_binop (compute_mode, sub_optab, tem, mask,
4846 				NULL_RTX, 0, OPTAB_WIDEN);
4847 	    expand_inc (quotient, tem);
4848 	    tem = expand_binop (compute_mode, xor_optab, mask, op1,
4849 				NULL_RTX, 0, OPTAB_WIDEN);
4850 	    tem = expand_binop (compute_mode, sub_optab, tem, mask,
4851 				NULL_RTX, 0, OPTAB_WIDEN);
4852 	    expand_dec (remainder, tem);
4853 	    emit_label (label);
4854 	  }
4855 	return gen_lowpart (mode, rem_flag ? remainder : quotient);
4856 
4857       default:
4858 	gcc_unreachable ();
4859       }
4860 
4861   if (quotient == 0)
4862     {
4863       if (target && GET_MODE (target) != compute_mode)
4864 	target = 0;
4865 
4866       if (rem_flag)
4867 	{
4868 	  /* Try to produce the remainder without producing the quotient.
4869 	     If we seem to have a divmod pattern that does not require widening,
4870 	     don't try widening here.  We should really have a WIDEN argument
4871 	     to expand_twoval_binop, since what we'd really like to do here is
4872 	     1) try a mod insn in compute_mode
4873 	     2) try a divmod insn in compute_mode
4874 	     3) try a div insn in compute_mode and multiply-subtract to get
4875 	        remainder
4876 	     4) try the same things with widening allowed.  */
4877 	  remainder
4878 	    = sign_expand_binop (compute_mode, umod_optab, smod_optab,
4879 				 op0, op1, target,
4880 				 unsignedp,
4881 				 ((optab_handler (optab2, compute_mode)->insn_code
4882 				   != CODE_FOR_nothing)
4883 				  ? OPTAB_DIRECT : OPTAB_WIDEN));
4884 	  if (remainder == 0)
4885 	    {
4886 	      /* No luck there.  Can we do remainder and divide at once
4887 		 without a library call?  */
4888 	      remainder = gen_reg_rtx (compute_mode);
4889 	      if (! expand_twoval_binop ((unsignedp
4890 					  ? udivmod_optab
4891 					  : sdivmod_optab),
4892 					 op0, op1,
4893 					 NULL_RTX, remainder, unsignedp))
4894 		remainder = 0;
4895 	    }
4896 
4897 	  if (remainder)
4898 	    return gen_lowpart (mode, remainder);
4899 	}
4900 
4901       /* Produce the quotient.  Try a quotient insn, but not a library call.
4902 	 If we have a divmod in this mode, use it in preference to widening
4903 	 the div (for this test we assume it will not fail). Note that optab2
4904 	 is set to the one of the two optabs that the call below will use.  */
4905       quotient
4906 	= sign_expand_binop (compute_mode, udiv_optab, sdiv_optab,
4907 			     op0, op1, rem_flag ? NULL_RTX : target,
4908 			     unsignedp,
4909 			     ((optab_handler (optab2, compute_mode)->insn_code
4910 			       != CODE_FOR_nothing)
4911 			      ? OPTAB_DIRECT : OPTAB_WIDEN));
4912 
4913       if (quotient == 0)
4914 	{
4915 	  /* No luck there.  Try a quotient-and-remainder insn,
4916 	     keeping the quotient alone.  */
4917 	  quotient = gen_reg_rtx (compute_mode);
4918 	  if (! expand_twoval_binop (unsignedp ? udivmod_optab : sdivmod_optab,
4919 				     op0, op1,
4920 				     quotient, NULL_RTX, unsignedp))
4921 	    {
4922 	      quotient = 0;
4923 	      if (! rem_flag)
4924 		/* Still no luck.  If we are not computing the remainder,
4925 		   use a library call for the quotient.  */
4926 		quotient = sign_expand_binop (compute_mode,
4927 					      udiv_optab, sdiv_optab,
4928 					      op0, op1, target,
4929 					      unsignedp, OPTAB_LIB_WIDEN);
4930 	    }
4931 	}
4932     }
4933 
4934   if (rem_flag)
4935     {
4936       if (target && GET_MODE (target) != compute_mode)
4937 	target = 0;
4938 
4939       if (quotient == 0)
4940 	{
4941 	  /* No divide instruction either.  Use library for remainder.  */
4942 	  remainder = sign_expand_binop (compute_mode, umod_optab, smod_optab,
4943 					 op0, op1, target,
4944 					 unsignedp, OPTAB_LIB_WIDEN);
4945 	  /* No remainder function.  Try a quotient-and-remainder
4946 	     function, keeping the remainder.  */
4947 	  if (!remainder)
4948 	    {
4949 	      remainder = gen_reg_rtx (compute_mode);
4950 	      if (!expand_twoval_binop_libfunc
4951 		  (unsignedp ? udivmod_optab : sdivmod_optab,
4952 		   op0, op1,
4953 		   NULL_RTX, remainder,
4954 		   unsignedp ? UMOD : MOD))
4955 		remainder = NULL_RTX;
4956 	    }
4957 	}
4958       else
4959 	{
4960 	  /* We divided.  Now finish doing X - Y * (X / Y).  */
4961 	  remainder = expand_mult (compute_mode, quotient, op1,
4962 				   NULL_RTX, unsignedp);
4963 	  remainder = expand_binop (compute_mode, sub_optab, op0,
4964 				    remainder, target, unsignedp,
4965 				    OPTAB_LIB_WIDEN);
4966 	}
4967     }
4968 
4969   return gen_lowpart (mode, rem_flag ? remainder : quotient);
4970 }
4971 
4972 /* Return a tree node with data type TYPE, describing the value of X.
4973    Usually this is an VAR_DECL, if there is no obvious better choice.
4974    X may be an expression, however we only support those expressions
4975    generated by loop.c.  */
4976 
4977 tree
4978 make_tree (tree type, rtx x)
4979 {
4980   tree t;
4981 
4982   switch (GET_CODE (x))
4983     {
4984     case CONST_INT:
4985       {
4986 	HOST_WIDE_INT hi = 0;
4987 
4988 	if (INTVAL (x) < 0
4989 	    && !(TYPE_UNSIGNED (type)
4990 		 && (GET_MODE_BITSIZE (TYPE_MODE (type))
4991 		     < HOST_BITS_PER_WIDE_INT)))
4992 	  hi = -1;
4993 
4994 	t = build_int_cst_wide (type, INTVAL (x), hi);
4995 
4996 	return t;
4997       }
4998 
4999     case CONST_DOUBLE:
5000       if (GET_MODE (x) == VOIDmode)
5001 	t = build_int_cst_wide (type,
5002 				CONST_DOUBLE_LOW (x), CONST_DOUBLE_HIGH (x));
5003       else
5004 	{
5005 	  REAL_VALUE_TYPE d;
5006 
5007 	  REAL_VALUE_FROM_CONST_DOUBLE (d, x);
5008 	  t = build_real (type, d);
5009 	}
5010 
5011       return t;
5012 
5013     case CONST_VECTOR:
5014       {
5015 	int units = CONST_VECTOR_NUNITS (x);
5016 	tree itype = TREE_TYPE (type);
5017 	tree t = NULL_TREE;
5018 	int i;
5019 
5020 
5021 	/* Build a tree with vector elements.  */
5022 	for (i = units - 1; i >= 0; --i)
5023 	  {
5024 	    rtx elt = CONST_VECTOR_ELT (x, i);
5025 	    t = tree_cons (NULL_TREE, make_tree (itype, elt), t);
5026 	  }
5027 
5028 	return build_vector (type, t);
5029       }
5030 
5031     case PLUS:
5032       return fold_build2 (PLUS_EXPR, type, make_tree (type, XEXP (x, 0)),
5033 			  make_tree (type, XEXP (x, 1)));
5034 
5035     case MINUS:
5036       return fold_build2 (MINUS_EXPR, type, make_tree (type, XEXP (x, 0)),
5037 			  make_tree (type, XEXP (x, 1)));
5038 
5039     case NEG:
5040       return fold_build1 (NEGATE_EXPR, type, make_tree (type, XEXP (x, 0)));
5041 
5042     case MULT:
5043       return fold_build2 (MULT_EXPR, type, make_tree (type, XEXP (x, 0)),
5044 			  make_tree (type, XEXP (x, 1)));
5045 
5046     case ASHIFT:
5047       return fold_build2 (LSHIFT_EXPR, type, make_tree (type, XEXP (x, 0)),
5048 			  make_tree (type, XEXP (x, 1)));
5049 
5050     case LSHIFTRT:
5051       t = unsigned_type_for (type);
5052       return fold_convert (type, build2 (RSHIFT_EXPR, t,
5053 			    		 make_tree (t, XEXP (x, 0)),
5054 				    	 make_tree (type, XEXP (x, 1))));
5055 
5056     case ASHIFTRT:
5057       t = signed_type_for (type);
5058       return fold_convert (type, build2 (RSHIFT_EXPR, t,
5059 					 make_tree (t, XEXP (x, 0)),
5060 				    	 make_tree (type, XEXP (x, 1))));
5061 
5062     case DIV:
5063       if (TREE_CODE (type) != REAL_TYPE)
5064 	t = signed_type_for (type);
5065       else
5066 	t = type;
5067 
5068       return fold_convert (type, build2 (TRUNC_DIV_EXPR, t,
5069 				    	 make_tree (t, XEXP (x, 0)),
5070 				    	 make_tree (t, XEXP (x, 1))));
5071     case UDIV:
5072       t = unsigned_type_for (type);
5073       return fold_convert (type, build2 (TRUNC_DIV_EXPR, t,
5074 				    	 make_tree (t, XEXP (x, 0)),
5075 				    	 make_tree (t, XEXP (x, 1))));
5076 
5077     case SIGN_EXTEND:
5078     case ZERO_EXTEND:
5079       t = lang_hooks.types.type_for_mode (GET_MODE (XEXP (x, 0)),
5080 					  GET_CODE (x) == ZERO_EXTEND);
5081       return fold_convert (type, make_tree (t, XEXP (x, 0)));
5082 
5083     case CONST:
5084       return make_tree (type, XEXP (x, 0));
5085 
5086     case SYMBOL_REF:
5087       t = SYMBOL_REF_DECL (x);
5088       if (t)
5089 	return fold_convert (type, build_fold_addr_expr (t));
5090       /* else fall through.  */
5091 
5092     default:
5093       t = build_decl (RTL_LOCATION (x), VAR_DECL, NULL_TREE, type);
5094 
5095       /* If TYPE is a POINTER_TYPE, we might need to convert X from
5096 	 address mode to pointer mode.  */
5097       if (POINTER_TYPE_P (type))
5098 	x = convert_memory_address_addr_space
5099 	      (TYPE_MODE (type), x, TYPE_ADDR_SPACE (TREE_TYPE (type)));
5100 
5101       /* Note that we do *not* use SET_DECL_RTL here, because we do not
5102 	 want set_decl_rtl to go adjusting REG_ATTRS for this temporary.  */
5103       t->decl_with_rtl.rtl = x;
5104 
5105       return t;
5106     }
5107 }
5108 
5109 /* Compute the logical-and of OP0 and OP1, storing it in TARGET
5110    and returning TARGET.
5111 
5112    If TARGET is 0, a pseudo-register or constant is returned.  */
5113 
5114 rtx
5115 expand_and (enum machine_mode mode, rtx op0, rtx op1, rtx target)
5116 {
5117   rtx tem = 0;
5118 
5119   if (GET_MODE (op0) == VOIDmode && GET_MODE (op1) == VOIDmode)
5120     tem = simplify_binary_operation (AND, mode, op0, op1);
5121   if (tem == 0)
5122     tem = expand_binop (mode, and_optab, op0, op1, target, 0, OPTAB_LIB_WIDEN);
5123 
5124   if (target == 0)
5125     target = tem;
5126   else if (tem != target)
5127     emit_move_insn (target, tem);
5128   return target;
5129 }
5130 
5131 /* Helper function for emit_store_flag.  */
5132 static rtx
5133 emit_cstore (rtx target, enum insn_code icode, enum rtx_code code,
5134 	     enum machine_mode mode, enum machine_mode compare_mode,
5135 	     int unsignedp, rtx x, rtx y, int normalizep,
5136 	     enum machine_mode target_mode)
5137 {
5138   rtx op0, last, comparison, subtarget, pattern;
5139   enum machine_mode result_mode = insn_data[(int) icode].operand[0].mode;
5140 
5141   last = get_last_insn ();
5142   x = prepare_operand (icode, x, 2, mode, compare_mode, unsignedp);
5143   y = prepare_operand (icode, y, 3, mode, compare_mode, unsignedp);
5144   comparison = gen_rtx_fmt_ee (code, result_mode, x, y);
5145   if (!x || !y
5146       || !insn_data[icode].operand[2].predicate
5147 	  (x, insn_data[icode].operand[2].mode)
5148       || !insn_data[icode].operand[3].predicate
5149 	  (y, insn_data[icode].operand[3].mode)
5150       || !insn_data[icode].operand[1].predicate (comparison, VOIDmode))
5151     {
5152       delete_insns_since (last);
5153       return NULL_RTX;
5154     }
5155 
5156   if (target_mode == VOIDmode)
5157     target_mode = result_mode;
5158   if (!target)
5159     target = gen_reg_rtx (target_mode);
5160 
5161   if (optimize
5162       || !(insn_data[(int) icode].operand[0].predicate (target, result_mode)))
5163     subtarget = gen_reg_rtx (result_mode);
5164   else
5165     subtarget = target;
5166 
5167   pattern = GEN_FCN (icode) (subtarget, comparison, x, y);
5168   if (!pattern)
5169     return NULL_RTX;
5170   emit_insn (pattern);
5171 
5172   /* If we are converting to a wider mode, first convert to
5173      TARGET_MODE, then normalize.  This produces better combining
5174      opportunities on machines that have a SIGN_EXTRACT when we are
5175      testing a single bit.  This mostly benefits the 68k.
5176 
5177      If STORE_FLAG_VALUE does not have the sign bit set when
5178      interpreted in MODE, we can do this conversion as unsigned, which
5179      is usually more efficient.  */
5180   if (GET_MODE_SIZE (target_mode) > GET_MODE_SIZE (result_mode))
5181     {
5182       convert_move (target, subtarget,
5183 		    (GET_MODE_BITSIZE (result_mode) <= HOST_BITS_PER_WIDE_INT)
5184 		    && 0 == (STORE_FLAG_VALUE
5185 			     & ((HOST_WIDE_INT) 1
5186 				<< (GET_MODE_BITSIZE (result_mode) -1))));
5187       op0 = target;
5188       result_mode = target_mode;
5189     }
5190   else
5191     op0 = subtarget;
5192 
5193   /* If we want to keep subexpressions around, don't reuse our last
5194      target.  */
5195   if (optimize)
5196     subtarget = 0;
5197 
5198   /* Now normalize to the proper value in MODE.  Sometimes we don't
5199      have to do anything.  */
5200   if (normalizep == 0 || normalizep == STORE_FLAG_VALUE)
5201     ;
5202   /* STORE_FLAG_VALUE might be the most negative number, so write
5203      the comparison this way to avoid a compiler-time warning.  */
5204   else if (- normalizep == STORE_FLAG_VALUE)
5205     op0 = expand_unop (result_mode, neg_optab, op0, subtarget, 0);
5206 
5207   /* We don't want to use STORE_FLAG_VALUE < 0 below since this makes
5208      it hard to use a value of just the sign bit due to ANSI integer
5209      constant typing rules.  */
5210   else if (GET_MODE_BITSIZE (result_mode) <= HOST_BITS_PER_WIDE_INT
5211 	   && (STORE_FLAG_VALUE
5212 	       & ((HOST_WIDE_INT) 1 << (GET_MODE_BITSIZE (result_mode) - 1))))
5213     op0 = expand_shift (RSHIFT_EXPR, result_mode, op0,
5214 			size_int (GET_MODE_BITSIZE (result_mode) - 1), subtarget,
5215 			normalizep == 1);
5216   else
5217     {
5218       gcc_assert (STORE_FLAG_VALUE & 1);
5219 
5220       op0 = expand_and (result_mode, op0, const1_rtx, subtarget);
5221       if (normalizep == -1)
5222 	op0 = expand_unop (result_mode, neg_optab, op0, op0, 0);
5223     }
5224 
5225   /* If we were converting to a smaller mode, do the conversion now.  */
5226   if (target_mode != result_mode)
5227     {
5228       convert_move (target, op0, 0);
5229       return target;
5230     }
5231   else
5232     return op0;
5233 }
5234 
5235 
5236 /* A subroutine of emit_store_flag only including "tricks" that do not
5237    need a recursive call.  These are kept separate to avoid infinite
5238    loops.  */
5239 
5240 static rtx
5241 emit_store_flag_1 (rtx target, enum rtx_code code, rtx op0, rtx op1,
5242 		   enum machine_mode mode, int unsignedp, int normalizep,
5243 		   enum machine_mode target_mode)
5244 {
5245   rtx subtarget;
5246   enum insn_code icode;
5247   enum machine_mode compare_mode;
5248   enum mode_class mclass;
5249   enum rtx_code scode;
5250   rtx tem;
5251 
5252   if (unsignedp)
5253     code = unsigned_condition (code);
5254   scode = swap_condition (code);
5255 
5256   /* If one operand is constant, make it the second one.  Only do this
5257      if the other operand is not constant as well.  */
5258 
5259   if (swap_commutative_operands_p (op0, op1))
5260     {
5261       tem = op0;
5262       op0 = op1;
5263       op1 = tem;
5264       code = swap_condition (code);
5265     }
5266 
5267   if (mode == VOIDmode)
5268     mode = GET_MODE (op0);
5269 
5270   /* For some comparisons with 1 and -1, we can convert this to
5271      comparisons with zero.  This will often produce more opportunities for
5272      store-flag insns.  */
5273 
5274   switch (code)
5275     {
5276     case LT:
5277       if (op1 == const1_rtx)
5278 	op1 = const0_rtx, code = LE;
5279       break;
5280     case LE:
5281       if (op1 == constm1_rtx)
5282 	op1 = const0_rtx, code = LT;
5283       break;
5284     case GE:
5285       if (op1 == const1_rtx)
5286 	op1 = const0_rtx, code = GT;
5287       break;
5288     case GT:
5289       if (op1 == constm1_rtx)
5290 	op1 = const0_rtx, code = GE;
5291       break;
5292     case GEU:
5293       if (op1 == const1_rtx)
5294 	op1 = const0_rtx, code = NE;
5295       break;
5296     case LTU:
5297       if (op1 == const1_rtx)
5298 	op1 = const0_rtx, code = EQ;
5299       break;
5300     default:
5301       break;
5302     }
5303 
5304   /* If we are comparing a double-word integer with zero or -1, we can
5305      convert the comparison into one involving a single word.  */
5306   if (GET_MODE_BITSIZE (mode) == BITS_PER_WORD * 2
5307       && GET_MODE_CLASS (mode) == MODE_INT
5308       && (!MEM_P (op0) || ! MEM_VOLATILE_P (op0)))
5309     {
5310       if ((code == EQ || code == NE)
5311 	  && (op1 == const0_rtx || op1 == constm1_rtx))
5312 	{
5313 	  rtx op00, op01;
5314 
5315 	  /* Do a logical OR or AND of the two words and compare the
5316 	     result.  */
5317 	  op00 = simplify_gen_subreg (word_mode, op0, mode, 0);
5318 	  op01 = simplify_gen_subreg (word_mode, op0, mode, UNITS_PER_WORD);
5319 	  tem = expand_binop (word_mode,
5320 			      op1 == const0_rtx ? ior_optab : and_optab,
5321 			      op00, op01, NULL_RTX, unsignedp,
5322 			      OPTAB_DIRECT);
5323 
5324 	  if (tem != 0)
5325 	    tem = emit_store_flag (NULL_RTX, code, tem, op1, word_mode,
5326 				   unsignedp, normalizep);
5327 	}
5328       else if ((code == LT || code == GE) && op1 == const0_rtx)
5329 	{
5330 	  rtx op0h;
5331 
5332 	  /* If testing the sign bit, can just test on high word.  */
5333 	  op0h = simplify_gen_subreg (word_mode, op0, mode,
5334 				      subreg_highpart_offset (word_mode,
5335 							      mode));
5336 	  tem = emit_store_flag (NULL_RTX, code, op0h, op1, word_mode,
5337 				 unsignedp, normalizep);
5338 	}
5339       else
5340 	tem = NULL_RTX;
5341 
5342       if (tem)
5343 	{
5344 	  if (target_mode == VOIDmode || GET_MODE (tem) == target_mode)
5345 	    return tem;
5346 	  if (!target)
5347 	    target = gen_reg_rtx (target_mode);
5348 
5349 	  convert_move (target, tem,
5350 			0 == ((normalizep ? normalizep : STORE_FLAG_VALUE)
5351 			      & ((HOST_WIDE_INT) 1
5352 				 << (GET_MODE_BITSIZE (word_mode) -1))));
5353 	  return target;
5354 	}
5355     }
5356 
5357   /* If this is A < 0 or A >= 0, we can do this by taking the ones
5358      complement of A (for GE) and shifting the sign bit to the low bit.  */
5359   if (op1 == const0_rtx && (code == LT || code == GE)
5360       && GET_MODE_CLASS (mode) == MODE_INT
5361       && (normalizep || STORE_FLAG_VALUE == 1
5362 	  || (GET_MODE_BITSIZE (mode) <= HOST_BITS_PER_WIDE_INT
5363 	      && ((STORE_FLAG_VALUE & GET_MODE_MASK (mode))
5364 		  == ((unsigned HOST_WIDE_INT) 1
5365 		      << (GET_MODE_BITSIZE (mode) - 1))))))
5366     {
5367       subtarget = target;
5368 
5369       if (!target)
5370 	target_mode = mode;
5371 
5372       /* If the result is to be wider than OP0, it is best to convert it
5373 	 first.  If it is to be narrower, it is *incorrect* to convert it
5374 	 first.  */
5375       else if (GET_MODE_SIZE (target_mode) > GET_MODE_SIZE (mode))
5376 	{
5377 	  op0 = convert_modes (target_mode, mode, op0, 0);
5378 	  mode = target_mode;
5379 	}
5380 
5381       if (target_mode != mode)
5382 	subtarget = 0;
5383 
5384       if (code == GE)
5385 	op0 = expand_unop (mode, one_cmpl_optab, op0,
5386 			   ((STORE_FLAG_VALUE == 1 || normalizep)
5387 			    ? 0 : subtarget), 0);
5388 
5389       if (STORE_FLAG_VALUE == 1 || normalizep)
5390 	/* If we are supposed to produce a 0/1 value, we want to do
5391 	   a logical shift from the sign bit to the low-order bit; for
5392 	   a -1/0 value, we do an arithmetic shift.  */
5393 	op0 = expand_shift (RSHIFT_EXPR, mode, op0,
5394 			    size_int (GET_MODE_BITSIZE (mode) - 1),
5395 			    subtarget, normalizep != -1);
5396 
5397       if (mode != target_mode)
5398 	op0 = convert_modes (target_mode, mode, op0, 0);
5399 
5400       return op0;
5401     }
5402 
5403   mclass = GET_MODE_CLASS (mode);
5404   for (compare_mode = mode; compare_mode != VOIDmode;
5405        compare_mode = GET_MODE_WIDER_MODE (compare_mode))
5406     {
5407      enum machine_mode optab_mode = mclass == MODE_CC ? CCmode : compare_mode;
5408      icode = optab_handler (cstore_optab, optab_mode)->insn_code;
5409      if (icode != CODE_FOR_nothing)
5410 	{
5411 	  do_pending_stack_adjust ();
5412 	  tem = emit_cstore (target, icode, code, mode, compare_mode,
5413 			     unsignedp, op0, op1, normalizep, target_mode);
5414 	  if (tem)
5415 	    return tem;
5416 
5417 	  if (GET_MODE_CLASS (mode) == MODE_FLOAT)
5418 	    {
5419 	      tem = emit_cstore (target, icode, scode, mode, compare_mode,
5420 				 unsignedp, op1, op0, normalizep, target_mode);
5421 	      if (tem)
5422 	        return tem;
5423 	    }
5424 	  break;
5425 	}
5426     }
5427 
5428   return 0;
5429 }
5430 
5431 /* Emit a store-flags instruction for comparison CODE on OP0 and OP1
5432    and storing in TARGET.  Normally return TARGET.
5433    Return 0 if that cannot be done.
5434 
5435    MODE is the mode to use for OP0 and OP1 should they be CONST_INTs.  If
5436    it is VOIDmode, they cannot both be CONST_INT.
5437 
5438    UNSIGNEDP is for the case where we have to widen the operands
5439    to perform the operation.  It says to use zero-extension.
5440 
5441    NORMALIZEP is 1 if we should convert the result to be either zero
5442    or one.  Normalize is -1 if we should convert the result to be
5443    either zero or -1.  If NORMALIZEP is zero, the result will be left
5444    "raw" out of the scc insn.  */
5445 
5446 rtx
5447 emit_store_flag (rtx target, enum rtx_code code, rtx op0, rtx op1,
5448 		 enum machine_mode mode, int unsignedp, int normalizep)
5449 {
5450   enum machine_mode target_mode = target ? GET_MODE (target) : VOIDmode;
5451   enum rtx_code rcode;
5452   rtx subtarget;
5453   rtx tem, last, trueval;
5454 
5455   tem = emit_store_flag_1 (target, code, op0, op1, mode, unsignedp, normalizep,
5456 			   target_mode);
5457   if (tem)
5458     return tem;
5459 
5460   /* If we reached here, we can't do this with a scc insn, however there
5461      are some comparisons that can be done in other ways.  Don't do any
5462      of these cases if branches are very cheap.  */
5463   if (BRANCH_COST (optimize_insn_for_speed_p (), false) == 0)
5464     return 0;
5465 
5466   /* See what we need to return.  We can only return a 1, -1, or the
5467      sign bit.  */
5468 
5469   if (normalizep == 0)
5470     {
5471       if (STORE_FLAG_VALUE == 1 || STORE_FLAG_VALUE == -1)
5472 	normalizep = STORE_FLAG_VALUE;
5473 
5474       else if (GET_MODE_BITSIZE (mode) <= HOST_BITS_PER_WIDE_INT
5475 	       && ((STORE_FLAG_VALUE & GET_MODE_MASK (mode))
5476 		   == (unsigned HOST_WIDE_INT) 1 << (GET_MODE_BITSIZE (mode) - 1)))
5477 	;
5478       else
5479 	return 0;
5480     }
5481 
5482   last = get_last_insn ();
5483 
5484   /* If optimizing, use different pseudo registers for each insn, instead
5485      of reusing the same pseudo.  This leads to better CSE, but slows
5486      down the compiler, since there are more pseudos */
5487   subtarget = (!optimize
5488 	       && (target_mode == mode)) ? target : NULL_RTX;
5489   trueval = GEN_INT (normalizep ? normalizep : STORE_FLAG_VALUE);
5490 
5491   /* For floating-point comparisons, try the reverse comparison or try
5492      changing the "orderedness" of the comparison.  */
5493   if (GET_MODE_CLASS (mode) == MODE_FLOAT)
5494     {
5495       enum rtx_code first_code;
5496       bool and_them;
5497 
5498       rcode = reverse_condition_maybe_unordered (code);
5499       if (can_compare_p (rcode, mode, ccp_store_flag)
5500           && (code == ORDERED || code == UNORDERED
5501 	      || (! HONOR_NANS (mode) && (code == LTGT || code == UNEQ))
5502 	      || (! HONOR_SNANS (mode) && (code == EQ || code == NE))))
5503 	{
5504           int want_add = ((STORE_FLAG_VALUE == 1 && normalizep == -1)
5505 		          || (STORE_FLAG_VALUE == -1 && normalizep == 1));
5506 
5507 	  /* For the reverse comparison, use either an addition or a XOR.  */
5508           if (want_add
5509 	      && rtx_cost (GEN_INT (normalizep), PLUS,
5510 			   optimize_insn_for_speed_p ()) == 0)
5511 	    {
5512 	      tem = emit_store_flag_1 (subtarget, rcode, op0, op1, mode, 0,
5513 				       STORE_FLAG_VALUE, target_mode);
5514 	      if (tem)
5515                 return expand_binop (target_mode, add_optab, tem,
5516 				     GEN_INT (normalizep),
5517 				     target, 0, OPTAB_WIDEN);
5518 	    }
5519           else if (!want_add
5520 	           && rtx_cost (trueval, XOR,
5521 			        optimize_insn_for_speed_p ()) == 0)
5522 	    {
5523 	      tem = emit_store_flag_1 (subtarget, rcode, op0, op1, mode, 0,
5524 				       normalizep, target_mode);
5525 	      if (tem)
5526                 return expand_binop (target_mode, xor_optab, tem, trueval,
5527 				     target, INTVAL (trueval) >= 0, OPTAB_WIDEN);
5528 	    }
5529 	}
5530 
5531       delete_insns_since (last);
5532 
5533       /* Cannot split ORDERED and UNORDERED, only try the above trick.   */
5534       if (code == ORDERED || code == UNORDERED)
5535 	return 0;
5536 
5537       and_them = split_comparison (code, mode, &first_code, &code);
5538 
5539       /* If there are no NaNs, the first comparison should always fall through.
5540          Effectively change the comparison to the other one.  */
5541       if (!HONOR_NANS (mode))
5542 	{
5543           gcc_assert (first_code == (and_them ? ORDERED : UNORDERED));
5544 	  return emit_store_flag_1 (target, code, op0, op1, mode, 0, normalizep,
5545 				    target_mode);
5546 	}
5547 
5548 #ifdef HAVE_conditional_move
5549       /* Try using a setcc instruction for ORDERED/UNORDERED, followed by a
5550 	 conditional move.  */
5551       tem = emit_store_flag_1 (subtarget, first_code, op0, op1, mode, 0,
5552 			       normalizep, target_mode);
5553       if (tem == 0)
5554 	return 0;
5555 
5556       if (and_them)
5557         tem = emit_conditional_move (target, code, op0, op1, mode,
5558 				     tem, const0_rtx, GET_MODE (tem), 0);
5559       else
5560         tem = emit_conditional_move (target, code, op0, op1, mode,
5561 				     trueval, tem, GET_MODE (tem), 0);
5562 
5563       if (tem == 0)
5564         delete_insns_since (last);
5565       return tem;
5566 #else
5567       return 0;
5568 #endif
5569     }
5570 
5571   /* The remaining tricks only apply to integer comparisons.  */
5572 
5573   if (GET_MODE_CLASS (mode) != MODE_INT)
5574     return 0;
5575 
5576   /* If this is an equality comparison of integers, we can try to exclusive-or
5577      (or subtract) the two operands and use a recursive call to try the
5578      comparison with zero.  Don't do any of these cases if branches are
5579      very cheap.  */
5580 
5581   if ((code == EQ || code == NE) && op1 != const0_rtx)
5582     {
5583       tem = expand_binop (mode, xor_optab, op0, op1, subtarget, 1,
5584 			  OPTAB_WIDEN);
5585 
5586       if (tem == 0)
5587 	tem = expand_binop (mode, sub_optab, op0, op1, subtarget, 1,
5588 			    OPTAB_WIDEN);
5589       if (tem != 0)
5590 	tem = emit_store_flag (target, code, tem, const0_rtx,
5591 			       mode, unsignedp, normalizep);
5592       if (tem != 0)
5593 	return tem;
5594 
5595       delete_insns_since (last);
5596     }
5597 
5598   /* For integer comparisons, try the reverse comparison.  However, for
5599      small X and if we'd have anyway to extend, implementing "X != 0"
5600      as "-(int)X >> 31" is still cheaper than inverting "(int)X == 0".  */
5601   rcode = reverse_condition (code);
5602   if (can_compare_p (rcode, mode, ccp_store_flag)
5603       && ! (optab_handler (cstore_optab, mode)->insn_code == CODE_FOR_nothing
5604 	    && code == NE
5605 	    && GET_MODE_SIZE (mode) < UNITS_PER_WORD
5606 	    && op1 == const0_rtx))
5607     {
5608       int want_add = ((STORE_FLAG_VALUE == 1 && normalizep == -1)
5609 		      || (STORE_FLAG_VALUE == -1 && normalizep == 1));
5610 
5611       /* Again, for the reverse comparison, use either an addition or a XOR.  */
5612       if (want_add
5613 	  && rtx_cost (GEN_INT (normalizep), PLUS,
5614 		       optimize_insn_for_speed_p ()) == 0)
5615 	{
5616 	  tem = emit_store_flag_1 (subtarget, rcode, op0, op1, mode, 0,
5617 				   STORE_FLAG_VALUE, target_mode);
5618 	  if (tem != 0)
5619             tem = expand_binop (target_mode, add_optab, tem,
5620 				GEN_INT (normalizep), target, 0, OPTAB_WIDEN);
5621 	}
5622       else if (!want_add
5623 	       && rtx_cost (trueval, XOR,
5624 			    optimize_insn_for_speed_p ()) == 0)
5625 	{
5626 	  tem = emit_store_flag_1 (subtarget, rcode, op0, op1, mode, 0,
5627 				   normalizep, target_mode);
5628 	  if (tem != 0)
5629             tem = expand_binop (target_mode, xor_optab, tem, trueval, target,
5630 				INTVAL (trueval) >= 0, OPTAB_WIDEN);
5631 	}
5632 
5633       if (tem != 0)
5634 	return tem;
5635       delete_insns_since (last);
5636     }
5637 
5638   /* Some other cases we can do are EQ, NE, LE, and GT comparisons with
5639      the constant zero.  Reject all other comparisons at this point.  Only
5640      do LE and GT if branches are expensive since they are expensive on
5641      2-operand machines.  */
5642 
5643   if (op1 != const0_rtx
5644       || (code != EQ && code != NE
5645 	  && (BRANCH_COST (optimize_insn_for_speed_p (),
5646 			   false) <= 1 || (code != LE && code != GT))))
5647     return 0;
5648 
5649   /* Try to put the result of the comparison in the sign bit.  Assume we can't
5650      do the necessary operation below.  */
5651 
5652   tem = 0;
5653 
5654   /* To see if A <= 0, compute (A | (A - 1)).  A <= 0 iff that result has
5655      the sign bit set.  */
5656 
5657   if (code == LE)
5658     {
5659       /* This is destructive, so SUBTARGET can't be OP0.  */
5660       if (rtx_equal_p (subtarget, op0))
5661 	subtarget = 0;
5662 
5663       tem = expand_binop (mode, sub_optab, op0, const1_rtx, subtarget, 0,
5664 			  OPTAB_WIDEN);
5665       if (tem)
5666 	tem = expand_binop (mode, ior_optab, op0, tem, subtarget, 0,
5667 			    OPTAB_WIDEN);
5668     }
5669 
5670   /* To see if A > 0, compute (((signed) A) << BITS) - A, where BITS is the
5671      number of bits in the mode of OP0, minus one.  */
5672 
5673   if (code == GT)
5674     {
5675       if (rtx_equal_p (subtarget, op0))
5676 	subtarget = 0;
5677 
5678       tem = expand_shift (RSHIFT_EXPR, mode, op0,
5679 			  size_int (GET_MODE_BITSIZE (mode) - 1),
5680 			  subtarget, 0);
5681       tem = expand_binop (mode, sub_optab, tem, op0, subtarget, 0,
5682 			  OPTAB_WIDEN);
5683     }
5684 
5685   if (code == EQ || code == NE)
5686     {
5687       /* For EQ or NE, one way to do the comparison is to apply an operation
5688 	 that converts the operand into a positive number if it is nonzero
5689 	 or zero if it was originally zero.  Then, for EQ, we subtract 1 and
5690 	 for NE we negate.  This puts the result in the sign bit.  Then we
5691 	 normalize with a shift, if needed.
5692 
5693 	 Two operations that can do the above actions are ABS and FFS, so try
5694 	 them.  If that doesn't work, and MODE is smaller than a full word,
5695 	 we can use zero-extension to the wider mode (an unsigned conversion)
5696 	 as the operation.  */
5697 
5698       /* Note that ABS doesn't yield a positive number for INT_MIN, but
5699 	 that is compensated by the subsequent overflow when subtracting
5700 	 one / negating.  */
5701 
5702       if (optab_handler (abs_optab, mode)->insn_code != CODE_FOR_nothing)
5703 	tem = expand_unop (mode, abs_optab, op0, subtarget, 1);
5704       else if (optab_handler (ffs_optab, mode)->insn_code != CODE_FOR_nothing)
5705 	tem = expand_unop (mode, ffs_optab, op0, subtarget, 1);
5706       else if (GET_MODE_SIZE (mode) < UNITS_PER_WORD)
5707 	{
5708 	  tem = convert_modes (word_mode, mode, op0, 1);
5709 	  mode = word_mode;
5710 	}
5711 
5712       if (tem != 0)
5713 	{
5714 	  if (code == EQ)
5715 	    tem = expand_binop (mode, sub_optab, tem, const1_rtx, subtarget,
5716 				0, OPTAB_WIDEN);
5717 	  else
5718 	    tem = expand_unop (mode, neg_optab, tem, subtarget, 0);
5719 	}
5720 
5721       /* If we couldn't do it that way, for NE we can "or" the two's complement
5722 	 of the value with itself.  For EQ, we take the one's complement of
5723 	 that "or", which is an extra insn, so we only handle EQ if branches
5724 	 are expensive.  */
5725 
5726       if (tem == 0
5727 	  && (code == NE
5728 	      || BRANCH_COST (optimize_insn_for_speed_p (),
5729 		      	      false) > 1))
5730 	{
5731 	  if (rtx_equal_p (subtarget, op0))
5732 	    subtarget = 0;
5733 
5734 	  tem = expand_unop (mode, neg_optab, op0, subtarget, 0);
5735 	  tem = expand_binop (mode, ior_optab, tem, op0, subtarget, 0,
5736 			      OPTAB_WIDEN);
5737 
5738 	  if (tem && code == EQ)
5739 	    tem = expand_unop (mode, one_cmpl_optab, tem, subtarget, 0);
5740 	}
5741     }
5742 
5743   if (tem && normalizep)
5744     tem = expand_shift (RSHIFT_EXPR, mode, tem,
5745 			size_int (GET_MODE_BITSIZE (mode) - 1),
5746 			subtarget, normalizep == 1);
5747 
5748   if (tem)
5749     {
5750       if (!target)
5751         ;
5752       else if (GET_MODE (tem) != target_mode)
5753 	{
5754 	  convert_move (target, tem, 0);
5755 	  tem = target;
5756 	}
5757       else if (!subtarget)
5758 	{
5759 	  emit_move_insn (target, tem);
5760 	  tem = target;
5761 	}
5762     }
5763   else
5764     delete_insns_since (last);
5765 
5766   return tem;
5767 }
5768 
5769 /* Like emit_store_flag, but always succeeds.  */
5770 
5771 rtx
5772 emit_store_flag_force (rtx target, enum rtx_code code, rtx op0, rtx op1,
5773 		       enum machine_mode mode, int unsignedp, int normalizep)
5774 {
5775   rtx tem, label;
5776   rtx trueval, falseval;
5777 
5778   /* First see if emit_store_flag can do the job.  */
5779   tem = emit_store_flag (target, code, op0, op1, mode, unsignedp, normalizep);
5780   if (tem != 0)
5781     return tem;
5782 
5783   if (!target)
5784     target = gen_reg_rtx (word_mode);
5785 
5786   /* If this failed, we have to do this with set/compare/jump/set code.
5787      For foo != 0, if foo is in OP0, just replace it with 1 if nonzero.  */
5788   trueval = normalizep ? GEN_INT (normalizep) : const1_rtx;
5789   if (code == NE
5790       && GET_MODE_CLASS (mode) == MODE_INT
5791       && REG_P (target)
5792       && op0 == target
5793       && op1 == const0_rtx)
5794     {
5795       label = gen_label_rtx ();
5796       do_compare_rtx_and_jump (target, const0_rtx, EQ, unsignedp,
5797 			       mode, NULL_RTX, NULL_RTX, label, -1);
5798       emit_move_insn (target, trueval);
5799       emit_label (label);
5800       return target;
5801     }
5802 
5803   if (!REG_P (target)
5804       || reg_mentioned_p (target, op0) || reg_mentioned_p (target, op1))
5805     target = gen_reg_rtx (GET_MODE (target));
5806 
5807   /* Jump in the right direction if the target cannot implement CODE
5808      but can jump on its reverse condition.  */
5809   falseval = const0_rtx;
5810   if (! can_compare_p (code, mode, ccp_jump)
5811       && (! FLOAT_MODE_P (mode)
5812           || code == ORDERED || code == UNORDERED
5813           || (! HONOR_NANS (mode) && (code == LTGT || code == UNEQ))
5814           || (! HONOR_SNANS (mode) && (code == EQ || code == NE))))
5815     {
5816       enum rtx_code rcode;
5817       if (FLOAT_MODE_P (mode))
5818         rcode = reverse_condition_maybe_unordered (code);
5819       else
5820         rcode = reverse_condition (code);
5821 
5822       /* Canonicalize to UNORDERED for the libcall.  */
5823       if (can_compare_p (rcode, mode, ccp_jump)
5824           || (code == ORDERED && ! can_compare_p (ORDERED, mode, ccp_jump)))
5825 	{
5826 	  falseval = trueval;
5827 	  trueval = const0_rtx;
5828 	  code = rcode;
5829 	}
5830     }
5831 
5832   emit_move_insn (target, trueval);
5833   label = gen_label_rtx ();
5834   do_compare_rtx_and_jump (op0, op1, code, unsignedp, mode, NULL_RTX,
5835 			   NULL_RTX, label, -1);
5836 
5837   emit_move_insn (target, falseval);
5838   emit_label (label);
5839 
5840   return target;
5841 }
5842 
5843 /* Perform possibly multi-word comparison and conditional jump to LABEL
5844    if ARG1 OP ARG2 true where ARG1 and ARG2 are of mode MODE.  This is
5845    now a thin wrapper around do_compare_rtx_and_jump.  */
5846 
5847 static void
5848 do_cmp_and_jump (rtx arg1, rtx arg2, enum rtx_code op, enum machine_mode mode,
5849 		 rtx label)
5850 {
5851   int unsignedp = (op == LTU || op == LEU || op == GTU || op == GEU);
5852   do_compare_rtx_and_jump (arg1, arg2, op, unsignedp, mode,
5853 			   NULL_RTX, NULL_RTX, label, -1);
5854 }
5855