xref: /netbsd-src/external/gpl3/gcc.old/dist/gcc/expmed.c (revision b83ebeba7f767758d2778bb0f9d7a76534253621)
1 /* Medium-level subroutines: convert bit-field store and extract
2    and shifts, multiplies and divides to rtl instructions.
3    Copyright (C) 1987-2013 Free Software Foundation, Inc.
4 
5 This file is part of GCC.
6 
7 GCC is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 3, or (at your option) any later
10 version.
11 
12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
15 for more details.
16 
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3.  If not see
19 <http://www.gnu.org/licenses/>.  */
20 
21 
22 #include "config.h"
23 #include "system.h"
24 #include "coretypes.h"
25 #include "tm.h"
26 #include "diagnostic-core.h"
27 #include "rtl.h"
28 #include "tree.h"
29 #include "tm_p.h"
30 #include "flags.h"
31 #include "insn-config.h"
32 #include "expr.h"
33 #include "optabs.h"
34 #include "recog.h"
35 #include "langhooks.h"
36 #include "df.h"
37 #include "target.h"
38 #include "expmed.h"
39 
40 struct target_expmed default_target_expmed;
41 #if SWITCHABLE_TARGET
42 struct target_expmed *this_target_expmed = &default_target_expmed;
43 #endif
44 
45 static void store_fixed_bit_field (rtx, unsigned HOST_WIDE_INT,
46 				   unsigned HOST_WIDE_INT,
47 				   unsigned HOST_WIDE_INT,
48 				   unsigned HOST_WIDE_INT,
49 				   rtx);
50 static void store_split_bit_field (rtx, unsigned HOST_WIDE_INT,
51 				   unsigned HOST_WIDE_INT,
52 				   unsigned HOST_WIDE_INT,
53 				   unsigned HOST_WIDE_INT,
54 				   rtx);
55 static rtx extract_fixed_bit_field (enum machine_mode, rtx,
56 				    unsigned HOST_WIDE_INT,
57 				    unsigned HOST_WIDE_INT, rtx, int, bool);
58 static rtx mask_rtx (enum machine_mode, int, int, int);
59 static rtx lshift_value (enum machine_mode, rtx, int, int);
60 static rtx extract_split_bit_field (rtx, unsigned HOST_WIDE_INT,
61 				    unsigned HOST_WIDE_INT, int);
62 static void do_cmp_and_jump (rtx, rtx, enum rtx_code, enum machine_mode, rtx);
63 static rtx expand_smod_pow2 (enum machine_mode, rtx, HOST_WIDE_INT);
64 static rtx expand_sdiv_pow2 (enum machine_mode, rtx, HOST_WIDE_INT);
65 
66 /* Test whether a value is zero of a power of two.  */
67 #define EXACT_POWER_OF_2_OR_ZERO_P(x) \
68   (((x) & ((x) - (unsigned HOST_WIDE_INT) 1)) == 0)
69 
70 struct init_expmed_rtl
71 {
72   struct rtx_def reg;		rtunion reg_fld[2];
73   struct rtx_def plus;	rtunion plus_fld1;
74   struct rtx_def neg;
75   struct rtx_def mult;	rtunion mult_fld1;
76   struct rtx_def sdiv;	rtunion sdiv_fld1;
77   struct rtx_def udiv;	rtunion udiv_fld1;
78   struct rtx_def sdiv_32;	rtunion sdiv_32_fld1;
79   struct rtx_def smod_32;	rtunion smod_32_fld1;
80   struct rtx_def wide_mult;	rtunion wide_mult_fld1;
81   struct rtx_def wide_lshr;	rtunion wide_lshr_fld1;
82   struct rtx_def wide_trunc;
83   struct rtx_def shift;	rtunion shift_fld1;
84   struct rtx_def shift_mult;	rtunion shift_mult_fld1;
85   struct rtx_def shift_add;	rtunion shift_add_fld1;
86   struct rtx_def shift_sub0;	rtunion shift_sub0_fld1;
87   struct rtx_def shift_sub1;	rtunion shift_sub1_fld1;
88   struct rtx_def zext;
89   struct rtx_def trunc;
90 
91   rtx pow2[MAX_BITS_PER_WORD];
92   rtx cint[MAX_BITS_PER_WORD];
93 };
94 
95 static void
96 init_expmed_one_conv (struct init_expmed_rtl *all, enum machine_mode to_mode,
97 		      enum machine_mode from_mode, bool speed)
98 {
99   int to_size, from_size;
100   rtx which;
101 
102   /* We're given no information about the true size of a partial integer,
103      only the size of the "full" integer it requires for storage.  For
104      comparison purposes here, reduce the bit size by one in that case.  */
105   to_size = (GET_MODE_BITSIZE (to_mode)
106 	     - (GET_MODE_CLASS (to_mode) == MODE_PARTIAL_INT));
107   from_size = (GET_MODE_BITSIZE (from_mode)
108 	       - (GET_MODE_CLASS (from_mode) == MODE_PARTIAL_INT));
109 
110   /* Assume cost of zero-extend and sign-extend is the same.  */
111   which = (to_size < from_size ? &all->trunc : &all->zext);
112 
113   PUT_MODE (&all->reg, from_mode);
114   set_convert_cost (to_mode, from_mode, speed, set_src_cost (which, speed));
115 }
116 
117 static void
118 init_expmed_one_mode (struct init_expmed_rtl *all,
119 		      enum machine_mode mode, int speed)
120 {
121   int m, n, mode_bitsize;
122   enum machine_mode mode_from;
123 
124   mode_bitsize = GET_MODE_UNIT_BITSIZE (mode);
125 
126   PUT_MODE (&all->reg, mode);
127   PUT_MODE (&all->plus, mode);
128   PUT_MODE (&all->neg, mode);
129   PUT_MODE (&all->mult, mode);
130   PUT_MODE (&all->sdiv, mode);
131   PUT_MODE (&all->udiv, mode);
132   PUT_MODE (&all->sdiv_32, mode);
133   PUT_MODE (&all->smod_32, mode);
134   PUT_MODE (&all->wide_trunc, mode);
135   PUT_MODE (&all->shift, mode);
136   PUT_MODE (&all->shift_mult, mode);
137   PUT_MODE (&all->shift_add, mode);
138   PUT_MODE (&all->shift_sub0, mode);
139   PUT_MODE (&all->shift_sub1, mode);
140   PUT_MODE (&all->zext, mode);
141   PUT_MODE (&all->trunc, mode);
142 
143   set_add_cost (speed, mode, set_src_cost (&all->plus, speed));
144   set_neg_cost (speed, mode, set_src_cost (&all->neg, speed));
145   set_mul_cost (speed, mode, set_src_cost (&all->mult, speed));
146   set_sdiv_cost (speed, mode, set_src_cost (&all->sdiv, speed));
147   set_udiv_cost (speed, mode, set_src_cost (&all->udiv, speed));
148 
149   set_sdiv_pow2_cheap (speed, mode, (set_src_cost (&all->sdiv_32, speed)
150 				     <= 2 * add_cost (speed, mode)));
151   set_smod_pow2_cheap (speed, mode, (set_src_cost (&all->smod_32, speed)
152 				     <= 4 * add_cost (speed, mode)));
153 
154   set_shift_cost (speed, mode, 0, 0);
155   {
156     int cost = add_cost (speed, mode);
157     set_shiftadd_cost (speed, mode, 0, cost);
158     set_shiftsub0_cost (speed, mode, 0, cost);
159     set_shiftsub1_cost (speed, mode, 0, cost);
160   }
161 
162   n = MIN (MAX_BITS_PER_WORD, mode_bitsize);
163   for (m = 1; m < n; m++)
164     {
165       XEXP (&all->shift, 1) = all->cint[m];
166       XEXP (&all->shift_mult, 1) = all->pow2[m];
167 
168       set_shift_cost (speed, mode, m, set_src_cost (&all->shift, speed));
169       set_shiftadd_cost (speed, mode, m, set_src_cost (&all->shift_add, speed));
170       set_shiftsub0_cost (speed, mode, m, set_src_cost (&all->shift_sub0, speed));
171       set_shiftsub1_cost (speed, mode, m, set_src_cost (&all->shift_sub1, speed));
172     }
173 
174   if (SCALAR_INT_MODE_P (mode))
175     {
176       for (mode_from = MIN_MODE_INT; mode_from <= MAX_MODE_INT;
177 	   mode_from = (enum machine_mode)(mode_from + 1))
178 	init_expmed_one_conv (all, mode, mode_from, speed);
179     }
180   if (GET_MODE_CLASS (mode) == MODE_INT)
181     {
182       enum machine_mode  wider_mode = GET_MODE_WIDER_MODE (mode);
183       if (wider_mode != VOIDmode)
184 	{
185 	  PUT_MODE (&all->zext, wider_mode);
186 	  PUT_MODE (&all->wide_mult, wider_mode);
187 	  PUT_MODE (&all->wide_lshr, wider_mode);
188 	  XEXP (&all->wide_lshr, 1) = GEN_INT (mode_bitsize);
189 
190 	  set_mul_widen_cost (speed, wider_mode,
191 			      set_src_cost (&all->wide_mult, speed));
192 	  set_mul_highpart_cost (speed, mode,
193 				 set_src_cost (&all->wide_trunc, speed));
194 	}
195     }
196 }
197 
198 void
199 init_expmed (void)
200 {
201   struct init_expmed_rtl all;
202   enum machine_mode mode;
203   int m, speed;
204 
205   memset (&all, 0, sizeof all);
206   for (m = 1; m < MAX_BITS_PER_WORD; m++)
207     {
208       all.pow2[m] = GEN_INT ((HOST_WIDE_INT) 1 << m);
209       all.cint[m] = GEN_INT (m);
210     }
211 
212   PUT_CODE (&all.reg, REG);
213   /* Avoid using hard regs in ways which may be unsupported.  */
214   SET_REGNO (&all.reg, LAST_VIRTUAL_REGISTER + 1);
215 
216   PUT_CODE (&all.plus, PLUS);
217   XEXP (&all.plus, 0) = &all.reg;
218   XEXP (&all.plus, 1) = &all.reg;
219 
220   PUT_CODE (&all.neg, NEG);
221   XEXP (&all.neg, 0) = &all.reg;
222 
223   PUT_CODE (&all.mult, MULT);
224   XEXP (&all.mult, 0) = &all.reg;
225   XEXP (&all.mult, 1) = &all.reg;
226 
227   PUT_CODE (&all.sdiv, DIV);
228   XEXP (&all.sdiv, 0) = &all.reg;
229   XEXP (&all.sdiv, 1) = &all.reg;
230 
231   PUT_CODE (&all.udiv, UDIV);
232   XEXP (&all.udiv, 0) = &all.reg;
233   XEXP (&all.udiv, 1) = &all.reg;
234 
235   PUT_CODE (&all.sdiv_32, DIV);
236   XEXP (&all.sdiv_32, 0) = &all.reg;
237   XEXP (&all.sdiv_32, 1) = 32 < MAX_BITS_PER_WORD ? all.cint[32] : GEN_INT (32);
238 
239   PUT_CODE (&all.smod_32, MOD);
240   XEXP (&all.smod_32, 0) = &all.reg;
241   XEXP (&all.smod_32, 1) = XEXP (&all.sdiv_32, 1);
242 
243   PUT_CODE (&all.zext, ZERO_EXTEND);
244   XEXP (&all.zext, 0) = &all.reg;
245 
246   PUT_CODE (&all.wide_mult, MULT);
247   XEXP (&all.wide_mult, 0) = &all.zext;
248   XEXP (&all.wide_mult, 1) = &all.zext;
249 
250   PUT_CODE (&all.wide_lshr, LSHIFTRT);
251   XEXP (&all.wide_lshr, 0) = &all.wide_mult;
252 
253   PUT_CODE (&all.wide_trunc, TRUNCATE);
254   XEXP (&all.wide_trunc, 0) = &all.wide_lshr;
255 
256   PUT_CODE (&all.shift, ASHIFT);
257   XEXP (&all.shift, 0) = &all.reg;
258 
259   PUT_CODE (&all.shift_mult, MULT);
260   XEXP (&all.shift_mult, 0) = &all.reg;
261 
262   PUT_CODE (&all.shift_add, PLUS);
263   XEXP (&all.shift_add, 0) = &all.shift_mult;
264   XEXP (&all.shift_add, 1) = &all.reg;
265 
266   PUT_CODE (&all.shift_sub0, MINUS);
267   XEXP (&all.shift_sub0, 0) = &all.shift_mult;
268   XEXP (&all.shift_sub0, 1) = &all.reg;
269 
270   PUT_CODE (&all.shift_sub1, MINUS);
271   XEXP (&all.shift_sub1, 0) = &all.reg;
272   XEXP (&all.shift_sub1, 1) = &all.shift_mult;
273 
274   PUT_CODE (&all.trunc, TRUNCATE);
275   XEXP (&all.trunc, 0) = &all.reg;
276 
277   for (speed = 0; speed < 2; speed++)
278     {
279       crtl->maybe_hot_insn_p = speed;
280       set_zero_cost (speed, set_src_cost (const0_rtx, speed));
281 
282       for (mode = MIN_MODE_INT; mode <= MAX_MODE_INT;
283 	   mode = (enum machine_mode)(mode + 1))
284 	init_expmed_one_mode (&all, mode, speed);
285 
286       if (MIN_MODE_PARTIAL_INT != VOIDmode)
287 	for (mode = MIN_MODE_PARTIAL_INT; mode <= MAX_MODE_PARTIAL_INT;
288 	     mode = (enum machine_mode)(mode + 1))
289 	  init_expmed_one_mode (&all, mode, speed);
290 
291       if (MIN_MODE_VECTOR_INT != VOIDmode)
292 	for (mode = MIN_MODE_VECTOR_INT; mode <= MAX_MODE_VECTOR_INT;
293 	     mode = (enum machine_mode)(mode + 1))
294 	  init_expmed_one_mode (&all, mode, speed);
295     }
296 
297   if (alg_hash_used_p ())
298     {
299       struct alg_hash_entry *p = alg_hash_entry_ptr (0);
300       memset (p, 0, sizeof (*p) * NUM_ALG_HASH_ENTRIES);
301     }
302   else
303     set_alg_hash_used_p (true);
304   default_rtl_profile ();
305 }
306 
307 /* Return an rtx representing minus the value of X.
308    MODE is the intended mode of the result,
309    useful if X is a CONST_INT.  */
310 
311 rtx
312 negate_rtx (enum machine_mode mode, rtx x)
313 {
314   rtx result = simplify_unary_operation (NEG, mode, x, mode);
315 
316   if (result == 0)
317     result = expand_unop (mode, neg_optab, x, NULL_RTX, 0);
318 
319   return result;
320 }
321 
322 /* Adjust bitfield memory MEM so that it points to the first unit of mode
323    MODE that contains a bitfield of size BITSIZE at bit position BITNUM.
324    If MODE is BLKmode, return a reference to every byte in the bitfield.
325    Set *NEW_BITNUM to the bit position of the field within the new memory.  */
326 
327 static rtx
328 narrow_bit_field_mem (rtx mem, enum machine_mode mode,
329 		      unsigned HOST_WIDE_INT bitsize,
330 		      unsigned HOST_WIDE_INT bitnum,
331 		      unsigned HOST_WIDE_INT *new_bitnum)
332 {
333   if (mode == BLKmode)
334     {
335       *new_bitnum = bitnum % BITS_PER_UNIT;
336       HOST_WIDE_INT offset = bitnum / BITS_PER_UNIT;
337       HOST_WIDE_INT size = ((*new_bitnum + bitsize + BITS_PER_UNIT - 1)
338 			    / BITS_PER_UNIT);
339       return adjust_bitfield_address_size (mem, mode, offset, size);
340     }
341   else
342     {
343       unsigned int unit = GET_MODE_BITSIZE (mode);
344       *new_bitnum = bitnum % unit;
345       HOST_WIDE_INT offset = (bitnum - *new_bitnum) / BITS_PER_UNIT;
346       return adjust_bitfield_address (mem, mode, offset);
347     }
348 }
349 
350 /* The caller wants to perform insertion or extraction PATTERN on a
351    bitfield of size BITSIZE at BITNUM bits into memory operand OP0.
352    BITREGION_START and BITREGION_END are as for store_bit_field
353    and FIELDMODE is the natural mode of the field.
354 
355    Search for a mode that is compatible with the memory access
356    restrictions and (where applicable) with a register insertion or
357    extraction.  Return the new memory on success, storing the adjusted
358    bit position in *NEW_BITNUM.  Return null otherwise.  */
359 
360 static rtx
361 adjust_bit_field_mem_for_reg (enum extraction_pattern pattern,
362 			      rtx op0, HOST_WIDE_INT bitsize,
363 			      HOST_WIDE_INT bitnum,
364 			      unsigned HOST_WIDE_INT bitregion_start,
365 			      unsigned HOST_WIDE_INT bitregion_end,
366 			      enum machine_mode fieldmode,
367 			      unsigned HOST_WIDE_INT *new_bitnum)
368 {
369   bit_field_mode_iterator iter (bitsize, bitnum, bitregion_start,
370 				bitregion_end, MEM_ALIGN (op0),
371 				MEM_VOLATILE_P (op0));
372   enum machine_mode best_mode;
373   if (iter.next_mode (&best_mode))
374     {
375       /* We can use a memory in BEST_MODE.  See whether this is true for
376 	 any wider modes.  All other things being equal, we prefer to
377 	 use the widest mode possible because it tends to expose more
378 	 CSE opportunities.  */
379       if (!iter.prefer_smaller_modes ())
380 	{
381 	  /* Limit the search to the mode required by the corresponding
382 	     register insertion or extraction instruction, if any.  */
383 	  enum machine_mode limit_mode = word_mode;
384 	  extraction_insn insn;
385 	  if (get_best_reg_extraction_insn (&insn, pattern,
386 					    GET_MODE_BITSIZE (best_mode),
387 					    fieldmode))
388 	    limit_mode = insn.field_mode;
389 
390 	  enum machine_mode wider_mode;
391 	  while (iter.next_mode (&wider_mode)
392 		 && GET_MODE_SIZE (wider_mode) <= GET_MODE_SIZE (limit_mode))
393 	    best_mode = wider_mode;
394 	}
395       return narrow_bit_field_mem (op0, best_mode, bitsize, bitnum,
396 				   new_bitnum);
397     }
398   return NULL_RTX;
399 }
400 
401 /* Return true if a bitfield of size BITSIZE at bit number BITNUM within
402    a structure of mode STRUCT_MODE represents a lowpart subreg.   The subreg
403    offset is then BITNUM / BITS_PER_UNIT.  */
404 
405 static bool
406 lowpart_bit_field_p (unsigned HOST_WIDE_INT bitnum,
407 		     unsigned HOST_WIDE_INT bitsize,
408 		     enum machine_mode struct_mode)
409 {
410   if (BYTES_BIG_ENDIAN)
411     return (bitnum % BITS_PER_UNIT == 0
412 	    && (bitnum + bitsize == GET_MODE_BITSIZE (struct_mode)
413 		|| (bitnum + bitsize) % BITS_PER_WORD == 0));
414   else
415     return bitnum % BITS_PER_WORD == 0;
416 }
417 
418 /* Return true if OP is a memory and if a bitfield of size BITSIZE at
419    bit number BITNUM can be treated as a simple value of mode MODE.  */
420 
421 static bool
422 simple_mem_bitfield_p (rtx op0, unsigned HOST_WIDE_INT bitsize,
423 		       unsigned HOST_WIDE_INT bitnum, enum machine_mode mode)
424 {
425   return (MEM_P (op0)
426 	  && bitnum % BITS_PER_UNIT == 0
427 	  && bitsize == GET_MODE_BITSIZE (mode)
428 	  && (!SLOW_UNALIGNED_ACCESS (mode, MEM_ALIGN (op0))
429 	      || (bitnum % GET_MODE_ALIGNMENT (mode) == 0
430 		  && MEM_ALIGN (op0) >= GET_MODE_ALIGNMENT (mode))));
431 }
432 
433 /* Try to use instruction INSV to store VALUE into a field of OP0.
434    BITSIZE and BITNUM are as for store_bit_field.  */
435 
436 static bool
437 store_bit_field_using_insv (const extraction_insn *insv, rtx op0,
438 			    unsigned HOST_WIDE_INT bitsize,
439 			    unsigned HOST_WIDE_INT bitnum, rtx value)
440 {
441   struct expand_operand ops[4];
442   rtx value1;
443   rtx xop0 = op0;
444   rtx last = get_last_insn ();
445   bool copy_back = false;
446 
447   enum machine_mode op_mode = insv->field_mode;
448   unsigned int unit = GET_MODE_BITSIZE (op_mode);
449   if (bitsize == 0 || bitsize > unit)
450     return false;
451 
452   if (MEM_P (xop0))
453     /* Get a reference to the first byte of the field.  */
454     xop0 = narrow_bit_field_mem (xop0, insv->struct_mode, bitsize, bitnum,
455 				 &bitnum);
456   else
457     {
458       /* Convert from counting within OP0 to counting in OP_MODE.  */
459       if (BYTES_BIG_ENDIAN)
460 	bitnum += unit - GET_MODE_BITSIZE (GET_MODE (op0));
461 
462       /* If xop0 is a register, we need it in OP_MODE
463 	 to make it acceptable to the format of insv.  */
464       if (GET_CODE (xop0) == SUBREG)
465 	/* We can't just change the mode, because this might clobber op0,
466 	   and we will need the original value of op0 if insv fails.  */
467 	xop0 = gen_rtx_SUBREG (op_mode, SUBREG_REG (xop0), SUBREG_BYTE (xop0));
468       if (REG_P (xop0) && GET_MODE (xop0) != op_mode)
469 	xop0 = gen_lowpart_SUBREG (op_mode, xop0);
470     }
471 
472   /* If the destination is a paradoxical subreg such that we need a
473      truncate to the inner mode, perform the insertion on a temporary and
474      truncate the result to the original destination.  Note that we can't
475      just truncate the paradoxical subreg as (truncate:N (subreg:W (reg:N
476      X) 0)) is (reg:N X).  */
477   if (GET_CODE (xop0) == SUBREG
478       && REG_P (SUBREG_REG (xop0))
479       && !TRULY_NOOP_TRUNCATION_MODES_P (GET_MODE (SUBREG_REG (xop0)),
480 					 op_mode))
481     {
482       rtx tem = gen_reg_rtx (op_mode);
483       emit_move_insn (tem, xop0);
484       xop0 = tem;
485       copy_back = true;
486     }
487 
488   /* If BITS_BIG_ENDIAN is zero on a BYTES_BIG_ENDIAN machine, we count
489      "backwards" from the size of the unit we are inserting into.
490      Otherwise, we count bits from the most significant on a
491      BYTES/BITS_BIG_ENDIAN machine.  */
492 
493   if (BITS_BIG_ENDIAN != BYTES_BIG_ENDIAN)
494     bitnum = unit - bitsize - bitnum;
495 
496   /* Convert VALUE to op_mode (which insv insn wants) in VALUE1.  */
497   value1 = value;
498   if (GET_MODE (value) != op_mode)
499     {
500       if (GET_MODE_BITSIZE (GET_MODE (value)) >= bitsize)
501 	{
502 	  /* Optimization: Don't bother really extending VALUE
503 	     if it has all the bits we will actually use.  However,
504 	     if we must narrow it, be sure we do it correctly.  */
505 
506 	  if (GET_MODE_SIZE (GET_MODE (value)) < GET_MODE_SIZE (op_mode))
507 	    {
508 	      rtx tmp;
509 
510 	      tmp = simplify_subreg (op_mode, value1, GET_MODE (value), 0);
511 	      if (! tmp)
512 		tmp = simplify_gen_subreg (op_mode,
513 					   force_reg (GET_MODE (value),
514 						      value1),
515 					   GET_MODE (value), 0);
516 	      value1 = tmp;
517 	    }
518 	  else
519 	    value1 = gen_lowpart (op_mode, value1);
520 	}
521       else if (CONST_INT_P (value))
522 	value1 = gen_int_mode (INTVAL (value), op_mode);
523       else
524 	/* Parse phase is supposed to make VALUE's data type
525 	   match that of the component reference, which is a type
526 	   at least as wide as the field; so VALUE should have
527 	   a mode that corresponds to that type.  */
528 	gcc_assert (CONSTANT_P (value));
529     }
530 
531   create_fixed_operand (&ops[0], xop0);
532   create_integer_operand (&ops[1], bitsize);
533   create_integer_operand (&ops[2], bitnum);
534   create_input_operand (&ops[3], value1, op_mode);
535   if (maybe_expand_insn (insv->icode, 4, ops))
536     {
537       if (copy_back)
538 	convert_move (op0, xop0, true);
539       return true;
540     }
541   delete_insns_since (last);
542   return false;
543 }
544 
545 /* A subroutine of store_bit_field, with the same arguments.  Return true
546    if the operation could be implemented.
547 
548    If FALLBACK_P is true, fall back to store_fixed_bit_field if we have
549    no other way of implementing the operation.  If FALLBACK_P is false,
550    return false instead.  */
551 
552 static bool
553 store_bit_field_1 (rtx str_rtx, unsigned HOST_WIDE_INT bitsize,
554 		   unsigned HOST_WIDE_INT bitnum,
555 		   unsigned HOST_WIDE_INT bitregion_start,
556 		   unsigned HOST_WIDE_INT bitregion_end,
557 		   enum machine_mode fieldmode,
558 		   rtx value, bool fallback_p)
559 {
560   rtx op0 = str_rtx;
561   rtx orig_value;
562 
563   while (GET_CODE (op0) == SUBREG)
564     {
565       /* The following line once was done only if WORDS_BIG_ENDIAN,
566 	 but I think that is a mistake.  WORDS_BIG_ENDIAN is
567 	 meaningful at a much higher level; when structures are copied
568 	 between memory and regs, the higher-numbered regs
569 	 always get higher addresses.  */
570       int inner_mode_size = GET_MODE_SIZE (GET_MODE (SUBREG_REG (op0)));
571       int outer_mode_size = GET_MODE_SIZE (GET_MODE (op0));
572       int byte_offset = 0;
573 
574       /* Paradoxical subregs need special handling on big endian machines.  */
575       if (SUBREG_BYTE (op0) == 0 && inner_mode_size < outer_mode_size)
576 	{
577 	  int difference = inner_mode_size - outer_mode_size;
578 
579 	  if (WORDS_BIG_ENDIAN)
580 	    byte_offset += (difference / UNITS_PER_WORD) * UNITS_PER_WORD;
581 	  if (BYTES_BIG_ENDIAN)
582 	    byte_offset += difference % UNITS_PER_WORD;
583 	}
584       else
585 	byte_offset = SUBREG_BYTE (op0);
586 
587       bitnum += byte_offset * BITS_PER_UNIT;
588       op0 = SUBREG_REG (op0);
589     }
590 
591   /* No action is needed if the target is a register and if the field
592      lies completely outside that register.  This can occur if the source
593      code contains an out-of-bounds access to a small array.  */
594   if (REG_P (op0) && bitnum >= GET_MODE_BITSIZE (GET_MODE (op0)))
595     return true;
596 
597   /* Use vec_set patterns for inserting parts of vectors whenever
598      available.  */
599   if (VECTOR_MODE_P (GET_MODE (op0))
600       && !MEM_P (op0)
601       && optab_handler (vec_set_optab, GET_MODE (op0)) != CODE_FOR_nothing
602       && fieldmode == GET_MODE_INNER (GET_MODE (op0))
603       && bitsize == GET_MODE_BITSIZE (GET_MODE_INNER (GET_MODE (op0)))
604       && !(bitnum % GET_MODE_BITSIZE (GET_MODE_INNER (GET_MODE (op0)))))
605     {
606       struct expand_operand ops[3];
607       enum machine_mode outermode = GET_MODE (op0);
608       enum machine_mode innermode = GET_MODE_INNER (outermode);
609       enum insn_code icode = optab_handler (vec_set_optab, outermode);
610       int pos = bitnum / GET_MODE_BITSIZE (innermode);
611 
612       create_fixed_operand (&ops[0], op0);
613       create_input_operand (&ops[1], value, innermode);
614       create_integer_operand (&ops[2], pos);
615       if (maybe_expand_insn (icode, 3, ops))
616 	return true;
617     }
618 
619   /* If the target is a register, overwriting the entire object, or storing
620      a full-word or multi-word field can be done with just a SUBREG.  */
621   if (!MEM_P (op0)
622       && bitsize == GET_MODE_BITSIZE (fieldmode)
623       && ((bitsize == GET_MODE_BITSIZE (GET_MODE (op0)) && bitnum == 0)
624 	  || (bitsize % BITS_PER_WORD == 0 && bitnum % BITS_PER_WORD == 0)))
625     {
626       /* Use the subreg machinery either to narrow OP0 to the required
627 	 words or to cope with mode punning between equal-sized modes.  */
628       rtx sub = simplify_gen_subreg (fieldmode, op0, GET_MODE (op0),
629 				     bitnum / BITS_PER_UNIT);
630       if (sub)
631 	{
632 	  emit_move_insn (sub, value);
633 	  return true;
634 	}
635     }
636 
637   /* If the target is memory, storing any naturally aligned field can be
638      done with a simple store.  For targets that support fast unaligned
639      memory, any naturally sized, unit aligned field can be done directly.  */
640   if (simple_mem_bitfield_p (op0, bitsize, bitnum, fieldmode))
641     {
642       op0 = adjust_bitfield_address (op0, fieldmode, bitnum / BITS_PER_UNIT);
643       emit_move_insn (op0, value);
644       return true;
645     }
646 
647   /* Make sure we are playing with integral modes.  Pun with subregs
648      if we aren't.  This must come after the entire register case above,
649      since that case is valid for any mode.  The following cases are only
650      valid for integral modes.  */
651   {
652     enum machine_mode imode = int_mode_for_mode (GET_MODE (op0));
653     if (imode != GET_MODE (op0))
654       {
655 	if (MEM_P (op0))
656 	  op0 = adjust_bitfield_address_size (op0, imode, 0, MEM_SIZE (op0));
657 	else
658 	  {
659 	    gcc_assert (imode != BLKmode);
660 	    op0 = gen_lowpart (imode, op0);
661 	  }
662       }
663   }
664 
665   /* Storing an lsb-aligned field in a register
666      can be done with a movstrict instruction.  */
667 
668   if (!MEM_P (op0)
669       && lowpart_bit_field_p (bitnum, bitsize, GET_MODE (op0))
670       && bitsize == GET_MODE_BITSIZE (fieldmode)
671       && optab_handler (movstrict_optab, fieldmode) != CODE_FOR_nothing)
672     {
673       struct expand_operand ops[2];
674       enum insn_code icode = optab_handler (movstrict_optab, fieldmode);
675       rtx arg0 = op0;
676       unsigned HOST_WIDE_INT subreg_off;
677 
678       if (GET_CODE (arg0) == SUBREG)
679 	{
680 	  /* Else we've got some float mode source being extracted into
681 	     a different float mode destination -- this combination of
682 	     subregs results in Severe Tire Damage.  */
683 	  gcc_assert (GET_MODE (SUBREG_REG (arg0)) == fieldmode
684 		      || GET_MODE_CLASS (fieldmode) == MODE_INT
685 		      || GET_MODE_CLASS (fieldmode) == MODE_PARTIAL_INT);
686 	  arg0 = SUBREG_REG (arg0);
687 	}
688 
689       subreg_off = bitnum / BITS_PER_UNIT;
690       if (validate_subreg (fieldmode, GET_MODE (arg0), arg0, subreg_off))
691 	{
692 	  arg0 = gen_rtx_SUBREG (fieldmode, arg0, subreg_off);
693 
694 	  create_fixed_operand (&ops[0], arg0);
695 	  /* Shrink the source operand to FIELDMODE.  */
696 	  create_convert_operand_to (&ops[1], value, fieldmode, false);
697 	  if (maybe_expand_insn (icode, 2, ops))
698 	    return true;
699 	}
700     }
701 
702   /* Handle fields bigger than a word.  */
703 
704   if (bitsize > BITS_PER_WORD)
705     {
706       /* Here we transfer the words of the field
707 	 in the order least significant first.
708 	 This is because the most significant word is the one which may
709 	 be less than full.
710 	 However, only do that if the value is not BLKmode.  */
711 
712       unsigned int backwards = WORDS_BIG_ENDIAN && fieldmode != BLKmode;
713       unsigned int nwords = (bitsize + (BITS_PER_WORD - 1)) / BITS_PER_WORD;
714       unsigned int i;
715       rtx last;
716 
717       /* This is the mode we must force value to, so that there will be enough
718 	 subwords to extract.  Note that fieldmode will often (always?) be
719 	 VOIDmode, because that is what store_field uses to indicate that this
720 	 is a bit field, but passing VOIDmode to operand_subword_force
721 	 is not allowed.  */
722       fieldmode = GET_MODE (value);
723       if (fieldmode == VOIDmode)
724 	fieldmode = smallest_mode_for_size (nwords * BITS_PER_WORD, MODE_INT);
725 
726       last = get_last_insn ();
727       for (i = 0; i < nwords; i++)
728 	{
729 	  /* If I is 0, use the low-order word in both field and target;
730 	     if I is 1, use the next to lowest word; and so on.  */
731 	  unsigned int wordnum = (backwards
732 				  ? GET_MODE_SIZE (fieldmode) / UNITS_PER_WORD
733 				  - i - 1
734 				  : i);
735 	  unsigned int bit_offset = (backwards
736 				     ? MAX ((int) bitsize - ((int) i + 1)
737 					    * BITS_PER_WORD,
738 					    0)
739 				     : (int) i * BITS_PER_WORD);
740 	  rtx value_word = operand_subword_force (value, wordnum, fieldmode);
741 	  unsigned HOST_WIDE_INT new_bitsize =
742 	    MIN (BITS_PER_WORD, bitsize - i * BITS_PER_WORD);
743 
744 	  /* If the remaining chunk doesn't have full wordsize we have
745 	     to make sure that for big endian machines the higher order
746 	     bits are used.  */
747 	  if (new_bitsize < BITS_PER_WORD && BYTES_BIG_ENDIAN && !backwards)
748 	    value_word = simplify_expand_binop (word_mode, lshr_optab,
749 						value_word,
750 						GEN_INT (BITS_PER_WORD
751 							 - new_bitsize),
752 						NULL_RTX, true,
753 						OPTAB_LIB_WIDEN);
754 
755 	  if (!store_bit_field_1 (op0, new_bitsize,
756 				  bitnum + bit_offset,
757 				  bitregion_start, bitregion_end,
758 				  word_mode,
759 				  value_word, fallback_p))
760 	    {
761 	      delete_insns_since (last);
762 	      return false;
763 	    }
764 	}
765       return true;
766     }
767 
768   /* If VALUE has a floating-point or complex mode, access it as an
769      integer of the corresponding size.  This can occur on a machine
770      with 64 bit registers that uses SFmode for float.  It can also
771      occur for unaligned float or complex fields.  */
772   orig_value = value;
773   if (GET_MODE (value) != VOIDmode
774       && GET_MODE_CLASS (GET_MODE (value)) != MODE_INT
775       && GET_MODE_CLASS (GET_MODE (value)) != MODE_PARTIAL_INT)
776     {
777       value = gen_reg_rtx (int_mode_for_mode (GET_MODE (value)));
778       emit_move_insn (gen_lowpart (GET_MODE (orig_value), value), orig_value);
779     }
780 
781   /* If OP0 is a multi-word register, narrow it to the affected word.
782      If the region spans two words, defer to store_split_bit_field.  */
783   if (!MEM_P (op0) && GET_MODE_SIZE (GET_MODE (op0)) > UNITS_PER_WORD)
784     {
785       op0 = simplify_gen_subreg (word_mode, op0, GET_MODE (op0),
786 				 bitnum / BITS_PER_WORD * UNITS_PER_WORD);
787       gcc_assert (op0);
788       bitnum %= BITS_PER_WORD;
789       if (bitnum + bitsize > BITS_PER_WORD)
790 	{
791 	  if (!fallback_p)
792 	    return false;
793 
794 	  store_split_bit_field (op0, bitsize, bitnum, bitregion_start,
795 				 bitregion_end, value);
796 	  return true;
797 	}
798     }
799 
800   /* From here on we can assume that the field to be stored in fits
801      within a word.  If the destination is a register, it too fits
802      in a word.  */
803 
804   extraction_insn insv;
805   if (!MEM_P (op0)
806       && get_best_reg_extraction_insn (&insv, EP_insv,
807 				       GET_MODE_BITSIZE (GET_MODE (op0)),
808 				       fieldmode)
809       && store_bit_field_using_insv (&insv, op0, bitsize, bitnum, value))
810     return true;
811 
812   /* If OP0 is a memory, try copying it to a register and seeing if a
813      cheap register alternative is available.  */
814   if (MEM_P (op0))
815     {
816       /* Do not use unaligned memory insvs for volatile bitfields when
817 	 -fstrict-volatile-bitfields is in effect.  */
818       if (!(MEM_VOLATILE_P (op0)
819 	    && flag_strict_volatile_bitfields > 0)
820 	  && get_best_mem_extraction_insn (&insv, EP_insv, bitsize, bitnum,
821 					   fieldmode)
822 	  && store_bit_field_using_insv (&insv, op0, bitsize, bitnum, value))
823 	return true;
824 
825       rtx last = get_last_insn ();
826 
827       /* Try loading part of OP0 into a register, inserting the bitfield
828 	 into that, and then copying the result back to OP0.  */
829       unsigned HOST_WIDE_INT bitpos;
830       rtx xop0 = adjust_bit_field_mem_for_reg (EP_insv, op0, bitsize, bitnum,
831 					       bitregion_start, bitregion_end,
832 					       fieldmode, &bitpos);
833       if (xop0)
834 	{
835 	  rtx tempreg = copy_to_reg (xop0);
836 	  if (store_bit_field_1 (tempreg, bitsize, bitpos,
837 				 bitregion_start, bitregion_end,
838 				 fieldmode, orig_value, false))
839 	    {
840 	      emit_move_insn (xop0, tempreg);
841 	      return true;
842 	    }
843 	  delete_insns_since (last);
844 	}
845     }
846 
847   if (!fallback_p)
848     return false;
849 
850   store_fixed_bit_field (op0, bitsize, bitnum, bitregion_start,
851 			 bitregion_end, value);
852   return true;
853 }
854 
855 /* Generate code to store value from rtx VALUE
856    into a bit-field within structure STR_RTX
857    containing BITSIZE bits starting at bit BITNUM.
858 
859    BITREGION_START is bitpos of the first bitfield in this region.
860    BITREGION_END is the bitpos of the ending bitfield in this region.
861    These two fields are 0, if the C++ memory model does not apply,
862    or we are not interested in keeping track of bitfield regions.
863 
864    FIELDMODE is the machine-mode of the FIELD_DECL node for this field.  */
865 
866 void
867 store_bit_field (rtx str_rtx, unsigned HOST_WIDE_INT bitsize,
868 		 unsigned HOST_WIDE_INT bitnum,
869 		 unsigned HOST_WIDE_INT bitregion_start,
870 		 unsigned HOST_WIDE_INT bitregion_end,
871 		 enum machine_mode fieldmode,
872 		 rtx value)
873 {
874   /* Under the C++0x memory model, we must not touch bits outside the
875      bit region.  Adjust the address to start at the beginning of the
876      bit region.  */
877   if (MEM_P (str_rtx) && bitregion_start > 0)
878     {
879       enum machine_mode bestmode;
880       HOST_WIDE_INT offset, size;
881 
882       gcc_assert ((bitregion_start % BITS_PER_UNIT) == 0);
883 
884       offset = bitregion_start / BITS_PER_UNIT;
885       bitnum -= bitregion_start;
886       size = (bitnum + bitsize + BITS_PER_UNIT - 1) / BITS_PER_UNIT;
887       bitregion_end -= bitregion_start;
888       bitregion_start = 0;
889       bestmode = get_best_mode (bitsize, bitnum,
890 				bitregion_start, bitregion_end,
891 				MEM_ALIGN (str_rtx), VOIDmode,
892 				MEM_VOLATILE_P (str_rtx));
893       str_rtx = adjust_bitfield_address_size (str_rtx, bestmode, offset, size);
894     }
895 
896   if (!store_bit_field_1 (str_rtx, bitsize, bitnum,
897 			  bitregion_start, bitregion_end,
898 			  fieldmode, value, true))
899     gcc_unreachable ();
900 }
901 
902 /* Use shifts and boolean operations to store VALUE into a bit field of
903    width BITSIZE in OP0, starting at bit BITNUM.  */
904 
905 static void
906 store_fixed_bit_field (rtx op0, unsigned HOST_WIDE_INT bitsize,
907 		       unsigned HOST_WIDE_INT bitnum,
908 		       unsigned HOST_WIDE_INT bitregion_start,
909 		       unsigned HOST_WIDE_INT bitregion_end,
910 		       rtx value)
911 {
912   enum machine_mode mode;
913   rtx temp;
914   int all_zero = 0;
915   int all_one = 0;
916 
917   /* There is a case not handled here:
918      a structure with a known alignment of just a halfword
919      and a field split across two aligned halfwords within the structure.
920      Or likewise a structure with a known alignment of just a byte
921      and a field split across two bytes.
922      Such cases are not supposed to be able to occur.  */
923 
924   if (MEM_P (op0))
925     {
926       unsigned HOST_WIDE_INT maxbits = MAX_FIXED_MODE_SIZE;
927 
928       if (bitregion_end)
929 	maxbits = bitregion_end - bitregion_start + 1;
930 
931       /* Get the proper mode to use for this field.  We want a mode that
932 	 includes the entire field.  If such a mode would be larger than
933 	 a word, we won't be doing the extraction the normal way.
934 	 We don't want a mode bigger than the destination.  */
935 
936       mode = GET_MODE (op0);
937       if (GET_MODE_BITSIZE (mode) == 0
938 	  || GET_MODE_BITSIZE (mode) > GET_MODE_BITSIZE (word_mode))
939 	mode = word_mode;
940 
941       if (MEM_VOLATILE_P (op0)
942           && GET_MODE_BITSIZE (GET_MODE (op0)) > 0
943 	  && GET_MODE_BITSIZE (GET_MODE (op0)) <= maxbits
944 	  && flag_strict_volatile_bitfields > 0)
945 	mode = GET_MODE (op0);
946       else
947 	mode = get_best_mode (bitsize, bitnum, bitregion_start, bitregion_end,
948 			      MEM_ALIGN (op0), mode, MEM_VOLATILE_P (op0));
949 
950       if (mode == VOIDmode)
951 	{
952 	  /* The only way this should occur is if the field spans word
953 	     boundaries.  */
954 	  store_split_bit_field (op0, bitsize, bitnum, bitregion_start,
955 				 bitregion_end, value);
956 	  return;
957 	}
958 
959       op0 = narrow_bit_field_mem (op0, mode, bitsize, bitnum, &bitnum);
960     }
961 
962   mode = GET_MODE (op0);
963   gcc_assert (SCALAR_INT_MODE_P (mode));
964 
965   /* Note that bitsize + bitnum can be greater than GET_MODE_BITSIZE (mode)
966      for invalid input, such as f5 from gcc.dg/pr48335-2.c.  */
967 
968   if (BYTES_BIG_ENDIAN)
969     /* BITNUM is the distance between our msb
970        and that of the containing datum.
971        Convert it to the distance from the lsb.  */
972     bitnum = GET_MODE_BITSIZE (mode) - bitsize - bitnum;
973 
974   /* Now BITNUM is always the distance between our lsb
975      and that of OP0.  */
976 
977   /* Shift VALUE left by BITNUM bits.  If VALUE is not constant,
978      we must first convert its mode to MODE.  */
979 
980   if (CONST_INT_P (value))
981     {
982       HOST_WIDE_INT v = INTVAL (value);
983 
984       if (bitsize < HOST_BITS_PER_WIDE_INT)
985 	v &= ((HOST_WIDE_INT) 1 << bitsize) - 1;
986 
987       if (v == 0)
988 	all_zero = 1;
989       else if ((bitsize < HOST_BITS_PER_WIDE_INT
990 		&& v == ((HOST_WIDE_INT) 1 << bitsize) - 1)
991 	       || (bitsize == HOST_BITS_PER_WIDE_INT && v == -1))
992 	all_one = 1;
993 
994       value = lshift_value (mode, value, bitnum, bitsize);
995     }
996   else
997     {
998       int must_and = (GET_MODE_BITSIZE (GET_MODE (value)) != bitsize
999 		      && bitnum + bitsize != GET_MODE_BITSIZE (mode));
1000 
1001       if (GET_MODE (value) != mode)
1002 	value = convert_to_mode (mode, value, 1);
1003 
1004       if (must_and)
1005 	value = expand_binop (mode, and_optab, value,
1006 			      mask_rtx (mode, 0, bitsize, 0),
1007 			      NULL_RTX, 1, OPTAB_LIB_WIDEN);
1008       if (bitnum > 0)
1009 	value = expand_shift (LSHIFT_EXPR, mode, value,
1010 			      bitnum, NULL_RTX, 1);
1011     }
1012 
1013   /* Now clear the chosen bits in OP0,
1014      except that if VALUE is -1 we need not bother.  */
1015   /* We keep the intermediates in registers to allow CSE to combine
1016      consecutive bitfield assignments.  */
1017 
1018   temp = force_reg (mode, op0);
1019 
1020   if (! all_one)
1021     {
1022       temp = expand_binop (mode, and_optab, temp,
1023 			   mask_rtx (mode, bitnum, bitsize, 1),
1024 			   NULL_RTX, 1, OPTAB_LIB_WIDEN);
1025       temp = force_reg (mode, temp);
1026     }
1027 
1028   /* Now logical-or VALUE into OP0, unless it is zero.  */
1029 
1030   if (! all_zero)
1031     {
1032       temp = expand_binop (mode, ior_optab, temp, value,
1033 			   NULL_RTX, 1, OPTAB_LIB_WIDEN);
1034       temp = force_reg (mode, temp);
1035     }
1036 
1037   if (op0 != temp)
1038     {
1039       op0 = copy_rtx (op0);
1040       emit_move_insn (op0, temp);
1041     }
1042 }
1043 
1044 /* Store a bit field that is split across multiple accessible memory objects.
1045 
1046    OP0 is the REG, SUBREG or MEM rtx for the first of the objects.
1047    BITSIZE is the field width; BITPOS the position of its first bit
1048    (within the word).
1049    VALUE is the value to store.
1050 
1051    This does not yet handle fields wider than BITS_PER_WORD.  */
1052 
1053 static void
1054 store_split_bit_field (rtx op0, unsigned HOST_WIDE_INT bitsize,
1055 		       unsigned HOST_WIDE_INT bitpos,
1056 		       unsigned HOST_WIDE_INT bitregion_start,
1057 		       unsigned HOST_WIDE_INT bitregion_end,
1058 		       rtx value)
1059 {
1060   unsigned int unit;
1061   unsigned int bitsdone = 0;
1062 
1063   /* Make sure UNIT isn't larger than BITS_PER_WORD, we can only handle that
1064      much at a time.  */
1065   if (REG_P (op0) || GET_CODE (op0) == SUBREG)
1066     unit = BITS_PER_WORD;
1067   else
1068     unit = MIN (MEM_ALIGN (op0), BITS_PER_WORD);
1069 
1070   /* If VALUE is a constant other than a CONST_INT, get it into a register in
1071      WORD_MODE.  If we can do this using gen_lowpart_common, do so.  Note
1072      that VALUE might be a floating-point constant.  */
1073   if (CONSTANT_P (value) && !CONST_INT_P (value))
1074     {
1075       rtx word = gen_lowpart_common (word_mode, value);
1076 
1077       if (word && (value != word))
1078 	value = word;
1079       else
1080 	value = gen_lowpart_common (word_mode,
1081 				    force_reg (GET_MODE (value) != VOIDmode
1082 					       ? GET_MODE (value)
1083 					       : word_mode, value));
1084     }
1085 
1086   while (bitsdone < bitsize)
1087     {
1088       unsigned HOST_WIDE_INT thissize;
1089       rtx part, word;
1090       unsigned HOST_WIDE_INT thispos;
1091       unsigned HOST_WIDE_INT offset;
1092 
1093       offset = (bitpos + bitsdone) / unit;
1094       thispos = (bitpos + bitsdone) % unit;
1095 
1096       /* When region of bytes we can touch is restricted, decrease
1097 	 UNIT close to the end of the region as needed.  If op0 is a REG
1098 	 or SUBREG of REG, don't do this, as there can't be data races
1099 	 on a register and we can expand shorter code in some cases.  */
1100       if (bitregion_end
1101 	  && unit > BITS_PER_UNIT
1102 	  && bitpos + bitsdone - thispos + unit > bitregion_end + 1
1103 	  && !REG_P (op0)
1104 	  && (GET_CODE (op0) != SUBREG || !REG_P (SUBREG_REG (op0))))
1105 	{
1106 	  unit = unit / 2;
1107 	  continue;
1108 	}
1109 
1110       /* THISSIZE must not overrun a word boundary.  Otherwise,
1111 	 store_fixed_bit_field will call us again, and we will mutually
1112 	 recurse forever.  */
1113       thissize = MIN (bitsize - bitsdone, BITS_PER_WORD);
1114       thissize = MIN (thissize, unit - thispos);
1115 
1116       if (BYTES_BIG_ENDIAN)
1117 	{
1118 	  /* Fetch successively less significant portions.  */
1119 	  if (CONST_INT_P (value))
1120 	    part = GEN_INT (((unsigned HOST_WIDE_INT) (INTVAL (value))
1121 			     >> (bitsize - bitsdone - thissize))
1122 			    & (((HOST_WIDE_INT) 1 << thissize) - 1));
1123 	  else
1124 	    {
1125 	      int total_bits = GET_MODE_BITSIZE (GET_MODE (value));
1126 	      /* The args are chosen so that the last part includes the
1127 		 lsb.  Give extract_bit_field the value it needs (with
1128 		 endianness compensation) to fetch the piece we want.  */
1129 	      part = extract_fixed_bit_field (word_mode, value, thissize,
1130 					      total_bits - bitsize + bitsdone,
1131 					      NULL_RTX, 1, false);
1132 	    }
1133 	}
1134       else
1135 	{
1136 	  /* Fetch successively more significant portions.  */
1137 	  if (CONST_INT_P (value))
1138 	    part = GEN_INT (((unsigned HOST_WIDE_INT) (INTVAL (value))
1139 			     >> bitsdone)
1140 			    & (((HOST_WIDE_INT) 1 << thissize) - 1));
1141 	  else
1142 	    part = extract_fixed_bit_field (word_mode, value, thissize,
1143 					    bitsdone, NULL_RTX, 1, false);
1144 	}
1145 
1146       /* If OP0 is a register, then handle OFFSET here.
1147 
1148 	 When handling multiword bitfields, extract_bit_field may pass
1149 	 down a word_mode SUBREG of a larger REG for a bitfield that actually
1150 	 crosses a word boundary.  Thus, for a SUBREG, we must find
1151 	 the current word starting from the base register.  */
1152       if (GET_CODE (op0) == SUBREG)
1153 	{
1154 	  int word_offset = (SUBREG_BYTE (op0) / UNITS_PER_WORD)
1155 			    + (offset * unit / BITS_PER_WORD);
1156 	  enum machine_mode sub_mode = GET_MODE (SUBREG_REG (op0));
1157 	  if (sub_mode != BLKmode && GET_MODE_SIZE (sub_mode) < UNITS_PER_WORD)
1158 	    word = word_offset ? const0_rtx : op0;
1159 	  else
1160 	    word = operand_subword_force (SUBREG_REG (op0), word_offset,
1161 					  GET_MODE (SUBREG_REG (op0)));
1162 	  offset &= BITS_PER_WORD / unit - 1;
1163 	}
1164       else if (REG_P (op0))
1165 	{
1166 	  enum machine_mode op0_mode = GET_MODE (op0);
1167 	  if (op0_mode != BLKmode && GET_MODE_SIZE (op0_mode) < UNITS_PER_WORD)
1168 	    word = offset ? const0_rtx : op0;
1169 	  else
1170 	    word = operand_subword_force (op0, offset * unit / BITS_PER_WORD,
1171 					  GET_MODE (op0));
1172 	  offset &= BITS_PER_WORD / unit - 1;
1173 	}
1174       else
1175 	word = op0;
1176 
1177       /* OFFSET is in UNITs, and UNIT is in bits.  If WORD is const0_rtx,
1178 	 it is just an out-of-bounds access.  Ignore it.  */
1179       if (word != const0_rtx)
1180 	store_fixed_bit_field (word, thissize, offset * unit + thispos,
1181 			       bitregion_start, bitregion_end, part);
1182       bitsdone += thissize;
1183     }
1184 }
1185 
1186 /* A subroutine of extract_bit_field_1 that converts return value X
1187    to either MODE or TMODE.  MODE, TMODE and UNSIGNEDP are arguments
1188    to extract_bit_field.  */
1189 
1190 static rtx
1191 convert_extracted_bit_field (rtx x, enum machine_mode mode,
1192 			     enum machine_mode tmode, bool unsignedp)
1193 {
1194   if (GET_MODE (x) == tmode || GET_MODE (x) == mode)
1195     return x;
1196 
1197   /* If the x mode is not a scalar integral, first convert to the
1198      integer mode of that size and then access it as a floating-point
1199      value via a SUBREG.  */
1200   if (!SCALAR_INT_MODE_P (tmode))
1201     {
1202       enum machine_mode smode;
1203 
1204       smode = mode_for_size (GET_MODE_BITSIZE (tmode), MODE_INT, 0);
1205       x = convert_to_mode (smode, x, unsignedp);
1206       x = force_reg (smode, x);
1207       return gen_lowpart (tmode, x);
1208     }
1209 
1210   return convert_to_mode (tmode, x, unsignedp);
1211 }
1212 
1213 /* Try to use an ext(z)v pattern to extract a field from OP0.
1214    Return the extracted value on success, otherwise return null.
1215    EXT_MODE is the mode of the extraction and the other arguments
1216    are as for extract_bit_field.  */
1217 
1218 static rtx
1219 extract_bit_field_using_extv (const extraction_insn *extv, rtx op0,
1220 			      unsigned HOST_WIDE_INT bitsize,
1221 			      unsigned HOST_WIDE_INT bitnum,
1222 			      int unsignedp, rtx target,
1223 			      enum machine_mode mode, enum machine_mode tmode)
1224 {
1225   struct expand_operand ops[4];
1226   rtx spec_target = target;
1227   rtx spec_target_subreg = 0;
1228   enum machine_mode ext_mode = extv->field_mode;
1229   unsigned unit = GET_MODE_BITSIZE (ext_mode);
1230 
1231   if (bitsize == 0 || unit < bitsize)
1232     return NULL_RTX;
1233 
1234   if (MEM_P (op0))
1235     /* Get a reference to the first byte of the field.  */
1236     op0 = narrow_bit_field_mem (op0, extv->struct_mode, bitsize, bitnum,
1237 				&bitnum);
1238   else
1239     {
1240       /* Convert from counting within OP0 to counting in EXT_MODE.  */
1241       if (BYTES_BIG_ENDIAN)
1242 	bitnum += unit - GET_MODE_BITSIZE (GET_MODE (op0));
1243 
1244       /* If op0 is a register, we need it in EXT_MODE to make it
1245 	 acceptable to the format of ext(z)v.  */
1246       if (GET_CODE (op0) == SUBREG && GET_MODE (op0) != ext_mode)
1247 	return NULL_RTX;
1248       if (REG_P (op0) && GET_MODE (op0) != ext_mode)
1249 	op0 = gen_lowpart_SUBREG (ext_mode, op0);
1250     }
1251 
1252   /* If BITS_BIG_ENDIAN is zero on a BYTES_BIG_ENDIAN machine, we count
1253      "backwards" from the size of the unit we are extracting from.
1254      Otherwise, we count bits from the most significant on a
1255      BYTES/BITS_BIG_ENDIAN machine.  */
1256 
1257   if (BITS_BIG_ENDIAN != BYTES_BIG_ENDIAN)
1258     bitnum = unit - bitsize - bitnum;
1259 
1260   if (target == 0)
1261     target = spec_target = gen_reg_rtx (tmode);
1262 
1263   if (GET_MODE (target) != ext_mode)
1264     {
1265       /* Don't use LHS paradoxical subreg if explicit truncation is needed
1266 	 between the mode of the extraction (word_mode) and the target
1267 	 mode.  Instead, create a temporary and use convert_move to set
1268 	 the target.  */
1269       if (REG_P (target)
1270 	  && TRULY_NOOP_TRUNCATION_MODES_P (GET_MODE (target), ext_mode))
1271 	{
1272 	  target = gen_lowpart (ext_mode, target);
1273 	  if (GET_MODE_PRECISION (ext_mode)
1274 	      > GET_MODE_PRECISION (GET_MODE (spec_target)))
1275 	    spec_target_subreg = target;
1276 	}
1277       else
1278 	target = gen_reg_rtx (ext_mode);
1279     }
1280 
1281   create_output_operand (&ops[0], target, ext_mode);
1282   create_fixed_operand (&ops[1], op0);
1283   create_integer_operand (&ops[2], bitsize);
1284   create_integer_operand (&ops[3], bitnum);
1285   if (maybe_expand_insn (extv->icode, 4, ops))
1286     {
1287       target = ops[0].value;
1288       if (target == spec_target)
1289 	return target;
1290       if (target == spec_target_subreg)
1291 	return spec_target;
1292       return convert_extracted_bit_field (target, mode, tmode, unsignedp);
1293     }
1294   return NULL_RTX;
1295 }
1296 
1297 /* A subroutine of extract_bit_field, with the same arguments.
1298    If FALLBACK_P is true, fall back to extract_fixed_bit_field
1299    if we can find no other means of implementing the operation.
1300    if FALLBACK_P is false, return NULL instead.  */
1301 
1302 static rtx
1303 extract_bit_field_1 (rtx str_rtx, unsigned HOST_WIDE_INT bitsize,
1304 		     unsigned HOST_WIDE_INT bitnum,
1305 		     int unsignedp, bool packedp, rtx target,
1306 		     enum machine_mode mode, enum machine_mode tmode,
1307 		     bool fallback_p)
1308 {
1309   rtx op0 = str_rtx;
1310   enum machine_mode int_mode;
1311   enum machine_mode mode1;
1312 
1313   if (tmode == VOIDmode)
1314     tmode = mode;
1315 
1316   while (GET_CODE (op0) == SUBREG)
1317     {
1318       bitnum += SUBREG_BYTE (op0) * BITS_PER_UNIT;
1319       op0 = SUBREG_REG (op0);
1320     }
1321 
1322   /* If we have an out-of-bounds access to a register, just return an
1323      uninitialized register of the required mode.  This can occur if the
1324      source code contains an out-of-bounds access to a small array.  */
1325   if (REG_P (op0) && bitnum >= GET_MODE_BITSIZE (GET_MODE (op0)))
1326     return gen_reg_rtx (tmode);
1327 
1328   if (REG_P (op0)
1329       && mode == GET_MODE (op0)
1330       && bitnum == 0
1331       && bitsize == GET_MODE_BITSIZE (GET_MODE (op0)))
1332     {
1333       /* We're trying to extract a full register from itself.  */
1334       return op0;
1335     }
1336 
1337   /* See if we can get a better vector mode before extracting.  */
1338   if (VECTOR_MODE_P (GET_MODE (op0))
1339       && !MEM_P (op0)
1340       && GET_MODE_INNER (GET_MODE (op0)) != tmode)
1341     {
1342       enum machine_mode new_mode;
1343 
1344       if (GET_MODE_CLASS (tmode) == MODE_FLOAT)
1345 	new_mode = MIN_MODE_VECTOR_FLOAT;
1346       else if (GET_MODE_CLASS (tmode) == MODE_FRACT)
1347 	new_mode = MIN_MODE_VECTOR_FRACT;
1348       else if (GET_MODE_CLASS (tmode) == MODE_UFRACT)
1349 	new_mode = MIN_MODE_VECTOR_UFRACT;
1350       else if (GET_MODE_CLASS (tmode) == MODE_ACCUM)
1351 	new_mode = MIN_MODE_VECTOR_ACCUM;
1352       else if (GET_MODE_CLASS (tmode) == MODE_UACCUM)
1353 	new_mode = MIN_MODE_VECTOR_UACCUM;
1354       else
1355 	new_mode = MIN_MODE_VECTOR_INT;
1356 
1357       for (; new_mode != VOIDmode ; new_mode = GET_MODE_WIDER_MODE (new_mode))
1358 	if (GET_MODE_SIZE (new_mode) == GET_MODE_SIZE (GET_MODE (op0))
1359 	    && targetm.vector_mode_supported_p (new_mode))
1360 	  break;
1361       if (new_mode != VOIDmode)
1362 	op0 = gen_lowpart (new_mode, op0);
1363     }
1364 
1365   /* Use vec_extract patterns for extracting parts of vectors whenever
1366      available.  */
1367   if (VECTOR_MODE_P (GET_MODE (op0))
1368       && !MEM_P (op0)
1369       && optab_handler (vec_extract_optab, GET_MODE (op0)) != CODE_FOR_nothing
1370       && ((bitnum + bitsize - 1) / GET_MODE_BITSIZE (GET_MODE_INNER (GET_MODE (op0)))
1371 	  == bitnum / GET_MODE_BITSIZE (GET_MODE_INNER (GET_MODE (op0)))))
1372     {
1373       struct expand_operand ops[3];
1374       enum machine_mode outermode = GET_MODE (op0);
1375       enum machine_mode innermode = GET_MODE_INNER (outermode);
1376       enum insn_code icode = optab_handler (vec_extract_optab, outermode);
1377       unsigned HOST_WIDE_INT pos = bitnum / GET_MODE_BITSIZE (innermode);
1378 
1379       create_output_operand (&ops[0], target, innermode);
1380       create_input_operand (&ops[1], op0, outermode);
1381       create_integer_operand (&ops[2], pos);
1382       if (maybe_expand_insn (icode, 3, ops))
1383 	{
1384 	  target = ops[0].value;
1385       	  if (GET_MODE (target) != mode)
1386 	    return gen_lowpart (tmode, target);
1387 	  return target;
1388 	}
1389     }
1390 
1391   /* Make sure we are playing with integral modes.  Pun with subregs
1392      if we aren't.  */
1393   {
1394     enum machine_mode imode = int_mode_for_mode (GET_MODE (op0));
1395     if (imode != GET_MODE (op0))
1396       {
1397 	if (MEM_P (op0))
1398 	  op0 = adjust_bitfield_address_size (op0, imode, 0, MEM_SIZE (op0));
1399 	else if (imode != BLKmode)
1400 	  {
1401 	    op0 = gen_lowpart (imode, op0);
1402 
1403 	    /* If we got a SUBREG, force it into a register since we
1404 	       aren't going to be able to do another SUBREG on it.  */
1405 	    if (GET_CODE (op0) == SUBREG)
1406 	      op0 = force_reg (imode, op0);
1407 	  }
1408 	else if (REG_P (op0))
1409 	  {
1410 	    rtx reg, subreg;
1411 	    imode = smallest_mode_for_size (GET_MODE_BITSIZE (GET_MODE (op0)),
1412 					    MODE_INT);
1413 	    reg = gen_reg_rtx (imode);
1414 	    subreg = gen_lowpart_SUBREG (GET_MODE (op0), reg);
1415 	    emit_move_insn (subreg, op0);
1416 	    op0 = reg;
1417 	    bitnum += SUBREG_BYTE (subreg) * BITS_PER_UNIT;
1418 	  }
1419 	else
1420 	  {
1421 	    HOST_WIDE_INT size = GET_MODE_SIZE (GET_MODE (op0));
1422 	    rtx mem = assign_stack_temp (GET_MODE (op0), size);
1423 	    emit_move_insn (mem, op0);
1424 	    op0 = adjust_bitfield_address_size (mem, BLKmode, 0, size);
1425 	  }
1426       }
1427   }
1428 
1429   /* ??? We currently assume TARGET is at least as big as BITSIZE.
1430      If that's wrong, the solution is to test for it and set TARGET to 0
1431      if needed.  */
1432 
1433   /* If the bitfield is volatile, we need to make sure the access
1434      remains on a type-aligned boundary.  */
1435   if (GET_CODE (op0) == MEM
1436       && MEM_VOLATILE_P (op0)
1437       && GET_MODE_BITSIZE (GET_MODE (op0)) > 0
1438       && flag_strict_volatile_bitfields > 0)
1439     goto no_subreg_mode_swap;
1440 
1441   /* Only scalar integer modes can be converted via subregs.  There is an
1442      additional problem for FP modes here in that they can have a precision
1443      which is different from the size.  mode_for_size uses precision, but
1444      we want a mode based on the size, so we must avoid calling it for FP
1445      modes.  */
1446   mode1 = mode;
1447   if (SCALAR_INT_MODE_P (tmode))
1448     {
1449       enum machine_mode try_mode = mode_for_size (bitsize,
1450 						  GET_MODE_CLASS (tmode), 0);
1451       if (try_mode != BLKmode)
1452 	mode1 = try_mode;
1453     }
1454   gcc_assert (mode1 != BLKmode);
1455 
1456   /* Extraction of a full MODE1 value can be done with a subreg as long
1457      as the least significant bit of the value is the least significant
1458      bit of either OP0 or a word of OP0.  */
1459   if (!MEM_P (op0)
1460       && lowpart_bit_field_p (bitnum, bitsize, GET_MODE (op0))
1461       && bitsize == GET_MODE_BITSIZE (mode1)
1462       && TRULY_NOOP_TRUNCATION_MODES_P (mode1, GET_MODE (op0)))
1463     {
1464       rtx sub = simplify_gen_subreg (mode1, op0, GET_MODE (op0),
1465 				     bitnum / BITS_PER_UNIT);
1466       if (sub)
1467 	return convert_extracted_bit_field (sub, mode, tmode, unsignedp);
1468     }
1469 
1470   /* Extraction of a full MODE1 value can be done with a load as long as
1471      the field is on a byte boundary and is sufficiently aligned.  */
1472   if (simple_mem_bitfield_p (op0, bitsize, bitnum, mode1))
1473     {
1474       op0 = adjust_bitfield_address (op0, mode1, bitnum / BITS_PER_UNIT);
1475       return convert_extracted_bit_field (op0, mode, tmode, unsignedp);
1476     }
1477 
1478  no_subreg_mode_swap:
1479 
1480   /* Handle fields bigger than a word.  */
1481 
1482   if (bitsize > BITS_PER_WORD)
1483     {
1484       /* Here we transfer the words of the field
1485 	 in the order least significant first.
1486 	 This is because the most significant word is the one which may
1487 	 be less than full.  */
1488 
1489       unsigned int nwords = (bitsize + (BITS_PER_WORD - 1)) / BITS_PER_WORD;
1490       unsigned int i;
1491       rtx last;
1492 
1493       if (target == 0 || !REG_P (target) || !valid_multiword_target_p (target))
1494 	target = gen_reg_rtx (mode);
1495 
1496       /* Indicate for flow that the entire target reg is being set.  */
1497       emit_clobber (target);
1498 
1499       last = get_last_insn ();
1500       for (i = 0; i < nwords; i++)
1501 	{
1502 	  /* If I is 0, use the low-order word in both field and target;
1503 	     if I is 1, use the next to lowest word; and so on.  */
1504 	  /* Word number in TARGET to use.  */
1505 	  unsigned int wordnum
1506 	    = (WORDS_BIG_ENDIAN
1507 	       ? GET_MODE_SIZE (GET_MODE (target)) / UNITS_PER_WORD - i - 1
1508 	       : i);
1509 	  /* Offset from start of field in OP0.  */
1510 	  unsigned int bit_offset = (WORDS_BIG_ENDIAN
1511 				     ? MAX (0, ((int) bitsize - ((int) i + 1)
1512 						* (int) BITS_PER_WORD))
1513 				     : (int) i * BITS_PER_WORD);
1514 	  rtx target_part = operand_subword (target, wordnum, 1, VOIDmode);
1515 	  rtx result_part
1516 	    = extract_bit_field_1 (op0, MIN (BITS_PER_WORD,
1517 					     bitsize - i * BITS_PER_WORD),
1518 				   bitnum + bit_offset, 1, false, target_part,
1519 				   mode, word_mode, fallback_p);
1520 
1521 	  gcc_assert (target_part);
1522 	  if (!result_part)
1523 	    {
1524 	      delete_insns_since (last);
1525 	      return NULL;
1526 	    }
1527 
1528 	  if (result_part != target_part)
1529 	    emit_move_insn (target_part, result_part);
1530 	}
1531 
1532       if (unsignedp)
1533 	{
1534 	  /* Unless we've filled TARGET, the upper regs in a multi-reg value
1535 	     need to be zero'd out.  */
1536 	  if (GET_MODE_SIZE (GET_MODE (target)) > nwords * UNITS_PER_WORD)
1537 	    {
1538 	      unsigned int i, total_words;
1539 
1540 	      total_words = GET_MODE_SIZE (GET_MODE (target)) / UNITS_PER_WORD;
1541 	      for (i = nwords; i < total_words; i++)
1542 		emit_move_insn
1543 		  (operand_subword (target,
1544 				    WORDS_BIG_ENDIAN ? total_words - i - 1 : i,
1545 				    1, VOIDmode),
1546 		   const0_rtx);
1547 	    }
1548 	  return target;
1549 	}
1550 
1551       /* Signed bit field: sign-extend with two arithmetic shifts.  */
1552       target = expand_shift (LSHIFT_EXPR, mode, target,
1553 			     GET_MODE_BITSIZE (mode) - bitsize, NULL_RTX, 0);
1554       return expand_shift (RSHIFT_EXPR, mode, target,
1555 			   GET_MODE_BITSIZE (mode) - bitsize, NULL_RTX, 0);
1556     }
1557 
1558   /* If OP0 is a multi-word register, narrow it to the affected word.
1559      If the region spans two words, defer to extract_split_bit_field.  */
1560   if (!MEM_P (op0) && GET_MODE_SIZE (GET_MODE (op0)) > UNITS_PER_WORD)
1561     {
1562       op0 = simplify_gen_subreg (word_mode, op0, GET_MODE (op0),
1563 				 bitnum / BITS_PER_WORD * UNITS_PER_WORD);
1564       bitnum %= BITS_PER_WORD;
1565       if (bitnum + bitsize > BITS_PER_WORD)
1566 	{
1567 	  if (!fallback_p)
1568 	    return NULL_RTX;
1569 	  target = extract_split_bit_field (op0, bitsize, bitnum, unsignedp);
1570 	  return convert_extracted_bit_field (target, mode, tmode, unsignedp);
1571 	}
1572     }
1573 
1574   /* From here on we know the desired field is smaller than a word.
1575      If OP0 is a register, it too fits within a word.  */
1576   enum extraction_pattern pattern = unsignedp ? EP_extzv : EP_extv;
1577   extraction_insn extv;
1578   if (!MEM_P (op0)
1579       /* ??? We could limit the structure size to the part of OP0 that
1580 	 contains the field, with appropriate checks for endianness
1581 	 and TRULY_NOOP_TRUNCATION.  */
1582       && get_best_reg_extraction_insn (&extv, pattern,
1583 				       GET_MODE_BITSIZE (GET_MODE (op0)),
1584 				       tmode))
1585     {
1586       rtx result = extract_bit_field_using_extv (&extv, op0, bitsize, bitnum,
1587 						 unsignedp, target, mode,
1588 						 tmode);
1589       if (result)
1590 	return result;
1591     }
1592 
1593   /* If OP0 is a memory, try copying it to a register and seeing if a
1594      cheap register alternative is available.  */
1595   if (MEM_P (op0))
1596     {
1597       /* Do not use extv/extzv for volatile bitfields when
1598          -fstrict-volatile-bitfields is in effect.  */
1599       if (!(MEM_VOLATILE_P (op0) && flag_strict_volatile_bitfields > 0)
1600 	  && get_best_mem_extraction_insn (&extv, pattern, bitsize, bitnum,
1601 					   tmode))
1602 	{
1603 	  rtx result = extract_bit_field_using_extv (&extv, op0, bitsize,
1604 						     bitnum, unsignedp,
1605 						     target, mode,
1606 						     tmode);
1607 	  if (result)
1608 	    return result;
1609 	}
1610 
1611       rtx last = get_last_insn ();
1612 
1613       /* Try loading part of OP0 into a register and extracting the
1614 	 bitfield from that.  */
1615       unsigned HOST_WIDE_INT bitpos;
1616       rtx xop0 = adjust_bit_field_mem_for_reg (pattern, op0, bitsize, bitnum,
1617 					       0, 0, tmode, &bitpos);
1618       if (xop0)
1619 	{
1620 	  xop0 = copy_to_reg (xop0);
1621 	  rtx result = extract_bit_field_1 (xop0, bitsize, bitpos,
1622 					    unsignedp, packedp, target,
1623 					    mode, tmode, false);
1624 	  if (result)
1625 	    return result;
1626 	  delete_insns_since (last);
1627 	}
1628     }
1629 
1630   if (!fallback_p)
1631     return NULL;
1632 
1633   /* Find a correspondingly-sized integer field, so we can apply
1634      shifts and masks to it.  */
1635   int_mode = int_mode_for_mode (tmode);
1636   if (int_mode == BLKmode)
1637     int_mode = int_mode_for_mode (mode);
1638   /* Should probably push op0 out to memory and then do a load.  */
1639   gcc_assert (int_mode != BLKmode);
1640 
1641   target = extract_fixed_bit_field (int_mode, op0, bitsize, bitnum,
1642 				    target, unsignedp, packedp);
1643   return convert_extracted_bit_field (target, mode, tmode, unsignedp);
1644 }
1645 
1646 /* Generate code to extract a byte-field from STR_RTX
1647    containing BITSIZE bits, starting at BITNUM,
1648    and put it in TARGET if possible (if TARGET is nonzero).
1649    Regardless of TARGET, we return the rtx for where the value is placed.
1650 
1651    STR_RTX is the structure containing the byte (a REG or MEM).
1652    UNSIGNEDP is nonzero if this is an unsigned bit field.
1653    PACKEDP is nonzero if the field has the packed attribute.
1654    MODE is the natural mode of the field value once extracted.
1655    TMODE is the mode the caller would like the value to have;
1656    but the value may be returned with type MODE instead.
1657 
1658    If a TARGET is specified and we can store in it at no extra cost,
1659    we do so, and return TARGET.
1660    Otherwise, we return a REG of mode TMODE or MODE, with TMODE preferred
1661    if they are equally easy.  */
1662 
1663 rtx
1664 extract_bit_field (rtx str_rtx, unsigned HOST_WIDE_INT bitsize,
1665 		   unsigned HOST_WIDE_INT bitnum, int unsignedp, bool packedp,
1666 		   rtx target, enum machine_mode mode, enum machine_mode tmode)
1667 {
1668   return extract_bit_field_1 (str_rtx, bitsize, bitnum, unsignedp, packedp,
1669 			      target, mode, tmode, true);
1670 }
1671 
1672 /* Use shifts and boolean operations to extract a field of BITSIZE bits
1673    from bit BITNUM of OP0.
1674 
1675    UNSIGNEDP is nonzero for an unsigned bit field (don't sign-extend value).
1676    PACKEDP is true if the field has the packed attribute.
1677 
1678    If TARGET is nonzero, attempts to store the value there
1679    and return TARGET, but this is not guaranteed.
1680    If TARGET is not used, create a pseudo-reg of mode TMODE for the value.  */
1681 
1682 static rtx
1683 extract_fixed_bit_field (enum machine_mode tmode, rtx op0,
1684 			 unsigned HOST_WIDE_INT bitsize,
1685 			 unsigned HOST_WIDE_INT bitnum, rtx target,
1686 			 int unsignedp, bool packedp)
1687 {
1688   enum machine_mode mode;
1689 
1690   if (MEM_P (op0))
1691     {
1692       /* Get the proper mode to use for this field.  We want a mode that
1693 	 includes the entire field.  If such a mode would be larger than
1694 	 a word, we won't be doing the extraction the normal way.  */
1695 
1696       if (MEM_VOLATILE_P (op0)
1697 	  && flag_strict_volatile_bitfields > 0)
1698 	{
1699 	  if (GET_MODE_BITSIZE (GET_MODE (op0)) > 0)
1700 	    mode = GET_MODE (op0);
1701 	  else if (target && GET_MODE_BITSIZE (GET_MODE (target)) > 0)
1702 	    mode = GET_MODE (target);
1703 	  else
1704 	    mode = tmode;
1705 	}
1706       else
1707 	mode = get_best_mode (bitsize, bitnum, 0, 0,
1708 			      MEM_ALIGN (op0), word_mode, MEM_VOLATILE_P (op0));
1709 
1710       if (mode == VOIDmode)
1711 	/* The only way this should occur is if the field spans word
1712 	   boundaries.  */
1713 	return extract_split_bit_field (op0, bitsize, bitnum, unsignedp);
1714 
1715       unsigned int total_bits = GET_MODE_BITSIZE (mode);
1716       HOST_WIDE_INT bit_offset = bitnum - bitnum % total_bits;
1717 
1718       /* If we're accessing a volatile MEM, we can't apply BIT_OFFSET
1719 	 if it results in a multi-word access where we otherwise wouldn't
1720 	 have one.  So, check for that case here.  */
1721       if (MEM_P (op0)
1722 	  && MEM_VOLATILE_P (op0)
1723 	  && flag_strict_volatile_bitfields > 0
1724 	  && bitnum % BITS_PER_UNIT + bitsize <= total_bits
1725 	  && bitnum % GET_MODE_BITSIZE (mode) + bitsize > total_bits)
1726 	{
1727 	  if (STRICT_ALIGNMENT)
1728 	    {
1729 	      static bool informed_about_misalignment = false;
1730 
1731 	      if (packedp)
1732 		{
1733 		  if (bitsize == total_bits)
1734 		    warning_at (input_location, OPT_fstrict_volatile_bitfields,
1735 				"multiple accesses to volatile structure"
1736 				" member because of packed attribute");
1737 		  else
1738 		    warning_at (input_location, OPT_fstrict_volatile_bitfields,
1739 				"multiple accesses to volatile structure"
1740 				" bitfield because of packed attribute");
1741 
1742 		  return extract_split_bit_field (op0, bitsize, bitnum,
1743 						  unsignedp);
1744 		}
1745 
1746 	      if (bitsize == total_bits)
1747 		warning_at (input_location, OPT_fstrict_volatile_bitfields,
1748 			    "mis-aligned access used for structure member");
1749 	      else
1750 		warning_at (input_location, OPT_fstrict_volatile_bitfields,
1751 			    "mis-aligned access used for structure bitfield");
1752 
1753 	      if (! informed_about_misalignment)
1754 		{
1755 		  informed_about_misalignment = true;
1756 		  inform (input_location,
1757 			  "when a volatile object spans multiple type-sized"
1758 			  " locations, the compiler must choose between using"
1759 			  " a single mis-aligned access to preserve the"
1760 			  " volatility, or using multiple aligned accesses"
1761 			  " to avoid runtime faults; this code may fail at"
1762 			  " runtime if the hardware does not allow this"
1763 			  " access");
1764 		}
1765 	    }
1766 	  bit_offset = bitnum - bitnum % BITS_PER_UNIT;
1767 	}
1768       op0 = adjust_bitfield_address (op0, mode, bit_offset / BITS_PER_UNIT);
1769       bitnum -= bit_offset;
1770     }
1771 
1772   mode = GET_MODE (op0);
1773   gcc_assert (SCALAR_INT_MODE_P (mode));
1774 
1775   /* Note that bitsize + bitnum can be greater than GET_MODE_BITSIZE (mode)
1776      for invalid input, such as extract equivalent of f5 from
1777      gcc.dg/pr48335-2.c.  */
1778 
1779   if (BYTES_BIG_ENDIAN)
1780     /* BITNUM is the distance between our msb and that of OP0.
1781        Convert it to the distance from the lsb.  */
1782     bitnum = GET_MODE_BITSIZE (mode) - bitsize - bitnum;
1783 
1784   /* Now BITNUM is always the distance between the field's lsb and that of OP0.
1785      We have reduced the big-endian case to the little-endian case.  */
1786 
1787   if (unsignedp)
1788     {
1789       if (bitnum)
1790 	{
1791 	  /* If the field does not already start at the lsb,
1792 	     shift it so it does.  */
1793 	  /* Maybe propagate the target for the shift.  */
1794 	  rtx subtarget = (target != 0 && REG_P (target) ? target : 0);
1795 	  if (tmode != mode)
1796 	    subtarget = 0;
1797 	  op0 = expand_shift (RSHIFT_EXPR, mode, op0, bitnum, subtarget, 1);
1798 	}
1799       /* Convert the value to the desired mode.  */
1800       if (mode != tmode)
1801 	op0 = convert_to_mode (tmode, op0, 1);
1802 
1803       /* Unless the msb of the field used to be the msb when we shifted,
1804 	 mask out the upper bits.  */
1805 
1806       if (GET_MODE_BITSIZE (mode) != bitnum + bitsize)
1807 	return expand_binop (GET_MODE (op0), and_optab, op0,
1808 			     mask_rtx (GET_MODE (op0), 0, bitsize, 0),
1809 			     target, 1, OPTAB_LIB_WIDEN);
1810       return op0;
1811     }
1812 
1813   /* To extract a signed bit-field, first shift its msb to the msb of the word,
1814      then arithmetic-shift its lsb to the lsb of the word.  */
1815   op0 = force_reg (mode, op0);
1816 
1817   /* Find the narrowest integer mode that contains the field.  */
1818 
1819   for (mode = GET_CLASS_NARROWEST_MODE (MODE_INT); mode != VOIDmode;
1820        mode = GET_MODE_WIDER_MODE (mode))
1821     if (GET_MODE_BITSIZE (mode) >= bitsize + bitnum)
1822       {
1823 	op0 = convert_to_mode (mode, op0, 0);
1824 	break;
1825       }
1826 
1827   if (mode != tmode)
1828     target = 0;
1829 
1830   if (GET_MODE_BITSIZE (mode) != (bitsize + bitnum))
1831     {
1832       int amount = GET_MODE_BITSIZE (mode) - (bitsize + bitnum);
1833       /* Maybe propagate the target for the shift.  */
1834       rtx subtarget = (target != 0 && REG_P (target) ? target : 0);
1835       op0 = expand_shift (LSHIFT_EXPR, mode, op0, amount, subtarget, 1);
1836     }
1837 
1838   return expand_shift (RSHIFT_EXPR, mode, op0,
1839 		       GET_MODE_BITSIZE (mode) - bitsize, target, 0);
1840 }
1841 
1842 /* Return a constant integer (CONST_INT or CONST_DOUBLE) mask value
1843    of mode MODE with BITSIZE ones followed by BITPOS zeros, or the
1844    complement of that if COMPLEMENT.  The mask is truncated if
1845    necessary to the width of mode MODE.  The mask is zero-extended if
1846    BITSIZE+BITPOS is too small for MODE.  */
1847 
1848 static rtx
1849 mask_rtx (enum machine_mode mode, int bitpos, int bitsize, int complement)
1850 {
1851   double_int mask;
1852 
1853   mask = double_int::mask (bitsize);
1854   mask = mask.llshift (bitpos, HOST_BITS_PER_DOUBLE_INT);
1855 
1856   if (complement)
1857     mask = ~mask;
1858 
1859   return immed_double_int_const (mask, mode);
1860 }
1861 
1862 /* Return a constant integer (CONST_INT or CONST_DOUBLE) rtx with the value
1863    VALUE truncated to BITSIZE bits and then shifted left BITPOS bits.  */
1864 
1865 static rtx
1866 lshift_value (enum machine_mode mode, rtx value, int bitpos, int bitsize)
1867 {
1868   double_int val;
1869 
1870   val = double_int::from_uhwi (INTVAL (value)).zext (bitsize);
1871   val = val.llshift (bitpos, HOST_BITS_PER_DOUBLE_INT);
1872 
1873   return immed_double_int_const (val, mode);
1874 }
1875 
1876 /* Extract a bit field that is split across two words
1877    and return an RTX for the result.
1878 
1879    OP0 is the REG, SUBREG or MEM rtx for the first of the two words.
1880    BITSIZE is the field width; BITPOS, position of its first bit, in the word.
1881    UNSIGNEDP is 1 if should zero-extend the contents; else sign-extend.  */
1882 
1883 static rtx
1884 extract_split_bit_field (rtx op0, unsigned HOST_WIDE_INT bitsize,
1885 			 unsigned HOST_WIDE_INT bitpos, int unsignedp)
1886 {
1887   unsigned int unit;
1888   unsigned int bitsdone = 0;
1889   rtx result = NULL_RTX;
1890   int first = 1;
1891 
1892   /* Make sure UNIT isn't larger than BITS_PER_WORD, we can only handle that
1893      much at a time.  */
1894   if (REG_P (op0) || GET_CODE (op0) == SUBREG)
1895     unit = BITS_PER_WORD;
1896   else
1897     unit = MIN (MEM_ALIGN (op0), BITS_PER_WORD);
1898 
1899   while (bitsdone < bitsize)
1900     {
1901       unsigned HOST_WIDE_INT thissize;
1902       rtx part, word;
1903       unsigned HOST_WIDE_INT thispos;
1904       unsigned HOST_WIDE_INT offset;
1905 
1906       offset = (bitpos + bitsdone) / unit;
1907       thispos = (bitpos + bitsdone) % unit;
1908 
1909       /* THISSIZE must not overrun a word boundary.  Otherwise,
1910 	 extract_fixed_bit_field will call us again, and we will mutually
1911 	 recurse forever.  */
1912       thissize = MIN (bitsize - bitsdone, BITS_PER_WORD);
1913       thissize = MIN (thissize, unit - thispos);
1914 
1915       /* If OP0 is a register, then handle OFFSET here.
1916 
1917 	 When handling multiword bitfields, extract_bit_field may pass
1918 	 down a word_mode SUBREG of a larger REG for a bitfield that actually
1919 	 crosses a word boundary.  Thus, for a SUBREG, we must find
1920 	 the current word starting from the base register.  */
1921       if (GET_CODE (op0) == SUBREG)
1922 	{
1923 	  int word_offset = (SUBREG_BYTE (op0) / UNITS_PER_WORD) + offset;
1924 	  word = operand_subword_force (SUBREG_REG (op0), word_offset,
1925 					GET_MODE (SUBREG_REG (op0)));
1926 	  offset = 0;
1927 	}
1928       else if (REG_P (op0))
1929 	{
1930 	  word = operand_subword_force (op0, offset, GET_MODE (op0));
1931 	  offset = 0;
1932 	}
1933       else
1934 	word = op0;
1935 
1936       /* Extract the parts in bit-counting order,
1937 	 whose meaning is determined by BYTES_PER_UNIT.
1938 	 OFFSET is in UNITs, and UNIT is in bits.  */
1939       part = extract_fixed_bit_field (word_mode, word, thissize,
1940 				      offset * unit + thispos, 0, 1, false);
1941       bitsdone += thissize;
1942 
1943       /* Shift this part into place for the result.  */
1944       if (BYTES_BIG_ENDIAN)
1945 	{
1946 	  if (bitsize != bitsdone)
1947 	    part = expand_shift (LSHIFT_EXPR, word_mode, part,
1948 				 bitsize - bitsdone, 0, 1);
1949 	}
1950       else
1951 	{
1952 	  if (bitsdone != thissize)
1953 	    part = expand_shift (LSHIFT_EXPR, word_mode, part,
1954 				 bitsdone - thissize, 0, 1);
1955 	}
1956 
1957       if (first)
1958 	result = part;
1959       else
1960 	/* Combine the parts with bitwise or.  This works
1961 	   because we extracted each part as an unsigned bit field.  */
1962 	result = expand_binop (word_mode, ior_optab, part, result, NULL_RTX, 1,
1963 			       OPTAB_LIB_WIDEN);
1964 
1965       first = 0;
1966     }
1967 
1968   /* Unsigned bit field: we are done.  */
1969   if (unsignedp)
1970     return result;
1971   /* Signed bit field: sign-extend with two arithmetic shifts.  */
1972   result = expand_shift (LSHIFT_EXPR, word_mode, result,
1973 			 BITS_PER_WORD - bitsize, NULL_RTX, 0);
1974   return expand_shift (RSHIFT_EXPR, word_mode, result,
1975 		       BITS_PER_WORD - bitsize, NULL_RTX, 0);
1976 }
1977 
1978 /* Try to read the low bits of SRC as an rvalue of mode MODE, preserving
1979    the bit pattern.  SRC_MODE is the mode of SRC; if this is smaller than
1980    MODE, fill the upper bits with zeros.  Fail if the layout of either
1981    mode is unknown (as for CC modes) or if the extraction would involve
1982    unprofitable mode punning.  Return the value on success, otherwise
1983    return null.
1984 
1985    This is different from gen_lowpart* in these respects:
1986 
1987      - the returned value must always be considered an rvalue
1988 
1989      - when MODE is wider than SRC_MODE, the extraction involves
1990        a zero extension
1991 
1992      - when MODE is smaller than SRC_MODE, the extraction involves
1993        a truncation (and is thus subject to TRULY_NOOP_TRUNCATION).
1994 
1995    In other words, this routine performs a computation, whereas the
1996    gen_lowpart* routines are conceptually lvalue or rvalue subreg
1997    operations.  */
1998 
1999 rtx
2000 extract_low_bits (enum machine_mode mode, enum machine_mode src_mode, rtx src)
2001 {
2002   enum machine_mode int_mode, src_int_mode;
2003 
2004   if (mode == src_mode)
2005     return src;
2006 
2007   if (CONSTANT_P (src))
2008     {
2009       /* simplify_gen_subreg can't be used here, as if simplify_subreg
2010 	 fails, it will happily create (subreg (symbol_ref)) or similar
2011 	 invalid SUBREGs.  */
2012       unsigned int byte = subreg_lowpart_offset (mode, src_mode);
2013       rtx ret = simplify_subreg (mode, src, src_mode, byte);
2014       if (ret)
2015 	return ret;
2016 
2017       if (GET_MODE (src) == VOIDmode
2018 	  || !validate_subreg (mode, src_mode, src, byte))
2019 	return NULL_RTX;
2020 
2021       src = force_reg (GET_MODE (src), src);
2022       return gen_rtx_SUBREG (mode, src, byte);
2023     }
2024 
2025   if (GET_MODE_CLASS (mode) == MODE_CC || GET_MODE_CLASS (src_mode) == MODE_CC)
2026     return NULL_RTX;
2027 
2028   if (GET_MODE_BITSIZE (mode) == GET_MODE_BITSIZE (src_mode)
2029       && MODES_TIEABLE_P (mode, src_mode))
2030     {
2031       rtx x = gen_lowpart_common (mode, src);
2032       if (x)
2033         return x;
2034     }
2035 
2036   src_int_mode = int_mode_for_mode (src_mode);
2037   int_mode = int_mode_for_mode (mode);
2038   if (src_int_mode == BLKmode || int_mode == BLKmode)
2039     return NULL_RTX;
2040 
2041   if (!MODES_TIEABLE_P (src_int_mode, src_mode))
2042     return NULL_RTX;
2043   if (!MODES_TIEABLE_P (int_mode, mode))
2044     return NULL_RTX;
2045 
2046   src = gen_lowpart (src_int_mode, src);
2047   src = convert_modes (int_mode, src_int_mode, src, true);
2048   src = gen_lowpart (mode, src);
2049   return src;
2050 }
2051 
2052 /* Add INC into TARGET.  */
2053 
2054 void
2055 expand_inc (rtx target, rtx inc)
2056 {
2057   rtx value = expand_binop (GET_MODE (target), add_optab,
2058 			    target, inc,
2059 			    target, 0, OPTAB_LIB_WIDEN);
2060   if (value != target)
2061     emit_move_insn (target, value);
2062 }
2063 
2064 /* Subtract DEC from TARGET.  */
2065 
2066 void
2067 expand_dec (rtx target, rtx dec)
2068 {
2069   rtx value = expand_binop (GET_MODE (target), sub_optab,
2070 			    target, dec,
2071 			    target, 0, OPTAB_LIB_WIDEN);
2072   if (value != target)
2073     emit_move_insn (target, value);
2074 }
2075 
2076 /* Output a shift instruction for expression code CODE,
2077    with SHIFTED being the rtx for the value to shift,
2078    and AMOUNT the rtx for the amount to shift by.
2079    Store the result in the rtx TARGET, if that is convenient.
2080    If UNSIGNEDP is nonzero, do a logical shift; otherwise, arithmetic.
2081    Return the rtx for where the value is.  */
2082 
2083 static rtx
2084 expand_shift_1 (enum tree_code code, enum machine_mode mode, rtx shifted,
2085 		rtx amount, rtx target, int unsignedp)
2086 {
2087   rtx op1, temp = 0;
2088   int left = (code == LSHIFT_EXPR || code == LROTATE_EXPR);
2089   int rotate = (code == LROTATE_EXPR || code == RROTATE_EXPR);
2090   optab lshift_optab = ashl_optab;
2091   optab rshift_arith_optab = ashr_optab;
2092   optab rshift_uns_optab = lshr_optab;
2093   optab lrotate_optab = rotl_optab;
2094   optab rrotate_optab = rotr_optab;
2095   enum machine_mode op1_mode;
2096   int attempt;
2097   bool speed = optimize_insn_for_speed_p ();
2098 
2099   op1 = amount;
2100   op1_mode = GET_MODE (op1);
2101 
2102   /* Determine whether the shift/rotate amount is a vector, or scalar.  If the
2103      shift amount is a vector, use the vector/vector shift patterns.  */
2104   if (VECTOR_MODE_P (mode) && VECTOR_MODE_P (op1_mode))
2105     {
2106       lshift_optab = vashl_optab;
2107       rshift_arith_optab = vashr_optab;
2108       rshift_uns_optab = vlshr_optab;
2109       lrotate_optab = vrotl_optab;
2110       rrotate_optab = vrotr_optab;
2111     }
2112 
2113   /* Previously detected shift-counts computed by NEGATE_EXPR
2114      and shifted in the other direction; but that does not work
2115      on all machines.  */
2116 
2117   if (SHIFT_COUNT_TRUNCATED)
2118     {
2119       if (CONST_INT_P (op1)
2120 	  && ((unsigned HOST_WIDE_INT) INTVAL (op1) >=
2121 	      (unsigned HOST_WIDE_INT) GET_MODE_BITSIZE (mode)))
2122 	op1 = GEN_INT ((unsigned HOST_WIDE_INT) INTVAL (op1)
2123 		       % GET_MODE_BITSIZE (mode));
2124       else if (GET_CODE (op1) == SUBREG
2125 	       && subreg_lowpart_p (op1)
2126 	       && SCALAR_INT_MODE_P (GET_MODE (SUBREG_REG (op1)))
2127 	       && SCALAR_INT_MODE_P (GET_MODE (op1)))
2128 	op1 = SUBREG_REG (op1);
2129     }
2130 
2131   if (op1 == const0_rtx)
2132     return shifted;
2133 
2134   /* Check whether its cheaper to implement a left shift by a constant
2135      bit count by a sequence of additions.  */
2136   if (code == LSHIFT_EXPR
2137       && CONST_INT_P (op1)
2138       && INTVAL (op1) > 0
2139       && INTVAL (op1) < GET_MODE_PRECISION (mode)
2140       && INTVAL (op1) < MAX_BITS_PER_WORD
2141       && (shift_cost (speed, mode, INTVAL (op1))
2142 	  > INTVAL (op1) * add_cost (speed, mode))
2143       && shift_cost (speed, mode, INTVAL (op1)) != MAX_COST)
2144     {
2145       int i;
2146       for (i = 0; i < INTVAL (op1); i++)
2147 	{
2148 	  temp = force_reg (mode, shifted);
2149 	  shifted = expand_binop (mode, add_optab, temp, temp, NULL_RTX,
2150 				  unsignedp, OPTAB_LIB_WIDEN);
2151 	}
2152       return shifted;
2153     }
2154 
2155   for (attempt = 0; temp == 0 && attempt < 3; attempt++)
2156     {
2157       enum optab_methods methods;
2158 
2159       if (attempt == 0)
2160 	methods = OPTAB_DIRECT;
2161       else if (attempt == 1)
2162 	methods = OPTAB_WIDEN;
2163       else
2164 	methods = OPTAB_LIB_WIDEN;
2165 
2166       if (rotate)
2167 	{
2168 	  /* Widening does not work for rotation.  */
2169 	  if (methods == OPTAB_WIDEN)
2170 	    continue;
2171 	  else if (methods == OPTAB_LIB_WIDEN)
2172 	    {
2173 	      /* If we have been unable to open-code this by a rotation,
2174 		 do it as the IOR of two shifts.  I.e., to rotate A
2175 		 by N bits, compute (A << N) | ((unsigned) A >> (C - N))
2176 		 where C is the bitsize of A.
2177 
2178 		 It is theoretically possible that the target machine might
2179 		 not be able to perform either shift and hence we would
2180 		 be making two libcalls rather than just the one for the
2181 		 shift (similarly if IOR could not be done).  We will allow
2182 		 this extremely unlikely lossage to avoid complicating the
2183 		 code below.  */
2184 
2185 	      rtx subtarget = target == shifted ? 0 : target;
2186 	      rtx new_amount, other_amount;
2187 	      rtx temp1;
2188 
2189 	      new_amount = op1;
2190 	      if (CONST_INT_P (op1))
2191 		other_amount = GEN_INT (GET_MODE_BITSIZE (mode)
2192 					- INTVAL (op1));
2193 	      else
2194 		other_amount
2195 		  = simplify_gen_binary (MINUS, GET_MODE (op1),
2196 					 GEN_INT (GET_MODE_PRECISION (mode)),
2197 					 op1);
2198 
2199 	      shifted = force_reg (mode, shifted);
2200 
2201 	      temp = expand_shift_1 (left ? LSHIFT_EXPR : RSHIFT_EXPR,
2202 				     mode, shifted, new_amount, 0, 1);
2203 	      temp1 = expand_shift_1 (left ? RSHIFT_EXPR : LSHIFT_EXPR,
2204 				      mode, shifted, other_amount,
2205 				      subtarget, 1);
2206 	      return expand_binop (mode, ior_optab, temp, temp1, target,
2207 				   unsignedp, methods);
2208 	    }
2209 
2210 	  temp = expand_binop (mode,
2211 			       left ? lrotate_optab : rrotate_optab,
2212 			       shifted, op1, target, unsignedp, methods);
2213 	}
2214       else if (unsignedp)
2215 	temp = expand_binop (mode,
2216 			     left ? lshift_optab : rshift_uns_optab,
2217 			     shifted, op1, target, unsignedp, methods);
2218 
2219       /* Do arithmetic shifts.
2220 	 Also, if we are going to widen the operand, we can just as well
2221 	 use an arithmetic right-shift instead of a logical one.  */
2222       if (temp == 0 && ! rotate
2223 	  && (! unsignedp || (! left && methods == OPTAB_WIDEN)))
2224 	{
2225 	  enum optab_methods methods1 = methods;
2226 
2227 	  /* If trying to widen a log shift to an arithmetic shift,
2228 	     don't accept an arithmetic shift of the same size.  */
2229 	  if (unsignedp)
2230 	    methods1 = OPTAB_MUST_WIDEN;
2231 
2232 	  /* Arithmetic shift */
2233 
2234 	  temp = expand_binop (mode,
2235 			       left ? lshift_optab : rshift_arith_optab,
2236 			       shifted, op1, target, unsignedp, methods1);
2237 	}
2238 
2239       /* We used to try extzv here for logical right shifts, but that was
2240 	 only useful for one machine, the VAX, and caused poor code
2241 	 generation there for lshrdi3, so the code was deleted and a
2242 	 define_expand for lshrsi3 was added to vax.md.  */
2243     }
2244 
2245   gcc_assert (temp);
2246   return temp;
2247 }
2248 
2249 /* Output a shift instruction for expression code CODE,
2250    with SHIFTED being the rtx for the value to shift,
2251    and AMOUNT the amount to shift by.
2252    Store the result in the rtx TARGET, if that is convenient.
2253    If UNSIGNEDP is nonzero, do a logical shift; otherwise, arithmetic.
2254    Return the rtx for where the value is.  */
2255 
2256 rtx
2257 expand_shift (enum tree_code code, enum machine_mode mode, rtx shifted,
2258 	      int amount, rtx target, int unsignedp)
2259 {
2260   return expand_shift_1 (code, mode,
2261 			 shifted, GEN_INT (amount), target, unsignedp);
2262 }
2263 
2264 /* Output a shift instruction for expression code CODE,
2265    with SHIFTED being the rtx for the value to shift,
2266    and AMOUNT the tree for the amount to shift by.
2267    Store the result in the rtx TARGET, if that is convenient.
2268    If UNSIGNEDP is nonzero, do a logical shift; otherwise, arithmetic.
2269    Return the rtx for where the value is.  */
2270 
2271 rtx
2272 expand_variable_shift (enum tree_code code, enum machine_mode mode, rtx shifted,
2273 		       tree amount, rtx target, int unsignedp)
2274 {
2275   return expand_shift_1 (code, mode,
2276 			 shifted, expand_normal (amount), target, unsignedp);
2277 }
2278 
2279 
2280 /* Indicates the type of fixup needed after a constant multiplication.
2281    BASIC_VARIANT means no fixup is needed, NEGATE_VARIANT means that
2282    the result should be negated, and ADD_VARIANT means that the
2283    multiplicand should be added to the result.  */
2284 enum mult_variant {basic_variant, negate_variant, add_variant};
2285 
2286 static void synth_mult (struct algorithm *, unsigned HOST_WIDE_INT,
2287 			const struct mult_cost *, enum machine_mode mode);
2288 static bool choose_mult_variant (enum machine_mode, HOST_WIDE_INT,
2289 				 struct algorithm *, enum mult_variant *, int);
2290 static rtx expand_mult_const (enum machine_mode, rtx, HOST_WIDE_INT, rtx,
2291 			      const struct algorithm *, enum mult_variant);
2292 static unsigned HOST_WIDE_INT invert_mod2n (unsigned HOST_WIDE_INT, int);
2293 static rtx extract_high_half (enum machine_mode, rtx);
2294 static rtx expmed_mult_highpart (enum machine_mode, rtx, rtx, rtx, int, int);
2295 static rtx expmed_mult_highpart_optab (enum machine_mode, rtx, rtx, rtx,
2296 				       int, int);
2297 /* Compute and return the best algorithm for multiplying by T.
2298    The algorithm must cost less than cost_limit
2299    If retval.cost >= COST_LIMIT, no algorithm was found and all
2300    other field of the returned struct are undefined.
2301    MODE is the machine mode of the multiplication.  */
2302 
2303 static void
2304 synth_mult (struct algorithm *alg_out, unsigned HOST_WIDE_INT t,
2305 	    const struct mult_cost *cost_limit, enum machine_mode mode)
2306 {
2307   int m;
2308   struct algorithm *alg_in, *best_alg;
2309   struct mult_cost best_cost;
2310   struct mult_cost new_limit;
2311   int op_cost, op_latency;
2312   unsigned HOST_WIDE_INT orig_t = t;
2313   unsigned HOST_WIDE_INT q;
2314   int maxm, hash_index;
2315   bool cache_hit = false;
2316   enum alg_code cache_alg = alg_zero;
2317   bool speed = optimize_insn_for_speed_p ();
2318   enum machine_mode imode;
2319   struct alg_hash_entry *entry_ptr;
2320 
2321   /* Indicate that no algorithm is yet found.  If no algorithm
2322      is found, this value will be returned and indicate failure.  */
2323   alg_out->cost.cost = cost_limit->cost + 1;
2324   alg_out->cost.latency = cost_limit->latency + 1;
2325 
2326   if (cost_limit->cost < 0
2327       || (cost_limit->cost == 0 && cost_limit->latency <= 0))
2328     return;
2329 
2330   /* Be prepared for vector modes.  */
2331   imode = GET_MODE_INNER (mode);
2332   if (imode == VOIDmode)
2333     imode = mode;
2334 
2335   maxm = MIN (BITS_PER_WORD, GET_MODE_BITSIZE (imode));
2336 
2337   /* Restrict the bits of "t" to the multiplication's mode.  */
2338   t &= GET_MODE_MASK (imode);
2339 
2340   /* t == 1 can be done in zero cost.  */
2341   if (t == 1)
2342     {
2343       alg_out->ops = 1;
2344       alg_out->cost.cost = 0;
2345       alg_out->cost.latency = 0;
2346       alg_out->op[0] = alg_m;
2347       return;
2348     }
2349 
2350   /* t == 0 sometimes has a cost.  If it does and it exceeds our limit,
2351      fail now.  */
2352   if (t == 0)
2353     {
2354       if (MULT_COST_LESS (cost_limit, zero_cost (speed)))
2355 	return;
2356       else
2357 	{
2358 	  alg_out->ops = 1;
2359 	  alg_out->cost.cost = zero_cost (speed);
2360 	  alg_out->cost.latency = zero_cost (speed);
2361 	  alg_out->op[0] = alg_zero;
2362 	  return;
2363 	}
2364     }
2365 
2366   /* We'll be needing a couple extra algorithm structures now.  */
2367 
2368   alg_in = XALLOCA (struct algorithm);
2369   best_alg = XALLOCA (struct algorithm);
2370   best_cost = *cost_limit;
2371 
2372   /* Compute the hash index.  */
2373   hash_index = (t ^ (unsigned int) mode ^ (speed * 256)) % NUM_ALG_HASH_ENTRIES;
2374 
2375   /* See if we already know what to do for T.  */
2376   entry_ptr = alg_hash_entry_ptr (hash_index);
2377   if (entry_ptr->t == t
2378       && entry_ptr->mode == mode
2379       && entry_ptr->mode == mode
2380       && entry_ptr->speed == speed
2381       && entry_ptr->alg != alg_unknown)
2382     {
2383       cache_alg = entry_ptr->alg;
2384 
2385       if (cache_alg == alg_impossible)
2386 	{
2387 	  /* The cache tells us that it's impossible to synthesize
2388 	     multiplication by T within entry_ptr->cost.  */
2389 	  if (!CHEAPER_MULT_COST (&entry_ptr->cost, cost_limit))
2390 	    /* COST_LIMIT is at least as restrictive as the one
2391 	       recorded in the hash table, in which case we have no
2392 	       hope of synthesizing a multiplication.  Just
2393 	       return.  */
2394 	    return;
2395 
2396 	  /* If we get here, COST_LIMIT is less restrictive than the
2397 	     one recorded in the hash table, so we may be able to
2398 	     synthesize a multiplication.  Proceed as if we didn't
2399 	     have the cache entry.  */
2400 	}
2401       else
2402 	{
2403 	  if (CHEAPER_MULT_COST (cost_limit, &entry_ptr->cost))
2404 	    /* The cached algorithm shows that this multiplication
2405 	       requires more cost than COST_LIMIT.  Just return.  This
2406 	       way, we don't clobber this cache entry with
2407 	       alg_impossible but retain useful information.  */
2408 	    return;
2409 
2410 	  cache_hit = true;
2411 
2412 	  switch (cache_alg)
2413 	    {
2414 	    case alg_shift:
2415 	      goto do_alg_shift;
2416 
2417 	    case alg_add_t_m2:
2418 	    case alg_sub_t_m2:
2419 	      goto do_alg_addsub_t_m2;
2420 
2421 	    case alg_add_factor:
2422 	    case alg_sub_factor:
2423 	      goto do_alg_addsub_factor;
2424 
2425 	    case alg_add_t2_m:
2426 	      goto do_alg_add_t2_m;
2427 
2428 	    case alg_sub_t2_m:
2429 	      goto do_alg_sub_t2_m;
2430 
2431 	    default:
2432 	      gcc_unreachable ();
2433 	    }
2434 	}
2435     }
2436 
2437   /* If we have a group of zero bits at the low-order part of T, try
2438      multiplying by the remaining bits and then doing a shift.  */
2439 
2440   if ((t & 1) == 0)
2441     {
2442     do_alg_shift:
2443       m = floor_log2 (t & -t);	/* m = number of low zero bits */
2444       if (m < maxm)
2445 	{
2446 	  q = t >> m;
2447 	  /* The function expand_shift will choose between a shift and
2448 	     a sequence of additions, so the observed cost is given as
2449 	     MIN (m * add_cost(speed, mode), shift_cost(speed, mode, m)).  */
2450 	  op_cost = m * add_cost (speed, mode);
2451 	  if (shift_cost (speed, mode, m) < op_cost)
2452 	    op_cost = shift_cost (speed, mode, m);
2453 	  new_limit.cost = best_cost.cost - op_cost;
2454 	  new_limit.latency = best_cost.latency - op_cost;
2455 	  synth_mult (alg_in, q, &new_limit, mode);
2456 
2457 	  alg_in->cost.cost += op_cost;
2458 	  alg_in->cost.latency += op_cost;
2459 	  if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2460 	    {
2461 	      struct algorithm *x;
2462 	      best_cost = alg_in->cost;
2463 	      x = alg_in, alg_in = best_alg, best_alg = x;
2464 	      best_alg->log[best_alg->ops] = m;
2465 	      best_alg->op[best_alg->ops] = alg_shift;
2466 	    }
2467 
2468 	  /* See if treating ORIG_T as a signed number yields a better
2469 	     sequence.  Try this sequence only for a negative ORIG_T
2470 	     as it would be useless for a non-negative ORIG_T.  */
2471 	  if ((HOST_WIDE_INT) orig_t < 0)
2472 	    {
2473 	      /* Shift ORIG_T as follows because a right shift of a
2474 		 negative-valued signed type is implementation
2475 		 defined.  */
2476 	      q = ~(~orig_t >> m);
2477 	      /* The function expand_shift will choose between a shift
2478 		 and a sequence of additions, so the observed cost is
2479 		 given as MIN (m * add_cost(speed, mode),
2480 		 shift_cost(speed, mode, m)).  */
2481 	      op_cost = m * add_cost (speed, mode);
2482 	      if (shift_cost (speed, mode, m) < op_cost)
2483 		op_cost = shift_cost (speed, mode, m);
2484 	      new_limit.cost = best_cost.cost - op_cost;
2485 	      new_limit.latency = best_cost.latency - op_cost;
2486 	      synth_mult (alg_in, q, &new_limit, mode);
2487 
2488 	      alg_in->cost.cost += op_cost;
2489 	      alg_in->cost.latency += op_cost;
2490 	      if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2491 		{
2492 		  struct algorithm *x;
2493 		  best_cost = alg_in->cost;
2494 		  x = alg_in, alg_in = best_alg, best_alg = x;
2495 		  best_alg->log[best_alg->ops] = m;
2496 		  best_alg->op[best_alg->ops] = alg_shift;
2497 		}
2498 	    }
2499 	}
2500       if (cache_hit)
2501 	goto done;
2502     }
2503 
2504   /* If we have an odd number, add or subtract one.  */
2505   if ((t & 1) != 0)
2506     {
2507       unsigned HOST_WIDE_INT w;
2508 
2509     do_alg_addsub_t_m2:
2510       for (w = 1; (w & t) != 0; w <<= 1)
2511 	;
2512       /* If T was -1, then W will be zero after the loop.  This is another
2513 	 case where T ends with ...111.  Handling this with (T + 1) and
2514 	 subtract 1 produces slightly better code and results in algorithm
2515 	 selection much faster than treating it like the ...0111 case
2516 	 below.  */
2517       if (w == 0
2518 	  || (w > 2
2519 	      /* Reject the case where t is 3.
2520 		 Thus we prefer addition in that case.  */
2521 	      && t != 3))
2522 	{
2523 	  /* T ends with ...111.  Multiply by (T + 1) and subtract 1.  */
2524 
2525 	  op_cost = add_cost (speed, mode);
2526 	  new_limit.cost = best_cost.cost - op_cost;
2527 	  new_limit.latency = best_cost.latency - op_cost;
2528 	  synth_mult (alg_in, t + 1, &new_limit, mode);
2529 
2530 	  alg_in->cost.cost += op_cost;
2531 	  alg_in->cost.latency += op_cost;
2532 	  if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2533 	    {
2534 	      struct algorithm *x;
2535 	      best_cost = alg_in->cost;
2536 	      x = alg_in, alg_in = best_alg, best_alg = x;
2537 	      best_alg->log[best_alg->ops] = 0;
2538 	      best_alg->op[best_alg->ops] = alg_sub_t_m2;
2539 	    }
2540 	}
2541       else
2542 	{
2543 	  /* T ends with ...01 or ...011.  Multiply by (T - 1) and add 1.  */
2544 
2545 	  op_cost = add_cost (speed, mode);
2546 	  new_limit.cost = best_cost.cost - op_cost;
2547 	  new_limit.latency = best_cost.latency - op_cost;
2548 	  synth_mult (alg_in, t - 1, &new_limit, mode);
2549 
2550 	  alg_in->cost.cost += op_cost;
2551 	  alg_in->cost.latency += op_cost;
2552 	  if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2553 	    {
2554 	      struct algorithm *x;
2555 	      best_cost = alg_in->cost;
2556 	      x = alg_in, alg_in = best_alg, best_alg = x;
2557 	      best_alg->log[best_alg->ops] = 0;
2558 	      best_alg->op[best_alg->ops] = alg_add_t_m2;
2559 	    }
2560 	}
2561 
2562       /* We may be able to calculate a * -7, a * -15, a * -31, etc
2563 	 quickly with a - a * n for some appropriate constant n.  */
2564       m = exact_log2 (-orig_t + 1);
2565       if (m >= 0 && m < maxm)
2566 	{
2567 	  op_cost = shiftsub1_cost (speed, mode, m);
2568 	  new_limit.cost = best_cost.cost - op_cost;
2569 	  new_limit.latency = best_cost.latency - op_cost;
2570 	  synth_mult (alg_in, (unsigned HOST_WIDE_INT) (-orig_t + 1) >> m,
2571 		      &new_limit, mode);
2572 
2573 	  alg_in->cost.cost += op_cost;
2574 	  alg_in->cost.latency += op_cost;
2575 	  if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2576 	    {
2577 	      struct algorithm *x;
2578 	      best_cost = alg_in->cost;
2579 	      x = alg_in, alg_in = best_alg, best_alg = x;
2580 	      best_alg->log[best_alg->ops] = m;
2581 	      best_alg->op[best_alg->ops] = alg_sub_t_m2;
2582 	    }
2583 	}
2584 
2585       if (cache_hit)
2586 	goto done;
2587     }
2588 
2589   /* Look for factors of t of the form
2590      t = q(2**m +- 1), 2 <= m <= floor(log2(t - 1)).
2591      If we find such a factor, we can multiply by t using an algorithm that
2592      multiplies by q, shift the result by m and add/subtract it to itself.
2593 
2594      We search for large factors first and loop down, even if large factors
2595      are less probable than small; if we find a large factor we will find a
2596      good sequence quickly, and therefore be able to prune (by decreasing
2597      COST_LIMIT) the search.  */
2598 
2599  do_alg_addsub_factor:
2600   for (m = floor_log2 (t - 1); m >= 2; m--)
2601     {
2602       unsigned HOST_WIDE_INT d;
2603 
2604       d = ((unsigned HOST_WIDE_INT) 1 << m) + 1;
2605       if (t % d == 0 && t > d && m < maxm
2606 	  && (!cache_hit || cache_alg == alg_add_factor))
2607 	{
2608 	  /* If the target has a cheap shift-and-add instruction use
2609 	     that in preference to a shift insn followed by an add insn.
2610 	     Assume that the shift-and-add is "atomic" with a latency
2611 	     equal to its cost, otherwise assume that on superscalar
2612 	     hardware the shift may be executed concurrently with the
2613 	     earlier steps in the algorithm.  */
2614 	  op_cost = add_cost (speed, mode) + shift_cost (speed, mode, m);
2615 	  if (shiftadd_cost (speed, mode, m) < op_cost)
2616 	    {
2617 	      op_cost = shiftadd_cost (speed, mode, m);
2618 	      op_latency = op_cost;
2619 	    }
2620 	  else
2621 	    op_latency = add_cost (speed, mode);
2622 
2623 	  new_limit.cost = best_cost.cost - op_cost;
2624 	  new_limit.latency = best_cost.latency - op_latency;
2625 	  synth_mult (alg_in, t / d, &new_limit, mode);
2626 
2627 	  alg_in->cost.cost += op_cost;
2628 	  alg_in->cost.latency += op_latency;
2629 	  if (alg_in->cost.latency < op_cost)
2630 	    alg_in->cost.latency = op_cost;
2631 	  if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2632 	    {
2633 	      struct algorithm *x;
2634 	      best_cost = alg_in->cost;
2635 	      x = alg_in, alg_in = best_alg, best_alg = x;
2636 	      best_alg->log[best_alg->ops] = m;
2637 	      best_alg->op[best_alg->ops] = alg_add_factor;
2638 	    }
2639 	  /* Other factors will have been taken care of in the recursion.  */
2640 	  break;
2641 	}
2642 
2643       d = ((unsigned HOST_WIDE_INT) 1 << m) - 1;
2644       if (t % d == 0 && t > d && m < maxm
2645 	  && (!cache_hit || cache_alg == alg_sub_factor))
2646 	{
2647 	  /* If the target has a cheap shift-and-subtract insn use
2648 	     that in preference to a shift insn followed by a sub insn.
2649 	     Assume that the shift-and-sub is "atomic" with a latency
2650 	     equal to it's cost, otherwise assume that on superscalar
2651 	     hardware the shift may be executed concurrently with the
2652 	     earlier steps in the algorithm.  */
2653 	  op_cost = add_cost (speed, mode) + shift_cost (speed, mode, m);
2654 	  if (shiftsub0_cost (speed, mode, m) < op_cost)
2655 	    {
2656 	      op_cost = shiftsub0_cost (speed, mode, m);
2657 	      op_latency = op_cost;
2658 	    }
2659 	  else
2660 	    op_latency = add_cost (speed, mode);
2661 
2662 	  new_limit.cost = best_cost.cost - op_cost;
2663 	  new_limit.latency = best_cost.latency - op_latency;
2664 	  synth_mult (alg_in, t / d, &new_limit, mode);
2665 
2666 	  alg_in->cost.cost += op_cost;
2667 	  alg_in->cost.latency += op_latency;
2668 	  if (alg_in->cost.latency < op_cost)
2669 	    alg_in->cost.latency = op_cost;
2670 	  if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2671 	    {
2672 	      struct algorithm *x;
2673 	      best_cost = alg_in->cost;
2674 	      x = alg_in, alg_in = best_alg, best_alg = x;
2675 	      best_alg->log[best_alg->ops] = m;
2676 	      best_alg->op[best_alg->ops] = alg_sub_factor;
2677 	    }
2678 	  break;
2679 	}
2680     }
2681   if (cache_hit)
2682     goto done;
2683 
2684   /* Try shift-and-add (load effective address) instructions,
2685      i.e. do a*3, a*5, a*9.  */
2686   if ((t & 1) != 0)
2687     {
2688     do_alg_add_t2_m:
2689       q = t - 1;
2690       q = q & -q;
2691       m = exact_log2 (q);
2692       if (m >= 0 && m < maxm)
2693 	{
2694 	  op_cost = shiftadd_cost (speed, mode, m);
2695 	  new_limit.cost = best_cost.cost - op_cost;
2696 	  new_limit.latency = best_cost.latency - op_cost;
2697 	  synth_mult (alg_in, (t - 1) >> m, &new_limit, mode);
2698 
2699 	  alg_in->cost.cost += op_cost;
2700 	  alg_in->cost.latency += op_cost;
2701 	  if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2702 	    {
2703 	      struct algorithm *x;
2704 	      best_cost = alg_in->cost;
2705 	      x = alg_in, alg_in = best_alg, best_alg = x;
2706 	      best_alg->log[best_alg->ops] = m;
2707 	      best_alg->op[best_alg->ops] = alg_add_t2_m;
2708 	    }
2709 	}
2710       if (cache_hit)
2711 	goto done;
2712 
2713     do_alg_sub_t2_m:
2714       q = t + 1;
2715       q = q & -q;
2716       m = exact_log2 (q);
2717       if (m >= 0 && m < maxm)
2718 	{
2719 	  op_cost = shiftsub0_cost (speed, mode, m);
2720 	  new_limit.cost = best_cost.cost - op_cost;
2721 	  new_limit.latency = best_cost.latency - op_cost;
2722 	  synth_mult (alg_in, (t + 1) >> m, &new_limit, mode);
2723 
2724 	  alg_in->cost.cost += op_cost;
2725 	  alg_in->cost.latency += op_cost;
2726 	  if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2727 	    {
2728 	      struct algorithm *x;
2729 	      best_cost = alg_in->cost;
2730 	      x = alg_in, alg_in = best_alg, best_alg = x;
2731 	      best_alg->log[best_alg->ops] = m;
2732 	      best_alg->op[best_alg->ops] = alg_sub_t2_m;
2733 	    }
2734 	}
2735       if (cache_hit)
2736 	goto done;
2737     }
2738 
2739  done:
2740   /* If best_cost has not decreased, we have not found any algorithm.  */
2741   if (!CHEAPER_MULT_COST (&best_cost, cost_limit))
2742     {
2743       /* We failed to find an algorithm.  Record alg_impossible for
2744 	 this case (that is, <T, MODE, COST_LIMIT>) so that next time
2745 	 we are asked to find an algorithm for T within the same or
2746 	 lower COST_LIMIT, we can immediately return to the
2747 	 caller.  */
2748       entry_ptr->t = t;
2749       entry_ptr->mode = mode;
2750       entry_ptr->speed = speed;
2751       entry_ptr->alg = alg_impossible;
2752       entry_ptr->cost = *cost_limit;
2753       return;
2754     }
2755 
2756   /* Cache the result.  */
2757   if (!cache_hit)
2758     {
2759       entry_ptr->t = t;
2760       entry_ptr->mode = mode;
2761       entry_ptr->speed = speed;
2762       entry_ptr->alg = best_alg->op[best_alg->ops];
2763       entry_ptr->cost.cost = best_cost.cost;
2764       entry_ptr->cost.latency = best_cost.latency;
2765     }
2766 
2767   /* If we are getting a too long sequence for `struct algorithm'
2768      to record, make this search fail.  */
2769   if (best_alg->ops == MAX_BITS_PER_WORD)
2770     return;
2771 
2772   /* Copy the algorithm from temporary space to the space at alg_out.
2773      We avoid using structure assignment because the majority of
2774      best_alg is normally undefined, and this is a critical function.  */
2775   alg_out->ops = best_alg->ops + 1;
2776   alg_out->cost = best_cost;
2777   memcpy (alg_out->op, best_alg->op,
2778 	  alg_out->ops * sizeof *alg_out->op);
2779   memcpy (alg_out->log, best_alg->log,
2780 	  alg_out->ops * sizeof *alg_out->log);
2781 }
2782 
2783 /* Find the cheapest way of multiplying a value of mode MODE by VAL.
2784    Try three variations:
2785 
2786        - a shift/add sequence based on VAL itself
2787        - a shift/add sequence based on -VAL, followed by a negation
2788        - a shift/add sequence based on VAL - 1, followed by an addition.
2789 
2790    Return true if the cheapest of these cost less than MULT_COST,
2791    describing the algorithm in *ALG and final fixup in *VARIANT.  */
2792 
2793 static bool
2794 choose_mult_variant (enum machine_mode mode, HOST_WIDE_INT val,
2795 		     struct algorithm *alg, enum mult_variant *variant,
2796 		     int mult_cost)
2797 {
2798   struct algorithm alg2;
2799   struct mult_cost limit;
2800   int op_cost;
2801   bool speed = optimize_insn_for_speed_p ();
2802 
2803   /* Fail quickly for impossible bounds.  */
2804   if (mult_cost < 0)
2805     return false;
2806 
2807   /* Ensure that mult_cost provides a reasonable upper bound.
2808      Any constant multiplication can be performed with less
2809      than 2 * bits additions.  */
2810   op_cost = 2 * GET_MODE_UNIT_BITSIZE (mode) * add_cost (speed, mode);
2811   if (mult_cost > op_cost)
2812     mult_cost = op_cost;
2813 
2814   *variant = basic_variant;
2815   limit.cost = mult_cost;
2816   limit.latency = mult_cost;
2817   synth_mult (alg, val, &limit, mode);
2818 
2819   /* This works only if the inverted value actually fits in an
2820      `unsigned int' */
2821   if (HOST_BITS_PER_INT >= GET_MODE_UNIT_BITSIZE (mode))
2822     {
2823       op_cost = neg_cost(speed, mode);
2824       if (MULT_COST_LESS (&alg->cost, mult_cost))
2825 	{
2826 	  limit.cost = alg->cost.cost - op_cost;
2827 	  limit.latency = alg->cost.latency - op_cost;
2828 	}
2829       else
2830 	{
2831 	  limit.cost = mult_cost - op_cost;
2832 	  limit.latency = mult_cost - op_cost;
2833 	}
2834 
2835       synth_mult (&alg2, -val, &limit, mode);
2836       alg2.cost.cost += op_cost;
2837       alg2.cost.latency += op_cost;
2838       if (CHEAPER_MULT_COST (&alg2.cost, &alg->cost))
2839 	*alg = alg2, *variant = negate_variant;
2840     }
2841 
2842   /* This proves very useful for division-by-constant.  */
2843   op_cost = add_cost (speed, mode);
2844   if (MULT_COST_LESS (&alg->cost, mult_cost))
2845     {
2846       limit.cost = alg->cost.cost - op_cost;
2847       limit.latency = alg->cost.latency - op_cost;
2848     }
2849   else
2850     {
2851       limit.cost = mult_cost - op_cost;
2852       limit.latency = mult_cost - op_cost;
2853     }
2854 
2855   synth_mult (&alg2, val - 1, &limit, mode);
2856   alg2.cost.cost += op_cost;
2857   alg2.cost.latency += op_cost;
2858   if (CHEAPER_MULT_COST (&alg2.cost, &alg->cost))
2859     *alg = alg2, *variant = add_variant;
2860 
2861   return MULT_COST_LESS (&alg->cost, mult_cost);
2862 }
2863 
2864 /* A subroutine of expand_mult, used for constant multiplications.
2865    Multiply OP0 by VAL in mode MODE, storing the result in TARGET if
2866    convenient.  Use the shift/add sequence described by ALG and apply
2867    the final fixup specified by VARIANT.  */
2868 
2869 static rtx
2870 expand_mult_const (enum machine_mode mode, rtx op0, HOST_WIDE_INT val,
2871 		   rtx target, const struct algorithm *alg,
2872 		   enum mult_variant variant)
2873 {
2874   HOST_WIDE_INT val_so_far;
2875   rtx insn, accum, tem;
2876   int opno;
2877   enum machine_mode nmode;
2878 
2879   /* Avoid referencing memory over and over and invalid sharing
2880      on SUBREGs.  */
2881   op0 = force_reg (mode, op0);
2882 
2883   /* ACCUM starts out either as OP0 or as a zero, depending on
2884      the first operation.  */
2885 
2886   if (alg->op[0] == alg_zero)
2887     {
2888       accum = copy_to_mode_reg (mode, CONST0_RTX (mode));
2889       val_so_far = 0;
2890     }
2891   else if (alg->op[0] == alg_m)
2892     {
2893       accum = copy_to_mode_reg (mode, op0);
2894       val_so_far = 1;
2895     }
2896   else
2897     gcc_unreachable ();
2898 
2899   for (opno = 1; opno < alg->ops; opno++)
2900     {
2901       int log = alg->log[opno];
2902       rtx shift_subtarget = optimize ? 0 : accum;
2903       rtx add_target
2904 	= (opno == alg->ops - 1 && target != 0 && variant != add_variant
2905 	   && !optimize)
2906 	  ? target : 0;
2907       rtx accum_target = optimize ? 0 : accum;
2908       rtx accum_inner;
2909 
2910       switch (alg->op[opno])
2911 	{
2912 	case alg_shift:
2913 	  tem = expand_shift (LSHIFT_EXPR, mode, accum, log, NULL_RTX, 0);
2914 	  /* REG_EQUAL note will be attached to the following insn.  */
2915 	  emit_move_insn (accum, tem);
2916 	  val_so_far <<= log;
2917 	  break;
2918 
2919 	case alg_add_t_m2:
2920 	  tem = expand_shift (LSHIFT_EXPR, mode, op0, log, NULL_RTX, 0);
2921 	  accum = force_operand (gen_rtx_PLUS (mode, accum, tem),
2922 				 add_target ? add_target : accum_target);
2923 	  val_so_far += (HOST_WIDE_INT) 1 << log;
2924 	  break;
2925 
2926 	case alg_sub_t_m2:
2927 	  tem = expand_shift (LSHIFT_EXPR, mode, op0, log, NULL_RTX, 0);
2928 	  accum = force_operand (gen_rtx_MINUS (mode, accum, tem),
2929 				 add_target ? add_target : accum_target);
2930 	  val_so_far -= (HOST_WIDE_INT) 1 << log;
2931 	  break;
2932 
2933 	case alg_add_t2_m:
2934 	  accum = expand_shift (LSHIFT_EXPR, mode, accum,
2935 				log, shift_subtarget, 0);
2936 	  accum = force_operand (gen_rtx_PLUS (mode, accum, op0),
2937 				 add_target ? add_target : accum_target);
2938 	  val_so_far = (val_so_far << log) + 1;
2939 	  break;
2940 
2941 	case alg_sub_t2_m:
2942 	  accum = expand_shift (LSHIFT_EXPR, mode, accum,
2943 				log, shift_subtarget, 0);
2944 	  accum = force_operand (gen_rtx_MINUS (mode, accum, op0),
2945 				 add_target ? add_target : accum_target);
2946 	  val_so_far = (val_so_far << log) - 1;
2947 	  break;
2948 
2949 	case alg_add_factor:
2950 	  tem = expand_shift (LSHIFT_EXPR, mode, accum, log, NULL_RTX, 0);
2951 	  accum = force_operand (gen_rtx_PLUS (mode, accum, tem),
2952 				 add_target ? add_target : accum_target);
2953 	  val_so_far += val_so_far << log;
2954 	  break;
2955 
2956 	case alg_sub_factor:
2957 	  tem = expand_shift (LSHIFT_EXPR, mode, accum, log, NULL_RTX, 0);
2958 	  accum = force_operand (gen_rtx_MINUS (mode, tem, accum),
2959 				 (add_target
2960 				  ? add_target : (optimize ? 0 : tem)));
2961 	  val_so_far = (val_so_far << log) - val_so_far;
2962 	  break;
2963 
2964 	default:
2965 	  gcc_unreachable ();
2966 	}
2967 
2968       if (SCALAR_INT_MODE_P (mode))
2969 	{
2970 	  /* Write a REG_EQUAL note on the last insn so that we can cse
2971 	     multiplication sequences.  Note that if ACCUM is a SUBREG,
2972 	     we've set the inner register and must properly indicate that.  */
2973           tem = op0, nmode = mode;
2974           accum_inner = accum;
2975           if (GET_CODE (accum) == SUBREG)
2976 	    {
2977 	      accum_inner = SUBREG_REG (accum);
2978 	      nmode = GET_MODE (accum_inner);
2979 	      tem = gen_lowpart (nmode, op0);
2980 	    }
2981 
2982           insn = get_last_insn ();
2983           set_dst_reg_note (insn, REG_EQUAL,
2984 			    gen_rtx_MULT (nmode, tem, GEN_INT (val_so_far)),
2985 			    accum_inner);
2986 	}
2987     }
2988 
2989   if (variant == negate_variant)
2990     {
2991       val_so_far = -val_so_far;
2992       accum = expand_unop (mode, neg_optab, accum, target, 0);
2993     }
2994   else if (variant == add_variant)
2995     {
2996       val_so_far = val_so_far + 1;
2997       accum = force_operand (gen_rtx_PLUS (mode, accum, op0), target);
2998     }
2999 
3000   /* Compare only the bits of val and val_so_far that are significant
3001      in the result mode, to avoid sign-/zero-extension confusion.  */
3002   nmode = GET_MODE_INNER (mode);
3003   if (nmode == VOIDmode)
3004     nmode = mode;
3005   val &= GET_MODE_MASK (nmode);
3006   val_so_far &= GET_MODE_MASK (nmode);
3007   gcc_assert (val == val_so_far);
3008 
3009   return accum;
3010 }
3011 
3012 /* Perform a multiplication and return an rtx for the result.
3013    MODE is mode of value; OP0 and OP1 are what to multiply (rtx's);
3014    TARGET is a suggestion for where to store the result (an rtx).
3015 
3016    We check specially for a constant integer as OP1.
3017    If you want this check for OP0 as well, then before calling
3018    you should swap the two operands if OP0 would be constant.  */
3019 
3020 rtx
3021 expand_mult (enum machine_mode mode, rtx op0, rtx op1, rtx target,
3022 	     int unsignedp)
3023 {
3024   enum mult_variant variant;
3025   struct algorithm algorithm;
3026   rtx scalar_op1;
3027   int max_cost;
3028   bool speed = optimize_insn_for_speed_p ();
3029   bool do_trapv = flag_trapv && SCALAR_INT_MODE_P (mode) && !unsignedp;
3030 
3031   if (CONSTANT_P (op0))
3032     {
3033       rtx temp = op0;
3034       op0 = op1;
3035       op1 = temp;
3036     }
3037 
3038   /* For vectors, there are several simplifications that can be made if
3039      all elements of the vector constant are identical.  */
3040   scalar_op1 = op1;
3041   if (GET_CODE (op1) == CONST_VECTOR)
3042     {
3043       int i, n = CONST_VECTOR_NUNITS (op1);
3044       scalar_op1 = CONST_VECTOR_ELT (op1, 0);
3045       for (i = 1; i < n; ++i)
3046 	if (!rtx_equal_p (scalar_op1, CONST_VECTOR_ELT (op1, i)))
3047 	  goto skip_scalar;
3048     }
3049 
3050   if (INTEGRAL_MODE_P (mode))
3051     {
3052       rtx fake_reg;
3053       HOST_WIDE_INT coeff;
3054       bool is_neg;
3055       int mode_bitsize;
3056 
3057       if (op1 == CONST0_RTX (mode))
3058 	return op1;
3059       if (op1 == CONST1_RTX (mode))
3060 	return op0;
3061       if (op1 == CONSTM1_RTX (mode))
3062 	return expand_unop (mode, do_trapv ? negv_optab : neg_optab,
3063 			    op0, target, 0);
3064 
3065       if (do_trapv)
3066 	goto skip_synth;
3067 
3068       /* These are the operations that are potentially turned into
3069 	 a sequence of shifts and additions.  */
3070       mode_bitsize = GET_MODE_UNIT_BITSIZE (mode);
3071 
3072       /* synth_mult does an `unsigned int' multiply.  As long as the mode is
3073 	 less than or equal in size to `unsigned int' this doesn't matter.
3074 	 If the mode is larger than `unsigned int', then synth_mult works
3075 	 only if the constant value exactly fits in an `unsigned int' without
3076 	 any truncation.  This means that multiplying by negative values does
3077 	 not work; results are off by 2^32 on a 32 bit machine.  */
3078 
3079       if (CONST_INT_P (scalar_op1))
3080 	{
3081 	  coeff = INTVAL (scalar_op1);
3082 	  is_neg = coeff < 0;
3083 	}
3084       else if (CONST_DOUBLE_AS_INT_P (scalar_op1))
3085 	{
3086 	  /* If we are multiplying in DImode, it may still be a win
3087 	     to try to work with shifts and adds.  */
3088 	  if (CONST_DOUBLE_HIGH (scalar_op1) == 0
3089 	      && (CONST_DOUBLE_LOW (scalar_op1) > 0
3090 		  || (CONST_DOUBLE_LOW (scalar_op1) < 0
3091 		      && EXACT_POWER_OF_2_OR_ZERO_P
3092 			   (CONST_DOUBLE_LOW (scalar_op1)))))
3093 	    {
3094 	      coeff = CONST_DOUBLE_LOW (scalar_op1);
3095 	      is_neg = false;
3096 	    }
3097 	  else if (CONST_DOUBLE_LOW (scalar_op1) == 0)
3098 	    {
3099 	      coeff = CONST_DOUBLE_HIGH (scalar_op1);
3100 	      if (EXACT_POWER_OF_2_OR_ZERO_P (coeff))
3101 		{
3102 		  int shift = floor_log2 (coeff) + HOST_BITS_PER_WIDE_INT;
3103 		  if (shift < HOST_BITS_PER_DOUBLE_INT - 1
3104 		      || mode_bitsize <= HOST_BITS_PER_DOUBLE_INT)
3105 		    return expand_shift (LSHIFT_EXPR, mode, op0,
3106 					 shift, target, unsignedp);
3107 		}
3108 	      goto skip_synth;
3109 	    }
3110 	  else
3111 	    goto skip_synth;
3112 	}
3113       else
3114 	goto skip_synth;
3115 
3116       /* We used to test optimize here, on the grounds that it's better to
3117 	 produce a smaller program when -O is not used.  But this causes
3118 	 such a terrible slowdown sometimes that it seems better to always
3119 	 use synth_mult.  */
3120 
3121       /* Special case powers of two.  */
3122       if (EXACT_POWER_OF_2_OR_ZERO_P (coeff)
3123 	  && !(is_neg && mode_bitsize > HOST_BITS_PER_WIDE_INT))
3124 	return expand_shift (LSHIFT_EXPR, mode, op0,
3125 			     floor_log2 (coeff), target, unsignedp);
3126 
3127       fake_reg = gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1);
3128 
3129       /* Attempt to handle multiplication of DImode values by negative
3130 	 coefficients, by performing the multiplication by a positive
3131 	 multiplier and then inverting the result.  */
3132       if (is_neg && mode_bitsize > HOST_BITS_PER_WIDE_INT)
3133 	{
3134 	  /* Its safe to use -coeff even for INT_MIN, as the
3135 	     result is interpreted as an unsigned coefficient.
3136 	     Exclude cost of op0 from max_cost to match the cost
3137 	     calculation of the synth_mult.  */
3138 	  coeff = -(unsigned HOST_WIDE_INT) coeff;
3139 	  max_cost = (set_src_cost (gen_rtx_MULT (mode, fake_reg, op1), speed)
3140 		      - neg_cost(speed, mode));
3141 	  if (max_cost <= 0)
3142 	    goto skip_synth;
3143 
3144 	  /* Special case powers of two.  */
3145 	  if (EXACT_POWER_OF_2_OR_ZERO_P (coeff))
3146 	    {
3147 	      rtx temp = expand_shift (LSHIFT_EXPR, mode, op0,
3148 				       floor_log2 (coeff), target, unsignedp);
3149 	      return expand_unop (mode, neg_optab, temp, target, 0);
3150 	    }
3151 
3152 	  if (choose_mult_variant (mode, coeff, &algorithm, &variant,
3153 				   max_cost))
3154 	    {
3155 	      rtx temp = expand_mult_const (mode, op0, coeff, NULL_RTX,
3156 					    &algorithm, variant);
3157 	      return expand_unop (mode, neg_optab, temp, target, 0);
3158 	    }
3159 	  goto skip_synth;
3160 	}
3161 
3162       /* Exclude cost of op0 from max_cost to match the cost
3163 	 calculation of the synth_mult.  */
3164       max_cost = set_src_cost (gen_rtx_MULT (mode, fake_reg, op1), speed);
3165       if (choose_mult_variant (mode, coeff, &algorithm, &variant, max_cost))
3166 	return expand_mult_const (mode, op0, coeff, target,
3167 				  &algorithm, variant);
3168     }
3169  skip_synth:
3170 
3171   /* Expand x*2.0 as x+x.  */
3172   if (CONST_DOUBLE_AS_FLOAT_P (scalar_op1))
3173     {
3174       REAL_VALUE_TYPE d;
3175       REAL_VALUE_FROM_CONST_DOUBLE (d, scalar_op1);
3176 
3177       if (REAL_VALUES_EQUAL (d, dconst2))
3178 	{
3179 	  op0 = force_reg (GET_MODE (op0), op0);
3180 	  return expand_binop (mode, add_optab, op0, op0,
3181 			       target, unsignedp, OPTAB_LIB_WIDEN);
3182 	}
3183     }
3184  skip_scalar:
3185 
3186   /* This used to use umul_optab if unsigned, but for non-widening multiply
3187      there is no difference between signed and unsigned.  */
3188   op0 = expand_binop (mode, do_trapv ? smulv_optab : smul_optab,
3189 		      op0, op1, target, unsignedp, OPTAB_LIB_WIDEN);
3190   gcc_assert (op0);
3191   return op0;
3192 }
3193 
3194 /* Return a cost estimate for multiplying a register by the given
3195    COEFFicient in the given MODE and SPEED.  */
3196 
3197 int
3198 mult_by_coeff_cost (HOST_WIDE_INT coeff, enum machine_mode mode, bool speed)
3199 {
3200   int max_cost;
3201   struct algorithm algorithm;
3202   enum mult_variant variant;
3203 
3204   rtx fake_reg = gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1);
3205   max_cost = set_src_cost (gen_rtx_MULT (mode, fake_reg, fake_reg), speed);
3206   if (choose_mult_variant (mode, coeff, &algorithm, &variant, max_cost))
3207     return algorithm.cost.cost;
3208   else
3209     return max_cost;
3210 }
3211 
3212 /* Perform a widening multiplication and return an rtx for the result.
3213    MODE is mode of value; OP0 and OP1 are what to multiply (rtx's);
3214    TARGET is a suggestion for where to store the result (an rtx).
3215    THIS_OPTAB is the optab we should use, it must be either umul_widen_optab
3216    or smul_widen_optab.
3217 
3218    We check specially for a constant integer as OP1, comparing the
3219    cost of a widening multiply against the cost of a sequence of shifts
3220    and adds.  */
3221 
3222 rtx
3223 expand_widening_mult (enum machine_mode mode, rtx op0, rtx op1, rtx target,
3224 		      int unsignedp, optab this_optab)
3225 {
3226   bool speed = optimize_insn_for_speed_p ();
3227   rtx cop1;
3228 
3229   if (CONST_INT_P (op1)
3230       && GET_MODE (op0) != VOIDmode
3231       && (cop1 = convert_modes (mode, GET_MODE (op0), op1,
3232 				this_optab == umul_widen_optab))
3233       && CONST_INT_P (cop1)
3234       && (INTVAL (cop1) >= 0
3235 	  || HWI_COMPUTABLE_MODE_P (mode)))
3236     {
3237       HOST_WIDE_INT coeff = INTVAL (cop1);
3238       int max_cost;
3239       enum mult_variant variant;
3240       struct algorithm algorithm;
3241 
3242       /* Special case powers of two.  */
3243       if (EXACT_POWER_OF_2_OR_ZERO_P (coeff))
3244 	{
3245 	  op0 = convert_to_mode (mode, op0, this_optab == umul_widen_optab);
3246 	  return expand_shift (LSHIFT_EXPR, mode, op0,
3247 			       floor_log2 (coeff), target, unsignedp);
3248 	}
3249 
3250       /* Exclude cost of op0 from max_cost to match the cost
3251 	 calculation of the synth_mult.  */
3252       max_cost = mul_widen_cost (speed, mode);
3253       if (choose_mult_variant (mode, coeff, &algorithm, &variant,
3254 			       max_cost))
3255 	{
3256 	  op0 = convert_to_mode (mode, op0, this_optab == umul_widen_optab);
3257 	  return expand_mult_const (mode, op0, coeff, target,
3258 				    &algorithm, variant);
3259 	}
3260     }
3261   return expand_binop (mode, this_optab, op0, op1, target,
3262 		       unsignedp, OPTAB_LIB_WIDEN);
3263 }
3264 
3265 /* Choose a minimal N + 1 bit approximation to 1/D that can be used to
3266    replace division by D, and put the least significant N bits of the result
3267    in *MULTIPLIER_PTR and return the most significant bit.
3268 
3269    The width of operations is N (should be <= HOST_BITS_PER_WIDE_INT), the
3270    needed precision is in PRECISION (should be <= N).
3271 
3272    PRECISION should be as small as possible so this function can choose
3273    multiplier more freely.
3274 
3275    The rounded-up logarithm of D is placed in *lgup_ptr.  A shift count that
3276    is to be used for a final right shift is placed in *POST_SHIFT_PTR.
3277 
3278    Using this function, x/D will be equal to (x * m) >> (*POST_SHIFT_PTR),
3279    where m is the full HOST_BITS_PER_WIDE_INT + 1 bit multiplier.  */
3280 
3281 unsigned HOST_WIDE_INT
3282 choose_multiplier (unsigned HOST_WIDE_INT d, int n, int precision,
3283 		   unsigned HOST_WIDE_INT *multiplier_ptr,
3284 		   int *post_shift_ptr, int *lgup_ptr)
3285 {
3286   double_int mhigh, mlow;
3287   int lgup, post_shift;
3288   int pow, pow2;
3289 
3290   /* lgup = ceil(log2(divisor)); */
3291   lgup = ceil_log2 (d);
3292 
3293   gcc_assert (lgup <= n);
3294 
3295   pow = n + lgup;
3296   pow2 = n + lgup - precision;
3297 
3298   /* We could handle this with some effort, but this case is much
3299      better handled directly with a scc insn, so rely on caller using
3300      that.  */
3301   gcc_assert (pow != HOST_BITS_PER_DOUBLE_INT);
3302 
3303   /* mlow = 2^(N + lgup)/d */
3304   double_int val = double_int_zero.set_bit (pow);
3305   mlow = val.div (double_int::from_uhwi (d), true, TRUNC_DIV_EXPR);
3306 
3307   /* mhigh = (2^(N + lgup) + 2^(N + lgup - precision))/d */
3308   val |= double_int_zero.set_bit (pow2);
3309   mhigh = val.div (double_int::from_uhwi (d), true, TRUNC_DIV_EXPR);
3310 
3311   gcc_assert (!mhigh.high || val.high - d < d);
3312   gcc_assert (mhigh.high <= 1 && mlow.high <= 1);
3313   /* Assert that mlow < mhigh.  */
3314   gcc_assert (mlow.ult (mhigh));
3315 
3316   /* If precision == N, then mlow, mhigh exceed 2^N
3317      (but they do not exceed 2^(N+1)).  */
3318 
3319   /* Reduce to lowest terms.  */
3320   for (post_shift = lgup; post_shift > 0; post_shift--)
3321     {
3322       int shft = HOST_BITS_PER_WIDE_INT - 1;
3323       unsigned HOST_WIDE_INT ml_lo = (mlow.high << shft) | (mlow.low >> 1);
3324       unsigned HOST_WIDE_INT mh_lo = (mhigh.high << shft) | (mhigh.low >> 1);
3325       if (ml_lo >= mh_lo)
3326 	break;
3327 
3328       mlow = double_int::from_uhwi (ml_lo);
3329       mhigh = double_int::from_uhwi (mh_lo);
3330     }
3331 
3332   *post_shift_ptr = post_shift;
3333   *lgup_ptr = lgup;
3334   if (n < HOST_BITS_PER_WIDE_INT)
3335     {
3336       unsigned HOST_WIDE_INT mask = ((unsigned HOST_WIDE_INT) 1 << n) - 1;
3337       *multiplier_ptr = mhigh.low & mask;
3338       return mhigh.low >= mask;
3339     }
3340   else
3341     {
3342       *multiplier_ptr = mhigh.low;
3343       return mhigh.high;
3344     }
3345 }
3346 
3347 /* Compute the inverse of X mod 2**n, i.e., find Y such that X * Y is
3348    congruent to 1 (mod 2**N).  */
3349 
3350 static unsigned HOST_WIDE_INT
3351 invert_mod2n (unsigned HOST_WIDE_INT x, int n)
3352 {
3353   /* Solve x*y == 1 (mod 2^n), where x is odd.  Return y.  */
3354 
3355   /* The algorithm notes that the choice y = x satisfies
3356      x*y == 1 mod 2^3, since x is assumed odd.
3357      Each iteration doubles the number of bits of significance in y.  */
3358 
3359   unsigned HOST_WIDE_INT mask;
3360   unsigned HOST_WIDE_INT y = x;
3361   int nbit = 3;
3362 
3363   mask = (n == HOST_BITS_PER_WIDE_INT
3364 	  ? ~(unsigned HOST_WIDE_INT) 0
3365 	  : ((unsigned HOST_WIDE_INT) 1 << n) - 1);
3366 
3367   while (nbit < n)
3368     {
3369       y = y * (2 - x*y) & mask;		/* Modulo 2^N */
3370       nbit *= 2;
3371     }
3372   return y;
3373 }
3374 
3375 /* Emit code to adjust ADJ_OPERAND after multiplication of wrong signedness
3376    flavor of OP0 and OP1.  ADJ_OPERAND is already the high half of the
3377    product OP0 x OP1.  If UNSIGNEDP is nonzero, adjust the signed product
3378    to become unsigned, if UNSIGNEDP is zero, adjust the unsigned product to
3379    become signed.
3380 
3381    The result is put in TARGET if that is convenient.
3382 
3383    MODE is the mode of operation.  */
3384 
3385 rtx
3386 expand_mult_highpart_adjust (enum machine_mode mode, rtx adj_operand, rtx op0,
3387 			     rtx op1, rtx target, int unsignedp)
3388 {
3389   rtx tem;
3390   enum rtx_code adj_code = unsignedp ? PLUS : MINUS;
3391 
3392   tem = expand_shift (RSHIFT_EXPR, mode, op0,
3393 		      GET_MODE_BITSIZE (mode) - 1, NULL_RTX, 0);
3394   tem = expand_and (mode, tem, op1, NULL_RTX);
3395   adj_operand
3396     = force_operand (gen_rtx_fmt_ee (adj_code, mode, adj_operand, tem),
3397 		     adj_operand);
3398 
3399   tem = expand_shift (RSHIFT_EXPR, mode, op1,
3400 		      GET_MODE_BITSIZE (mode) - 1, NULL_RTX, 0);
3401   tem = expand_and (mode, tem, op0, NULL_RTX);
3402   target = force_operand (gen_rtx_fmt_ee (adj_code, mode, adj_operand, tem),
3403 			  target);
3404 
3405   return target;
3406 }
3407 
3408 /* Subroutine of expmed_mult_highpart.  Return the MODE high part of OP.  */
3409 
3410 static rtx
3411 extract_high_half (enum machine_mode mode, rtx op)
3412 {
3413   enum machine_mode wider_mode;
3414 
3415   if (mode == word_mode)
3416     return gen_highpart (mode, op);
3417 
3418   gcc_assert (!SCALAR_FLOAT_MODE_P (mode));
3419 
3420   wider_mode = GET_MODE_WIDER_MODE (mode);
3421   op = expand_shift (RSHIFT_EXPR, wider_mode, op,
3422 		     GET_MODE_BITSIZE (mode), 0, 1);
3423   return convert_modes (mode, wider_mode, op, 0);
3424 }
3425 
3426 /* Like expmed_mult_highpart, but only consider using a multiplication
3427    optab.  OP1 is an rtx for the constant operand.  */
3428 
3429 static rtx
3430 expmed_mult_highpart_optab (enum machine_mode mode, rtx op0, rtx op1,
3431 			    rtx target, int unsignedp, int max_cost)
3432 {
3433   rtx narrow_op1 = gen_int_mode (INTVAL (op1), mode);
3434   enum machine_mode wider_mode;
3435   optab moptab;
3436   rtx tem;
3437   int size;
3438   bool speed = optimize_insn_for_speed_p ();
3439 
3440   gcc_assert (!SCALAR_FLOAT_MODE_P (mode));
3441 
3442   wider_mode = GET_MODE_WIDER_MODE (mode);
3443   size = GET_MODE_BITSIZE (mode);
3444 
3445   /* Firstly, try using a multiplication insn that only generates the needed
3446      high part of the product, and in the sign flavor of unsignedp.  */
3447   if (mul_highpart_cost (speed, mode) < max_cost)
3448     {
3449       moptab = unsignedp ? umul_highpart_optab : smul_highpart_optab;
3450       tem = expand_binop (mode, moptab, op0, narrow_op1, target,
3451 			  unsignedp, OPTAB_DIRECT);
3452       if (tem)
3453 	return tem;
3454     }
3455 
3456   /* Secondly, same as above, but use sign flavor opposite of unsignedp.
3457      Need to adjust the result after the multiplication.  */
3458   if (size - 1 < BITS_PER_WORD
3459       && (mul_highpart_cost (speed, mode)
3460 	  + 2 * shift_cost (speed, mode, size-1)
3461 	  + 4 * add_cost (speed, mode) < max_cost))
3462     {
3463       moptab = unsignedp ? smul_highpart_optab : umul_highpart_optab;
3464       tem = expand_binop (mode, moptab, op0, narrow_op1, target,
3465 			  unsignedp, OPTAB_DIRECT);
3466       if (tem)
3467 	/* We used the wrong signedness.  Adjust the result.  */
3468 	return expand_mult_highpart_adjust (mode, tem, op0, narrow_op1,
3469 					    tem, unsignedp);
3470     }
3471 
3472   /* Try widening multiplication.  */
3473   moptab = unsignedp ? umul_widen_optab : smul_widen_optab;
3474   if (widening_optab_handler (moptab, wider_mode, mode) != CODE_FOR_nothing
3475       && mul_widen_cost (speed, wider_mode) < max_cost)
3476     {
3477       tem = expand_binop (wider_mode, moptab, op0, narrow_op1, 0,
3478 			  unsignedp, OPTAB_WIDEN);
3479       if (tem)
3480 	return extract_high_half (mode, tem);
3481     }
3482 
3483   /* Try widening the mode and perform a non-widening multiplication.  */
3484   if (optab_handler (smul_optab, wider_mode) != CODE_FOR_nothing
3485       && size - 1 < BITS_PER_WORD
3486       && (mul_cost (speed, wider_mode) + shift_cost (speed, mode, size-1)
3487 	  < max_cost))
3488     {
3489       rtx insns, wop0, wop1;
3490 
3491       /* We need to widen the operands, for example to ensure the
3492 	 constant multiplier is correctly sign or zero extended.
3493 	 Use a sequence to clean-up any instructions emitted by
3494 	 the conversions if things don't work out.  */
3495       start_sequence ();
3496       wop0 = convert_modes (wider_mode, mode, op0, unsignedp);
3497       wop1 = convert_modes (wider_mode, mode, op1, unsignedp);
3498       tem = expand_binop (wider_mode, smul_optab, wop0, wop1, 0,
3499 			  unsignedp, OPTAB_WIDEN);
3500       insns = get_insns ();
3501       end_sequence ();
3502 
3503       if (tem)
3504 	{
3505 	  emit_insn (insns);
3506 	  return extract_high_half (mode, tem);
3507 	}
3508     }
3509 
3510   /* Try widening multiplication of opposite signedness, and adjust.  */
3511   moptab = unsignedp ? smul_widen_optab : umul_widen_optab;
3512   if (widening_optab_handler (moptab, wider_mode, mode) != CODE_FOR_nothing
3513       && size - 1 < BITS_PER_WORD
3514       && (mul_widen_cost (speed, wider_mode)
3515 	  + 2 * shift_cost (speed, mode, size-1)
3516 	  + 4 * add_cost (speed, mode) < max_cost))
3517     {
3518       tem = expand_binop (wider_mode, moptab, op0, narrow_op1,
3519 			  NULL_RTX, ! unsignedp, OPTAB_WIDEN);
3520       if (tem != 0)
3521 	{
3522 	  tem = extract_high_half (mode, tem);
3523 	  /* We used the wrong signedness.  Adjust the result.  */
3524 	  return expand_mult_highpart_adjust (mode, tem, op0, narrow_op1,
3525 					      target, unsignedp);
3526 	}
3527     }
3528 
3529   return 0;
3530 }
3531 
3532 /* Emit code to multiply OP0 and OP1 (where OP1 is an integer constant),
3533    putting the high half of the result in TARGET if that is convenient,
3534    and return where the result is.  If the operation can not be performed,
3535    0 is returned.
3536 
3537    MODE is the mode of operation and result.
3538 
3539    UNSIGNEDP nonzero means unsigned multiply.
3540 
3541    MAX_COST is the total allowed cost for the expanded RTL.  */
3542 
3543 static rtx
3544 expmed_mult_highpart (enum machine_mode mode, rtx op0, rtx op1,
3545 		      rtx target, int unsignedp, int max_cost)
3546 {
3547   enum machine_mode wider_mode = GET_MODE_WIDER_MODE (mode);
3548   unsigned HOST_WIDE_INT cnst1;
3549   int extra_cost;
3550   bool sign_adjust = false;
3551   enum mult_variant variant;
3552   struct algorithm alg;
3553   rtx tem;
3554   bool speed = optimize_insn_for_speed_p ();
3555 
3556   gcc_assert (!SCALAR_FLOAT_MODE_P (mode));
3557   /* We can't support modes wider than HOST_BITS_PER_INT.  */
3558   gcc_assert (HWI_COMPUTABLE_MODE_P (mode));
3559 
3560   cnst1 = INTVAL (op1) & GET_MODE_MASK (mode);
3561 
3562   /* We can't optimize modes wider than BITS_PER_WORD.
3563      ??? We might be able to perform double-word arithmetic if
3564      mode == word_mode, however all the cost calculations in
3565      synth_mult etc. assume single-word operations.  */
3566   if (GET_MODE_BITSIZE (wider_mode) > BITS_PER_WORD)
3567     return expmed_mult_highpart_optab (mode, op0, op1, target,
3568 				       unsignedp, max_cost);
3569 
3570   extra_cost = shift_cost (speed, mode, GET_MODE_BITSIZE (mode) - 1);
3571 
3572   /* Check whether we try to multiply by a negative constant.  */
3573   if (!unsignedp && ((cnst1 >> (GET_MODE_BITSIZE (mode) - 1)) & 1))
3574     {
3575       sign_adjust = true;
3576       extra_cost += add_cost (speed, mode);
3577     }
3578 
3579   /* See whether shift/add multiplication is cheap enough.  */
3580   if (choose_mult_variant (wider_mode, cnst1, &alg, &variant,
3581 			   max_cost - extra_cost))
3582     {
3583       /* See whether the specialized multiplication optabs are
3584 	 cheaper than the shift/add version.  */
3585       tem = expmed_mult_highpart_optab (mode, op0, op1, target, unsignedp,
3586 					alg.cost.cost + extra_cost);
3587       if (tem)
3588 	return tem;
3589 
3590       tem = convert_to_mode (wider_mode, op0, unsignedp);
3591       tem = expand_mult_const (wider_mode, tem, cnst1, 0, &alg, variant);
3592       tem = extract_high_half (mode, tem);
3593 
3594       /* Adjust result for signedness.  */
3595       if (sign_adjust)
3596 	tem = force_operand (gen_rtx_MINUS (mode, tem, op0), tem);
3597 
3598       return tem;
3599     }
3600   return expmed_mult_highpart_optab (mode, op0, op1, target,
3601 				     unsignedp, max_cost);
3602 }
3603 
3604 
3605 /* Expand signed modulus of OP0 by a power of two D in mode MODE.  */
3606 
3607 static rtx
3608 expand_smod_pow2 (enum machine_mode mode, rtx op0, HOST_WIDE_INT d)
3609 {
3610   unsigned HOST_WIDE_INT masklow, maskhigh;
3611   rtx result, temp, shift, label;
3612   int logd;
3613 
3614   logd = floor_log2 (d);
3615   result = gen_reg_rtx (mode);
3616 
3617   /* Avoid conditional branches when they're expensive.  */
3618   if (BRANCH_COST (optimize_insn_for_speed_p (), false) >= 2
3619       && optimize_insn_for_speed_p ())
3620     {
3621       rtx signmask = emit_store_flag (result, LT, op0, const0_rtx,
3622 				      mode, 0, -1);
3623       if (signmask)
3624 	{
3625 	  signmask = force_reg (mode, signmask);
3626 	  masklow = ((HOST_WIDE_INT) 1 << logd) - 1;
3627 	  shift = GEN_INT (GET_MODE_BITSIZE (mode) - logd);
3628 
3629 	  /* Use the rtx_cost of a LSHIFTRT instruction to determine
3630 	     which instruction sequence to use.  If logical right shifts
3631 	     are expensive the use 2 XORs, 2 SUBs and an AND, otherwise
3632 	     use a LSHIFTRT, 1 ADD, 1 SUB and an AND.  */
3633 
3634 	  temp = gen_rtx_LSHIFTRT (mode, result, shift);
3635 	  if (optab_handler (lshr_optab, mode) == CODE_FOR_nothing
3636 	      || (set_src_cost (temp, optimize_insn_for_speed_p ())
3637 		  > COSTS_N_INSNS (2)))
3638 	    {
3639 	      temp = expand_binop (mode, xor_optab, op0, signmask,
3640 				   NULL_RTX, 1, OPTAB_LIB_WIDEN);
3641 	      temp = expand_binop (mode, sub_optab, temp, signmask,
3642 				   NULL_RTX, 1, OPTAB_LIB_WIDEN);
3643 	      temp = expand_binop (mode, and_optab, temp, GEN_INT (masklow),
3644 				   NULL_RTX, 1, OPTAB_LIB_WIDEN);
3645 	      temp = expand_binop (mode, xor_optab, temp, signmask,
3646 				   NULL_RTX, 1, OPTAB_LIB_WIDEN);
3647 	      temp = expand_binop (mode, sub_optab, temp, signmask,
3648 				   NULL_RTX, 1, OPTAB_LIB_WIDEN);
3649 	    }
3650 	  else
3651 	    {
3652 	      signmask = expand_binop (mode, lshr_optab, signmask, shift,
3653 				       NULL_RTX, 1, OPTAB_LIB_WIDEN);
3654 	      signmask = force_reg (mode, signmask);
3655 
3656 	      temp = expand_binop (mode, add_optab, op0, signmask,
3657 				   NULL_RTX, 1, OPTAB_LIB_WIDEN);
3658 	      temp = expand_binop (mode, and_optab, temp, GEN_INT (masklow),
3659 				   NULL_RTX, 1, OPTAB_LIB_WIDEN);
3660 	      temp = expand_binop (mode, sub_optab, temp, signmask,
3661 				   NULL_RTX, 1, OPTAB_LIB_WIDEN);
3662 	    }
3663 	  return temp;
3664 	}
3665     }
3666 
3667   /* Mask contains the mode's signbit and the significant bits of the
3668      modulus.  By including the signbit in the operation, many targets
3669      can avoid an explicit compare operation in the following comparison
3670      against zero.  */
3671 
3672   masklow = ((HOST_WIDE_INT) 1 << logd) - 1;
3673   if (GET_MODE_BITSIZE (mode) <= HOST_BITS_PER_WIDE_INT)
3674     {
3675       masklow |= (HOST_WIDE_INT) -1 << (GET_MODE_BITSIZE (mode) - 1);
3676       maskhigh = -1;
3677     }
3678   else
3679     maskhigh = (HOST_WIDE_INT) -1
3680 		 << (GET_MODE_BITSIZE (mode) - HOST_BITS_PER_WIDE_INT - 1);
3681 
3682   temp = expand_binop (mode, and_optab, op0,
3683 		       immed_double_const (masklow, maskhigh, mode),
3684 		       result, 1, OPTAB_LIB_WIDEN);
3685   if (temp != result)
3686     emit_move_insn (result, temp);
3687 
3688   label = gen_label_rtx ();
3689   do_cmp_and_jump (result, const0_rtx, GE, mode, label);
3690 
3691   temp = expand_binop (mode, sub_optab, result, const1_rtx, result,
3692 		       0, OPTAB_LIB_WIDEN);
3693   masklow = (HOST_WIDE_INT) -1 << logd;
3694   maskhigh = -1;
3695   temp = expand_binop (mode, ior_optab, temp,
3696 		       immed_double_const (masklow, maskhigh, mode),
3697 		       result, 1, OPTAB_LIB_WIDEN);
3698   temp = expand_binop (mode, add_optab, temp, const1_rtx, result,
3699 		       0, OPTAB_LIB_WIDEN);
3700   if (temp != result)
3701     emit_move_insn (result, temp);
3702   emit_label (label);
3703   return result;
3704 }
3705 
3706 /* Expand signed division of OP0 by a power of two D in mode MODE.
3707    This routine is only called for positive values of D.  */
3708 
3709 static rtx
3710 expand_sdiv_pow2 (enum machine_mode mode, rtx op0, HOST_WIDE_INT d)
3711 {
3712   rtx temp, label;
3713   int logd;
3714 
3715   logd = floor_log2 (d);
3716 
3717   if (d == 2
3718       && BRANCH_COST (optimize_insn_for_speed_p (),
3719 		      false) >= 1)
3720     {
3721       temp = gen_reg_rtx (mode);
3722       temp = emit_store_flag (temp, LT, op0, const0_rtx, mode, 0, 1);
3723       temp = expand_binop (mode, add_optab, temp, op0, NULL_RTX,
3724 			   0, OPTAB_LIB_WIDEN);
3725       return expand_shift (RSHIFT_EXPR, mode, temp, logd, NULL_RTX, 0);
3726     }
3727 
3728 #ifdef HAVE_conditional_move
3729   if (BRANCH_COST (optimize_insn_for_speed_p (), false)
3730       >= 2)
3731     {
3732       rtx temp2;
3733 
3734       /* ??? emit_conditional_move forces a stack adjustment via
3735 	 compare_from_rtx so, if the sequence is discarded, it will
3736 	 be lost.  Do it now instead.  */
3737       do_pending_stack_adjust ();
3738 
3739       start_sequence ();
3740       temp2 = copy_to_mode_reg (mode, op0);
3741       temp = expand_binop (mode, add_optab, temp2, GEN_INT (d-1),
3742 			   NULL_RTX, 0, OPTAB_LIB_WIDEN);
3743       temp = force_reg (mode, temp);
3744 
3745       /* Construct "temp2 = (temp2 < 0) ? temp : temp2".  */
3746       temp2 = emit_conditional_move (temp2, LT, temp2, const0_rtx,
3747 				     mode, temp, temp2, mode, 0);
3748       if (temp2)
3749 	{
3750 	  rtx seq = get_insns ();
3751 	  end_sequence ();
3752 	  emit_insn (seq);
3753 	  return expand_shift (RSHIFT_EXPR, mode, temp2, logd, NULL_RTX, 0);
3754 	}
3755       end_sequence ();
3756     }
3757 #endif
3758 
3759   if (BRANCH_COST (optimize_insn_for_speed_p (),
3760 		   false) >= 2)
3761     {
3762       int ushift = GET_MODE_BITSIZE (mode) - logd;
3763 
3764       temp = gen_reg_rtx (mode);
3765       temp = emit_store_flag (temp, LT, op0, const0_rtx, mode, 0, -1);
3766       if (shift_cost (optimize_insn_for_speed_p (), mode, ushift)
3767 	  > COSTS_N_INSNS (1))
3768 	temp = expand_binop (mode, and_optab, temp, GEN_INT (d - 1),
3769 			     NULL_RTX, 0, OPTAB_LIB_WIDEN);
3770       else
3771 	temp = expand_shift (RSHIFT_EXPR, mode, temp,
3772 			     ushift, NULL_RTX, 1);
3773       temp = expand_binop (mode, add_optab, temp, op0, NULL_RTX,
3774 			   0, OPTAB_LIB_WIDEN);
3775       return expand_shift (RSHIFT_EXPR, mode, temp, logd, NULL_RTX, 0);
3776     }
3777 
3778   label = gen_label_rtx ();
3779   temp = copy_to_mode_reg (mode, op0);
3780   do_cmp_and_jump (temp, const0_rtx, GE, mode, label);
3781   expand_inc (temp, GEN_INT (d - 1));
3782   emit_label (label);
3783   return expand_shift (RSHIFT_EXPR, mode, temp, logd, NULL_RTX, 0);
3784 }
3785 
3786 /* Emit the code to divide OP0 by OP1, putting the result in TARGET
3787    if that is convenient, and returning where the result is.
3788    You may request either the quotient or the remainder as the result;
3789    specify REM_FLAG nonzero to get the remainder.
3790 
3791    CODE is the expression code for which kind of division this is;
3792    it controls how rounding is done.  MODE is the machine mode to use.
3793    UNSIGNEDP nonzero means do unsigned division.  */
3794 
3795 /* ??? For CEIL_MOD_EXPR, can compute incorrect remainder with ANDI
3796    and then correct it by or'ing in missing high bits
3797    if result of ANDI is nonzero.
3798    For ROUND_MOD_EXPR, can use ANDI and then sign-extend the result.
3799    This could optimize to a bfexts instruction.
3800    But C doesn't use these operations, so their optimizations are
3801    left for later.  */
3802 /* ??? For modulo, we don't actually need the highpart of the first product,
3803    the low part will do nicely.  And for small divisors, the second multiply
3804    can also be a low-part only multiply or even be completely left out.
3805    E.g. to calculate the remainder of a division by 3 with a 32 bit
3806    multiply, multiply with 0x55555556 and extract the upper two bits;
3807    the result is exact for inputs up to 0x1fffffff.
3808    The input range can be reduced by using cross-sum rules.
3809    For odd divisors >= 3, the following table gives right shift counts
3810    so that if a number is shifted by an integer multiple of the given
3811    amount, the remainder stays the same:
3812    2, 4, 3, 6, 10, 12, 4, 8, 18, 6, 11, 20, 18, 0, 5, 10, 12, 0, 12, 20,
3813    14, 12, 23, 21, 8, 0, 20, 18, 0, 0, 6, 12, 0, 22, 0, 18, 20, 30, 0, 0,
3814    0, 8, 0, 11, 12, 10, 36, 0, 30, 0, 0, 12, 0, 0, 0, 0, 44, 12, 24, 0,
3815    20, 0, 7, 14, 0, 18, 36, 0, 0, 46, 60, 0, 42, 0, 15, 24, 20, 0, 0, 33,
3816    0, 20, 0, 0, 18, 0, 60, 0, 0, 0, 0, 0, 40, 18, 0, 0, 12
3817 
3818    Cross-sum rules for even numbers can be derived by leaving as many bits
3819    to the right alone as the divisor has zeros to the right.
3820    E.g. if x is an unsigned 32 bit number:
3821    (x mod 12) == (((x & 1023) + ((x >> 8) & ~3)) * 0x15555558 >> 2 * 3) >> 28
3822    */
3823 
3824 rtx
3825 expand_divmod (int rem_flag, enum tree_code code, enum machine_mode mode,
3826 	       rtx op0, rtx op1, rtx target, int unsignedp)
3827 {
3828   enum machine_mode compute_mode;
3829   rtx tquotient;
3830   rtx quotient = 0, remainder = 0;
3831   rtx last;
3832   int size;
3833   rtx insn;
3834   optab optab1, optab2;
3835   int op1_is_constant, op1_is_pow2 = 0;
3836   int max_cost, extra_cost;
3837   static HOST_WIDE_INT last_div_const = 0;
3838   bool speed = optimize_insn_for_speed_p ();
3839 
3840   op1_is_constant = CONST_INT_P (op1);
3841   if (op1_is_constant)
3842     {
3843       unsigned HOST_WIDE_INT ext_op1 = UINTVAL (op1);
3844       if (unsignedp)
3845 	ext_op1 &= GET_MODE_MASK (mode);
3846       op1_is_pow2 = ((EXACT_POWER_OF_2_OR_ZERO_P (ext_op1)
3847 		     || (! unsignedp && EXACT_POWER_OF_2_OR_ZERO_P (-ext_op1))));
3848     }
3849 
3850   /*
3851      This is the structure of expand_divmod:
3852 
3853      First comes code to fix up the operands so we can perform the operations
3854      correctly and efficiently.
3855 
3856      Second comes a switch statement with code specific for each rounding mode.
3857      For some special operands this code emits all RTL for the desired
3858      operation, for other cases, it generates only a quotient and stores it in
3859      QUOTIENT.  The case for trunc division/remainder might leave quotient = 0,
3860      to indicate that it has not done anything.
3861 
3862      Last comes code that finishes the operation.  If QUOTIENT is set and
3863      REM_FLAG is set, the remainder is computed as OP0 - QUOTIENT * OP1.  If
3864      QUOTIENT is not set, it is computed using trunc rounding.
3865 
3866      We try to generate special code for division and remainder when OP1 is a
3867      constant.  If |OP1| = 2**n we can use shifts and some other fast
3868      operations.  For other values of OP1, we compute a carefully selected
3869      fixed-point approximation m = 1/OP1, and generate code that multiplies OP0
3870      by m.
3871 
3872      In all cases but EXACT_DIV_EXPR, this multiplication requires the upper
3873      half of the product.  Different strategies for generating the product are
3874      implemented in expmed_mult_highpart.
3875 
3876      If what we actually want is the remainder, we generate that by another
3877      by-constant multiplication and a subtraction.  */
3878 
3879   /* We shouldn't be called with OP1 == const1_rtx, but some of the
3880      code below will malfunction if we are, so check here and handle
3881      the special case if so.  */
3882   if (op1 == const1_rtx)
3883     return rem_flag ? const0_rtx : op0;
3884 
3885     /* When dividing by -1, we could get an overflow.
3886      negv_optab can handle overflows.  */
3887   if (! unsignedp && op1 == constm1_rtx)
3888     {
3889       if (rem_flag)
3890 	return const0_rtx;
3891       return expand_unop (mode, flag_trapv && GET_MODE_CLASS(mode) == MODE_INT
3892 			  ? negv_optab : neg_optab, op0, target, 0);
3893     }
3894 
3895   if (target
3896       /* Don't use the function value register as a target
3897 	 since we have to read it as well as write it,
3898 	 and function-inlining gets confused by this.  */
3899       && ((REG_P (target) && REG_FUNCTION_VALUE_P (target))
3900 	  /* Don't clobber an operand while doing a multi-step calculation.  */
3901 	  || ((rem_flag || op1_is_constant)
3902 	      && (reg_mentioned_p (target, op0)
3903 		  || (MEM_P (op0) && MEM_P (target))))
3904 	  || reg_mentioned_p (target, op1)
3905 	  || (MEM_P (op1) && MEM_P (target))))
3906     target = 0;
3907 
3908   /* Get the mode in which to perform this computation.  Normally it will
3909      be MODE, but sometimes we can't do the desired operation in MODE.
3910      If so, pick a wider mode in which we can do the operation.  Convert
3911      to that mode at the start to avoid repeated conversions.
3912 
3913      First see what operations we need.  These depend on the expression
3914      we are evaluating.  (We assume that divxx3 insns exist under the
3915      same conditions that modxx3 insns and that these insns don't normally
3916      fail.  If these assumptions are not correct, we may generate less
3917      efficient code in some cases.)
3918 
3919      Then see if we find a mode in which we can open-code that operation
3920      (either a division, modulus, or shift).  Finally, check for the smallest
3921      mode for which we can do the operation with a library call.  */
3922 
3923   /* We might want to refine this now that we have division-by-constant
3924      optimization.  Since expmed_mult_highpart tries so many variants, it is
3925      not straightforward to generalize this.  Maybe we should make an array
3926      of possible modes in init_expmed?  Save this for GCC 2.7.  */
3927 
3928   optab1 = ((op1_is_pow2 && op1 != const0_rtx)
3929 	    ? (unsignedp ? lshr_optab : ashr_optab)
3930 	    : (unsignedp ? udiv_optab : sdiv_optab));
3931   optab2 = ((op1_is_pow2 && op1 != const0_rtx)
3932 	    ? optab1
3933 	    : (unsignedp ? udivmod_optab : sdivmod_optab));
3934 
3935   for (compute_mode = mode; compute_mode != VOIDmode;
3936        compute_mode = GET_MODE_WIDER_MODE (compute_mode))
3937     if (optab_handler (optab1, compute_mode) != CODE_FOR_nothing
3938 	|| optab_handler (optab2, compute_mode) != CODE_FOR_nothing)
3939       break;
3940 
3941   if (compute_mode == VOIDmode)
3942     for (compute_mode = mode; compute_mode != VOIDmode;
3943 	 compute_mode = GET_MODE_WIDER_MODE (compute_mode))
3944       if (optab_libfunc (optab1, compute_mode)
3945 	  || optab_libfunc (optab2, compute_mode))
3946 	break;
3947 
3948   /* If we still couldn't find a mode, use MODE, but expand_binop will
3949      probably die.  */
3950   if (compute_mode == VOIDmode)
3951     compute_mode = mode;
3952 
3953   if (target && GET_MODE (target) == compute_mode)
3954     tquotient = target;
3955   else
3956     tquotient = gen_reg_rtx (compute_mode);
3957 
3958   size = GET_MODE_BITSIZE (compute_mode);
3959 #if 0
3960   /* It should be possible to restrict the precision to GET_MODE_BITSIZE
3961      (mode), and thereby get better code when OP1 is a constant.  Do that
3962      later.  It will require going over all usages of SIZE below.  */
3963   size = GET_MODE_BITSIZE (mode);
3964 #endif
3965 
3966   /* Only deduct something for a REM if the last divide done was
3967      for a different constant.   Then set the constant of the last
3968      divide.  */
3969   max_cost = (unsignedp
3970 	      ? udiv_cost (speed, compute_mode)
3971 	      : sdiv_cost (speed, compute_mode));
3972   if (rem_flag && ! (last_div_const != 0 && op1_is_constant
3973 		     && INTVAL (op1) == last_div_const))
3974     max_cost -= (mul_cost (speed, compute_mode)
3975 		 + add_cost (speed, compute_mode));
3976 
3977   last_div_const = ! rem_flag && op1_is_constant ? INTVAL (op1) : 0;
3978 
3979   /* Now convert to the best mode to use.  */
3980   if (compute_mode != mode)
3981     {
3982       op0 = convert_modes (compute_mode, mode, op0, unsignedp);
3983       op1 = convert_modes (compute_mode, mode, op1, unsignedp);
3984 
3985       /* convert_modes may have placed op1 into a register, so we
3986 	 must recompute the following.  */
3987       op1_is_constant = CONST_INT_P (op1);
3988       op1_is_pow2 = (op1_is_constant
3989 		     && ((EXACT_POWER_OF_2_OR_ZERO_P (INTVAL (op1))
3990 			  || (! unsignedp
3991 			      && EXACT_POWER_OF_2_OR_ZERO_P (-UINTVAL (op1))))));
3992     }
3993 
3994   /* If one of the operands is a volatile MEM, copy it into a register.  */
3995 
3996   if (MEM_P (op0) && MEM_VOLATILE_P (op0))
3997     op0 = force_reg (compute_mode, op0);
3998   if (MEM_P (op1) && MEM_VOLATILE_P (op1))
3999     op1 = force_reg (compute_mode, op1);
4000 
4001   /* If we need the remainder or if OP1 is constant, we need to
4002      put OP0 in a register in case it has any queued subexpressions.  */
4003   if (rem_flag || op1_is_constant)
4004     op0 = force_reg (compute_mode, op0);
4005 
4006   last = get_last_insn ();
4007 
4008   /* Promote floor rounding to trunc rounding for unsigned operations.  */
4009   if (unsignedp)
4010     {
4011       if (code == FLOOR_DIV_EXPR)
4012 	code = TRUNC_DIV_EXPR;
4013       if (code == FLOOR_MOD_EXPR)
4014 	code = TRUNC_MOD_EXPR;
4015       if (code == EXACT_DIV_EXPR && op1_is_pow2)
4016 	code = TRUNC_DIV_EXPR;
4017     }
4018 
4019   if (op1 != const0_rtx)
4020     switch (code)
4021       {
4022       case TRUNC_MOD_EXPR:
4023       case TRUNC_DIV_EXPR:
4024 	if (op1_is_constant)
4025 	  {
4026 	    if (unsignedp)
4027 	      {
4028 		unsigned HOST_WIDE_INT mh, ml;
4029 		int pre_shift, post_shift;
4030 		int dummy;
4031 		unsigned HOST_WIDE_INT d = (INTVAL (op1)
4032 					    & GET_MODE_MASK (compute_mode));
4033 
4034 		if (EXACT_POWER_OF_2_OR_ZERO_P (d))
4035 		  {
4036 		    pre_shift = floor_log2 (d);
4037 		    if (rem_flag)
4038 		      {
4039 			remainder
4040 			  = expand_binop (compute_mode, and_optab, op0,
4041 					  GEN_INT (((HOST_WIDE_INT) 1 << pre_shift) - 1),
4042 					  remainder, 1,
4043 					  OPTAB_LIB_WIDEN);
4044 			if (remainder)
4045 			  return gen_lowpart (mode, remainder);
4046 		      }
4047 		    quotient = expand_shift (RSHIFT_EXPR, compute_mode, op0,
4048 					     pre_shift, tquotient, 1);
4049 		  }
4050 		else if (size <= HOST_BITS_PER_WIDE_INT)
4051 		  {
4052 		    if (d >= ((unsigned HOST_WIDE_INT) 1 << (size - 1)))
4053 		      {
4054 			/* Most significant bit of divisor is set; emit an scc
4055 			   insn.  */
4056 			quotient = emit_store_flag_force (tquotient, GEU, op0, op1,
4057 							  compute_mode, 1, 1);
4058 		      }
4059 		    else
4060 		      {
4061 			/* Find a suitable multiplier and right shift count
4062 			   instead of multiplying with D.  */
4063 
4064 			mh = choose_multiplier (d, size, size,
4065 						&ml, &post_shift, &dummy);
4066 
4067 			/* If the suggested multiplier is more than SIZE bits,
4068 			   we can do better for even divisors, using an
4069 			   initial right shift.  */
4070 			if (mh != 0 && (d & 1) == 0)
4071 			  {
4072 			    pre_shift = floor_log2 (d & -d);
4073 			    mh = choose_multiplier (d >> pre_shift, size,
4074 						    size - pre_shift,
4075 						    &ml, &post_shift, &dummy);
4076 			    gcc_assert (!mh);
4077 			  }
4078 			else
4079 			  pre_shift = 0;
4080 
4081 			if (mh != 0)
4082 			  {
4083 			    rtx t1, t2, t3, t4;
4084 
4085 			    if (post_shift - 1 >= BITS_PER_WORD)
4086 			      goto fail1;
4087 
4088 			    extra_cost
4089 			      = (shift_cost (speed, compute_mode, post_shift - 1)
4090 				 + shift_cost (speed, compute_mode, 1)
4091 				 + 2 * add_cost (speed, compute_mode));
4092 			    t1 = expmed_mult_highpart (compute_mode, op0,
4093 						       GEN_INT (ml),
4094 						       NULL_RTX, 1,
4095 						       max_cost - extra_cost);
4096 			    if (t1 == 0)
4097 			      goto fail1;
4098 			    t2 = force_operand (gen_rtx_MINUS (compute_mode,
4099 							       op0, t1),
4100 						NULL_RTX);
4101 			    t3 = expand_shift (RSHIFT_EXPR, compute_mode,
4102 					       t2, 1, NULL_RTX, 1);
4103 			    t4 = force_operand (gen_rtx_PLUS (compute_mode,
4104 							      t1, t3),
4105 						NULL_RTX);
4106 			    quotient = expand_shift
4107 			      (RSHIFT_EXPR, compute_mode, t4,
4108 			       post_shift - 1, tquotient, 1);
4109 			  }
4110 			else
4111 			  {
4112 			    rtx t1, t2;
4113 
4114 			    if (pre_shift >= BITS_PER_WORD
4115 				|| post_shift >= BITS_PER_WORD)
4116 			      goto fail1;
4117 
4118 			    t1 = expand_shift
4119 			      (RSHIFT_EXPR, compute_mode, op0,
4120 			       pre_shift, NULL_RTX, 1);
4121 			    extra_cost
4122 			      = (shift_cost (speed, compute_mode, pre_shift)
4123 				 + shift_cost (speed, compute_mode, post_shift));
4124 			    t2 = expmed_mult_highpart (compute_mode, t1,
4125 						       GEN_INT (ml),
4126 						       NULL_RTX, 1,
4127 						       max_cost - extra_cost);
4128 			    if (t2 == 0)
4129 			      goto fail1;
4130 			    quotient = expand_shift
4131 			      (RSHIFT_EXPR, compute_mode, t2,
4132 			       post_shift, tquotient, 1);
4133 			  }
4134 		      }
4135 		  }
4136 		else		/* Too wide mode to use tricky code */
4137 		  break;
4138 
4139 		insn = get_last_insn ();
4140 		if (insn != last)
4141 		  set_dst_reg_note (insn, REG_EQUAL,
4142 				    gen_rtx_UDIV (compute_mode, op0, op1),
4143 				    quotient);
4144 	      }
4145 	    else		/* TRUNC_DIV, signed */
4146 	      {
4147 		unsigned HOST_WIDE_INT ml;
4148 		int lgup, post_shift;
4149 		rtx mlr;
4150 		HOST_WIDE_INT d = INTVAL (op1);
4151 		unsigned HOST_WIDE_INT abs_d;
4152 
4153 		/* Since d might be INT_MIN, we have to cast to
4154 		   unsigned HOST_WIDE_INT before negating to avoid
4155 		   undefined signed overflow.  */
4156 		abs_d = (d >= 0
4157 			 ? (unsigned HOST_WIDE_INT) d
4158 			 : - (unsigned HOST_WIDE_INT) d);
4159 
4160 		/* n rem d = n rem -d */
4161 		if (rem_flag && d < 0)
4162 		  {
4163 		    d = abs_d;
4164 		    op1 = gen_int_mode (abs_d, compute_mode);
4165 		  }
4166 
4167 		if (d == 1)
4168 		  quotient = op0;
4169 		else if (d == -1)
4170 		  quotient = expand_unop (compute_mode, neg_optab, op0,
4171 					  tquotient, 0);
4172 		else if (HOST_BITS_PER_WIDE_INT >= size
4173 			 && abs_d == (unsigned HOST_WIDE_INT) 1 << (size - 1))
4174 		  {
4175 		    /* This case is not handled correctly below.  */
4176 		    quotient = emit_store_flag (tquotient, EQ, op0, op1,
4177 						compute_mode, 1, 1);
4178 		    if (quotient == 0)
4179 		      goto fail1;
4180 		  }
4181 		else if (EXACT_POWER_OF_2_OR_ZERO_P (d)
4182 			 && (rem_flag
4183 			     ? smod_pow2_cheap (speed, compute_mode)
4184 			     : sdiv_pow2_cheap (speed, compute_mode))
4185 			 /* We assume that cheap metric is true if the
4186 			    optab has an expander for this mode.  */
4187 			 && ((optab_handler ((rem_flag ? smod_optab
4188 					      : sdiv_optab),
4189 					     compute_mode)
4190 			      != CODE_FOR_nothing)
4191 			     || (optab_handler (sdivmod_optab,
4192 						compute_mode)
4193 				 != CODE_FOR_nothing)))
4194 		  ;
4195 		else if (EXACT_POWER_OF_2_OR_ZERO_P (abs_d))
4196 		  {
4197 		    if (rem_flag)
4198 		      {
4199 			remainder = expand_smod_pow2 (compute_mode, op0, d);
4200 			if (remainder)
4201 			  return gen_lowpart (mode, remainder);
4202 		      }
4203 
4204 		    if (sdiv_pow2_cheap (speed, compute_mode)
4205 			&& ((optab_handler (sdiv_optab, compute_mode)
4206 			     != CODE_FOR_nothing)
4207 			    || (optab_handler (sdivmod_optab, compute_mode)
4208 				!= CODE_FOR_nothing)))
4209 		      quotient = expand_divmod (0, TRUNC_DIV_EXPR,
4210 						compute_mode, op0,
4211 						gen_int_mode (abs_d,
4212 							      compute_mode),
4213 						NULL_RTX, 0);
4214 		    else
4215 		      quotient = expand_sdiv_pow2 (compute_mode, op0, abs_d);
4216 
4217 		    /* We have computed OP0 / abs(OP1).  If OP1 is negative,
4218 		       negate the quotient.  */
4219 		    if (d < 0)
4220 		      {
4221 			insn = get_last_insn ();
4222 			if (insn != last
4223 			    && abs_d < ((unsigned HOST_WIDE_INT) 1
4224 					<< (HOST_BITS_PER_WIDE_INT - 1)))
4225 			  set_dst_reg_note (insn, REG_EQUAL,
4226 					    gen_rtx_DIV (compute_mode, op0,
4227 							 gen_int_mode
4228 							   (abs_d,
4229 							    compute_mode)),
4230 					    quotient);
4231 
4232 			quotient = expand_unop (compute_mode, neg_optab,
4233 						quotient, quotient, 0);
4234 		      }
4235 		  }
4236 		else if (size <= HOST_BITS_PER_WIDE_INT)
4237 		  {
4238 		    choose_multiplier (abs_d, size, size - 1,
4239 				       &ml, &post_shift, &lgup);
4240 		    if (ml < (unsigned HOST_WIDE_INT) 1 << (size - 1))
4241 		      {
4242 			rtx t1, t2, t3;
4243 
4244 			if (post_shift >= BITS_PER_WORD
4245 			    || size - 1 >= BITS_PER_WORD)
4246 			  goto fail1;
4247 
4248 			extra_cost = (shift_cost (speed, compute_mode, post_shift)
4249 				      + shift_cost (speed, compute_mode, size - 1)
4250 				      + add_cost (speed, compute_mode));
4251 			t1 = expmed_mult_highpart (compute_mode, op0,
4252 						   GEN_INT (ml), NULL_RTX, 0,
4253 						   max_cost - extra_cost);
4254 			if (t1 == 0)
4255 			  goto fail1;
4256 			t2 = expand_shift
4257 			  (RSHIFT_EXPR, compute_mode, t1,
4258 			   post_shift, NULL_RTX, 0);
4259 			t3 = expand_shift
4260 			  (RSHIFT_EXPR, compute_mode, op0,
4261 			   size - 1, NULL_RTX, 0);
4262 			if (d < 0)
4263 			  quotient
4264 			    = force_operand (gen_rtx_MINUS (compute_mode,
4265 							    t3, t2),
4266 					     tquotient);
4267 			else
4268 			  quotient
4269 			    = force_operand (gen_rtx_MINUS (compute_mode,
4270 							    t2, t3),
4271 					     tquotient);
4272 		      }
4273 		    else
4274 		      {
4275 			rtx t1, t2, t3, t4;
4276 
4277 			if (post_shift >= BITS_PER_WORD
4278 			    || size - 1 >= BITS_PER_WORD)
4279 			  goto fail1;
4280 
4281 			ml |= (~(unsigned HOST_WIDE_INT) 0) << (size - 1);
4282 			mlr = gen_int_mode (ml, compute_mode);
4283 			extra_cost = (shift_cost (speed, compute_mode, post_shift)
4284 				      + shift_cost (speed, compute_mode, size - 1)
4285 				      + 2 * add_cost (speed, compute_mode));
4286 			t1 = expmed_mult_highpart (compute_mode, op0, mlr,
4287 						   NULL_RTX, 0,
4288 						   max_cost - extra_cost);
4289 			if (t1 == 0)
4290 			  goto fail1;
4291 			t2 = force_operand (gen_rtx_PLUS (compute_mode,
4292 							  t1, op0),
4293 					    NULL_RTX);
4294 			t3 = expand_shift
4295 			  (RSHIFT_EXPR, compute_mode, t2,
4296 			   post_shift, NULL_RTX, 0);
4297 			t4 = expand_shift
4298 			  (RSHIFT_EXPR, compute_mode, op0,
4299 			   size - 1, NULL_RTX, 0);
4300 			if (d < 0)
4301 			  quotient
4302 			    = force_operand (gen_rtx_MINUS (compute_mode,
4303 							    t4, t3),
4304 					     tquotient);
4305 			else
4306 			  quotient
4307 			    = force_operand (gen_rtx_MINUS (compute_mode,
4308 							    t3, t4),
4309 					     tquotient);
4310 		      }
4311 		  }
4312 		else		/* Too wide mode to use tricky code */
4313 		  break;
4314 
4315 		insn = get_last_insn ();
4316 		if (insn != last)
4317 		  set_dst_reg_note (insn, REG_EQUAL,
4318 				    gen_rtx_DIV (compute_mode, op0, op1),
4319 				    quotient);
4320 	      }
4321 	    break;
4322 	  }
4323       fail1:
4324 	delete_insns_since (last);
4325 	break;
4326 
4327       case FLOOR_DIV_EXPR:
4328       case FLOOR_MOD_EXPR:
4329       /* We will come here only for signed operations.  */
4330 	if (op1_is_constant && HOST_BITS_PER_WIDE_INT >= size)
4331 	  {
4332 	    unsigned HOST_WIDE_INT mh, ml;
4333 	    int pre_shift, lgup, post_shift;
4334 	    HOST_WIDE_INT d = INTVAL (op1);
4335 
4336 	    if (d > 0)
4337 	      {
4338 		/* We could just as easily deal with negative constants here,
4339 		   but it does not seem worth the trouble for GCC 2.6.  */
4340 		if (EXACT_POWER_OF_2_OR_ZERO_P (d))
4341 		  {
4342 		    pre_shift = floor_log2 (d);
4343 		    if (rem_flag)
4344 		      {
4345 			remainder = expand_binop (compute_mode, and_optab, op0,
4346 						  GEN_INT (((HOST_WIDE_INT) 1 << pre_shift) - 1),
4347 						  remainder, 0, OPTAB_LIB_WIDEN);
4348 			if (remainder)
4349 			  return gen_lowpart (mode, remainder);
4350 		      }
4351 		    quotient = expand_shift
4352 		      (RSHIFT_EXPR, compute_mode, op0,
4353 		       pre_shift, tquotient, 0);
4354 		  }
4355 		else
4356 		  {
4357 		    rtx t1, t2, t3, t4;
4358 
4359 		    mh = choose_multiplier (d, size, size - 1,
4360 					    &ml, &post_shift, &lgup);
4361 		    gcc_assert (!mh);
4362 
4363 		    if (post_shift < BITS_PER_WORD
4364 			&& size - 1 < BITS_PER_WORD)
4365 		      {
4366 			t1 = expand_shift
4367 			  (RSHIFT_EXPR, compute_mode, op0,
4368 			   size - 1, NULL_RTX, 0);
4369 			t2 = expand_binop (compute_mode, xor_optab, op0, t1,
4370 					   NULL_RTX, 0, OPTAB_WIDEN);
4371 			extra_cost = (shift_cost (speed, compute_mode, post_shift)
4372 				      + shift_cost (speed, compute_mode, size - 1)
4373 				      + 2 * add_cost (speed, compute_mode));
4374 			t3 = expmed_mult_highpart (compute_mode, t2,
4375 						   GEN_INT (ml), NULL_RTX, 1,
4376 						   max_cost - extra_cost);
4377 			if (t3 != 0)
4378 			  {
4379 			    t4 = expand_shift
4380 			      (RSHIFT_EXPR, compute_mode, t3,
4381 			       post_shift, NULL_RTX, 1);
4382 			    quotient = expand_binop (compute_mode, xor_optab,
4383 						     t4, t1, tquotient, 0,
4384 						     OPTAB_WIDEN);
4385 			  }
4386 		      }
4387 		  }
4388 	      }
4389 	    else
4390 	      {
4391 		rtx nsign, t1, t2, t3, t4;
4392 		t1 = force_operand (gen_rtx_PLUS (compute_mode,
4393 						  op0, constm1_rtx), NULL_RTX);
4394 		t2 = expand_binop (compute_mode, ior_optab, op0, t1, NULL_RTX,
4395 				   0, OPTAB_WIDEN);
4396 		nsign = expand_shift
4397 		  (RSHIFT_EXPR, compute_mode, t2,
4398 		   size - 1, NULL_RTX, 0);
4399 		t3 = force_operand (gen_rtx_MINUS (compute_mode, t1, nsign),
4400 				    NULL_RTX);
4401 		t4 = expand_divmod (0, TRUNC_DIV_EXPR, compute_mode, t3, op1,
4402 				    NULL_RTX, 0);
4403 		if (t4)
4404 		  {
4405 		    rtx t5;
4406 		    t5 = expand_unop (compute_mode, one_cmpl_optab, nsign,
4407 				      NULL_RTX, 0);
4408 		    quotient = force_operand (gen_rtx_PLUS (compute_mode,
4409 							    t4, t5),
4410 					      tquotient);
4411 		  }
4412 	      }
4413 	  }
4414 
4415 	if (quotient != 0)
4416 	  break;
4417 	delete_insns_since (last);
4418 
4419 	/* Try using an instruction that produces both the quotient and
4420 	   remainder, using truncation.  We can easily compensate the quotient
4421 	   or remainder to get floor rounding, once we have the remainder.
4422 	   Notice that we compute also the final remainder value here,
4423 	   and return the result right away.  */
4424 	if (target == 0 || GET_MODE (target) != compute_mode)
4425 	  target = gen_reg_rtx (compute_mode);
4426 
4427 	if (rem_flag)
4428 	  {
4429 	    remainder
4430 	      = REG_P (target) ? target : gen_reg_rtx (compute_mode);
4431 	    quotient = gen_reg_rtx (compute_mode);
4432 	  }
4433 	else
4434 	  {
4435 	    quotient
4436 	      = REG_P (target) ? target : gen_reg_rtx (compute_mode);
4437 	    remainder = gen_reg_rtx (compute_mode);
4438 	  }
4439 
4440 	if (expand_twoval_binop (sdivmod_optab, op0, op1,
4441 				 quotient, remainder, 0))
4442 	  {
4443 	    /* This could be computed with a branch-less sequence.
4444 	       Save that for later.  */
4445 	    rtx tem;
4446 	    rtx label = gen_label_rtx ();
4447 	    do_cmp_and_jump (remainder, const0_rtx, EQ, compute_mode, label);
4448 	    tem = expand_binop (compute_mode, xor_optab, op0, op1,
4449 				NULL_RTX, 0, OPTAB_WIDEN);
4450 	    do_cmp_and_jump (tem, const0_rtx, GE, compute_mode, label);
4451 	    expand_dec (quotient, const1_rtx);
4452 	    expand_inc (remainder, op1);
4453 	    emit_label (label);
4454 	    return gen_lowpart (mode, rem_flag ? remainder : quotient);
4455 	  }
4456 
4457 	/* No luck with division elimination or divmod.  Have to do it
4458 	   by conditionally adjusting op0 *and* the result.  */
4459 	{
4460 	  rtx label1, label2, label3, label4, label5;
4461 	  rtx adjusted_op0;
4462 	  rtx tem;
4463 
4464 	  quotient = gen_reg_rtx (compute_mode);
4465 	  adjusted_op0 = copy_to_mode_reg (compute_mode, op0);
4466 	  label1 = gen_label_rtx ();
4467 	  label2 = gen_label_rtx ();
4468 	  label3 = gen_label_rtx ();
4469 	  label4 = gen_label_rtx ();
4470 	  label5 = gen_label_rtx ();
4471 	  do_cmp_and_jump (op1, const0_rtx, LT, compute_mode, label2);
4472 	  do_cmp_and_jump (adjusted_op0, const0_rtx, LT, compute_mode, label1);
4473 	  tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4474 			      quotient, 0, OPTAB_LIB_WIDEN);
4475 	  if (tem != quotient)
4476 	    emit_move_insn (quotient, tem);
4477 	  emit_jump_insn (gen_jump (label5));
4478 	  emit_barrier ();
4479 	  emit_label (label1);
4480 	  expand_inc (adjusted_op0, const1_rtx);
4481 	  emit_jump_insn (gen_jump (label4));
4482 	  emit_barrier ();
4483 	  emit_label (label2);
4484 	  do_cmp_and_jump (adjusted_op0, const0_rtx, GT, compute_mode, label3);
4485 	  tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4486 			      quotient, 0, OPTAB_LIB_WIDEN);
4487 	  if (tem != quotient)
4488 	    emit_move_insn (quotient, tem);
4489 	  emit_jump_insn (gen_jump (label5));
4490 	  emit_barrier ();
4491 	  emit_label (label3);
4492 	  expand_dec (adjusted_op0, const1_rtx);
4493 	  emit_label (label4);
4494 	  tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4495 			      quotient, 0, OPTAB_LIB_WIDEN);
4496 	  if (tem != quotient)
4497 	    emit_move_insn (quotient, tem);
4498 	  expand_dec (quotient, const1_rtx);
4499 	  emit_label (label5);
4500 	}
4501 	break;
4502 
4503       case CEIL_DIV_EXPR:
4504       case CEIL_MOD_EXPR:
4505 	if (unsignedp)
4506 	  {
4507 	    if (op1_is_constant && EXACT_POWER_OF_2_OR_ZERO_P (INTVAL (op1)))
4508 	      {
4509 		rtx t1, t2, t3;
4510 		unsigned HOST_WIDE_INT d = INTVAL (op1);
4511 		t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
4512 				   floor_log2 (d), tquotient, 1);
4513 		t2 = expand_binop (compute_mode, and_optab, op0,
4514 				   GEN_INT (d - 1),
4515 				   NULL_RTX, 1, OPTAB_LIB_WIDEN);
4516 		t3 = gen_reg_rtx (compute_mode);
4517 		t3 = emit_store_flag (t3, NE, t2, const0_rtx,
4518 				      compute_mode, 1, 1);
4519 		if (t3 == 0)
4520 		  {
4521 		    rtx lab;
4522 		    lab = gen_label_rtx ();
4523 		    do_cmp_and_jump (t2, const0_rtx, EQ, compute_mode, lab);
4524 		    expand_inc (t1, const1_rtx);
4525 		    emit_label (lab);
4526 		    quotient = t1;
4527 		  }
4528 		else
4529 		  quotient = force_operand (gen_rtx_PLUS (compute_mode,
4530 							  t1, t3),
4531 					    tquotient);
4532 		break;
4533 	      }
4534 
4535 	    /* Try using an instruction that produces both the quotient and
4536 	       remainder, using truncation.  We can easily compensate the
4537 	       quotient or remainder to get ceiling rounding, once we have the
4538 	       remainder.  Notice that we compute also the final remainder
4539 	       value here, and return the result right away.  */
4540 	    if (target == 0 || GET_MODE (target) != compute_mode)
4541 	      target = gen_reg_rtx (compute_mode);
4542 
4543 	    if (rem_flag)
4544 	      {
4545 		remainder = (REG_P (target)
4546 			     ? target : gen_reg_rtx (compute_mode));
4547 		quotient = gen_reg_rtx (compute_mode);
4548 	      }
4549 	    else
4550 	      {
4551 		quotient = (REG_P (target)
4552 			    ? target : gen_reg_rtx (compute_mode));
4553 		remainder = gen_reg_rtx (compute_mode);
4554 	      }
4555 
4556 	    if (expand_twoval_binop (udivmod_optab, op0, op1, quotient,
4557 				     remainder, 1))
4558 	      {
4559 		/* This could be computed with a branch-less sequence.
4560 		   Save that for later.  */
4561 		rtx label = gen_label_rtx ();
4562 		do_cmp_and_jump (remainder, const0_rtx, EQ,
4563 				 compute_mode, label);
4564 		expand_inc (quotient, const1_rtx);
4565 		expand_dec (remainder, op1);
4566 		emit_label (label);
4567 		return gen_lowpart (mode, rem_flag ? remainder : quotient);
4568 	      }
4569 
4570 	    /* No luck with division elimination or divmod.  Have to do it
4571 	       by conditionally adjusting op0 *and* the result.  */
4572 	    {
4573 	      rtx label1, label2;
4574 	      rtx adjusted_op0, tem;
4575 
4576 	      quotient = gen_reg_rtx (compute_mode);
4577 	      adjusted_op0 = copy_to_mode_reg (compute_mode, op0);
4578 	      label1 = gen_label_rtx ();
4579 	      label2 = gen_label_rtx ();
4580 	      do_cmp_and_jump (adjusted_op0, const0_rtx, NE,
4581 			       compute_mode, label1);
4582 	      emit_move_insn  (quotient, const0_rtx);
4583 	      emit_jump_insn (gen_jump (label2));
4584 	      emit_barrier ();
4585 	      emit_label (label1);
4586 	      expand_dec (adjusted_op0, const1_rtx);
4587 	      tem = expand_binop (compute_mode, udiv_optab, adjusted_op0, op1,
4588 				  quotient, 1, OPTAB_LIB_WIDEN);
4589 	      if (tem != quotient)
4590 		emit_move_insn (quotient, tem);
4591 	      expand_inc (quotient, const1_rtx);
4592 	      emit_label (label2);
4593 	    }
4594 	  }
4595 	else /* signed */
4596 	  {
4597 	    if (op1_is_constant && EXACT_POWER_OF_2_OR_ZERO_P (INTVAL (op1))
4598 		&& INTVAL (op1) >= 0)
4599 	      {
4600 		/* This is extremely similar to the code for the unsigned case
4601 		   above.  For 2.7 we should merge these variants, but for
4602 		   2.6.1 I don't want to touch the code for unsigned since that
4603 		   get used in C.  The signed case will only be used by other
4604 		   languages (Ada).  */
4605 
4606 		rtx t1, t2, t3;
4607 		unsigned HOST_WIDE_INT d = INTVAL (op1);
4608 		t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
4609 				   floor_log2 (d), tquotient, 0);
4610 		t2 = expand_binop (compute_mode, and_optab, op0,
4611 				   GEN_INT (d - 1),
4612 				   NULL_RTX, 1, OPTAB_LIB_WIDEN);
4613 		t3 = gen_reg_rtx (compute_mode);
4614 		t3 = emit_store_flag (t3, NE, t2, const0_rtx,
4615 				      compute_mode, 1, 1);
4616 		if (t3 == 0)
4617 		  {
4618 		    rtx lab;
4619 		    lab = gen_label_rtx ();
4620 		    do_cmp_and_jump (t2, const0_rtx, EQ, compute_mode, lab);
4621 		    expand_inc (t1, const1_rtx);
4622 		    emit_label (lab);
4623 		    quotient = t1;
4624 		  }
4625 		else
4626 		  quotient = force_operand (gen_rtx_PLUS (compute_mode,
4627 							  t1, t3),
4628 					    tquotient);
4629 		break;
4630 	      }
4631 
4632 	    /* Try using an instruction that produces both the quotient and
4633 	       remainder, using truncation.  We can easily compensate the
4634 	       quotient or remainder to get ceiling rounding, once we have the
4635 	       remainder.  Notice that we compute also the final remainder
4636 	       value here, and return the result right away.  */
4637 	    if (target == 0 || GET_MODE (target) != compute_mode)
4638 	      target = gen_reg_rtx (compute_mode);
4639 	    if (rem_flag)
4640 	      {
4641 		remainder= (REG_P (target)
4642 			    ? target : gen_reg_rtx (compute_mode));
4643 		quotient = gen_reg_rtx (compute_mode);
4644 	      }
4645 	    else
4646 	      {
4647 		quotient = (REG_P (target)
4648 			    ? target : gen_reg_rtx (compute_mode));
4649 		remainder = gen_reg_rtx (compute_mode);
4650 	      }
4651 
4652 	    if (expand_twoval_binop (sdivmod_optab, op0, op1, quotient,
4653 				     remainder, 0))
4654 	      {
4655 		/* This could be computed with a branch-less sequence.
4656 		   Save that for later.  */
4657 		rtx tem;
4658 		rtx label = gen_label_rtx ();
4659 		do_cmp_and_jump (remainder, const0_rtx, EQ,
4660 				 compute_mode, label);
4661 		tem = expand_binop (compute_mode, xor_optab, op0, op1,
4662 				    NULL_RTX, 0, OPTAB_WIDEN);
4663 		do_cmp_and_jump (tem, const0_rtx, LT, compute_mode, label);
4664 		expand_inc (quotient, const1_rtx);
4665 		expand_dec (remainder, op1);
4666 		emit_label (label);
4667 		return gen_lowpart (mode, rem_flag ? remainder : quotient);
4668 	      }
4669 
4670 	    /* No luck with division elimination or divmod.  Have to do it
4671 	       by conditionally adjusting op0 *and* the result.  */
4672 	    {
4673 	      rtx label1, label2, label3, label4, label5;
4674 	      rtx adjusted_op0;
4675 	      rtx tem;
4676 
4677 	      quotient = gen_reg_rtx (compute_mode);
4678 	      adjusted_op0 = copy_to_mode_reg (compute_mode, op0);
4679 	      label1 = gen_label_rtx ();
4680 	      label2 = gen_label_rtx ();
4681 	      label3 = gen_label_rtx ();
4682 	      label4 = gen_label_rtx ();
4683 	      label5 = gen_label_rtx ();
4684 	      do_cmp_and_jump (op1, const0_rtx, LT, compute_mode, label2);
4685 	      do_cmp_and_jump (adjusted_op0, const0_rtx, GT,
4686 			       compute_mode, label1);
4687 	      tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4688 				  quotient, 0, OPTAB_LIB_WIDEN);
4689 	      if (tem != quotient)
4690 		emit_move_insn (quotient, tem);
4691 	      emit_jump_insn (gen_jump (label5));
4692 	      emit_barrier ();
4693 	      emit_label (label1);
4694 	      expand_dec (adjusted_op0, const1_rtx);
4695 	      emit_jump_insn (gen_jump (label4));
4696 	      emit_barrier ();
4697 	      emit_label (label2);
4698 	      do_cmp_and_jump (adjusted_op0, const0_rtx, LT,
4699 			       compute_mode, label3);
4700 	      tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4701 				  quotient, 0, OPTAB_LIB_WIDEN);
4702 	      if (tem != quotient)
4703 		emit_move_insn (quotient, tem);
4704 	      emit_jump_insn (gen_jump (label5));
4705 	      emit_barrier ();
4706 	      emit_label (label3);
4707 	      expand_inc (adjusted_op0, const1_rtx);
4708 	      emit_label (label4);
4709 	      tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4710 				  quotient, 0, OPTAB_LIB_WIDEN);
4711 	      if (tem != quotient)
4712 		emit_move_insn (quotient, tem);
4713 	      expand_inc (quotient, const1_rtx);
4714 	      emit_label (label5);
4715 	    }
4716 	  }
4717 	break;
4718 
4719       case EXACT_DIV_EXPR:
4720 	if (op1_is_constant && HOST_BITS_PER_WIDE_INT >= size)
4721 	  {
4722 	    HOST_WIDE_INT d = INTVAL (op1);
4723 	    unsigned HOST_WIDE_INT ml;
4724 	    int pre_shift;
4725 	    rtx t1;
4726 
4727 	    pre_shift = floor_log2 (d & -d);
4728 	    ml = invert_mod2n (d >> pre_shift, size);
4729 	    t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
4730 			       pre_shift, NULL_RTX, unsignedp);
4731 	    quotient = expand_mult (compute_mode, t1,
4732 				    gen_int_mode (ml, compute_mode),
4733 				    NULL_RTX, 1);
4734 
4735 	    insn = get_last_insn ();
4736 	    set_dst_reg_note (insn, REG_EQUAL,
4737 			      gen_rtx_fmt_ee (unsignedp ? UDIV : DIV,
4738 					      compute_mode, op0, op1),
4739 			      quotient);
4740 	  }
4741 	break;
4742 
4743       case ROUND_DIV_EXPR:
4744       case ROUND_MOD_EXPR:
4745 	if (unsignedp)
4746 	  {
4747 	    rtx tem;
4748 	    rtx label;
4749 	    label = gen_label_rtx ();
4750 	    quotient = gen_reg_rtx (compute_mode);
4751 	    remainder = gen_reg_rtx (compute_mode);
4752 	    if (expand_twoval_binop (udivmod_optab, op0, op1, quotient, remainder, 1) == 0)
4753 	      {
4754 		rtx tem;
4755 		quotient = expand_binop (compute_mode, udiv_optab, op0, op1,
4756 					 quotient, 1, OPTAB_LIB_WIDEN);
4757 		tem = expand_mult (compute_mode, quotient, op1, NULL_RTX, 1);
4758 		remainder = expand_binop (compute_mode, sub_optab, op0, tem,
4759 					  remainder, 1, OPTAB_LIB_WIDEN);
4760 	      }
4761 	    tem = plus_constant (compute_mode, op1, -1);
4762 	    tem = expand_shift (RSHIFT_EXPR, compute_mode, tem, 1, NULL_RTX, 1);
4763 	    do_cmp_and_jump (remainder, tem, LEU, compute_mode, label);
4764 	    expand_inc (quotient, const1_rtx);
4765 	    expand_dec (remainder, op1);
4766 	    emit_label (label);
4767 	  }
4768 	else
4769 	  {
4770 	    rtx abs_rem, abs_op1, tem, mask;
4771 	    rtx label;
4772 	    label = gen_label_rtx ();
4773 	    quotient = gen_reg_rtx (compute_mode);
4774 	    remainder = gen_reg_rtx (compute_mode);
4775 	    if (expand_twoval_binop (sdivmod_optab, op0, op1, quotient, remainder, 0) == 0)
4776 	      {
4777 		rtx tem;
4778 		quotient = expand_binop (compute_mode, sdiv_optab, op0, op1,
4779 					 quotient, 0, OPTAB_LIB_WIDEN);
4780 		tem = expand_mult (compute_mode, quotient, op1, NULL_RTX, 0);
4781 		remainder = expand_binop (compute_mode, sub_optab, op0, tem,
4782 					  remainder, 0, OPTAB_LIB_WIDEN);
4783 	      }
4784 	    abs_rem = expand_abs (compute_mode, remainder, NULL_RTX, 1, 0);
4785 	    abs_op1 = expand_abs (compute_mode, op1, NULL_RTX, 1, 0);
4786 	    tem = expand_shift (LSHIFT_EXPR, compute_mode, abs_rem,
4787 				1, NULL_RTX, 1);
4788 	    do_cmp_and_jump (tem, abs_op1, LTU, compute_mode, label);
4789 	    tem = expand_binop (compute_mode, xor_optab, op0, op1,
4790 				NULL_RTX, 0, OPTAB_WIDEN);
4791 	    mask = expand_shift (RSHIFT_EXPR, compute_mode, tem,
4792 				 size - 1, NULL_RTX, 0);
4793 	    tem = expand_binop (compute_mode, xor_optab, mask, const1_rtx,
4794 				NULL_RTX, 0, OPTAB_WIDEN);
4795 	    tem = expand_binop (compute_mode, sub_optab, tem, mask,
4796 				NULL_RTX, 0, OPTAB_WIDEN);
4797 	    expand_inc (quotient, tem);
4798 	    tem = expand_binop (compute_mode, xor_optab, mask, op1,
4799 				NULL_RTX, 0, OPTAB_WIDEN);
4800 	    tem = expand_binop (compute_mode, sub_optab, tem, mask,
4801 				NULL_RTX, 0, OPTAB_WIDEN);
4802 	    expand_dec (remainder, tem);
4803 	    emit_label (label);
4804 	  }
4805 	return gen_lowpart (mode, rem_flag ? remainder : quotient);
4806 
4807       default:
4808 	gcc_unreachable ();
4809       }
4810 
4811   if (quotient == 0)
4812     {
4813       if (target && GET_MODE (target) != compute_mode)
4814 	target = 0;
4815 
4816       if (rem_flag)
4817 	{
4818 	  /* Try to produce the remainder without producing the quotient.
4819 	     If we seem to have a divmod pattern that does not require widening,
4820 	     don't try widening here.  We should really have a WIDEN argument
4821 	     to expand_twoval_binop, since what we'd really like to do here is
4822 	     1) try a mod insn in compute_mode
4823 	     2) try a divmod insn in compute_mode
4824 	     3) try a div insn in compute_mode and multiply-subtract to get
4825 	        remainder
4826 	     4) try the same things with widening allowed.  */
4827 	  remainder
4828 	    = sign_expand_binop (compute_mode, umod_optab, smod_optab,
4829 				 op0, op1, target,
4830 				 unsignedp,
4831 				 ((optab_handler (optab2, compute_mode)
4832 				   != CODE_FOR_nothing)
4833 				  ? OPTAB_DIRECT : OPTAB_WIDEN));
4834 	  if (remainder == 0)
4835 	    {
4836 	      /* No luck there.  Can we do remainder and divide at once
4837 		 without a library call?  */
4838 	      remainder = gen_reg_rtx (compute_mode);
4839 	      if (! expand_twoval_binop ((unsignedp
4840 					  ? udivmod_optab
4841 					  : sdivmod_optab),
4842 					 op0, op1,
4843 					 NULL_RTX, remainder, unsignedp))
4844 		remainder = 0;
4845 	    }
4846 
4847 	  if (remainder)
4848 	    return gen_lowpart (mode, remainder);
4849 	}
4850 
4851       /* Produce the quotient.  Try a quotient insn, but not a library call.
4852 	 If we have a divmod in this mode, use it in preference to widening
4853 	 the div (for this test we assume it will not fail). Note that optab2
4854 	 is set to the one of the two optabs that the call below will use.  */
4855       quotient
4856 	= sign_expand_binop (compute_mode, udiv_optab, sdiv_optab,
4857 			     op0, op1, rem_flag ? NULL_RTX : target,
4858 			     unsignedp,
4859 			     ((optab_handler (optab2, compute_mode)
4860 			       != CODE_FOR_nothing)
4861 			      ? OPTAB_DIRECT : OPTAB_WIDEN));
4862 
4863       if (quotient == 0)
4864 	{
4865 	  /* No luck there.  Try a quotient-and-remainder insn,
4866 	     keeping the quotient alone.  */
4867 	  quotient = gen_reg_rtx (compute_mode);
4868 	  if (! expand_twoval_binop (unsignedp ? udivmod_optab : sdivmod_optab,
4869 				     op0, op1,
4870 				     quotient, NULL_RTX, unsignedp))
4871 	    {
4872 	      quotient = 0;
4873 	      if (! rem_flag)
4874 		/* Still no luck.  If we are not computing the remainder,
4875 		   use a library call for the quotient.  */
4876 		quotient = sign_expand_binop (compute_mode,
4877 					      udiv_optab, sdiv_optab,
4878 					      op0, op1, target,
4879 					      unsignedp, OPTAB_LIB_WIDEN);
4880 	    }
4881 	}
4882     }
4883 
4884   if (rem_flag)
4885     {
4886       if (target && GET_MODE (target) != compute_mode)
4887 	target = 0;
4888 
4889       if (quotient == 0)
4890 	{
4891 	  /* No divide instruction either.  Use library for remainder.  */
4892 	  remainder = sign_expand_binop (compute_mode, umod_optab, smod_optab,
4893 					 op0, op1, target,
4894 					 unsignedp, OPTAB_LIB_WIDEN);
4895 	  /* No remainder function.  Try a quotient-and-remainder
4896 	     function, keeping the remainder.  */
4897 	  if (!remainder)
4898 	    {
4899 	      remainder = gen_reg_rtx (compute_mode);
4900 	      if (!expand_twoval_binop_libfunc
4901 		  (unsignedp ? udivmod_optab : sdivmod_optab,
4902 		   op0, op1,
4903 		   NULL_RTX, remainder,
4904 		   unsignedp ? UMOD : MOD))
4905 		remainder = NULL_RTX;
4906 	    }
4907 	}
4908       else
4909 	{
4910 	  /* We divided.  Now finish doing X - Y * (X / Y).  */
4911 	  remainder = expand_mult (compute_mode, quotient, op1,
4912 				   NULL_RTX, unsignedp);
4913 	  remainder = expand_binop (compute_mode, sub_optab, op0,
4914 				    remainder, target, unsignedp,
4915 				    OPTAB_LIB_WIDEN);
4916 	}
4917     }
4918 
4919   return gen_lowpart (mode, rem_flag ? remainder : quotient);
4920 }
4921 
4922 /* Return a tree node with data type TYPE, describing the value of X.
4923    Usually this is an VAR_DECL, if there is no obvious better choice.
4924    X may be an expression, however we only support those expressions
4925    generated by loop.c.  */
4926 
4927 tree
4928 make_tree (tree type, rtx x)
4929 {
4930   tree t;
4931 
4932   switch (GET_CODE (x))
4933     {
4934     case CONST_INT:
4935       {
4936 	HOST_WIDE_INT hi = 0;
4937 
4938 	if (INTVAL (x) < 0
4939 	    && !(TYPE_UNSIGNED (type)
4940 		 && (GET_MODE_BITSIZE (TYPE_MODE (type))
4941 		     < HOST_BITS_PER_WIDE_INT)))
4942 	  hi = -1;
4943 
4944 	t = build_int_cst_wide (type, INTVAL (x), hi);
4945 
4946 	return t;
4947       }
4948 
4949     case CONST_DOUBLE:
4950       if (GET_MODE (x) == VOIDmode)
4951 	t = build_int_cst_wide (type,
4952 				CONST_DOUBLE_LOW (x), CONST_DOUBLE_HIGH (x));
4953       else
4954 	{
4955 	  REAL_VALUE_TYPE d;
4956 
4957 	  REAL_VALUE_FROM_CONST_DOUBLE (d, x);
4958 	  t = build_real (type, d);
4959 	}
4960 
4961       return t;
4962 
4963     case CONST_VECTOR:
4964       {
4965 	int units = CONST_VECTOR_NUNITS (x);
4966 	tree itype = TREE_TYPE (type);
4967 	tree *elts;
4968 	int i;
4969 
4970 	/* Build a tree with vector elements.  */
4971 	elts = XALLOCAVEC (tree, units);
4972 	for (i = units - 1; i >= 0; --i)
4973 	  {
4974 	    rtx elt = CONST_VECTOR_ELT (x, i);
4975 	    elts[i] = make_tree (itype, elt);
4976 	  }
4977 
4978 	return build_vector (type, elts);
4979       }
4980 
4981     case PLUS:
4982       return fold_build2 (PLUS_EXPR, type, make_tree (type, XEXP (x, 0)),
4983 			  make_tree (type, XEXP (x, 1)));
4984 
4985     case MINUS:
4986       return fold_build2 (MINUS_EXPR, type, make_tree (type, XEXP (x, 0)),
4987 			  make_tree (type, XEXP (x, 1)));
4988 
4989     case NEG:
4990       return fold_build1 (NEGATE_EXPR, type, make_tree (type, XEXP (x, 0)));
4991 
4992     case MULT:
4993       return fold_build2 (MULT_EXPR, type, make_tree (type, XEXP (x, 0)),
4994 			  make_tree (type, XEXP (x, 1)));
4995 
4996     case ASHIFT:
4997       return fold_build2 (LSHIFT_EXPR, type, make_tree (type, XEXP (x, 0)),
4998 			  make_tree (type, XEXP (x, 1)));
4999 
5000     case LSHIFTRT:
5001       t = unsigned_type_for (type);
5002       return fold_convert (type, build2 (RSHIFT_EXPR, t,
5003 			    		 make_tree (t, XEXP (x, 0)),
5004 				    	 make_tree (type, XEXP (x, 1))));
5005 
5006     case ASHIFTRT:
5007       t = signed_type_for (type);
5008       return fold_convert (type, build2 (RSHIFT_EXPR, t,
5009 					 make_tree (t, XEXP (x, 0)),
5010 				    	 make_tree (type, XEXP (x, 1))));
5011 
5012     case DIV:
5013       if (TREE_CODE (type) != REAL_TYPE)
5014 	t = signed_type_for (type);
5015       else
5016 	t = type;
5017 
5018       return fold_convert (type, build2 (TRUNC_DIV_EXPR, t,
5019 				    	 make_tree (t, XEXP (x, 0)),
5020 				    	 make_tree (t, XEXP (x, 1))));
5021     case UDIV:
5022       t = unsigned_type_for (type);
5023       return fold_convert (type, build2 (TRUNC_DIV_EXPR, t,
5024 				    	 make_tree (t, XEXP (x, 0)),
5025 				    	 make_tree (t, XEXP (x, 1))));
5026 
5027     case SIGN_EXTEND:
5028     case ZERO_EXTEND:
5029       t = lang_hooks.types.type_for_mode (GET_MODE (XEXP (x, 0)),
5030 					  GET_CODE (x) == ZERO_EXTEND);
5031       return fold_convert (type, make_tree (t, XEXP (x, 0)));
5032 
5033     case CONST:
5034       return make_tree (type, XEXP (x, 0));
5035 
5036     case SYMBOL_REF:
5037       t = SYMBOL_REF_DECL (x);
5038       if (t)
5039 	return fold_convert (type, build_fold_addr_expr (t));
5040       /* else fall through.  */
5041 
5042     default:
5043       t = build_decl (RTL_LOCATION (x), VAR_DECL, NULL_TREE, type);
5044 
5045       /* If TYPE is a POINTER_TYPE, we might need to convert X from
5046 	 address mode to pointer mode.  */
5047       if (POINTER_TYPE_P (type))
5048 	x = convert_memory_address_addr_space
5049 	      (TYPE_MODE (type), x, TYPE_ADDR_SPACE (TREE_TYPE (type)));
5050 
5051       /* Note that we do *not* use SET_DECL_RTL here, because we do not
5052 	 want set_decl_rtl to go adjusting REG_ATTRS for this temporary.  */
5053       t->decl_with_rtl.rtl = x;
5054 
5055       return t;
5056     }
5057 }
5058 
5059 /* Compute the logical-and of OP0 and OP1, storing it in TARGET
5060    and returning TARGET.
5061 
5062    If TARGET is 0, a pseudo-register or constant is returned.  */
5063 
5064 rtx
5065 expand_and (enum machine_mode mode, rtx op0, rtx op1, rtx target)
5066 {
5067   rtx tem = 0;
5068 
5069   if (GET_MODE (op0) == VOIDmode && GET_MODE (op1) == VOIDmode)
5070     tem = simplify_binary_operation (AND, mode, op0, op1);
5071   if (tem == 0)
5072     tem = expand_binop (mode, and_optab, op0, op1, target, 0, OPTAB_LIB_WIDEN);
5073 
5074   if (target == 0)
5075     target = tem;
5076   else if (tem != target)
5077     emit_move_insn (target, tem);
5078   return target;
5079 }
5080 
5081 /* Helper function for emit_store_flag.  */
5082 static rtx
5083 emit_cstore (rtx target, enum insn_code icode, enum rtx_code code,
5084 	     enum machine_mode mode, enum machine_mode compare_mode,
5085 	     int unsignedp, rtx x, rtx y, int normalizep,
5086 	     enum machine_mode target_mode)
5087 {
5088   struct expand_operand ops[4];
5089   rtx op0, last, comparison, subtarget;
5090   enum machine_mode result_mode = insn_data[(int) icode].operand[0].mode;
5091 
5092   last = get_last_insn ();
5093   x = prepare_operand (icode, x, 2, mode, compare_mode, unsignedp);
5094   y = prepare_operand (icode, y, 3, mode, compare_mode, unsignedp);
5095   if (!x || !y)
5096     {
5097       delete_insns_since (last);
5098       return NULL_RTX;
5099     }
5100 
5101   if (target_mode == VOIDmode)
5102     target_mode = result_mode;
5103   if (!target)
5104     target = gen_reg_rtx (target_mode);
5105 
5106   comparison = gen_rtx_fmt_ee (code, result_mode, x, y);
5107 
5108   create_output_operand (&ops[0], optimize ? NULL_RTX : target, result_mode);
5109   create_fixed_operand (&ops[1], comparison);
5110   create_fixed_operand (&ops[2], x);
5111   create_fixed_operand (&ops[3], y);
5112   if (!maybe_expand_insn (icode, 4, ops))
5113     {
5114       delete_insns_since (last);
5115       return NULL_RTX;
5116     }
5117   subtarget = ops[0].value;
5118 
5119   /* If we are converting to a wider mode, first convert to
5120      TARGET_MODE, then normalize.  This produces better combining
5121      opportunities on machines that have a SIGN_EXTRACT when we are
5122      testing a single bit.  This mostly benefits the 68k.
5123 
5124      If STORE_FLAG_VALUE does not have the sign bit set when
5125      interpreted in MODE, we can do this conversion as unsigned, which
5126      is usually more efficient.  */
5127   if (GET_MODE_SIZE (target_mode) > GET_MODE_SIZE (result_mode))
5128     {
5129       convert_move (target, subtarget,
5130 		    val_signbit_known_clear_p (result_mode,
5131 					       STORE_FLAG_VALUE));
5132       op0 = target;
5133       result_mode = target_mode;
5134     }
5135   else
5136     op0 = subtarget;
5137 
5138   /* If we want to keep subexpressions around, don't reuse our last
5139      target.  */
5140   if (optimize)
5141     subtarget = 0;
5142 
5143   /* Now normalize to the proper value in MODE.  Sometimes we don't
5144      have to do anything.  */
5145   if (normalizep == 0 || normalizep == STORE_FLAG_VALUE)
5146     ;
5147   /* STORE_FLAG_VALUE might be the most negative number, so write
5148      the comparison this way to avoid a compiler-time warning.  */
5149   else if (- normalizep == STORE_FLAG_VALUE)
5150     op0 = expand_unop (result_mode, neg_optab, op0, subtarget, 0);
5151 
5152   /* We don't want to use STORE_FLAG_VALUE < 0 below since this makes
5153      it hard to use a value of just the sign bit due to ANSI integer
5154      constant typing rules.  */
5155   else if (val_signbit_known_set_p (result_mode, STORE_FLAG_VALUE))
5156     op0 = expand_shift (RSHIFT_EXPR, result_mode, op0,
5157 			GET_MODE_BITSIZE (result_mode) - 1, subtarget,
5158 			normalizep == 1);
5159   else
5160     {
5161       gcc_assert (STORE_FLAG_VALUE & 1);
5162 
5163       op0 = expand_and (result_mode, op0, const1_rtx, subtarget);
5164       if (normalizep == -1)
5165 	op0 = expand_unop (result_mode, neg_optab, op0, op0, 0);
5166     }
5167 
5168   /* If we were converting to a smaller mode, do the conversion now.  */
5169   if (target_mode != result_mode)
5170     {
5171       convert_move (target, op0, 0);
5172       return target;
5173     }
5174   else
5175     return op0;
5176 }
5177 
5178 
5179 /* A subroutine of emit_store_flag only including "tricks" that do not
5180    need a recursive call.  These are kept separate to avoid infinite
5181    loops.  */
5182 
5183 static rtx
5184 emit_store_flag_1 (rtx target, enum rtx_code code, rtx op0, rtx op1,
5185 		   enum machine_mode mode, int unsignedp, int normalizep,
5186 		   enum machine_mode target_mode)
5187 {
5188   rtx subtarget;
5189   enum insn_code icode;
5190   enum machine_mode compare_mode;
5191   enum mode_class mclass;
5192   enum rtx_code scode;
5193   rtx tem;
5194 
5195   if (unsignedp)
5196     code = unsigned_condition (code);
5197   scode = swap_condition (code);
5198 
5199   /* If one operand is constant, make it the second one.  Only do this
5200      if the other operand is not constant as well.  */
5201 
5202   if (swap_commutative_operands_p (op0, op1))
5203     {
5204       tem = op0;
5205       op0 = op1;
5206       op1 = tem;
5207       code = swap_condition (code);
5208     }
5209 
5210   if (mode == VOIDmode)
5211     mode = GET_MODE (op0);
5212 
5213   /* For some comparisons with 1 and -1, we can convert this to
5214      comparisons with zero.  This will often produce more opportunities for
5215      store-flag insns.  */
5216 
5217   switch (code)
5218     {
5219     case LT:
5220       if (op1 == const1_rtx)
5221 	op1 = const0_rtx, code = LE;
5222       break;
5223     case LE:
5224       if (op1 == constm1_rtx)
5225 	op1 = const0_rtx, code = LT;
5226       break;
5227     case GE:
5228       if (op1 == const1_rtx)
5229 	op1 = const0_rtx, code = GT;
5230       break;
5231     case GT:
5232       if (op1 == constm1_rtx)
5233 	op1 = const0_rtx, code = GE;
5234       break;
5235     case GEU:
5236       if (op1 == const1_rtx)
5237 	op1 = const0_rtx, code = NE;
5238       break;
5239     case LTU:
5240       if (op1 == const1_rtx)
5241 	op1 = const0_rtx, code = EQ;
5242       break;
5243     default:
5244       break;
5245     }
5246 
5247   /* If we are comparing a double-word integer with zero or -1, we can
5248      convert the comparison into one involving a single word.  */
5249   if (GET_MODE_BITSIZE (mode) == BITS_PER_WORD * 2
5250       && GET_MODE_CLASS (mode) == MODE_INT
5251       && (!MEM_P (op0) || ! MEM_VOLATILE_P (op0)))
5252     {
5253       if ((code == EQ || code == NE)
5254 	  && (op1 == const0_rtx || op1 == constm1_rtx))
5255 	{
5256 	  rtx op00, op01;
5257 
5258 	  /* Do a logical OR or AND of the two words and compare the
5259 	     result.  */
5260 	  op00 = simplify_gen_subreg (word_mode, op0, mode, 0);
5261 	  op01 = simplify_gen_subreg (word_mode, op0, mode, UNITS_PER_WORD);
5262 	  tem = expand_binop (word_mode,
5263 			      op1 == const0_rtx ? ior_optab : and_optab,
5264 			      op00, op01, NULL_RTX, unsignedp,
5265 			      OPTAB_DIRECT);
5266 
5267 	  if (tem != 0)
5268 	    tem = emit_store_flag (NULL_RTX, code, tem, op1, word_mode,
5269 				   unsignedp, normalizep);
5270 	}
5271       else if ((code == LT || code == GE) && op1 == const0_rtx)
5272 	{
5273 	  rtx op0h;
5274 
5275 	  /* If testing the sign bit, can just test on high word.  */
5276 	  op0h = simplify_gen_subreg (word_mode, op0, mode,
5277 				      subreg_highpart_offset (word_mode,
5278 							      mode));
5279 	  tem = emit_store_flag (NULL_RTX, code, op0h, op1, word_mode,
5280 				 unsignedp, normalizep);
5281 	}
5282       else
5283 	tem = NULL_RTX;
5284 
5285       if (tem)
5286 	{
5287 	  if (target_mode == VOIDmode || GET_MODE (tem) == target_mode)
5288 	    return tem;
5289 	  if (!target)
5290 	    target = gen_reg_rtx (target_mode);
5291 
5292 	  convert_move (target, tem,
5293 			!val_signbit_known_set_p (word_mode,
5294 						  (normalizep ? normalizep
5295 						   : STORE_FLAG_VALUE)));
5296 	  return target;
5297 	}
5298     }
5299 
5300   /* If this is A < 0 or A >= 0, we can do this by taking the ones
5301      complement of A (for GE) and shifting the sign bit to the low bit.  */
5302   if (op1 == const0_rtx && (code == LT || code == GE)
5303       && GET_MODE_CLASS (mode) == MODE_INT
5304       && (normalizep || STORE_FLAG_VALUE == 1
5305 	  || val_signbit_p (mode, STORE_FLAG_VALUE)))
5306     {
5307       subtarget = target;
5308 
5309       if (!target)
5310 	target_mode = mode;
5311 
5312       /* If the result is to be wider than OP0, it is best to convert it
5313 	 first.  If it is to be narrower, it is *incorrect* to convert it
5314 	 first.  */
5315       else if (GET_MODE_SIZE (target_mode) > GET_MODE_SIZE (mode))
5316 	{
5317 	  op0 = convert_modes (target_mode, mode, op0, 0);
5318 	  mode = target_mode;
5319 	}
5320 
5321       if (target_mode != mode)
5322 	subtarget = 0;
5323 
5324       if (code == GE)
5325 	op0 = expand_unop (mode, one_cmpl_optab, op0,
5326 			   ((STORE_FLAG_VALUE == 1 || normalizep)
5327 			    ? 0 : subtarget), 0);
5328 
5329       if (STORE_FLAG_VALUE == 1 || normalizep)
5330 	/* If we are supposed to produce a 0/1 value, we want to do
5331 	   a logical shift from the sign bit to the low-order bit; for
5332 	   a -1/0 value, we do an arithmetic shift.  */
5333 	op0 = expand_shift (RSHIFT_EXPR, mode, op0,
5334 			    GET_MODE_BITSIZE (mode) - 1,
5335 			    subtarget, normalizep != -1);
5336 
5337       if (mode != target_mode)
5338 	op0 = convert_modes (target_mode, mode, op0, 0);
5339 
5340       return op0;
5341     }
5342 
5343   mclass = GET_MODE_CLASS (mode);
5344   for (compare_mode = mode; compare_mode != VOIDmode;
5345        compare_mode = GET_MODE_WIDER_MODE (compare_mode))
5346     {
5347      enum machine_mode optab_mode = mclass == MODE_CC ? CCmode : compare_mode;
5348      icode = optab_handler (cstore_optab, optab_mode);
5349      if (icode != CODE_FOR_nothing)
5350 	{
5351 	  do_pending_stack_adjust ();
5352 	  tem = emit_cstore (target, icode, code, mode, compare_mode,
5353 			     unsignedp, op0, op1, normalizep, target_mode);
5354 	  if (tem)
5355 	    return tem;
5356 
5357 	  if (GET_MODE_CLASS (mode) == MODE_FLOAT)
5358 	    {
5359 	      tem = emit_cstore (target, icode, scode, mode, compare_mode,
5360 				 unsignedp, op1, op0, normalizep, target_mode);
5361 	      if (tem)
5362 	        return tem;
5363 	    }
5364 	  break;
5365 	}
5366     }
5367 
5368   return 0;
5369 }
5370 
5371 /* Emit a store-flags instruction for comparison CODE on OP0 and OP1
5372    and storing in TARGET.  Normally return TARGET.
5373    Return 0 if that cannot be done.
5374 
5375    MODE is the mode to use for OP0 and OP1 should they be CONST_INTs.  If
5376    it is VOIDmode, they cannot both be CONST_INT.
5377 
5378    UNSIGNEDP is for the case where we have to widen the operands
5379    to perform the operation.  It says to use zero-extension.
5380 
5381    NORMALIZEP is 1 if we should convert the result to be either zero
5382    or one.  Normalize is -1 if we should convert the result to be
5383    either zero or -1.  If NORMALIZEP is zero, the result will be left
5384    "raw" out of the scc insn.  */
5385 
5386 rtx
5387 emit_store_flag (rtx target, enum rtx_code code, rtx op0, rtx op1,
5388 		 enum machine_mode mode, int unsignedp, int normalizep)
5389 {
5390   enum machine_mode target_mode = target ? GET_MODE (target) : VOIDmode;
5391   enum rtx_code rcode;
5392   rtx subtarget;
5393   rtx tem, last, trueval;
5394 
5395   tem = emit_store_flag_1 (target, code, op0, op1, mode, unsignedp, normalizep,
5396 			   target_mode);
5397   if (tem)
5398     return tem;
5399 
5400   /* If we reached here, we can't do this with a scc insn, however there
5401      are some comparisons that can be done in other ways.  Don't do any
5402      of these cases if branches are very cheap.  */
5403   if (BRANCH_COST (optimize_insn_for_speed_p (), false) == 0)
5404     return 0;
5405 
5406   /* See what we need to return.  We can only return a 1, -1, or the
5407      sign bit.  */
5408 
5409   if (normalizep == 0)
5410     {
5411       if (STORE_FLAG_VALUE == 1 || STORE_FLAG_VALUE == -1)
5412 	normalizep = STORE_FLAG_VALUE;
5413 
5414       else if (val_signbit_p (mode, STORE_FLAG_VALUE))
5415 	;
5416       else
5417 	return 0;
5418     }
5419 
5420   last = get_last_insn ();
5421 
5422   /* If optimizing, use different pseudo registers for each insn, instead
5423      of reusing the same pseudo.  This leads to better CSE, but slows
5424      down the compiler, since there are more pseudos */
5425   subtarget = (!optimize
5426 	       && (target_mode == mode)) ? target : NULL_RTX;
5427   trueval = GEN_INT (normalizep ? normalizep : STORE_FLAG_VALUE);
5428 
5429   /* For floating-point comparisons, try the reverse comparison or try
5430      changing the "orderedness" of the comparison.  */
5431   if (GET_MODE_CLASS (mode) == MODE_FLOAT)
5432     {
5433       enum rtx_code first_code;
5434       bool and_them;
5435 
5436       rcode = reverse_condition_maybe_unordered (code);
5437       if (can_compare_p (rcode, mode, ccp_store_flag)
5438           && (code == ORDERED || code == UNORDERED
5439 	      || (! HONOR_NANS (mode) && (code == LTGT || code == UNEQ))
5440 	      || (! HONOR_SNANS (mode) && (code == EQ || code == NE))))
5441 	{
5442           int want_add = ((STORE_FLAG_VALUE == 1 && normalizep == -1)
5443 		          || (STORE_FLAG_VALUE == -1 && normalizep == 1));
5444 
5445 	  /* For the reverse comparison, use either an addition or a XOR.  */
5446           if (want_add
5447 	      && rtx_cost (GEN_INT (normalizep), PLUS, 1,
5448 			   optimize_insn_for_speed_p ()) == 0)
5449 	    {
5450 	      tem = emit_store_flag_1 (subtarget, rcode, op0, op1, mode, 0,
5451 				       STORE_FLAG_VALUE, target_mode);
5452 	      if (tem)
5453                 return expand_binop (target_mode, add_optab, tem,
5454 				     GEN_INT (normalizep),
5455 				     target, 0, OPTAB_WIDEN);
5456 	    }
5457           else if (!want_add
5458 	           && rtx_cost (trueval, XOR, 1,
5459 			        optimize_insn_for_speed_p ()) == 0)
5460 	    {
5461 	      tem = emit_store_flag_1 (subtarget, rcode, op0, op1, mode, 0,
5462 				       normalizep, target_mode);
5463 	      if (tem)
5464                 return expand_binop (target_mode, xor_optab, tem, trueval,
5465 				     target, INTVAL (trueval) >= 0, OPTAB_WIDEN);
5466 	    }
5467 	}
5468 
5469       delete_insns_since (last);
5470 
5471       /* Cannot split ORDERED and UNORDERED, only try the above trick.   */
5472       if (code == ORDERED || code == UNORDERED)
5473 	return 0;
5474 
5475       and_them = split_comparison (code, mode, &first_code, &code);
5476 
5477       /* If there are no NaNs, the first comparison should always fall through.
5478          Effectively change the comparison to the other one.  */
5479       if (!HONOR_NANS (mode))
5480 	{
5481           gcc_assert (first_code == (and_them ? ORDERED : UNORDERED));
5482 	  return emit_store_flag_1 (target, code, op0, op1, mode, 0, normalizep,
5483 				    target_mode);
5484 	}
5485 
5486 #ifdef HAVE_conditional_move
5487       /* Try using a setcc instruction for ORDERED/UNORDERED, followed by a
5488 	 conditional move.  */
5489       tem = emit_store_flag_1 (subtarget, first_code, op0, op1, mode, 0,
5490 			       normalizep, target_mode);
5491       if (tem == 0)
5492 	return 0;
5493 
5494       if (and_them)
5495         tem = emit_conditional_move (target, code, op0, op1, mode,
5496 				     tem, const0_rtx, GET_MODE (tem), 0);
5497       else
5498         tem = emit_conditional_move (target, code, op0, op1, mode,
5499 				     trueval, tem, GET_MODE (tem), 0);
5500 
5501       if (tem == 0)
5502         delete_insns_since (last);
5503       return tem;
5504 #else
5505       return 0;
5506 #endif
5507     }
5508 
5509   /* The remaining tricks only apply to integer comparisons.  */
5510 
5511   if (GET_MODE_CLASS (mode) != MODE_INT)
5512     return 0;
5513 
5514   /* If this is an equality comparison of integers, we can try to exclusive-or
5515      (or subtract) the two operands and use a recursive call to try the
5516      comparison with zero.  Don't do any of these cases if branches are
5517      very cheap.  */
5518 
5519   if ((code == EQ || code == NE) && op1 != const0_rtx)
5520     {
5521       tem = expand_binop (mode, xor_optab, op0, op1, subtarget, 1,
5522 			  OPTAB_WIDEN);
5523 
5524       if (tem == 0)
5525 	tem = expand_binop (mode, sub_optab, op0, op1, subtarget, 1,
5526 			    OPTAB_WIDEN);
5527       if (tem != 0)
5528 	tem = emit_store_flag (target, code, tem, const0_rtx,
5529 			       mode, unsignedp, normalizep);
5530       if (tem != 0)
5531 	return tem;
5532 
5533       delete_insns_since (last);
5534     }
5535 
5536   /* For integer comparisons, try the reverse comparison.  However, for
5537      small X and if we'd have anyway to extend, implementing "X != 0"
5538      as "-(int)X >> 31" is still cheaper than inverting "(int)X == 0".  */
5539   rcode = reverse_condition (code);
5540   if (can_compare_p (rcode, mode, ccp_store_flag)
5541       && ! (optab_handler (cstore_optab, mode) == CODE_FOR_nothing
5542 	    && code == NE
5543 	    && GET_MODE_SIZE (mode) < UNITS_PER_WORD
5544 	    && op1 == const0_rtx))
5545     {
5546       int want_add = ((STORE_FLAG_VALUE == 1 && normalizep == -1)
5547 		      || (STORE_FLAG_VALUE == -1 && normalizep == 1));
5548 
5549       /* Again, for the reverse comparison, use either an addition or a XOR.  */
5550       if (want_add
5551 	  && rtx_cost (GEN_INT (normalizep), PLUS, 1,
5552 		       optimize_insn_for_speed_p ()) == 0)
5553 	{
5554 	  tem = emit_store_flag_1 (subtarget, rcode, op0, op1, mode, 0,
5555 				   STORE_FLAG_VALUE, target_mode);
5556 	  if (tem != 0)
5557             tem = expand_binop (target_mode, add_optab, tem,
5558 				GEN_INT (normalizep), target, 0, OPTAB_WIDEN);
5559 	}
5560       else if (!want_add
5561 	       && rtx_cost (trueval, XOR, 1,
5562 			    optimize_insn_for_speed_p ()) == 0)
5563 	{
5564 	  tem = emit_store_flag_1 (subtarget, rcode, op0, op1, mode, 0,
5565 				   normalizep, target_mode);
5566 	  if (tem != 0)
5567             tem = expand_binop (target_mode, xor_optab, tem, trueval, target,
5568 				INTVAL (trueval) >= 0, OPTAB_WIDEN);
5569 	}
5570 
5571       if (tem != 0)
5572 	return tem;
5573       delete_insns_since (last);
5574     }
5575 
5576   /* Some other cases we can do are EQ, NE, LE, and GT comparisons with
5577      the constant zero.  Reject all other comparisons at this point.  Only
5578      do LE and GT if branches are expensive since they are expensive on
5579      2-operand machines.  */
5580 
5581   if (op1 != const0_rtx
5582       || (code != EQ && code != NE
5583 	  && (BRANCH_COST (optimize_insn_for_speed_p (),
5584 			   false) <= 1 || (code != LE && code != GT))))
5585     return 0;
5586 
5587   /* Try to put the result of the comparison in the sign bit.  Assume we can't
5588      do the necessary operation below.  */
5589 
5590   tem = 0;
5591 
5592   /* To see if A <= 0, compute (A | (A - 1)).  A <= 0 iff that result has
5593      the sign bit set.  */
5594 
5595   if (code == LE)
5596     {
5597       /* This is destructive, so SUBTARGET can't be OP0.  */
5598       if (rtx_equal_p (subtarget, op0))
5599 	subtarget = 0;
5600 
5601       tem = expand_binop (mode, sub_optab, op0, const1_rtx, subtarget, 0,
5602 			  OPTAB_WIDEN);
5603       if (tem)
5604 	tem = expand_binop (mode, ior_optab, op0, tem, subtarget, 0,
5605 			    OPTAB_WIDEN);
5606     }
5607 
5608   /* To see if A > 0, compute (((signed) A) << BITS) - A, where BITS is the
5609      number of bits in the mode of OP0, minus one.  */
5610 
5611   if (code == GT)
5612     {
5613       if (rtx_equal_p (subtarget, op0))
5614 	subtarget = 0;
5615 
5616       tem = expand_shift (RSHIFT_EXPR, mode, op0,
5617 			  GET_MODE_BITSIZE (mode) - 1,
5618 			  subtarget, 0);
5619       tem = expand_binop (mode, sub_optab, tem, op0, subtarget, 0,
5620 			  OPTAB_WIDEN);
5621     }
5622 
5623   if (code == EQ || code == NE)
5624     {
5625       /* For EQ or NE, one way to do the comparison is to apply an operation
5626 	 that converts the operand into a positive number if it is nonzero
5627 	 or zero if it was originally zero.  Then, for EQ, we subtract 1 and
5628 	 for NE we negate.  This puts the result in the sign bit.  Then we
5629 	 normalize with a shift, if needed.
5630 
5631 	 Two operations that can do the above actions are ABS and FFS, so try
5632 	 them.  If that doesn't work, and MODE is smaller than a full word,
5633 	 we can use zero-extension to the wider mode (an unsigned conversion)
5634 	 as the operation.  */
5635 
5636       /* Note that ABS doesn't yield a positive number for INT_MIN, but
5637 	 that is compensated by the subsequent overflow when subtracting
5638 	 one / negating.  */
5639 
5640       if (optab_handler (abs_optab, mode) != CODE_FOR_nothing)
5641 	tem = expand_unop (mode, abs_optab, op0, subtarget, 1);
5642       else if (optab_handler (ffs_optab, mode) != CODE_FOR_nothing)
5643 	tem = expand_unop (mode, ffs_optab, op0, subtarget, 1);
5644       else if (GET_MODE_SIZE (mode) < UNITS_PER_WORD)
5645 	{
5646 	  tem = convert_modes (word_mode, mode, op0, 1);
5647 	  mode = word_mode;
5648 	}
5649 
5650       if (tem != 0)
5651 	{
5652 	  if (code == EQ)
5653 	    tem = expand_binop (mode, sub_optab, tem, const1_rtx, subtarget,
5654 				0, OPTAB_WIDEN);
5655 	  else
5656 	    tem = expand_unop (mode, neg_optab, tem, subtarget, 0);
5657 	}
5658 
5659       /* If we couldn't do it that way, for NE we can "or" the two's complement
5660 	 of the value with itself.  For EQ, we take the one's complement of
5661 	 that "or", which is an extra insn, so we only handle EQ if branches
5662 	 are expensive.  */
5663 
5664       if (tem == 0
5665 	  && (code == NE
5666 	      || BRANCH_COST (optimize_insn_for_speed_p (),
5667 		      	      false) > 1))
5668 	{
5669 	  if (rtx_equal_p (subtarget, op0))
5670 	    subtarget = 0;
5671 
5672 	  tem = expand_unop (mode, neg_optab, op0, subtarget, 0);
5673 	  tem = expand_binop (mode, ior_optab, tem, op0, subtarget, 0,
5674 			      OPTAB_WIDEN);
5675 
5676 	  if (tem && code == EQ)
5677 	    tem = expand_unop (mode, one_cmpl_optab, tem, subtarget, 0);
5678 	}
5679     }
5680 
5681   if (tem && normalizep)
5682     tem = expand_shift (RSHIFT_EXPR, mode, tem,
5683 			GET_MODE_BITSIZE (mode) - 1,
5684 			subtarget, normalizep == 1);
5685 
5686   if (tem)
5687     {
5688       if (!target)
5689         ;
5690       else if (GET_MODE (tem) != target_mode)
5691 	{
5692 	  convert_move (target, tem, 0);
5693 	  tem = target;
5694 	}
5695       else if (!subtarget)
5696 	{
5697 	  emit_move_insn (target, tem);
5698 	  tem = target;
5699 	}
5700     }
5701   else
5702     delete_insns_since (last);
5703 
5704   return tem;
5705 }
5706 
5707 /* Like emit_store_flag, but always succeeds.  */
5708 
5709 rtx
5710 emit_store_flag_force (rtx target, enum rtx_code code, rtx op0, rtx op1,
5711 		       enum machine_mode mode, int unsignedp, int normalizep)
5712 {
5713   rtx tem, label;
5714   rtx trueval, falseval;
5715 
5716   /* First see if emit_store_flag can do the job.  */
5717   tem = emit_store_flag (target, code, op0, op1, mode, unsignedp, normalizep);
5718   if (tem != 0)
5719     return tem;
5720 
5721   if (!target)
5722     target = gen_reg_rtx (word_mode);
5723 
5724   /* If this failed, we have to do this with set/compare/jump/set code.
5725      For foo != 0, if foo is in OP0, just replace it with 1 if nonzero.  */
5726   trueval = normalizep ? GEN_INT (normalizep) : const1_rtx;
5727   if (code == NE
5728       && GET_MODE_CLASS (mode) == MODE_INT
5729       && REG_P (target)
5730       && op0 == target
5731       && op1 == const0_rtx)
5732     {
5733       label = gen_label_rtx ();
5734       do_compare_rtx_and_jump (target, const0_rtx, EQ, unsignedp,
5735 			       mode, NULL_RTX, NULL_RTX, label, -1);
5736       emit_move_insn (target, trueval);
5737       emit_label (label);
5738       return target;
5739     }
5740 
5741   if (!REG_P (target)
5742       || reg_mentioned_p (target, op0) || reg_mentioned_p (target, op1))
5743     target = gen_reg_rtx (GET_MODE (target));
5744 
5745   /* Jump in the right direction if the target cannot implement CODE
5746      but can jump on its reverse condition.  */
5747   falseval = const0_rtx;
5748   if (! can_compare_p (code, mode, ccp_jump)
5749       && (! FLOAT_MODE_P (mode)
5750           || code == ORDERED || code == UNORDERED
5751           || (! HONOR_NANS (mode) && (code == LTGT || code == UNEQ))
5752           || (! HONOR_SNANS (mode) && (code == EQ || code == NE))))
5753     {
5754       enum rtx_code rcode;
5755       if (FLOAT_MODE_P (mode))
5756         rcode = reverse_condition_maybe_unordered (code);
5757       else
5758         rcode = reverse_condition (code);
5759 
5760       /* Canonicalize to UNORDERED for the libcall.  */
5761       if (can_compare_p (rcode, mode, ccp_jump)
5762           || (code == ORDERED && ! can_compare_p (ORDERED, mode, ccp_jump)))
5763 	{
5764 	  falseval = trueval;
5765 	  trueval = const0_rtx;
5766 	  code = rcode;
5767 	}
5768     }
5769 
5770   emit_move_insn (target, trueval);
5771   label = gen_label_rtx ();
5772   do_compare_rtx_and_jump (op0, op1, code, unsignedp, mode, NULL_RTX,
5773 			   NULL_RTX, label, -1);
5774 
5775   emit_move_insn (target, falseval);
5776   emit_label (label);
5777 
5778   return target;
5779 }
5780 
5781 /* Perform possibly multi-word comparison and conditional jump to LABEL
5782    if ARG1 OP ARG2 true where ARG1 and ARG2 are of mode MODE.  This is
5783    now a thin wrapper around do_compare_rtx_and_jump.  */
5784 
5785 static void
5786 do_cmp_and_jump (rtx arg1, rtx arg2, enum rtx_code op, enum machine_mode mode,
5787 		 rtx label)
5788 {
5789   int unsignedp = (op == LTU || op == LEU || op == GTU || op == GEU);
5790   do_compare_rtx_and_jump (arg1, arg2, op, unsignedp, mode,
5791 			   NULL_RTX, NULL_RTX, label, -1);
5792 }
5793