xref: /netbsd-src/external/gpl3/gcc.old/dist/gcc/lower-subreg.c (revision bdc22b2e01993381dcefeff2bc9b56ca75a4235c)
1 /* Decompose multiword subregs.
2    Copyright (C) 2007-2015 Free Software Foundation, Inc.
3    Contributed by Richard Henderson <rth@redhat.com>
4 		  Ian Lance Taylor <iant@google.com>
5 
6 This file is part of GCC.
7 
8 GCC is free software; you can redistribute it and/or modify it under
9 the terms of the GNU General Public License as published by the Free
10 Software Foundation; either version 3, or (at your option) any later
11 version.
12 
13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
16 for more details.
17 
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3.  If not see
20 <http://www.gnu.org/licenses/>.  */
21 
22 #include "config.h"
23 #include "system.h"
24 #include "coretypes.h"
25 #include "machmode.h"
26 #include "tm.h"
27 #include "hash-set.h"
28 #include "vec.h"
29 #include "double-int.h"
30 #include "input.h"
31 #include "alias.h"
32 #include "symtab.h"
33 #include "wide-int.h"
34 #include "inchash.h"
35 #include "tree.h"
36 #include "rtl.h"
37 #include "tm_p.h"
38 #include "flags.h"
39 #include "insn-config.h"
40 #include "obstack.h"
41 #include "predict.h"
42 #include "hard-reg-set.h"
43 #include "function.h"
44 #include "dominance.h"
45 #include "cfg.h"
46 #include "cfgrtl.h"
47 #include "cfgbuild.h"
48 #include "basic-block.h"
49 #include "recog.h"
50 #include "bitmap.h"
51 #include "dce.h"
52 #include "hashtab.h"
53 #include "statistics.h"
54 #include "real.h"
55 #include "fixed-value.h"
56 #include "expmed.h"
57 #include "dojump.h"
58 #include "explow.h"
59 #include "calls.h"
60 #include "emit-rtl.h"
61 #include "varasm.h"
62 #include "stmt.h"
63 #include "expr.h"
64 #include "except.h"
65 #include "regs.h"
66 #include "tree-pass.h"
67 #include "df.h"
68 #include "lower-subreg.h"
69 #include "rtl-iter.h"
70 
71 #ifdef STACK_GROWS_DOWNWARD
72 # undef STACK_GROWS_DOWNWARD
73 # define STACK_GROWS_DOWNWARD 1
74 #else
75 # define STACK_GROWS_DOWNWARD 0
76 #endif
77 
78 
79 /* Decompose multi-word pseudo-registers into individual
80    pseudo-registers when possible and profitable.  This is possible
81    when all the uses of a multi-word register are via SUBREG, or are
82    copies of the register to another location.  Breaking apart the
83    register permits more CSE and permits better register allocation.
84    This is profitable if the machine does not have move instructions
85    to do this.
86 
87    This pass only splits moves with modes that are wider than
88    word_mode and ASHIFTs, LSHIFTRTs, ASHIFTRTs and ZERO_EXTENDs with
89    integer modes that are twice the width of word_mode.  The latter
90    could be generalized if there was a need to do this, but the trend in
91    architectures is to not need this.
92 
93    There are two useful preprocessor defines for use by maintainers:
94 
95    #define LOG_COSTS 1
96 
97    if you wish to see the actual cost estimates that are being used
98    for each mode wider than word mode and the cost estimates for zero
99    extension and the shifts.   This can be useful when port maintainers
100    are tuning insn rtx costs.
101 
102    #define FORCE_LOWERING 1
103 
104    if you wish to test the pass with all the transformation forced on.
105    This can be useful for finding bugs in the transformations.  */
106 
107 #define LOG_COSTS 0
108 #define FORCE_LOWERING 0
109 
110 /* Bit N in this bitmap is set if regno N is used in a context in
111    which we can decompose it.  */
112 static bitmap decomposable_context;
113 
114 /* Bit N in this bitmap is set if regno N is used in a context in
115    which it can not be decomposed.  */
116 static bitmap non_decomposable_context;
117 
118 /* Bit N in this bitmap is set if regno N is used in a subreg
119    which changes the mode but not the size.  This typically happens
120    when the register accessed as a floating-point value; we want to
121    avoid generating accesses to its subwords in integer modes.  */
122 static bitmap subreg_context;
123 
124 /* Bit N in the bitmap in element M of this array is set if there is a
125    copy from reg M to reg N.  */
126 static vec<bitmap> reg_copy_graph;
127 
128 struct target_lower_subreg default_target_lower_subreg;
129 #if SWITCHABLE_TARGET
130 struct target_lower_subreg *this_target_lower_subreg
131   = &default_target_lower_subreg;
132 #endif
133 
134 #define twice_word_mode \
135   this_target_lower_subreg->x_twice_word_mode
136 #define choices \
137   this_target_lower_subreg->x_choices
138 
139 /* RTXes used while computing costs.  */
140 struct cost_rtxes {
141   /* Source and target registers.  */
142   rtx source;
143   rtx target;
144 
145   /* A twice_word_mode ZERO_EXTEND of SOURCE.  */
146   rtx zext;
147 
148   /* A shift of SOURCE.  */
149   rtx shift;
150 
151   /* A SET of TARGET.  */
152   rtx set;
153 };
154 
155 /* Return the cost of a CODE shift in mode MODE by OP1 bits, using the
156    rtxes in RTXES.  SPEED_P selects between the speed and size cost.  */
157 
158 static int
159 shift_cost (bool speed_p, struct cost_rtxes *rtxes, enum rtx_code code,
160 	    machine_mode mode, int op1)
161 {
162   PUT_CODE (rtxes->shift, code);
163   PUT_MODE (rtxes->shift, mode);
164   PUT_MODE (rtxes->source, mode);
165   XEXP (rtxes->shift, 1) = GEN_INT (op1);
166   return set_src_cost (rtxes->shift, speed_p);
167 }
168 
169 /* For each X in the range [0, BITS_PER_WORD), set SPLITTING[X]
170    to true if it is profitable to split a double-word CODE shift
171    of X + BITS_PER_WORD bits.  SPEED_P says whether we are testing
172    for speed or size profitability.
173 
174    Use the rtxes in RTXES to calculate costs.  WORD_MOVE_ZERO_COST is
175    the cost of moving zero into a word-mode register.  WORD_MOVE_COST
176    is the cost of moving between word registers.  */
177 
178 static void
179 compute_splitting_shift (bool speed_p, struct cost_rtxes *rtxes,
180 			 bool *splitting, enum rtx_code code,
181 			 int word_move_zero_cost, int word_move_cost)
182 {
183   int wide_cost, narrow_cost, upper_cost, i;
184 
185   for (i = 0; i < BITS_PER_WORD; i++)
186     {
187       wide_cost = shift_cost (speed_p, rtxes, code, twice_word_mode,
188 			      i + BITS_PER_WORD);
189       if (i == 0)
190 	narrow_cost = word_move_cost;
191       else
192 	narrow_cost = shift_cost (speed_p, rtxes, code, word_mode, i);
193 
194       if (code != ASHIFTRT)
195 	upper_cost = word_move_zero_cost;
196       else if (i == BITS_PER_WORD - 1)
197 	upper_cost = word_move_cost;
198       else
199 	upper_cost = shift_cost (speed_p, rtxes, code, word_mode,
200 				 BITS_PER_WORD - 1);
201 
202       if (LOG_COSTS)
203 	fprintf (stderr, "%s %s by %d: original cost %d, split cost %d + %d\n",
204 		 GET_MODE_NAME (twice_word_mode), GET_RTX_NAME (code),
205 		 i + BITS_PER_WORD, wide_cost, narrow_cost, upper_cost);
206 
207       if (FORCE_LOWERING || wide_cost >= narrow_cost + upper_cost)
208 	splitting[i] = true;
209     }
210 }
211 
212 /* Compute what we should do when optimizing for speed or size; SPEED_P
213    selects which.  Use RTXES for computing costs.  */
214 
215 static void
216 compute_costs (bool speed_p, struct cost_rtxes *rtxes)
217 {
218   unsigned int i;
219   int word_move_zero_cost, word_move_cost;
220 
221   PUT_MODE (rtxes->target, word_mode);
222   SET_SRC (rtxes->set) = CONST0_RTX (word_mode);
223   word_move_zero_cost = set_rtx_cost (rtxes->set, speed_p);
224 
225   SET_SRC (rtxes->set) = rtxes->source;
226   word_move_cost = set_rtx_cost (rtxes->set, speed_p);
227 
228   if (LOG_COSTS)
229     fprintf (stderr, "%s move: from zero cost %d, from reg cost %d\n",
230 	     GET_MODE_NAME (word_mode), word_move_zero_cost, word_move_cost);
231 
232   for (i = 0; i < MAX_MACHINE_MODE; i++)
233     {
234       machine_mode mode = (machine_mode) i;
235       int factor = GET_MODE_SIZE (mode) / UNITS_PER_WORD;
236       if (factor > 1)
237 	{
238 	  int mode_move_cost;
239 
240 	  PUT_MODE (rtxes->target, mode);
241 	  PUT_MODE (rtxes->source, mode);
242 	  mode_move_cost = set_rtx_cost (rtxes->set, speed_p);
243 
244 	  if (LOG_COSTS)
245 	    fprintf (stderr, "%s move: original cost %d, split cost %d * %d\n",
246 		     GET_MODE_NAME (mode), mode_move_cost,
247 		     word_move_cost, factor);
248 
249 	  if (FORCE_LOWERING || mode_move_cost >= word_move_cost * factor)
250 	    {
251 	      choices[speed_p].move_modes_to_split[i] = true;
252 	      choices[speed_p].something_to_do = true;
253 	    }
254 	}
255     }
256 
257   /* For the moves and shifts, the only case that is checked is one
258      where the mode of the target is an integer mode twice the width
259      of the word_mode.
260 
261      If it is not profitable to split a double word move then do not
262      even consider the shifts or the zero extension.  */
263   if (choices[speed_p].move_modes_to_split[(int) twice_word_mode])
264     {
265       int zext_cost;
266 
267       /* The only case here to check to see if moving the upper part with a
268 	 zero is cheaper than doing the zext itself.  */
269       PUT_MODE (rtxes->source, word_mode);
270       zext_cost = set_src_cost (rtxes->zext, speed_p);
271 
272       if (LOG_COSTS)
273 	fprintf (stderr, "%s %s: original cost %d, split cost %d + %d\n",
274 		 GET_MODE_NAME (twice_word_mode), GET_RTX_NAME (ZERO_EXTEND),
275 		 zext_cost, word_move_cost, word_move_zero_cost);
276 
277       if (FORCE_LOWERING || zext_cost >= word_move_cost + word_move_zero_cost)
278 	choices[speed_p].splitting_zext = true;
279 
280       compute_splitting_shift (speed_p, rtxes,
281 			       choices[speed_p].splitting_ashift, ASHIFT,
282 			       word_move_zero_cost, word_move_cost);
283       compute_splitting_shift (speed_p, rtxes,
284 			       choices[speed_p].splitting_lshiftrt, LSHIFTRT,
285 			       word_move_zero_cost, word_move_cost);
286       compute_splitting_shift (speed_p, rtxes,
287 			       choices[speed_p].splitting_ashiftrt, ASHIFTRT,
288 			       word_move_zero_cost, word_move_cost);
289     }
290 }
291 
292 /* Do one-per-target initialisation.  This involves determining
293    which operations on the machine are profitable.  If none are found,
294    then the pass just returns when called.  */
295 
296 void
297 init_lower_subreg (void)
298 {
299   struct cost_rtxes rtxes;
300 
301   memset (this_target_lower_subreg, 0, sizeof (*this_target_lower_subreg));
302 
303   twice_word_mode = GET_MODE_2XWIDER_MODE (word_mode);
304 
305   rtxes.target = gen_rtx_REG (word_mode, FIRST_PSEUDO_REGISTER);
306   rtxes.source = gen_rtx_REG (word_mode, FIRST_PSEUDO_REGISTER + 1);
307   rtxes.set = gen_rtx_SET (VOIDmode, rtxes.target, rtxes.source);
308   rtxes.zext = gen_rtx_ZERO_EXTEND (twice_word_mode, rtxes.source);
309   rtxes.shift = gen_rtx_ASHIFT (twice_word_mode, rtxes.source, const0_rtx);
310 
311   if (LOG_COSTS)
312     fprintf (stderr, "\nSize costs\n==========\n\n");
313   compute_costs (false, &rtxes);
314 
315   if (LOG_COSTS)
316     fprintf (stderr, "\nSpeed costs\n===========\n\n");
317   compute_costs (true, &rtxes);
318 }
319 
320 static bool
321 simple_move_operand (rtx x)
322 {
323   if (GET_CODE (x) == SUBREG)
324     x = SUBREG_REG (x);
325 
326   if (!OBJECT_P (x))
327     return false;
328 
329   if (GET_CODE (x) == LABEL_REF
330       || GET_CODE (x) == SYMBOL_REF
331       || GET_CODE (x) == HIGH
332       || GET_CODE (x) == CONST)
333     return false;
334 
335   if (MEM_P (x)
336       && (MEM_VOLATILE_P (x)
337 	  || mode_dependent_address_p (XEXP (x, 0), MEM_ADDR_SPACE (x))))
338     return false;
339 
340   return true;
341 }
342 
343 /* If INSN is a single set between two objects that we want to split,
344    return the single set.  SPEED_P says whether we are optimizing
345    INSN for speed or size.
346 
347    INSN should have been passed to recog and extract_insn before this
348    is called.  */
349 
350 static rtx
351 simple_move (rtx_insn *insn, bool speed_p)
352 {
353   rtx x;
354   rtx set;
355   machine_mode mode;
356 
357   if (recog_data.n_operands != 2)
358     return NULL_RTX;
359 
360   set = single_set (insn);
361   if (!set)
362     return NULL_RTX;
363 
364   x = SET_DEST (set);
365   if (x != recog_data.operand[0] && x != recog_data.operand[1])
366     return NULL_RTX;
367   if (!simple_move_operand (x))
368     return NULL_RTX;
369 
370   x = SET_SRC (set);
371   if (x != recog_data.operand[0] && x != recog_data.operand[1])
372     return NULL_RTX;
373   /* For the src we can handle ASM_OPERANDS, and it is beneficial for
374      things like x86 rdtsc which returns a DImode value.  */
375   if (GET_CODE (x) != ASM_OPERANDS
376       && !simple_move_operand (x))
377     return NULL_RTX;
378 
379   /* We try to decompose in integer modes, to avoid generating
380      inefficient code copying between integer and floating point
381      registers.  That means that we can't decompose if this is a
382      non-integer mode for which there is no integer mode of the same
383      size.  */
384   mode = GET_MODE (SET_DEST (set));
385   if (!SCALAR_INT_MODE_P (mode)
386       && (mode_for_size (GET_MODE_SIZE (mode) * BITS_PER_UNIT, MODE_INT, 0)
387 	  == BLKmode))
388     return NULL_RTX;
389 
390   /* Reject PARTIAL_INT modes.  They are used for processor specific
391      purposes and it's probably best not to tamper with them.  */
392   if (GET_MODE_CLASS (mode) == MODE_PARTIAL_INT)
393     return NULL_RTX;
394 
395   if (!choices[speed_p].move_modes_to_split[(int) mode])
396     return NULL_RTX;
397 
398   return set;
399 }
400 
401 /* If SET is a copy from one multi-word pseudo-register to another,
402    record that in reg_copy_graph.  Return whether it is such a
403    copy.  */
404 
405 static bool
406 find_pseudo_copy (rtx set)
407 {
408   rtx dest = SET_DEST (set);
409   rtx src = SET_SRC (set);
410   unsigned int rd, rs;
411   bitmap b;
412 
413   if (!REG_P (dest) || !REG_P (src))
414     return false;
415 
416   rd = REGNO (dest);
417   rs = REGNO (src);
418   if (HARD_REGISTER_NUM_P (rd) || HARD_REGISTER_NUM_P (rs))
419     return false;
420 
421   b = reg_copy_graph[rs];
422   if (b == NULL)
423     {
424       b = BITMAP_ALLOC (NULL);
425       reg_copy_graph[rs] = b;
426     }
427 
428   bitmap_set_bit (b, rd);
429 
430   return true;
431 }
432 
433 /* Look through the registers in DECOMPOSABLE_CONTEXT.  For each case
434    where they are copied to another register, add the register to
435    which they are copied to DECOMPOSABLE_CONTEXT.  Use
436    NON_DECOMPOSABLE_CONTEXT to limit this--we don't bother to track
437    copies of registers which are in NON_DECOMPOSABLE_CONTEXT.  */
438 
439 static void
440 propagate_pseudo_copies (void)
441 {
442   bitmap queue, propagate;
443 
444   queue = BITMAP_ALLOC (NULL);
445   propagate = BITMAP_ALLOC (NULL);
446 
447   bitmap_copy (queue, decomposable_context);
448   do
449     {
450       bitmap_iterator iter;
451       unsigned int i;
452 
453       bitmap_clear (propagate);
454 
455       EXECUTE_IF_SET_IN_BITMAP (queue, 0, i, iter)
456 	{
457 	  bitmap b = reg_copy_graph[i];
458 	  if (b)
459 	    bitmap_ior_and_compl_into (propagate, b, non_decomposable_context);
460 	}
461 
462       bitmap_and_compl (queue, propagate, decomposable_context);
463       bitmap_ior_into (decomposable_context, propagate);
464     }
465   while (!bitmap_empty_p (queue));
466 
467   BITMAP_FREE (queue);
468   BITMAP_FREE (propagate);
469 }
470 
471 /* A pointer to one of these values is passed to
472    find_decomposable_subregs.  */
473 
474 enum classify_move_insn
475 {
476   /* Not a simple move from one location to another.  */
477   NOT_SIMPLE_MOVE,
478   /* A simple move we want to decompose.  */
479   DECOMPOSABLE_SIMPLE_MOVE,
480   /* Any other simple move.  */
481   SIMPLE_MOVE
482 };
483 
484 /* If we find a SUBREG in *LOC which we could use to decompose a
485    pseudo-register, set a bit in DECOMPOSABLE_CONTEXT.  If we find an
486    unadorned register which is not a simple pseudo-register copy,
487    DATA will point at the type of move, and we set a bit in
488    DECOMPOSABLE_CONTEXT or NON_DECOMPOSABLE_CONTEXT as appropriate.  */
489 
490 static void
491 find_decomposable_subregs (rtx *loc, enum classify_move_insn *pcmi)
492 {
493   subrtx_var_iterator::array_type array;
494   FOR_EACH_SUBRTX_VAR (iter, array, *loc, NONCONST)
495     {
496       rtx x = *iter;
497       if (GET_CODE (x) == SUBREG)
498 	{
499 	  rtx inner = SUBREG_REG (x);
500 	  unsigned int regno, outer_size, inner_size, outer_words, inner_words;
501 
502 	  if (!REG_P (inner))
503 	    continue;
504 
505 	  regno = REGNO (inner);
506 	  if (HARD_REGISTER_NUM_P (regno))
507 	    {
508 	      iter.skip_subrtxes ();
509 	      continue;
510 	    }
511 
512 	  outer_size = GET_MODE_SIZE (GET_MODE (x));
513 	  inner_size = GET_MODE_SIZE (GET_MODE (inner));
514 	  outer_words = (outer_size + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
515 	  inner_words = (inner_size + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
516 
517 	  /* We only try to decompose single word subregs of multi-word
518 	     registers.  When we find one, we return -1 to avoid iterating
519 	     over the inner register.
520 
521 	     ??? This doesn't allow, e.g., DImode subregs of TImode values
522 	     on 32-bit targets.  We would need to record the way the
523 	     pseudo-register was used, and only decompose if all the uses
524 	     were the same number and size of pieces.  Hopefully this
525 	     doesn't happen much.  */
526 
527 	  if (outer_words == 1 && inner_words > 1)
528 	    {
529 	      bitmap_set_bit (decomposable_context, regno);
530 	      iter.skip_subrtxes ();
531 	      continue;
532 	    }
533 
534 	  /* If this is a cast from one mode to another, where the modes
535 	     have the same size, and they are not tieable, then mark this
536 	     register as non-decomposable.  If we decompose it we are
537 	     likely to mess up whatever the backend is trying to do.  */
538 	  if (outer_words > 1
539 	      && outer_size == inner_size
540 	      && !MODES_TIEABLE_P (GET_MODE (x), GET_MODE (inner)))
541 	    {
542 	      bitmap_set_bit (non_decomposable_context, regno);
543 	      bitmap_set_bit (subreg_context, regno);
544 	      iter.skip_subrtxes ();
545 	      continue;
546 	    }
547 	}
548       else if (REG_P (x))
549 	{
550 	  unsigned int regno;
551 
552 	  /* We will see an outer SUBREG before we see the inner REG, so
553 	     when we see a plain REG here it means a direct reference to
554 	     the register.
555 
556 	     If this is not a simple copy from one location to another,
557 	     then we can not decompose this register.  If this is a simple
558 	     copy we want to decompose, and the mode is right,
559 	     then we mark the register as decomposable.
560 	     Otherwise we don't say anything about this register --
561 	     it could be decomposed, but whether that would be
562 	     profitable depends upon how it is used elsewhere.
563 
564 	     We only set bits in the bitmap for multi-word
565 	     pseudo-registers, since those are the only ones we care about
566 	     and it keeps the size of the bitmaps down.  */
567 
568 	  regno = REGNO (x);
569 	  if (!HARD_REGISTER_NUM_P (regno)
570 	      && GET_MODE_SIZE (GET_MODE (x)) > UNITS_PER_WORD)
571 	    {
572 	      switch (*pcmi)
573 		{
574 		case NOT_SIMPLE_MOVE:
575 		  bitmap_set_bit (non_decomposable_context, regno);
576 		  break;
577 		case DECOMPOSABLE_SIMPLE_MOVE:
578 		  if (MODES_TIEABLE_P (GET_MODE (x), word_mode))
579 		    bitmap_set_bit (decomposable_context, regno);
580 		  break;
581 		case SIMPLE_MOVE:
582 		  break;
583 		default:
584 		  gcc_unreachable ();
585 		}
586 	    }
587 	}
588       else if (MEM_P (x))
589 	{
590 	  enum classify_move_insn cmi_mem = NOT_SIMPLE_MOVE;
591 
592 	  /* Any registers used in a MEM do not participate in a
593 	     SIMPLE_MOVE or DECOMPOSABLE_SIMPLE_MOVE.  Do our own recursion
594 	     here, and return -1 to block the parent's recursion.  */
595 	  find_decomposable_subregs (&XEXP (x, 0), &cmi_mem);
596 	  iter.skip_subrtxes ();
597 	}
598     }
599 }
600 
601 /* Decompose REGNO into word-sized components.  We smash the REG node
602    in place.  This ensures that (1) something goes wrong quickly if we
603    fail to make some replacement, and (2) the debug information inside
604    the symbol table is automatically kept up to date.  */
605 
606 static void
607 decompose_register (unsigned int regno)
608 {
609   rtx reg;
610   unsigned int words, i;
611   rtvec v;
612 
613   reg = regno_reg_rtx[regno];
614 
615   regno_reg_rtx[regno] = NULL_RTX;
616 
617   words = GET_MODE_SIZE (GET_MODE (reg));
618   words = (words + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
619 
620   v = rtvec_alloc (words);
621   for (i = 0; i < words; ++i)
622     RTVEC_ELT (v, i) = gen_reg_rtx_offset (reg, word_mode, i * UNITS_PER_WORD);
623 
624   PUT_CODE (reg, CONCATN);
625   XVEC (reg, 0) = v;
626 
627   if (dump_file)
628     {
629       fprintf (dump_file, "; Splitting reg %u ->", regno);
630       for (i = 0; i < words; ++i)
631 	fprintf (dump_file, " %u", REGNO (XVECEXP (reg, 0, i)));
632       fputc ('\n', dump_file);
633     }
634 }
635 
636 /* Get a SUBREG of a CONCATN.  */
637 
638 static rtx
639 simplify_subreg_concatn (machine_mode outermode, rtx op,
640 			 unsigned int byte)
641 {
642   unsigned int inner_size;
643   machine_mode innermode, partmode;
644   rtx part;
645   unsigned int final_offset;
646 
647   gcc_assert (GET_CODE (op) == CONCATN);
648   gcc_assert (byte % GET_MODE_SIZE (outermode) == 0);
649 
650   innermode = GET_MODE (op);
651   gcc_assert (byte < GET_MODE_SIZE (innermode));
652   gcc_assert (GET_MODE_SIZE (outermode) <= GET_MODE_SIZE (innermode));
653 
654   inner_size = GET_MODE_SIZE (innermode) / XVECLEN (op, 0);
655   part = XVECEXP (op, 0, byte / inner_size);
656   partmode = GET_MODE (part);
657 
658   /* VECTOR_CSTs in debug expressions are expanded into CONCATN instead of
659      regular CONST_VECTORs.  They have vector or integer modes, depending
660      on the capabilities of the target.  Cope with them.  */
661   if (partmode == VOIDmode && VECTOR_MODE_P (innermode))
662     partmode = GET_MODE_INNER (innermode);
663   else if (partmode == VOIDmode)
664     {
665       enum mode_class mclass = GET_MODE_CLASS (innermode);
666       partmode = mode_for_size (inner_size * BITS_PER_UNIT, mclass, 0);
667     }
668 
669   final_offset = byte % inner_size;
670   if (final_offset + GET_MODE_SIZE (outermode) > inner_size)
671     return NULL_RTX;
672 
673   return simplify_gen_subreg (outermode, part, partmode, final_offset);
674 }
675 
676 /* Wrapper around simplify_gen_subreg which handles CONCATN.  */
677 
678 static rtx
679 simplify_gen_subreg_concatn (machine_mode outermode, rtx op,
680 			     machine_mode innermode, unsigned int byte)
681 {
682   rtx ret;
683 
684   /* We have to handle generating a SUBREG of a SUBREG of a CONCATN.
685      If OP is a SUBREG of a CONCATN, then it must be a simple mode
686      change with the same size and offset 0, or it must extract a
687      part.  We shouldn't see anything else here.  */
688   if (GET_CODE (op) == SUBREG && GET_CODE (SUBREG_REG (op)) == CONCATN)
689     {
690       rtx op2;
691 
692       if ((GET_MODE_SIZE (GET_MODE (op))
693 	   == GET_MODE_SIZE (GET_MODE (SUBREG_REG (op))))
694 	  && SUBREG_BYTE (op) == 0)
695 	return simplify_gen_subreg_concatn (outermode, SUBREG_REG (op),
696 					    GET_MODE (SUBREG_REG (op)), byte);
697 
698       op2 = simplify_subreg_concatn (GET_MODE (op), SUBREG_REG (op),
699 				     SUBREG_BYTE (op));
700       if (op2 == NULL_RTX)
701 	{
702 	  /* We don't handle paradoxical subregs here.  */
703 	  gcc_assert (GET_MODE_SIZE (outermode)
704 		      <= GET_MODE_SIZE (GET_MODE (op)));
705 	  gcc_assert (GET_MODE_SIZE (GET_MODE (op))
706 		      <= GET_MODE_SIZE (GET_MODE (SUBREG_REG (op))));
707 	  op2 = simplify_subreg_concatn (outermode, SUBREG_REG (op),
708 					 byte + SUBREG_BYTE (op));
709 	  gcc_assert (op2 != NULL_RTX);
710 	  return op2;
711 	}
712 
713       op = op2;
714       gcc_assert (op != NULL_RTX);
715       gcc_assert (innermode == GET_MODE (op));
716     }
717 
718   if (GET_CODE (op) == CONCATN)
719     return simplify_subreg_concatn (outermode, op, byte);
720 
721   ret = simplify_gen_subreg (outermode, op, innermode, byte);
722 
723   /* If we see an insn like (set (reg:DI) (subreg:DI (reg:SI) 0)) then
724      resolve_simple_move will ask for the high part of the paradoxical
725      subreg, which does not have a value.  Just return a zero.  */
726   if (ret == NULL_RTX
727       && GET_CODE (op) == SUBREG
728       && SUBREG_BYTE (op) == 0
729       && (GET_MODE_SIZE (innermode)
730 	  > GET_MODE_SIZE (GET_MODE (SUBREG_REG (op)))))
731     return CONST0_RTX (outermode);
732 
733   gcc_assert (ret != NULL_RTX);
734   return ret;
735 }
736 
737 /* Return whether we should resolve X into the registers into which it
738    was decomposed.  */
739 
740 static bool
741 resolve_reg_p (rtx x)
742 {
743   return GET_CODE (x) == CONCATN;
744 }
745 
746 /* Return whether X is a SUBREG of a register which we need to
747    resolve.  */
748 
749 static bool
750 resolve_subreg_p (rtx x)
751 {
752   if (GET_CODE (x) != SUBREG)
753     return false;
754   return resolve_reg_p (SUBREG_REG (x));
755 }
756 
757 /* Look for SUBREGs in *LOC which need to be decomposed.  */
758 
759 static bool
760 resolve_subreg_use (rtx *loc, rtx insn)
761 {
762   subrtx_ptr_iterator::array_type array;
763   FOR_EACH_SUBRTX_PTR (iter, array, loc, NONCONST)
764     {
765       rtx *loc = *iter;
766       rtx x = *loc;
767       if (resolve_subreg_p (x))
768 	{
769 	  x = simplify_subreg_concatn (GET_MODE (x), SUBREG_REG (x),
770 				       SUBREG_BYTE (x));
771 
772 	  /* It is possible for a note to contain a reference which we can
773 	     decompose.  In this case, return 1 to the caller to indicate
774 	     that the note must be removed.  */
775 	  if (!x)
776 	    {
777 	      gcc_assert (!insn);
778 	      return true;
779 	    }
780 
781 	  validate_change (insn, loc, x, 1);
782 	  iter.skip_subrtxes ();
783 	}
784       else if (resolve_reg_p (x))
785 	/* Return 1 to the caller to indicate that we found a direct
786 	   reference to a register which is being decomposed.  This can
787 	   happen inside notes, multiword shift or zero-extend
788 	   instructions.  */
789 	return true;
790     }
791 
792   return false;
793 }
794 
795 /* Resolve any decomposed registers which appear in register notes on
796    INSN.  */
797 
798 static void
799 resolve_reg_notes (rtx_insn *insn)
800 {
801   rtx *pnote, note;
802 
803   note = find_reg_equal_equiv_note (insn);
804   if (note)
805     {
806       int old_count = num_validated_changes ();
807       if (resolve_subreg_use (&XEXP (note, 0), NULL_RTX))
808 	remove_note (insn, note);
809       else
810 	if (old_count != num_validated_changes ())
811 	  df_notes_rescan (insn);
812     }
813 
814   pnote = &REG_NOTES (insn);
815   while (*pnote != NULL_RTX)
816     {
817       bool del = false;
818 
819       note = *pnote;
820       switch (REG_NOTE_KIND (note))
821 	{
822 	case REG_DEAD:
823 	case REG_UNUSED:
824 	  if (resolve_reg_p (XEXP (note, 0)))
825 	    del = true;
826 	  break;
827 
828 	default:
829 	  break;
830 	}
831 
832       if (del)
833 	*pnote = XEXP (note, 1);
834       else
835 	pnote = &XEXP (note, 1);
836     }
837 }
838 
839 /* Return whether X can be decomposed into subwords.  */
840 
841 static bool
842 can_decompose_p (rtx x)
843 {
844   if (REG_P (x))
845     {
846       unsigned int regno = REGNO (x);
847 
848       if (HARD_REGISTER_NUM_P (regno))
849 	{
850 	  unsigned int byte, num_bytes;
851 
852 	  num_bytes = GET_MODE_SIZE (GET_MODE (x));
853 	  for (byte = 0; byte < num_bytes; byte += UNITS_PER_WORD)
854 	    if (simplify_subreg_regno (regno, GET_MODE (x), byte, word_mode) < 0)
855 	      return false;
856 	  return true;
857 	}
858       else
859 	return !bitmap_bit_p (subreg_context, regno);
860     }
861 
862   return true;
863 }
864 
865 /* Decompose the registers used in a simple move SET within INSN.  If
866    we don't change anything, return INSN, otherwise return the start
867    of the sequence of moves.  */
868 
869 static rtx_insn *
870 resolve_simple_move (rtx set, rtx_insn *insn)
871 {
872   rtx src, dest, real_dest;
873   rtx_insn *insns;
874   machine_mode orig_mode, dest_mode;
875   unsigned int words;
876   bool pushing;
877 
878   src = SET_SRC (set);
879   dest = SET_DEST (set);
880   orig_mode = GET_MODE (dest);
881 
882   words = (GET_MODE_SIZE (orig_mode) + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
883   gcc_assert (words > 1);
884 
885   start_sequence ();
886 
887   /* We have to handle copying from a SUBREG of a decomposed reg where
888      the SUBREG is larger than word size.  Rather than assume that we
889      can take a word_mode SUBREG of the destination, we copy to a new
890      register and then copy that to the destination.  */
891 
892   real_dest = NULL_RTX;
893 
894   if (GET_CODE (src) == SUBREG
895       && resolve_reg_p (SUBREG_REG (src))
896       && (SUBREG_BYTE (src) != 0
897 	  || (GET_MODE_SIZE (orig_mode)
898 	      != GET_MODE_SIZE (GET_MODE (SUBREG_REG (src))))))
899     {
900       real_dest = dest;
901       dest = gen_reg_rtx (orig_mode);
902       if (REG_P (real_dest))
903 	REG_ATTRS (dest) = REG_ATTRS (real_dest);
904     }
905 
906   /* Similarly if we are copying to a SUBREG of a decomposed reg where
907      the SUBREG is larger than word size.  */
908 
909   if (GET_CODE (dest) == SUBREG
910       && resolve_reg_p (SUBREG_REG (dest))
911       && (SUBREG_BYTE (dest) != 0
912 	  || (GET_MODE_SIZE (orig_mode)
913 	      != GET_MODE_SIZE (GET_MODE (SUBREG_REG (dest))))))
914     {
915       rtx reg, smove;
916       rtx_insn *minsn;
917 
918       reg = gen_reg_rtx (orig_mode);
919       minsn = emit_move_insn (reg, src);
920       smove = single_set (minsn);
921       gcc_assert (smove != NULL_RTX);
922       resolve_simple_move (smove, minsn);
923       src = reg;
924     }
925 
926   /* If we didn't have any big SUBREGS of decomposed registers, and
927      neither side of the move is a register we are decomposing, then
928      we don't have to do anything here.  */
929 
930   if (src == SET_SRC (set)
931       && dest == SET_DEST (set)
932       && !resolve_reg_p (src)
933       && !resolve_subreg_p (src)
934       && !resolve_reg_p (dest)
935       && !resolve_subreg_p (dest))
936     {
937       end_sequence ();
938       return insn;
939     }
940 
941   /* It's possible for the code to use a subreg of a decomposed
942      register while forming an address.  We need to handle that before
943      passing the address to emit_move_insn.  We pass NULL_RTX as the
944      insn parameter to resolve_subreg_use because we can not validate
945      the insn yet.  */
946   if (MEM_P (src) || MEM_P (dest))
947     {
948       int acg;
949 
950       if (MEM_P (src))
951 	resolve_subreg_use (&XEXP (src, 0), NULL_RTX);
952       if (MEM_P (dest))
953 	resolve_subreg_use (&XEXP (dest, 0), NULL_RTX);
954       acg = apply_change_group ();
955       gcc_assert (acg);
956     }
957 
958   /* If SRC is a register which we can't decompose, or has side
959      effects, we need to move via a temporary register.  */
960 
961   if (!can_decompose_p (src)
962       || side_effects_p (src)
963       || GET_CODE (src) == ASM_OPERANDS)
964     {
965       rtx reg;
966 
967       reg = gen_reg_rtx (orig_mode);
968 
969 #ifdef AUTO_INC_DEC
970       {
971 	rtx move = emit_move_insn (reg, src);
972 	if (MEM_P (src))
973 	  {
974 	    rtx note = find_reg_note (insn, REG_INC, NULL_RTX);
975 	    if (note)
976 	      add_reg_note (move, REG_INC, XEXP (note, 0));
977 	  }
978       }
979 #else
980       emit_move_insn (reg, src);
981 #endif
982       src = reg;
983     }
984 
985   /* If DEST is a register which we can't decompose, or has side
986      effects, we need to first move to a temporary register.  We
987      handle the common case of pushing an operand directly.  We also
988      go through a temporary register if it holds a floating point
989      value.  This gives us better code on systems which can't move
990      data easily between integer and floating point registers.  */
991 
992   dest_mode = orig_mode;
993   pushing = push_operand (dest, dest_mode);
994   if (!can_decompose_p (dest)
995       || (side_effects_p (dest) && !pushing)
996       || (!SCALAR_INT_MODE_P (dest_mode)
997 	  && !resolve_reg_p (dest)
998 	  && !resolve_subreg_p (dest)))
999     {
1000       if (real_dest == NULL_RTX)
1001 	real_dest = dest;
1002       if (!SCALAR_INT_MODE_P (dest_mode))
1003 	{
1004 	  dest_mode = mode_for_size (GET_MODE_SIZE (dest_mode) * BITS_PER_UNIT,
1005 				     MODE_INT, 0);
1006 	  gcc_assert (dest_mode != BLKmode);
1007 	}
1008       dest = gen_reg_rtx (dest_mode);
1009       if (REG_P (real_dest))
1010 	REG_ATTRS (dest) = REG_ATTRS (real_dest);
1011     }
1012 
1013   if (pushing)
1014     {
1015       unsigned int i, j, jinc;
1016 
1017       gcc_assert (GET_MODE_SIZE (orig_mode) % UNITS_PER_WORD == 0);
1018       gcc_assert (GET_CODE (XEXP (dest, 0)) != PRE_MODIFY);
1019       gcc_assert (GET_CODE (XEXP (dest, 0)) != POST_MODIFY);
1020 
1021       if (WORDS_BIG_ENDIAN == STACK_GROWS_DOWNWARD)
1022 	{
1023 	  j = 0;
1024 	  jinc = 1;
1025 	}
1026       else
1027 	{
1028 	  j = words - 1;
1029 	  jinc = -1;
1030 	}
1031 
1032       for (i = 0; i < words; ++i, j += jinc)
1033 	{
1034 	  rtx temp;
1035 
1036 	  temp = copy_rtx (XEXP (dest, 0));
1037 	  temp = adjust_automodify_address_nv (dest, word_mode, temp,
1038 					       j * UNITS_PER_WORD);
1039 	  emit_move_insn (temp,
1040 			  simplify_gen_subreg_concatn (word_mode, src,
1041 						       orig_mode,
1042 						       j * UNITS_PER_WORD));
1043 	}
1044     }
1045   else
1046     {
1047       unsigned int i;
1048 
1049       if (REG_P (dest) && !HARD_REGISTER_NUM_P (REGNO (dest)))
1050 	emit_clobber (dest);
1051 
1052       for (i = 0; i < words; ++i)
1053 	emit_move_insn (simplify_gen_subreg_concatn (word_mode, dest,
1054 						     dest_mode,
1055 						     i * UNITS_PER_WORD),
1056 			simplify_gen_subreg_concatn (word_mode, src,
1057 						     orig_mode,
1058 						     i * UNITS_PER_WORD));
1059     }
1060 
1061   if (real_dest != NULL_RTX)
1062     {
1063       rtx mdest, smove;
1064       rtx_insn *minsn;
1065 
1066       if (dest_mode == orig_mode)
1067 	mdest = dest;
1068       else
1069 	mdest = simplify_gen_subreg (orig_mode, dest, GET_MODE (dest), 0);
1070       minsn = emit_move_insn (real_dest, mdest);
1071 
1072 #ifdef AUTO_INC_DEC
1073   if (MEM_P (real_dest)
1074       && !(resolve_reg_p (real_dest) || resolve_subreg_p (real_dest)))
1075     {
1076       rtx note = find_reg_note (insn, REG_INC, NULL_RTX);
1077       if (note)
1078 	add_reg_note (minsn, REG_INC, XEXP (note, 0));
1079     }
1080 #endif
1081 
1082       smove = single_set (minsn);
1083       gcc_assert (smove != NULL_RTX);
1084 
1085       resolve_simple_move (smove, minsn);
1086     }
1087 
1088   insns = get_insns ();
1089   end_sequence ();
1090 
1091   copy_reg_eh_region_note_forward (insn, insns, NULL_RTX);
1092 
1093   emit_insn_before (insns, insn);
1094 
1095   /* If we get here via self-recursion, then INSN is not yet in the insns
1096      chain and delete_insn will fail.  We only want to remove INSN from the
1097      current sequence.  See PR56738.  */
1098   if (in_sequence_p ())
1099     remove_insn (insn);
1100   else
1101     delete_insn (insn);
1102 
1103   return insns;
1104 }
1105 
1106 /* Change a CLOBBER of a decomposed register into a CLOBBER of the
1107    component registers.  Return whether we changed something.  */
1108 
1109 static bool
1110 resolve_clobber (rtx pat, rtx_insn *insn)
1111 {
1112   rtx reg;
1113   machine_mode orig_mode;
1114   unsigned int words, i;
1115   int ret;
1116 
1117   reg = XEXP (pat, 0);
1118   if (!resolve_reg_p (reg) && !resolve_subreg_p (reg))
1119     return false;
1120 
1121   orig_mode = GET_MODE (reg);
1122   words = GET_MODE_SIZE (orig_mode);
1123   words = (words + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
1124 
1125   ret = validate_change (NULL_RTX, &XEXP (pat, 0),
1126 			 simplify_gen_subreg_concatn (word_mode, reg,
1127 						      orig_mode, 0),
1128 			 0);
1129   df_insn_rescan (insn);
1130   gcc_assert (ret != 0);
1131 
1132   for (i = words - 1; i > 0; --i)
1133     {
1134       rtx x;
1135 
1136       x = simplify_gen_subreg_concatn (word_mode, reg, orig_mode,
1137 				       i * UNITS_PER_WORD);
1138       x = gen_rtx_CLOBBER (VOIDmode, x);
1139       emit_insn_after (x, insn);
1140     }
1141 
1142   resolve_reg_notes (insn);
1143 
1144   return true;
1145 }
1146 
1147 /* A USE of a decomposed register is no longer meaningful.  Return
1148    whether we changed something.  */
1149 
1150 static bool
1151 resolve_use (rtx pat, rtx_insn *insn)
1152 {
1153   if (resolve_reg_p (XEXP (pat, 0)) || resolve_subreg_p (XEXP (pat, 0)))
1154     {
1155       delete_insn (insn);
1156       return true;
1157     }
1158 
1159   resolve_reg_notes (insn);
1160 
1161   return false;
1162 }
1163 
1164 /* A VAR_LOCATION can be simplified.  */
1165 
1166 static void
1167 resolve_debug (rtx_insn *insn)
1168 {
1169   subrtx_ptr_iterator::array_type array;
1170   FOR_EACH_SUBRTX_PTR (iter, array, &PATTERN (insn), NONCONST)
1171     {
1172       rtx *loc = *iter;
1173       rtx x = *loc;
1174       if (resolve_subreg_p (x))
1175 	{
1176 	  x = simplify_subreg_concatn (GET_MODE (x), SUBREG_REG (x),
1177 				       SUBREG_BYTE (x));
1178 
1179 	  if (x)
1180 	    *loc = x;
1181 	  else
1182 	    x = copy_rtx (*loc);
1183 	}
1184       if (resolve_reg_p (x))
1185 	*loc = copy_rtx (x);
1186     }
1187 
1188   df_insn_rescan (insn);
1189 
1190   resolve_reg_notes (insn);
1191 }
1192 
1193 /* Check if INSN is a decomposable multiword-shift or zero-extend and
1194    set the decomposable_context bitmap accordingly.  SPEED_P is true
1195    if we are optimizing INSN for speed rather than size.  Return true
1196    if INSN is decomposable.  */
1197 
1198 static bool
1199 find_decomposable_shift_zext (rtx_insn *insn, bool speed_p)
1200 {
1201   rtx set;
1202   rtx op;
1203   rtx op_operand;
1204 
1205   set = single_set (insn);
1206   if (!set)
1207     return false;
1208 
1209   op = SET_SRC (set);
1210   if (GET_CODE (op) != ASHIFT
1211       && GET_CODE (op) != LSHIFTRT
1212       && GET_CODE (op) != ASHIFTRT
1213       && GET_CODE (op) != ZERO_EXTEND)
1214     return false;
1215 
1216   op_operand = XEXP (op, 0);
1217   if (!REG_P (SET_DEST (set)) || !REG_P (op_operand)
1218       || HARD_REGISTER_NUM_P (REGNO (SET_DEST (set)))
1219       || HARD_REGISTER_NUM_P (REGNO (op_operand))
1220       || GET_MODE (op) != twice_word_mode)
1221     return false;
1222 
1223   if (GET_CODE (op) == ZERO_EXTEND)
1224     {
1225       if (GET_MODE (op_operand) != word_mode
1226 	  || !choices[speed_p].splitting_zext)
1227 	return false;
1228     }
1229   else /* left or right shift */
1230     {
1231       bool *splitting = (GET_CODE (op) == ASHIFT
1232 			 ? choices[speed_p].splitting_ashift
1233 			 : GET_CODE (op) == ASHIFTRT
1234 			 ? choices[speed_p].splitting_ashiftrt
1235 			 : choices[speed_p].splitting_lshiftrt);
1236       if (!CONST_INT_P (XEXP (op, 1))
1237 	  || !IN_RANGE (INTVAL (XEXP (op, 1)), BITS_PER_WORD,
1238 			2 * BITS_PER_WORD - 1)
1239 	  || !splitting[INTVAL (XEXP (op, 1)) - BITS_PER_WORD])
1240 	return false;
1241 
1242       bitmap_set_bit (decomposable_context, REGNO (op_operand));
1243     }
1244 
1245   bitmap_set_bit (decomposable_context, REGNO (SET_DEST (set)));
1246 
1247   return true;
1248 }
1249 
1250 /* Decompose a more than word wide shift (in INSN) of a multiword
1251    pseudo or a multiword zero-extend of a wordmode pseudo into a move
1252    and 'set to zero' insn.  Return a pointer to the new insn when a
1253    replacement was done.  */
1254 
1255 static rtx_insn *
1256 resolve_shift_zext (rtx_insn *insn)
1257 {
1258   rtx set;
1259   rtx op;
1260   rtx op_operand;
1261   rtx_insn *insns;
1262   rtx src_reg, dest_reg, dest_upper, upper_src = NULL_RTX;
1263   int src_reg_num, dest_reg_num, offset1, offset2, src_offset;
1264 
1265   set = single_set (insn);
1266   if (!set)
1267     return NULL;
1268 
1269   op = SET_SRC (set);
1270   if (GET_CODE (op) != ASHIFT
1271       && GET_CODE (op) != LSHIFTRT
1272       && GET_CODE (op) != ASHIFTRT
1273       && GET_CODE (op) != ZERO_EXTEND)
1274     return NULL;
1275 
1276   op_operand = XEXP (op, 0);
1277 
1278   /* We can tear this operation apart only if the regs were already
1279      torn apart.  */
1280   if (!resolve_reg_p (SET_DEST (set)) && !resolve_reg_p (op_operand))
1281     return NULL;
1282 
1283   /* src_reg_num is the number of the word mode register which we
1284      are operating on.  For a left shift and a zero_extend on little
1285      endian machines this is register 0.  */
1286   src_reg_num = (GET_CODE (op) == LSHIFTRT || GET_CODE (op) == ASHIFTRT)
1287 		? 1 : 0;
1288 
1289   if (WORDS_BIG_ENDIAN
1290       && GET_MODE_SIZE (GET_MODE (op_operand)) > UNITS_PER_WORD)
1291     src_reg_num = 1 - src_reg_num;
1292 
1293   if (GET_CODE (op) == ZERO_EXTEND)
1294     dest_reg_num = WORDS_BIG_ENDIAN ? 1 : 0;
1295   else
1296     dest_reg_num = 1 - src_reg_num;
1297 
1298   offset1 = UNITS_PER_WORD * dest_reg_num;
1299   offset2 = UNITS_PER_WORD * (1 - dest_reg_num);
1300   src_offset = UNITS_PER_WORD * src_reg_num;
1301 
1302   start_sequence ();
1303 
1304   dest_reg = simplify_gen_subreg_concatn (word_mode, SET_DEST (set),
1305                                           GET_MODE (SET_DEST (set)),
1306                                           offset1);
1307   dest_upper = simplify_gen_subreg_concatn (word_mode, SET_DEST (set),
1308 					    GET_MODE (SET_DEST (set)),
1309 					    offset2);
1310   src_reg = simplify_gen_subreg_concatn (word_mode, op_operand,
1311                                          GET_MODE (op_operand),
1312                                          src_offset);
1313   if (GET_CODE (op) == ASHIFTRT
1314       && INTVAL (XEXP (op, 1)) != 2 * BITS_PER_WORD - 1)
1315     upper_src = expand_shift (RSHIFT_EXPR, word_mode, copy_rtx (src_reg),
1316 			      BITS_PER_WORD - 1, NULL_RTX, 0);
1317 
1318   if (GET_CODE (op) != ZERO_EXTEND)
1319     {
1320       int shift_count = INTVAL (XEXP (op, 1));
1321       if (shift_count > BITS_PER_WORD)
1322 	src_reg = expand_shift (GET_CODE (op) == ASHIFT ?
1323 				LSHIFT_EXPR : RSHIFT_EXPR,
1324 				word_mode, src_reg,
1325 				shift_count - BITS_PER_WORD,
1326 				dest_reg, GET_CODE (op) != ASHIFTRT);
1327     }
1328 
1329   if (dest_reg != src_reg)
1330     emit_move_insn (dest_reg, src_reg);
1331   if (GET_CODE (op) != ASHIFTRT)
1332     emit_move_insn (dest_upper, CONST0_RTX (word_mode));
1333   else if (INTVAL (XEXP (op, 1)) == 2 * BITS_PER_WORD - 1)
1334     emit_move_insn (dest_upper, copy_rtx (src_reg));
1335   else
1336     emit_move_insn (dest_upper, upper_src);
1337   insns = get_insns ();
1338 
1339   end_sequence ();
1340 
1341   emit_insn_before (insns, insn);
1342 
1343   if (dump_file)
1344     {
1345       rtx_insn *in;
1346       fprintf (dump_file, "; Replacing insn: %d with insns: ", INSN_UID (insn));
1347       for (in = insns; in != insn; in = NEXT_INSN (in))
1348 	fprintf (dump_file, "%d ", INSN_UID (in));
1349       fprintf (dump_file, "\n");
1350     }
1351 
1352   delete_insn (insn);
1353   return insns;
1354 }
1355 
1356 /* Print to dump_file a description of what we're doing with shift code CODE.
1357    SPLITTING[X] is true if we are splitting shifts by X + BITS_PER_WORD.  */
1358 
1359 static void
1360 dump_shift_choices (enum rtx_code code, bool *splitting)
1361 {
1362   int i;
1363   const char *sep;
1364 
1365   fprintf (dump_file,
1366 	   "  Splitting mode %s for %s lowering with shift amounts = ",
1367 	   GET_MODE_NAME (twice_word_mode), GET_RTX_NAME (code));
1368   sep = "";
1369   for (i = 0; i < BITS_PER_WORD; i++)
1370     if (splitting[i])
1371       {
1372 	fprintf (dump_file, "%s%d", sep, i + BITS_PER_WORD);
1373 	sep = ",";
1374       }
1375   fprintf (dump_file, "\n");
1376 }
1377 
1378 /* Print to dump_file a description of what we're doing when optimizing
1379    for speed or size; SPEED_P says which.  DESCRIPTION is a description
1380    of the SPEED_P choice.  */
1381 
1382 static void
1383 dump_choices (bool speed_p, const char *description)
1384 {
1385   unsigned int i;
1386 
1387   fprintf (dump_file, "Choices when optimizing for %s:\n", description);
1388 
1389   for (i = 0; i < MAX_MACHINE_MODE; i++)
1390     if (GET_MODE_SIZE ((machine_mode) i) > UNITS_PER_WORD)
1391       fprintf (dump_file, "  %s mode %s for copy lowering.\n",
1392 	       choices[speed_p].move_modes_to_split[i]
1393 	       ? "Splitting"
1394 	       : "Skipping",
1395 	       GET_MODE_NAME ((machine_mode) i));
1396 
1397   fprintf (dump_file, "  %s mode %s for zero_extend lowering.\n",
1398 	   choices[speed_p].splitting_zext ? "Splitting" : "Skipping",
1399 	   GET_MODE_NAME (twice_word_mode));
1400 
1401   dump_shift_choices (ASHIFT, choices[speed_p].splitting_ashift);
1402   dump_shift_choices (LSHIFTRT, choices[speed_p].splitting_lshiftrt);
1403   dump_shift_choices (ASHIFTRT, choices[speed_p].splitting_ashiftrt);
1404   fprintf (dump_file, "\n");
1405 }
1406 
1407 /* Look for registers which are always accessed via word-sized SUBREGs
1408    or -if DECOMPOSE_COPIES is true- via copies.  Decompose these
1409    registers into several word-sized pseudo-registers.  */
1410 
1411 static void
1412 decompose_multiword_subregs (bool decompose_copies)
1413 {
1414   unsigned int max;
1415   basic_block bb;
1416   bool speed_p;
1417 
1418   if (dump_file)
1419     {
1420       dump_choices (false, "size");
1421       dump_choices (true, "speed");
1422     }
1423 
1424   /* Check if this target even has any modes to consider lowering.   */
1425   if (!choices[false].something_to_do && !choices[true].something_to_do)
1426     {
1427       if (dump_file)
1428 	fprintf (dump_file, "Nothing to do!\n");
1429       return;
1430     }
1431 
1432   max = max_reg_num ();
1433 
1434   /* First see if there are any multi-word pseudo-registers.  If there
1435      aren't, there is nothing we can do.  This should speed up this
1436      pass in the normal case, since it should be faster than scanning
1437      all the insns.  */
1438   {
1439     unsigned int i;
1440     bool useful_modes_seen = false;
1441 
1442     for (i = FIRST_PSEUDO_REGISTER; i < max; ++i)
1443       if (regno_reg_rtx[i] != NULL)
1444 	{
1445 	  machine_mode mode = GET_MODE (regno_reg_rtx[i]);
1446 	  if (choices[false].move_modes_to_split[(int) mode]
1447 	      || choices[true].move_modes_to_split[(int) mode])
1448 	    {
1449 	      useful_modes_seen = true;
1450 	      break;
1451 	    }
1452 	}
1453 
1454     if (!useful_modes_seen)
1455       {
1456 	if (dump_file)
1457 	  fprintf (dump_file, "Nothing to lower in this function.\n");
1458 	return;
1459       }
1460   }
1461 
1462   if (df)
1463     {
1464       df_set_flags (DF_DEFER_INSN_RESCAN);
1465       run_word_dce ();
1466     }
1467 
1468   /* FIXME: It may be possible to change this code to look for each
1469      multi-word pseudo-register and to find each insn which sets or
1470      uses that register.  That should be faster than scanning all the
1471      insns.  */
1472 
1473   decomposable_context = BITMAP_ALLOC (NULL);
1474   non_decomposable_context = BITMAP_ALLOC (NULL);
1475   subreg_context = BITMAP_ALLOC (NULL);
1476 
1477   reg_copy_graph.create (max);
1478   reg_copy_graph.safe_grow_cleared (max);
1479   memset (reg_copy_graph.address (), 0, sizeof (bitmap) * max);
1480 
1481   speed_p = optimize_function_for_speed_p (cfun);
1482   FOR_EACH_BB_FN (bb, cfun)
1483     {
1484       rtx_insn *insn;
1485 
1486       FOR_BB_INSNS (bb, insn)
1487 	{
1488 	  rtx set;
1489 	  enum classify_move_insn cmi;
1490 	  int i, n;
1491 
1492 	  if (!INSN_P (insn)
1493 	      || GET_CODE (PATTERN (insn)) == CLOBBER
1494 	      || GET_CODE (PATTERN (insn)) == USE)
1495 	    continue;
1496 
1497 	  recog_memoized (insn);
1498 
1499 	  if (find_decomposable_shift_zext (insn, speed_p))
1500 	    continue;
1501 
1502 	  extract_insn (insn);
1503 
1504 	  set = simple_move (insn, speed_p);
1505 
1506 	  if (!set)
1507 	    cmi = NOT_SIMPLE_MOVE;
1508 	  else
1509 	    {
1510 	      /* We mark pseudo-to-pseudo copies as decomposable during the
1511 		 second pass only.  The first pass is so early that there is
1512 		 good chance such moves will be optimized away completely by
1513 		 subsequent optimizations anyway.
1514 
1515 		 However, we call find_pseudo_copy even during the first pass
1516 		 so as to properly set up the reg_copy_graph.  */
1517 	      if (find_pseudo_copy (set))
1518 		cmi = decompose_copies? DECOMPOSABLE_SIMPLE_MOVE : SIMPLE_MOVE;
1519 	      else
1520 		cmi = SIMPLE_MOVE;
1521 	    }
1522 
1523 	  n = recog_data.n_operands;
1524 	  for (i = 0; i < n; ++i)
1525 	    {
1526 	      find_decomposable_subregs (&recog_data.operand[i], &cmi);
1527 
1528 	      /* We handle ASM_OPERANDS as a special case to support
1529 		 things like x86 rdtsc which returns a DImode value.
1530 		 We can decompose the output, which will certainly be
1531 		 operand 0, but not the inputs.  */
1532 
1533 	      if (cmi == SIMPLE_MOVE
1534 		  && GET_CODE (SET_SRC (set)) == ASM_OPERANDS)
1535 		{
1536 		  gcc_assert (i == 0);
1537 		  cmi = NOT_SIMPLE_MOVE;
1538 		}
1539 	    }
1540 	}
1541     }
1542 
1543   bitmap_and_compl_into (decomposable_context, non_decomposable_context);
1544   if (!bitmap_empty_p (decomposable_context))
1545     {
1546       sbitmap sub_blocks;
1547       unsigned int i;
1548       sbitmap_iterator sbi;
1549       bitmap_iterator iter;
1550       unsigned int regno;
1551 
1552       propagate_pseudo_copies ();
1553 
1554       sub_blocks = sbitmap_alloc (last_basic_block_for_fn (cfun));
1555       bitmap_clear (sub_blocks);
1556 
1557       EXECUTE_IF_SET_IN_BITMAP (decomposable_context, 0, regno, iter)
1558 	decompose_register (regno);
1559 
1560       FOR_EACH_BB_FN (bb, cfun)
1561 	{
1562 	  rtx_insn *insn;
1563 
1564 	  FOR_BB_INSNS (bb, insn)
1565 	    {
1566 	      rtx pat;
1567 
1568 	      if (!INSN_P (insn))
1569 		continue;
1570 
1571 	      pat = PATTERN (insn);
1572 	      if (GET_CODE (pat) == CLOBBER)
1573 		resolve_clobber (pat, insn);
1574 	      else if (GET_CODE (pat) == USE)
1575 		resolve_use (pat, insn);
1576 	      else if (DEBUG_INSN_P (insn))
1577 		resolve_debug (insn);
1578 	      else
1579 		{
1580 		  rtx set;
1581 		  int i;
1582 
1583 		  recog_memoized (insn);
1584 		  extract_insn (insn);
1585 
1586 		  set = simple_move (insn, speed_p);
1587 		  if (set)
1588 		    {
1589 		      rtx_insn *orig_insn = insn;
1590 		      bool cfi = control_flow_insn_p (insn);
1591 
1592 		      /* We can end up splitting loads to multi-word pseudos
1593 			 into separate loads to machine word size pseudos.
1594 			 When this happens, we first had one load that can
1595 			 throw, and after resolve_simple_move we'll have a
1596 			 bunch of loads (at least two).  All those loads may
1597 			 trap if we can have non-call exceptions, so they
1598 			 all will end the current basic block.  We split the
1599 			 block after the outer loop over all insns, but we
1600 			 make sure here that we will be able to split the
1601 			 basic block and still produce the correct control
1602 			 flow graph for it.  */
1603 		      gcc_assert (!cfi
1604 				  || (cfun->can_throw_non_call_exceptions
1605 				      && can_throw_internal (insn)));
1606 
1607 		      insn = resolve_simple_move (set, insn);
1608 		      if (insn != orig_insn)
1609 			{
1610 			  recog_memoized (insn);
1611 			  extract_insn (insn);
1612 
1613 			  if (cfi)
1614 			    bitmap_set_bit (sub_blocks, bb->index);
1615 			}
1616 		    }
1617 		  else
1618 		    {
1619 		      rtx_insn *decomposed_shift;
1620 
1621 		      decomposed_shift = resolve_shift_zext (insn);
1622 		      if (decomposed_shift != NULL_RTX)
1623 			{
1624 			  insn = decomposed_shift;
1625 			  recog_memoized (insn);
1626 			  extract_insn (insn);
1627 			}
1628 		    }
1629 
1630 		  for (i = recog_data.n_operands - 1; i >= 0; --i)
1631 		    resolve_subreg_use (recog_data.operand_loc[i], insn);
1632 
1633 		  resolve_reg_notes (insn);
1634 
1635 		  if (num_validated_changes () > 0)
1636 		    {
1637 		      for (i = recog_data.n_dups - 1; i >= 0; --i)
1638 			{
1639 			  rtx *pl = recog_data.dup_loc[i];
1640 			  int dup_num = recog_data.dup_num[i];
1641 			  rtx *px = recog_data.operand_loc[dup_num];
1642 
1643 			  validate_unshare_change (insn, pl, *px, 1);
1644 			}
1645 
1646 		      i = apply_change_group ();
1647 		      gcc_assert (i);
1648 		    }
1649 		}
1650 	    }
1651 	}
1652 
1653       /* If we had insns to split that caused control flow insns in the middle
1654 	 of a basic block, split those blocks now.  Note that we only handle
1655 	 the case where splitting a load has caused multiple possibly trapping
1656 	 loads to appear.  */
1657       EXECUTE_IF_SET_IN_BITMAP (sub_blocks, 0, i, sbi)
1658 	{
1659 	  rtx_insn *insn, *end;
1660 	  edge fallthru;
1661 
1662 	  bb = BASIC_BLOCK_FOR_FN (cfun, i);
1663 	  insn = BB_HEAD (bb);
1664 	  end = BB_END (bb);
1665 
1666 	  while (insn != end)
1667 	    {
1668 	      if (control_flow_insn_p (insn))
1669 		{
1670 		  /* Split the block after insn.  There will be a fallthru
1671 		     edge, which is OK so we keep it.  We have to create the
1672 		     exception edges ourselves.  */
1673 		  fallthru = split_block (bb, insn);
1674 		  rtl_make_eh_edge (NULL, bb, BB_END (bb));
1675 		  bb = fallthru->dest;
1676 		  insn = BB_HEAD (bb);
1677 		}
1678 	      else
1679 	        insn = NEXT_INSN (insn);
1680 	    }
1681 	}
1682 
1683       sbitmap_free (sub_blocks);
1684     }
1685 
1686   {
1687     unsigned int i;
1688     bitmap b;
1689 
1690     FOR_EACH_VEC_ELT (reg_copy_graph, i, b)
1691       if (b)
1692 	BITMAP_FREE (b);
1693   }
1694 
1695   reg_copy_graph.release ();
1696 
1697   BITMAP_FREE (decomposable_context);
1698   BITMAP_FREE (non_decomposable_context);
1699   BITMAP_FREE (subreg_context);
1700 }
1701 
1702 /* Implement first lower subreg pass.  */
1703 
1704 namespace {
1705 
1706 const pass_data pass_data_lower_subreg =
1707 {
1708   RTL_PASS, /* type */
1709   "subreg1", /* name */
1710   OPTGROUP_NONE, /* optinfo_flags */
1711   TV_LOWER_SUBREG, /* tv_id */
1712   0, /* properties_required */
1713   0, /* properties_provided */
1714   0, /* properties_destroyed */
1715   0, /* todo_flags_start */
1716   0, /* todo_flags_finish */
1717 };
1718 
1719 class pass_lower_subreg : public rtl_opt_pass
1720 {
1721 public:
1722   pass_lower_subreg (gcc::context *ctxt)
1723     : rtl_opt_pass (pass_data_lower_subreg, ctxt)
1724   {}
1725 
1726   /* opt_pass methods: */
1727   virtual bool gate (function *) { return flag_split_wide_types != 0; }
1728   virtual unsigned int execute (function *)
1729     {
1730       decompose_multiword_subregs (false);
1731       return 0;
1732     }
1733 
1734 }; // class pass_lower_subreg
1735 
1736 } // anon namespace
1737 
1738 rtl_opt_pass *
1739 make_pass_lower_subreg (gcc::context *ctxt)
1740 {
1741   return new pass_lower_subreg (ctxt);
1742 }
1743 
1744 /* Implement second lower subreg pass.  */
1745 
1746 namespace {
1747 
1748 const pass_data pass_data_lower_subreg2 =
1749 {
1750   RTL_PASS, /* type */
1751   "subreg2", /* name */
1752   OPTGROUP_NONE, /* optinfo_flags */
1753   TV_LOWER_SUBREG, /* tv_id */
1754   0, /* properties_required */
1755   0, /* properties_provided */
1756   0, /* properties_destroyed */
1757   0, /* todo_flags_start */
1758   TODO_df_finish, /* todo_flags_finish */
1759 };
1760 
1761 class pass_lower_subreg2 : public rtl_opt_pass
1762 {
1763 public:
1764   pass_lower_subreg2 (gcc::context *ctxt)
1765     : rtl_opt_pass (pass_data_lower_subreg2, ctxt)
1766   {}
1767 
1768   /* opt_pass methods: */
1769   virtual bool gate (function *) { return flag_split_wide_types != 0; }
1770   virtual unsigned int execute (function *)
1771     {
1772       decompose_multiword_subregs (true);
1773       return 0;
1774     }
1775 
1776 }; // class pass_lower_subreg2
1777 
1778 } // anon namespace
1779 
1780 rtl_opt_pass *
1781 make_pass_lower_subreg2 (gcc::context *ctxt)
1782 {
1783   return new pass_lower_subreg2 (ctxt);
1784 }
1785