xref: /netbsd-src/external/gpl3/gcc/dist/gcc/lower-subreg.cc (revision b1e838363e3c6fc78a55519254d99869742dd33c)
1 /* Decompose multiword subregs.
2    Copyright (C) 2007-2022 Free Software Foundation, Inc.
3    Contributed by Richard Henderson <rth@redhat.com>
4 		  Ian Lance Taylor <iant@google.com>
5 
6 This file is part of GCC.
7 
8 GCC is free software; you can redistribute it and/or modify it under
9 the terms of the GNU General Public License as published by the Free
10 Software Foundation; either version 3, or (at your option) any later
11 version.
12 
13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
16 for more details.
17 
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3.  If not see
20 <http://www.gnu.org/licenses/>.  */
21 
22 #include "config.h"
23 #include "system.h"
24 #include "coretypes.h"
25 #include "backend.h"
26 #include "rtl.h"
27 #include "tree.h"
28 #include "cfghooks.h"
29 #include "df.h"
30 #include "memmodel.h"
31 #include "tm_p.h"
32 #include "expmed.h"
33 #include "insn-config.h"
34 #include "emit-rtl.h"
35 #include "recog.h"
36 #include "cfgrtl.h"
37 #include "cfgbuild.h"
38 #include "dce.h"
39 #include "expr.h"
40 #include "tree-pass.h"
41 #include "lower-subreg.h"
42 #include "rtl-iter.h"
43 #include "target.h"
44 
45 
46 /* Decompose multi-word pseudo-registers into individual
47    pseudo-registers when possible and profitable.  This is possible
48    when all the uses of a multi-word register are via SUBREG, or are
49    copies of the register to another location.  Breaking apart the
50    register permits more CSE and permits better register allocation.
51    This is profitable if the machine does not have move instructions
52    to do this.
53 
54    This pass only splits moves with modes that are wider than
55    word_mode and ASHIFTs, LSHIFTRTs, ASHIFTRTs and ZERO_EXTENDs with
56    integer modes that are twice the width of word_mode.  The latter
57    could be generalized if there was a need to do this, but the trend in
58    architectures is to not need this.
59 
60    There are two useful preprocessor defines for use by maintainers:
61 
62    #define LOG_COSTS 1
63 
64    if you wish to see the actual cost estimates that are being used
65    for each mode wider than word mode and the cost estimates for zero
66    extension and the shifts.   This can be useful when port maintainers
67    are tuning insn rtx costs.
68 
69    #define FORCE_LOWERING 1
70 
71    if you wish to test the pass with all the transformation forced on.
72    This can be useful for finding bugs in the transformations.  */
73 
74 #define LOG_COSTS 0
75 #define FORCE_LOWERING 0
76 
77 /* Bit N in this bitmap is set if regno N is used in a context in
78    which we can decompose it.  */
79 static bitmap decomposable_context;
80 
81 /* Bit N in this bitmap is set if regno N is used in a context in
82    which it cannot be decomposed.  */
83 static bitmap non_decomposable_context;
84 
85 /* Bit N in this bitmap is set if regno N is used in a subreg
86    which changes the mode but not the size.  This typically happens
87    when the register accessed as a floating-point value; we want to
88    avoid generating accesses to its subwords in integer modes.  */
89 static bitmap subreg_context;
90 
91 /* Bit N in the bitmap in element M of this array is set if there is a
92    copy from reg M to reg N.  */
93 static vec<bitmap> reg_copy_graph;
94 
95 struct target_lower_subreg default_target_lower_subreg;
96 #if SWITCHABLE_TARGET
97 struct target_lower_subreg *this_target_lower_subreg
98   = &default_target_lower_subreg;
99 #endif
100 
101 #define twice_word_mode \
102   this_target_lower_subreg->x_twice_word_mode
103 #define choices \
104   this_target_lower_subreg->x_choices
105 
106 /* Return true if MODE is a mode we know how to lower.  When returning true,
107    store its byte size in *BYTES and its word size in *WORDS.  */
108 
109 static inline bool
interesting_mode_p(machine_mode mode,unsigned int * bytes,unsigned int * words)110 interesting_mode_p (machine_mode mode, unsigned int *bytes,
111 		    unsigned int *words)
112 {
113   if (!GET_MODE_SIZE (mode).is_constant (bytes))
114     return false;
115   *words = CEIL (*bytes, UNITS_PER_WORD);
116   return true;
117 }
118 
119 /* RTXes used while computing costs.  */
120 struct cost_rtxes {
121   /* Source and target registers.  */
122   rtx source;
123   rtx target;
124 
125   /* A twice_word_mode ZERO_EXTEND of SOURCE.  */
126   rtx zext;
127 
128   /* A shift of SOURCE.  */
129   rtx shift;
130 
131   /* A SET of TARGET.  */
132   rtx set;
133 };
134 
135 /* Return the cost of a CODE shift in mode MODE by OP1 bits, using the
136    rtxes in RTXES.  SPEED_P selects between the speed and size cost.  */
137 
138 static int
shift_cost(bool speed_p,struct cost_rtxes * rtxes,enum rtx_code code,machine_mode mode,int op1)139 shift_cost (bool speed_p, struct cost_rtxes *rtxes, enum rtx_code code,
140 	    machine_mode mode, int op1)
141 {
142   PUT_CODE (rtxes->shift, code);
143   PUT_MODE (rtxes->shift, mode);
144   PUT_MODE (rtxes->source, mode);
145   XEXP (rtxes->shift, 1) = gen_int_shift_amount (mode, op1);
146   return set_src_cost (rtxes->shift, mode, speed_p);
147 }
148 
149 /* For each X in the range [0, BITS_PER_WORD), set SPLITTING[X]
150    to true if it is profitable to split a double-word CODE shift
151    of X + BITS_PER_WORD bits.  SPEED_P says whether we are testing
152    for speed or size profitability.
153 
154    Use the rtxes in RTXES to calculate costs.  WORD_MOVE_ZERO_COST is
155    the cost of moving zero into a word-mode register.  WORD_MOVE_COST
156    is the cost of moving between word registers.  */
157 
158 static void
compute_splitting_shift(bool speed_p,struct cost_rtxes * rtxes,bool * splitting,enum rtx_code code,int word_move_zero_cost,int word_move_cost)159 compute_splitting_shift (bool speed_p, struct cost_rtxes *rtxes,
160 			 bool *splitting, enum rtx_code code,
161 			 int word_move_zero_cost, int word_move_cost)
162 {
163   int wide_cost, narrow_cost, upper_cost, i;
164 
165   for (i = 0; i < BITS_PER_WORD; i++)
166     {
167       wide_cost = shift_cost (speed_p, rtxes, code, twice_word_mode,
168 			      i + BITS_PER_WORD);
169       if (i == 0)
170 	narrow_cost = word_move_cost;
171       else
172 	narrow_cost = shift_cost (speed_p, rtxes, code, word_mode, i);
173 
174       if (code != ASHIFTRT)
175 	upper_cost = word_move_zero_cost;
176       else if (i == BITS_PER_WORD - 1)
177 	upper_cost = word_move_cost;
178       else
179 	upper_cost = shift_cost (speed_p, rtxes, code, word_mode,
180 				 BITS_PER_WORD - 1);
181 
182       if (LOG_COSTS)
183 	fprintf (stderr, "%s %s by %d: original cost %d, split cost %d + %d\n",
184 		 GET_MODE_NAME (twice_word_mode), GET_RTX_NAME (code),
185 		 i + BITS_PER_WORD, wide_cost, narrow_cost, upper_cost);
186 
187       if (FORCE_LOWERING || wide_cost >= narrow_cost + upper_cost)
188 	splitting[i] = true;
189     }
190 }
191 
192 /* Compute what we should do when optimizing for speed or size; SPEED_P
193    selects which.  Use RTXES for computing costs.  */
194 
195 static void
compute_costs(bool speed_p,struct cost_rtxes * rtxes)196 compute_costs (bool speed_p, struct cost_rtxes *rtxes)
197 {
198   unsigned int i;
199   int word_move_zero_cost, word_move_cost;
200 
201   PUT_MODE (rtxes->target, word_mode);
202   SET_SRC (rtxes->set) = CONST0_RTX (word_mode);
203   word_move_zero_cost = set_rtx_cost (rtxes->set, speed_p);
204 
205   SET_SRC (rtxes->set) = rtxes->source;
206   word_move_cost = set_rtx_cost (rtxes->set, speed_p);
207 
208   if (LOG_COSTS)
209     fprintf (stderr, "%s move: from zero cost %d, from reg cost %d\n",
210 	     GET_MODE_NAME (word_mode), word_move_zero_cost, word_move_cost);
211 
212   for (i = 0; i < MAX_MACHINE_MODE; i++)
213     {
214       machine_mode mode = (machine_mode) i;
215       unsigned int size, factor;
216       if (interesting_mode_p (mode, &size, &factor) && factor > 1)
217 	{
218 	  unsigned int mode_move_cost;
219 
220 	  PUT_MODE (rtxes->target, mode);
221 	  PUT_MODE (rtxes->source, mode);
222 	  mode_move_cost = set_rtx_cost (rtxes->set, speed_p);
223 
224 	  if (LOG_COSTS)
225 	    fprintf (stderr, "%s move: original cost %d, split cost %d * %d\n",
226 		     GET_MODE_NAME (mode), mode_move_cost,
227 		     word_move_cost, factor);
228 
229 	  if (FORCE_LOWERING || mode_move_cost >= word_move_cost * factor)
230 	    {
231 	      choices[speed_p].move_modes_to_split[i] = true;
232 	      choices[speed_p].something_to_do = true;
233 	    }
234 	}
235     }
236 
237   /* For the moves and shifts, the only case that is checked is one
238      where the mode of the target is an integer mode twice the width
239      of the word_mode.
240 
241      If it is not profitable to split a double word move then do not
242      even consider the shifts or the zero extension.  */
243   if (choices[speed_p].move_modes_to_split[(int) twice_word_mode])
244     {
245       int zext_cost;
246 
247       /* The only case here to check to see if moving the upper part with a
248 	 zero is cheaper than doing the zext itself.  */
249       PUT_MODE (rtxes->source, word_mode);
250       zext_cost = set_src_cost (rtxes->zext, twice_word_mode, speed_p);
251 
252       if (LOG_COSTS)
253 	fprintf (stderr, "%s %s: original cost %d, split cost %d + %d\n",
254 		 GET_MODE_NAME (twice_word_mode), GET_RTX_NAME (ZERO_EXTEND),
255 		 zext_cost, word_move_cost, word_move_zero_cost);
256 
257       if (FORCE_LOWERING || zext_cost >= word_move_cost + word_move_zero_cost)
258 	choices[speed_p].splitting_zext = true;
259 
260       compute_splitting_shift (speed_p, rtxes,
261 			       choices[speed_p].splitting_ashift, ASHIFT,
262 			       word_move_zero_cost, word_move_cost);
263       compute_splitting_shift (speed_p, rtxes,
264 			       choices[speed_p].splitting_lshiftrt, LSHIFTRT,
265 			       word_move_zero_cost, word_move_cost);
266       compute_splitting_shift (speed_p, rtxes,
267 			       choices[speed_p].splitting_ashiftrt, ASHIFTRT,
268 			       word_move_zero_cost, word_move_cost);
269     }
270 }
271 
272 /* Do one-per-target initialisation.  This involves determining
273    which operations on the machine are profitable.  If none are found,
274    then the pass just returns when called.  */
275 
276 void
init_lower_subreg(void)277 init_lower_subreg (void)
278 {
279   struct cost_rtxes rtxes;
280 
281   memset (this_target_lower_subreg, 0, sizeof (*this_target_lower_subreg));
282 
283   twice_word_mode = GET_MODE_2XWIDER_MODE (word_mode).require ();
284 
285   rtxes.target = gen_rtx_REG (word_mode, LAST_VIRTUAL_REGISTER + 1);
286   rtxes.source = gen_rtx_REG (word_mode, LAST_VIRTUAL_REGISTER + 2);
287   rtxes.set = gen_rtx_SET (rtxes.target, rtxes.source);
288   rtxes.zext = gen_rtx_ZERO_EXTEND (twice_word_mode, rtxes.source);
289   rtxes.shift = gen_rtx_ASHIFT (twice_word_mode, rtxes.source, const0_rtx);
290 
291   if (LOG_COSTS)
292     fprintf (stderr, "\nSize costs\n==========\n\n");
293   compute_costs (false, &rtxes);
294 
295   if (LOG_COSTS)
296     fprintf (stderr, "\nSpeed costs\n===========\n\n");
297   compute_costs (true, &rtxes);
298 }
299 
300 static bool
simple_move_operand(rtx x)301 simple_move_operand (rtx x)
302 {
303   if (GET_CODE (x) == SUBREG)
304     x = SUBREG_REG (x);
305 
306   if (!OBJECT_P (x))
307     return false;
308 
309   if (GET_CODE (x) == LABEL_REF
310       || GET_CODE (x) == SYMBOL_REF
311       || GET_CODE (x) == HIGH
312       || GET_CODE (x) == CONST)
313     return false;
314 
315   if (MEM_P (x)
316       && (MEM_VOLATILE_P (x)
317 	  || mode_dependent_address_p (XEXP (x, 0), MEM_ADDR_SPACE (x))))
318     return false;
319 
320   return true;
321 }
322 
323 /* If X is an operator that can be treated as a simple move that we
324    can split, then return the operand that is operated on.  */
325 
326 static rtx
operand_for_swap_move_operator(rtx x)327 operand_for_swap_move_operator (rtx x)
328 {
329   /* A word sized rotate of a register pair is equivalent to swapping
330      the registers in the register pair.  */
331   if (GET_CODE (x) == ROTATE
332       && GET_MODE (x) == twice_word_mode
333       && simple_move_operand (XEXP (x, 0))
334       && CONST_INT_P (XEXP (x, 1))
335       && INTVAL (XEXP (x, 1)) == BITS_PER_WORD)
336     return XEXP (x, 0);
337 
338   return NULL_RTX;
339 }
340 
341 /* If INSN is a single set between two objects that we want to split,
342    return the single set.  SPEED_P says whether we are optimizing
343    INSN for speed or size.
344 
345    INSN should have been passed to recog and extract_insn before this
346    is called.  */
347 
348 static rtx
simple_move(rtx_insn * insn,bool speed_p)349 simple_move (rtx_insn *insn, bool speed_p)
350 {
351   rtx x, op;
352   rtx set;
353   machine_mode mode;
354 
355   if (recog_data.n_operands != 2)
356     return NULL_RTX;
357 
358   set = single_set (insn);
359   if (!set)
360     return NULL_RTX;
361 
362   x = SET_DEST (set);
363   if (x != recog_data.operand[0] && x != recog_data.operand[1])
364     return NULL_RTX;
365   if (!simple_move_operand (x))
366     return NULL_RTX;
367 
368   x = SET_SRC (set);
369   if ((op = operand_for_swap_move_operator (x)) != NULL_RTX)
370     x = op;
371 
372   if (x != recog_data.operand[0] && x != recog_data.operand[1])
373     return NULL_RTX;
374   /* For the src we can handle ASM_OPERANDS, and it is beneficial for
375      things like x86 rdtsc which returns a DImode value.  */
376   if (GET_CODE (x) != ASM_OPERANDS
377       && !simple_move_operand (x))
378     return NULL_RTX;
379 
380   /* We try to decompose in integer modes, to avoid generating
381      inefficient code copying between integer and floating point
382      registers.  That means that we can't decompose if this is a
383      non-integer mode for which there is no integer mode of the same
384      size.  */
385   mode = GET_MODE (SET_DEST (set));
386   scalar_int_mode int_mode;
387   if (!SCALAR_INT_MODE_P (mode)
388       && (!int_mode_for_size (GET_MODE_BITSIZE (mode), 0).exists (&int_mode)
389 	  || !targetm.modes_tieable_p (mode, int_mode)))
390     return NULL_RTX;
391 
392   /* Reject PARTIAL_INT modes.  They are used for processor specific
393      purposes and it's probably best not to tamper with them.  */
394   if (GET_MODE_CLASS (mode) == MODE_PARTIAL_INT)
395     return NULL_RTX;
396 
397   if (!choices[speed_p].move_modes_to_split[(int) mode])
398     return NULL_RTX;
399 
400   return set;
401 }
402 
403 /* If SET is a copy from one multi-word pseudo-register to another,
404    record that in reg_copy_graph.  Return whether it is such a
405    copy.  */
406 
407 static bool
find_pseudo_copy(rtx set)408 find_pseudo_copy (rtx set)
409 {
410   rtx dest = SET_DEST (set);
411   rtx src = SET_SRC (set);
412   rtx op;
413   unsigned int rd, rs;
414   bitmap b;
415 
416   if ((op = operand_for_swap_move_operator (src)) != NULL_RTX)
417     src = op;
418 
419   if (!REG_P (dest) || !REG_P (src))
420     return false;
421 
422   rd = REGNO (dest);
423   rs = REGNO (src);
424   if (HARD_REGISTER_NUM_P (rd) || HARD_REGISTER_NUM_P (rs))
425     return false;
426 
427   b = reg_copy_graph[rs];
428   if (b == NULL)
429     {
430       b = BITMAP_ALLOC (NULL);
431       reg_copy_graph[rs] = b;
432     }
433 
434   bitmap_set_bit (b, rd);
435 
436   return true;
437 }
438 
439 /* Look through the registers in DECOMPOSABLE_CONTEXT.  For each case
440    where they are copied to another register, add the register to
441    which they are copied to DECOMPOSABLE_CONTEXT.  Use
442    NON_DECOMPOSABLE_CONTEXT to limit this--we don't bother to track
443    copies of registers which are in NON_DECOMPOSABLE_CONTEXT.  */
444 
445 static void
propagate_pseudo_copies(void)446 propagate_pseudo_copies (void)
447 {
448   auto_bitmap queue, propagate;
449 
450   bitmap_copy (queue, decomposable_context);
451   do
452     {
453       bitmap_iterator iter;
454       unsigned int i;
455 
456       bitmap_clear (propagate);
457 
458       EXECUTE_IF_SET_IN_BITMAP (queue, 0, i, iter)
459 	{
460 	  bitmap b = reg_copy_graph[i];
461 	  if (b)
462 	    bitmap_ior_and_compl_into (propagate, b, non_decomposable_context);
463 	}
464 
465       bitmap_and_compl (queue, propagate, decomposable_context);
466       bitmap_ior_into (decomposable_context, propagate);
467     }
468   while (!bitmap_empty_p (queue));
469 }
470 
471 /* A pointer to one of these values is passed to
472    find_decomposable_subregs.  */
473 
474 enum classify_move_insn
475 {
476   /* Not a simple move from one location to another.  */
477   NOT_SIMPLE_MOVE,
478   /* A simple move we want to decompose.  */
479   DECOMPOSABLE_SIMPLE_MOVE,
480   /* Any other simple move.  */
481   SIMPLE_MOVE
482 };
483 
484 /* If we find a SUBREG in *LOC which we could use to decompose a
485    pseudo-register, set a bit in DECOMPOSABLE_CONTEXT.  If we find an
486    unadorned register which is not a simple pseudo-register copy,
487    DATA will point at the type of move, and we set a bit in
488    DECOMPOSABLE_CONTEXT or NON_DECOMPOSABLE_CONTEXT as appropriate.  */
489 
490 static void
find_decomposable_subregs(rtx * loc,enum classify_move_insn * pcmi)491 find_decomposable_subregs (rtx *loc, enum classify_move_insn *pcmi)
492 {
493   subrtx_var_iterator::array_type array;
494   FOR_EACH_SUBRTX_VAR (iter, array, *loc, NONCONST)
495     {
496       rtx x = *iter;
497       if (GET_CODE (x) == SUBREG)
498 	{
499 	  rtx inner = SUBREG_REG (x);
500 	  unsigned int regno, outer_size, inner_size, outer_words, inner_words;
501 
502 	  if (!REG_P (inner))
503 	    continue;
504 
505 	  regno = REGNO (inner);
506 	  if (HARD_REGISTER_NUM_P (regno))
507 	    {
508 	      iter.skip_subrtxes ();
509 	      continue;
510 	    }
511 
512 	  if (!interesting_mode_p (GET_MODE (x), &outer_size, &outer_words)
513 	      || !interesting_mode_p (GET_MODE (inner), &inner_size,
514 				      &inner_words))
515 	    continue;
516 
517 	  /* We only try to decompose single word subregs of multi-word
518 	     registers.  When we find one, we return -1 to avoid iterating
519 	     over the inner register.
520 
521 	     ??? This doesn't allow, e.g., DImode subregs of TImode values
522 	     on 32-bit targets.  We would need to record the way the
523 	     pseudo-register was used, and only decompose if all the uses
524 	     were the same number and size of pieces.  Hopefully this
525 	     doesn't happen much.  */
526 
527 	  if (outer_words == 1
528 	      && inner_words > 1
529 	      /* Don't allow to decompose floating point subregs of
530 		 multi-word pseudos if the floating point mode does
531 		 not have word size, because otherwise we'd generate
532 		 a subreg with that floating mode from a different
533 		 sized integral pseudo which is not allowed by
534 		 validate_subreg.  */
535 	      && (!FLOAT_MODE_P (GET_MODE (x))
536 		  || outer_size == UNITS_PER_WORD))
537 	    {
538 	      bitmap_set_bit (decomposable_context, regno);
539 	      iter.skip_subrtxes ();
540 	      continue;
541 	    }
542 
543 	  /* If this is a cast from one mode to another, where the modes
544 	     have the same size, and they are not tieable, then mark this
545 	     register as non-decomposable.  If we decompose it we are
546 	     likely to mess up whatever the backend is trying to do.  */
547 	  if (outer_words > 1
548 	      && outer_size == inner_size
549 	      && !targetm.modes_tieable_p (GET_MODE (x), GET_MODE (inner)))
550 	    {
551 	      bitmap_set_bit (non_decomposable_context, regno);
552 	      bitmap_set_bit (subreg_context, regno);
553 	      iter.skip_subrtxes ();
554 	      continue;
555 	    }
556 	}
557       else if (REG_P (x))
558 	{
559 	  unsigned int regno, size, words;
560 
561 	  /* We will see an outer SUBREG before we see the inner REG, so
562 	     when we see a plain REG here it means a direct reference to
563 	     the register.
564 
565 	     If this is not a simple copy from one location to another,
566 	     then we cannot decompose this register.  If this is a simple
567 	     copy we want to decompose, and the mode is right,
568 	     then we mark the register as decomposable.
569 	     Otherwise we don't say anything about this register --
570 	     it could be decomposed, but whether that would be
571 	     profitable depends upon how it is used elsewhere.
572 
573 	     We only set bits in the bitmap for multi-word
574 	     pseudo-registers, since those are the only ones we care about
575 	     and it keeps the size of the bitmaps down.  */
576 
577 	  regno = REGNO (x);
578 	  if (!HARD_REGISTER_NUM_P (regno)
579 	      && interesting_mode_p (GET_MODE (x), &size, &words)
580 	      && words > 1)
581 	    {
582 	      switch (*pcmi)
583 		{
584 		case NOT_SIMPLE_MOVE:
585 		  bitmap_set_bit (non_decomposable_context, regno);
586 		  break;
587 		case DECOMPOSABLE_SIMPLE_MOVE:
588 		  if (targetm.modes_tieable_p (GET_MODE (x), word_mode))
589 		    bitmap_set_bit (decomposable_context, regno);
590 		  break;
591 		case SIMPLE_MOVE:
592 		  break;
593 		default:
594 		  gcc_unreachable ();
595 		}
596 	    }
597 	}
598       else if (MEM_P (x))
599 	{
600 	  enum classify_move_insn cmi_mem = NOT_SIMPLE_MOVE;
601 
602 	  /* Any registers used in a MEM do not participate in a
603 	     SIMPLE_MOVE or DECOMPOSABLE_SIMPLE_MOVE.  Do our own recursion
604 	     here, and return -1 to block the parent's recursion.  */
605 	  find_decomposable_subregs (&XEXP (x, 0), &cmi_mem);
606 	  iter.skip_subrtxes ();
607 	}
608     }
609 }
610 
611 /* Decompose REGNO into word-sized components.  We smash the REG node
612    in place.  This ensures that (1) something goes wrong quickly if we
613    fail to make some replacement, and (2) the debug information inside
614    the symbol table is automatically kept up to date.  */
615 
616 static void
decompose_register(unsigned int regno)617 decompose_register (unsigned int regno)
618 {
619   rtx reg;
620   unsigned int size, words, i;
621   rtvec v;
622 
623   reg = regno_reg_rtx[regno];
624 
625   regno_reg_rtx[regno] = NULL_RTX;
626 
627   if (!interesting_mode_p (GET_MODE (reg), &size, &words))
628     gcc_unreachable ();
629 
630   v = rtvec_alloc (words);
631   for (i = 0; i < words; ++i)
632     RTVEC_ELT (v, i) = gen_reg_rtx_offset (reg, word_mode, i * UNITS_PER_WORD);
633 
634   PUT_CODE (reg, CONCATN);
635   XVEC (reg, 0) = v;
636 
637   if (dump_file)
638     {
639       fprintf (dump_file, "; Splitting reg %u ->", regno);
640       for (i = 0; i < words; ++i)
641 	fprintf (dump_file, " %u", REGNO (XVECEXP (reg, 0, i)));
642       fputc ('\n', dump_file);
643     }
644 }
645 
646 /* Get a SUBREG of a CONCATN.  */
647 
648 static rtx
simplify_subreg_concatn(machine_mode outermode,rtx op,poly_uint64 orig_byte)649 simplify_subreg_concatn (machine_mode outermode, rtx op, poly_uint64 orig_byte)
650 {
651   unsigned int outer_size, outer_words, inner_size, inner_words;
652   machine_mode innermode, partmode;
653   rtx part;
654   unsigned int final_offset;
655   unsigned int byte;
656 
657   innermode = GET_MODE (op);
658   if (!interesting_mode_p (outermode, &outer_size, &outer_words)
659       || !interesting_mode_p (innermode, &inner_size, &inner_words))
660     gcc_unreachable ();
661 
662   /* Must be constant if interesting_mode_p passes.  */
663   byte = orig_byte.to_constant ();
664   gcc_assert (GET_CODE (op) == CONCATN);
665   gcc_assert (byte % outer_size == 0);
666 
667   gcc_assert (byte < inner_size);
668   if (outer_size > inner_size)
669     return NULL_RTX;
670 
671   inner_size /= XVECLEN (op, 0);
672   part = XVECEXP (op, 0, byte / inner_size);
673   partmode = GET_MODE (part);
674 
675   final_offset = byte % inner_size;
676   if (final_offset + outer_size > inner_size)
677     return NULL_RTX;
678 
679   /* VECTOR_CSTs in debug expressions are expanded into CONCATN instead of
680      regular CONST_VECTORs.  They have vector or integer modes, depending
681      on the capabilities of the target.  Cope with them.  */
682   if (partmode == VOIDmode && VECTOR_MODE_P (innermode))
683     partmode = GET_MODE_INNER (innermode);
684   else if (partmode == VOIDmode)
685     partmode = mode_for_size (inner_size * BITS_PER_UNIT,
686 			      GET_MODE_CLASS (innermode), 0).require ();
687 
688   return simplify_gen_subreg (outermode, part, partmode, final_offset);
689 }
690 
691 /* Wrapper around simplify_gen_subreg which handles CONCATN.  */
692 
693 static rtx
simplify_gen_subreg_concatn(machine_mode outermode,rtx op,machine_mode innermode,unsigned int byte)694 simplify_gen_subreg_concatn (machine_mode outermode, rtx op,
695 			     machine_mode innermode, unsigned int byte)
696 {
697   rtx ret;
698 
699   /* We have to handle generating a SUBREG of a SUBREG of a CONCATN.
700      If OP is a SUBREG of a CONCATN, then it must be a simple mode
701      change with the same size and offset 0, or it must extract a
702      part.  We shouldn't see anything else here.  */
703   if (GET_CODE (op) == SUBREG && GET_CODE (SUBREG_REG (op)) == CONCATN)
704     {
705       rtx op2;
706 
707       if (known_eq (GET_MODE_SIZE (GET_MODE (op)),
708 		    GET_MODE_SIZE (GET_MODE (SUBREG_REG (op))))
709 	  && known_eq (SUBREG_BYTE (op), 0))
710 	return simplify_gen_subreg_concatn (outermode, SUBREG_REG (op),
711 					    GET_MODE (SUBREG_REG (op)), byte);
712 
713       op2 = simplify_subreg_concatn (GET_MODE (op), SUBREG_REG (op),
714 				     SUBREG_BYTE (op));
715       if (op2 == NULL_RTX)
716 	{
717 	  /* We don't handle paradoxical subregs here.  */
718 	  gcc_assert (!paradoxical_subreg_p (outermode, GET_MODE (op)));
719 	  gcc_assert (!paradoxical_subreg_p (op));
720 	  op2 = simplify_subreg_concatn (outermode, SUBREG_REG (op),
721 					 byte + SUBREG_BYTE (op));
722 	  gcc_assert (op2 != NULL_RTX);
723 	  return op2;
724 	}
725 
726       op = op2;
727       gcc_assert (op != NULL_RTX);
728       gcc_assert (innermode == GET_MODE (op));
729     }
730 
731   if (GET_CODE (op) == CONCATN)
732     return simplify_subreg_concatn (outermode, op, byte);
733 
734   ret = simplify_gen_subreg (outermode, op, innermode, byte);
735 
736   /* If we see an insn like (set (reg:DI) (subreg:DI (reg:SI) 0)) then
737      resolve_simple_move will ask for the high part of the paradoxical
738      subreg, which does not have a value.  Just return a zero.  */
739   if (ret == NULL_RTX
740       && paradoxical_subreg_p (op))
741     return CONST0_RTX (outermode);
742 
743   gcc_assert (ret != NULL_RTX);
744   return ret;
745 }
746 
747 /* Return whether we should resolve X into the registers into which it
748    was decomposed.  */
749 
750 static bool
resolve_reg_p(rtx x)751 resolve_reg_p (rtx x)
752 {
753   return GET_CODE (x) == CONCATN;
754 }
755 
756 /* Return whether X is a SUBREG of a register which we need to
757    resolve.  */
758 
759 static bool
resolve_subreg_p(rtx x)760 resolve_subreg_p (rtx x)
761 {
762   if (GET_CODE (x) != SUBREG)
763     return false;
764   return resolve_reg_p (SUBREG_REG (x));
765 }
766 
767 /* Look for SUBREGs in *LOC which need to be decomposed.  */
768 
769 static bool
resolve_subreg_use(rtx * loc,rtx insn)770 resolve_subreg_use (rtx *loc, rtx insn)
771 {
772   subrtx_ptr_iterator::array_type array;
773   FOR_EACH_SUBRTX_PTR (iter, array, loc, NONCONST)
774     {
775       rtx *loc = *iter;
776       rtx x = *loc;
777       if (resolve_subreg_p (x))
778 	{
779 	  x = simplify_subreg_concatn (GET_MODE (x), SUBREG_REG (x),
780 				       SUBREG_BYTE (x));
781 
782 	  /* It is possible for a note to contain a reference which we can
783 	     decompose.  In this case, return 1 to the caller to indicate
784 	     that the note must be removed.  */
785 	  if (!x)
786 	    {
787 	      gcc_assert (!insn);
788 	      return true;
789 	    }
790 
791 	  validate_change (insn, loc, x, 1);
792 	  iter.skip_subrtxes ();
793 	}
794       else if (resolve_reg_p (x))
795 	/* Return 1 to the caller to indicate that we found a direct
796 	   reference to a register which is being decomposed.  This can
797 	   happen inside notes, multiword shift or zero-extend
798 	   instructions.  */
799 	return true;
800     }
801 
802   return false;
803 }
804 
805 /* Resolve any decomposed registers which appear in register notes on
806    INSN.  */
807 
808 static void
resolve_reg_notes(rtx_insn * insn)809 resolve_reg_notes (rtx_insn *insn)
810 {
811   rtx *pnote, note;
812 
813   note = find_reg_equal_equiv_note (insn);
814   if (note)
815     {
816       int old_count = num_validated_changes ();
817       if (resolve_subreg_use (&XEXP (note, 0), NULL_RTX))
818 	remove_note (insn, note);
819       else
820 	if (old_count != num_validated_changes ())
821 	  df_notes_rescan (insn);
822     }
823 
824   pnote = &REG_NOTES (insn);
825   while (*pnote != NULL_RTX)
826     {
827       bool del = false;
828 
829       note = *pnote;
830       switch (REG_NOTE_KIND (note))
831 	{
832 	case REG_DEAD:
833 	case REG_UNUSED:
834 	  if (resolve_reg_p (XEXP (note, 0)))
835 	    del = true;
836 	  break;
837 
838 	default:
839 	  break;
840 	}
841 
842       if (del)
843 	*pnote = XEXP (note, 1);
844       else
845 	pnote = &XEXP (note, 1);
846     }
847 }
848 
849 /* Return whether X can be decomposed into subwords.  */
850 
851 static bool
can_decompose_p(rtx x)852 can_decompose_p (rtx x)
853 {
854   if (REG_P (x))
855     {
856       unsigned int regno = REGNO (x);
857 
858       if (HARD_REGISTER_NUM_P (regno))
859 	{
860 	  unsigned int byte, num_bytes, num_words;
861 
862 	  if (!interesting_mode_p (GET_MODE (x), &num_bytes, &num_words))
863 	    return false;
864 	  for (byte = 0; byte < num_bytes; byte += UNITS_PER_WORD)
865 	    if (simplify_subreg_regno (regno, GET_MODE (x), byte, word_mode) < 0)
866 	      return false;
867 	  return true;
868 	}
869       else
870 	return !bitmap_bit_p (subreg_context, regno);
871     }
872 
873   return true;
874 }
875 
876 /* OPND is a concatn operand this is used with a simple move operator.
877    Return a new rtx with the concatn's operands swapped.  */
878 
879 static rtx
resolve_operand_for_swap_move_operator(rtx opnd)880 resolve_operand_for_swap_move_operator (rtx opnd)
881 {
882   gcc_assert (GET_CODE (opnd) == CONCATN);
883   rtx concatn = copy_rtx (opnd);
884   rtx op0 = XVECEXP (concatn, 0, 0);
885   rtx op1 = XVECEXP (concatn, 0, 1);
886   XVECEXP (concatn, 0, 0) = op1;
887   XVECEXP (concatn, 0, 1) = op0;
888   return concatn;
889 }
890 
891 /* Decompose the registers used in a simple move SET within INSN.  If
892    we don't change anything, return INSN, otherwise return the start
893    of the sequence of moves.  */
894 
895 static rtx_insn *
resolve_simple_move(rtx set,rtx_insn * insn)896 resolve_simple_move (rtx set, rtx_insn *insn)
897 {
898   rtx src, dest, real_dest, src_op;
899   rtx_insn *insns;
900   machine_mode orig_mode, dest_mode;
901   unsigned int orig_size, words;
902   bool pushing;
903 
904   src = SET_SRC (set);
905   dest = SET_DEST (set);
906   orig_mode = GET_MODE (dest);
907 
908   if (!interesting_mode_p (orig_mode, &orig_size, &words))
909     gcc_unreachable ();
910   gcc_assert (words > 1);
911 
912   start_sequence ();
913 
914   /* We have to handle copying from a SUBREG of a decomposed reg where
915      the SUBREG is larger than word size.  Rather than assume that we
916      can take a word_mode SUBREG of the destination, we copy to a new
917      register and then copy that to the destination.  */
918 
919   real_dest = NULL_RTX;
920 
921   if ((src_op = operand_for_swap_move_operator (src)) != NULL_RTX)
922     {
923       if (resolve_reg_p (dest))
924 	{
925 	  /* DEST is a CONCATN, so swap its operands and strip
926 	     SRC's operator.  */
927 	  dest = resolve_operand_for_swap_move_operator (dest);
928 	  src = src_op;
929 	}
930       else if (resolve_reg_p (src_op))
931 	{
932 	  /* SRC is an operation on a CONCATN, so strip the operator and
933 	     swap the CONCATN's operands.  */
934 	  src = resolve_operand_for_swap_move_operator (src_op);
935 	}
936     }
937 
938   if (GET_CODE (src) == SUBREG
939       && resolve_reg_p (SUBREG_REG (src))
940       && (maybe_ne (SUBREG_BYTE (src), 0)
941 	  || maybe_ne (orig_size, GET_MODE_SIZE (GET_MODE (SUBREG_REG (src))))))
942     {
943       real_dest = dest;
944       dest = gen_reg_rtx (orig_mode);
945       if (REG_P (real_dest))
946 	REG_ATTRS (dest) = REG_ATTRS (real_dest);
947     }
948 
949   /* Similarly if we are copying to a SUBREG of a decomposed reg where
950      the SUBREG is larger than word size.  */
951 
952   if (GET_CODE (dest) == SUBREG
953       && resolve_reg_p (SUBREG_REG (dest))
954       && (maybe_ne (SUBREG_BYTE (dest), 0)
955 	  || maybe_ne (orig_size,
956 		       GET_MODE_SIZE (GET_MODE (SUBREG_REG (dest))))))
957     {
958       rtx reg, smove;
959       rtx_insn *minsn;
960 
961       reg = gen_reg_rtx (orig_mode);
962       minsn = emit_move_insn (reg, src);
963       smove = single_set (minsn);
964       gcc_assert (smove != NULL_RTX);
965       resolve_simple_move (smove, minsn);
966       src = reg;
967     }
968 
969   /* If we didn't have any big SUBREGS of decomposed registers, and
970      neither side of the move is a register we are decomposing, then
971      we don't have to do anything here.  */
972 
973   if (src == SET_SRC (set)
974       && dest == SET_DEST (set)
975       && !resolve_reg_p (src)
976       && !resolve_subreg_p (src)
977       && !resolve_reg_p (dest)
978       && !resolve_subreg_p (dest))
979     {
980       end_sequence ();
981       return insn;
982     }
983 
984   /* It's possible for the code to use a subreg of a decomposed
985      register while forming an address.  We need to handle that before
986      passing the address to emit_move_insn.  We pass NULL_RTX as the
987      insn parameter to resolve_subreg_use because we cannot validate
988      the insn yet.  */
989   if (MEM_P (src) || MEM_P (dest))
990     {
991       int acg;
992 
993       if (MEM_P (src))
994 	resolve_subreg_use (&XEXP (src, 0), NULL_RTX);
995       if (MEM_P (dest))
996 	resolve_subreg_use (&XEXP (dest, 0), NULL_RTX);
997       acg = apply_change_group ();
998       gcc_assert (acg);
999     }
1000 
1001   /* If SRC is a register which we can't decompose, or has side
1002      effects, we need to move via a temporary register.  */
1003 
1004   if (!can_decompose_p (src)
1005       || side_effects_p (src)
1006       || GET_CODE (src) == ASM_OPERANDS)
1007     {
1008       rtx reg;
1009 
1010       reg = gen_reg_rtx (orig_mode);
1011 
1012       if (AUTO_INC_DEC)
1013 	{
1014 	  rtx_insn *move = emit_move_insn (reg, src);
1015 	  if (MEM_P (src))
1016 	    {
1017 	      rtx note = find_reg_note (insn, REG_INC, NULL_RTX);
1018 	      if (note)
1019 		add_reg_note (move, REG_INC, XEXP (note, 0));
1020 	    }
1021 	}
1022       else
1023 	emit_move_insn (reg, src);
1024 
1025       src = reg;
1026     }
1027 
1028   /* If DEST is a register which we can't decompose, or has side
1029      effects, we need to first move to a temporary register.  We
1030      handle the common case of pushing an operand directly.  We also
1031      go through a temporary register if it holds a floating point
1032      value.  This gives us better code on systems which can't move
1033      data easily between integer and floating point registers.  */
1034 
1035   dest_mode = orig_mode;
1036   pushing = push_operand (dest, dest_mode);
1037   if (!can_decompose_p (dest)
1038       || (side_effects_p (dest) && !pushing)
1039       || (!SCALAR_INT_MODE_P (dest_mode)
1040 	  && !resolve_reg_p (dest)
1041 	  && !resolve_subreg_p (dest)))
1042     {
1043       if (real_dest == NULL_RTX)
1044 	real_dest = dest;
1045       if (!SCALAR_INT_MODE_P (dest_mode))
1046 	dest_mode = int_mode_for_mode (dest_mode).require ();
1047       dest = gen_reg_rtx (dest_mode);
1048       if (REG_P (real_dest))
1049 	REG_ATTRS (dest) = REG_ATTRS (real_dest);
1050     }
1051 
1052   if (pushing)
1053     {
1054       unsigned int i, j, jinc;
1055 
1056       gcc_assert (orig_size % UNITS_PER_WORD == 0);
1057       gcc_assert (GET_CODE (XEXP (dest, 0)) != PRE_MODIFY);
1058       gcc_assert (GET_CODE (XEXP (dest, 0)) != POST_MODIFY);
1059 
1060       if (WORDS_BIG_ENDIAN == STACK_GROWS_DOWNWARD)
1061 	{
1062 	  j = 0;
1063 	  jinc = 1;
1064 	}
1065       else
1066 	{
1067 	  j = words - 1;
1068 	  jinc = -1;
1069 	}
1070 
1071       for (i = 0; i < words; ++i, j += jinc)
1072 	{
1073 	  rtx temp;
1074 
1075 	  temp = copy_rtx (XEXP (dest, 0));
1076 	  temp = adjust_automodify_address_nv (dest, word_mode, temp,
1077 					       j * UNITS_PER_WORD);
1078 	  emit_move_insn (temp,
1079 			  simplify_gen_subreg_concatn (word_mode, src,
1080 						       orig_mode,
1081 						       j * UNITS_PER_WORD));
1082 	}
1083     }
1084   else
1085     {
1086       unsigned int i;
1087 
1088       if (REG_P (dest) && !HARD_REGISTER_NUM_P (REGNO (dest)))
1089 	emit_clobber (dest);
1090 
1091       for (i = 0; i < words; ++i)
1092 	{
1093 	  rtx t = simplify_gen_subreg_concatn (word_mode, dest,
1094 					       dest_mode,
1095 					       i * UNITS_PER_WORD);
1096 	  /* simplify_gen_subreg_concatn can return (const_int 0) for
1097 	     some sub-objects of paradoxical subregs.  As a source operand,
1098 	     that's fine.  As a destination it must be avoided.  Those are
1099 	     supposed to be don't care bits, so we can just drop that store
1100 	     on the floor.  */
1101 	  if (t != CONST0_RTX (word_mode))
1102 	    emit_move_insn (t,
1103 			    simplify_gen_subreg_concatn (word_mode, src,
1104 							 orig_mode,
1105 							 i * UNITS_PER_WORD));
1106 	}
1107     }
1108 
1109   if (real_dest != NULL_RTX)
1110     {
1111       rtx mdest, smove;
1112       rtx_insn *minsn;
1113 
1114       if (dest_mode == orig_mode)
1115 	mdest = dest;
1116       else
1117 	mdest = simplify_gen_subreg (orig_mode, dest, GET_MODE (dest), 0);
1118       minsn = emit_move_insn (real_dest, mdest);
1119 
1120   if (AUTO_INC_DEC && MEM_P (real_dest)
1121       && !(resolve_reg_p (real_dest) || resolve_subreg_p (real_dest)))
1122     {
1123       rtx note = find_reg_note (insn, REG_INC, NULL_RTX);
1124       if (note)
1125 	add_reg_note (minsn, REG_INC, XEXP (note, 0));
1126     }
1127 
1128       smove = single_set (minsn);
1129       gcc_assert (smove != NULL_RTX);
1130 
1131       resolve_simple_move (smove, minsn);
1132     }
1133 
1134   insns = get_insns ();
1135   end_sequence ();
1136 
1137   copy_reg_eh_region_note_forward (insn, insns, NULL_RTX);
1138 
1139   emit_insn_before (insns, insn);
1140 
1141   /* If we get here via self-recursion, then INSN is not yet in the insns
1142      chain and delete_insn will fail.  We only want to remove INSN from the
1143      current sequence.  See PR56738.  */
1144   if (in_sequence_p ())
1145     remove_insn (insn);
1146   else
1147     delete_insn (insn);
1148 
1149   return insns;
1150 }
1151 
1152 /* Change a CLOBBER of a decomposed register into a CLOBBER of the
1153    component registers.  Return whether we changed something.  */
1154 
1155 static bool
resolve_clobber(rtx pat,rtx_insn * insn)1156 resolve_clobber (rtx pat, rtx_insn *insn)
1157 {
1158   rtx reg;
1159   machine_mode orig_mode;
1160   unsigned int orig_size, words, i;
1161   int ret;
1162 
1163   reg = XEXP (pat, 0);
1164   /* For clobbers we can look through paradoxical subregs which
1165      we do not handle in simplify_gen_subreg_concatn.  */
1166   if (paradoxical_subreg_p (reg))
1167     reg = SUBREG_REG (reg);
1168   if (!resolve_reg_p (reg) && !resolve_subreg_p (reg))
1169     return false;
1170 
1171   orig_mode = GET_MODE (reg);
1172   if (!interesting_mode_p (orig_mode, &orig_size, &words))
1173     gcc_unreachable ();
1174 
1175   ret = validate_change (NULL_RTX, &XEXP (pat, 0),
1176 			 simplify_gen_subreg_concatn (word_mode, reg,
1177 						      orig_mode, 0),
1178 			 0);
1179   df_insn_rescan (insn);
1180   gcc_assert (ret != 0);
1181 
1182   for (i = words - 1; i > 0; --i)
1183     {
1184       rtx x;
1185 
1186       x = simplify_gen_subreg_concatn (word_mode, reg, orig_mode,
1187 				       i * UNITS_PER_WORD);
1188       x = gen_rtx_CLOBBER (VOIDmode, x);
1189       emit_insn_after (x, insn);
1190     }
1191 
1192   resolve_reg_notes (insn);
1193 
1194   return true;
1195 }
1196 
1197 /* A USE of a decomposed register is no longer meaningful.  Return
1198    whether we changed something.  */
1199 
1200 static bool
resolve_use(rtx pat,rtx_insn * insn)1201 resolve_use (rtx pat, rtx_insn *insn)
1202 {
1203   if (resolve_reg_p (XEXP (pat, 0)) || resolve_subreg_p (XEXP (pat, 0)))
1204     {
1205       delete_insn (insn);
1206       return true;
1207     }
1208 
1209   resolve_reg_notes (insn);
1210 
1211   return false;
1212 }
1213 
1214 /* A VAR_LOCATION can be simplified.  */
1215 
1216 static void
resolve_debug(rtx_insn * insn)1217 resolve_debug (rtx_insn *insn)
1218 {
1219   subrtx_ptr_iterator::array_type array;
1220   FOR_EACH_SUBRTX_PTR (iter, array, &PATTERN (insn), NONCONST)
1221     {
1222       rtx *loc = *iter;
1223       rtx x = *loc;
1224       if (resolve_subreg_p (x))
1225 	{
1226 	  x = simplify_subreg_concatn (GET_MODE (x), SUBREG_REG (x),
1227 				       SUBREG_BYTE (x));
1228 
1229 	  if (x)
1230 	    *loc = x;
1231 	  else
1232 	    x = copy_rtx (*loc);
1233 	}
1234       if (resolve_reg_p (x))
1235 	*loc = copy_rtx (x);
1236     }
1237 
1238   df_insn_rescan (insn);
1239 
1240   resolve_reg_notes (insn);
1241 }
1242 
1243 /* Check if INSN is a decomposable multiword-shift or zero-extend and
1244    set the decomposable_context bitmap accordingly.  SPEED_P is true
1245    if we are optimizing INSN for speed rather than size.  Return true
1246    if INSN is decomposable.  */
1247 
1248 static bool
find_decomposable_shift_zext(rtx_insn * insn,bool speed_p)1249 find_decomposable_shift_zext (rtx_insn *insn, bool speed_p)
1250 {
1251   rtx set;
1252   rtx op;
1253   rtx op_operand;
1254 
1255   set = single_set (insn);
1256   if (!set)
1257     return false;
1258 
1259   op = SET_SRC (set);
1260   if (GET_CODE (op) != ASHIFT
1261       && GET_CODE (op) != LSHIFTRT
1262       && GET_CODE (op) != ASHIFTRT
1263       && GET_CODE (op) != ZERO_EXTEND)
1264     return false;
1265 
1266   op_operand = XEXP (op, 0);
1267   if (!REG_P (SET_DEST (set)) || !REG_P (op_operand)
1268       || HARD_REGISTER_NUM_P (REGNO (SET_DEST (set)))
1269       || HARD_REGISTER_NUM_P (REGNO (op_operand))
1270       || GET_MODE (op) != twice_word_mode)
1271     return false;
1272 
1273   if (GET_CODE (op) == ZERO_EXTEND)
1274     {
1275       if (GET_MODE (op_operand) != word_mode
1276 	  || !choices[speed_p].splitting_zext)
1277 	return false;
1278     }
1279   else /* left or right shift */
1280     {
1281       bool *splitting = (GET_CODE (op) == ASHIFT
1282 			 ? choices[speed_p].splitting_ashift
1283 			 : GET_CODE (op) == ASHIFTRT
1284 			 ? choices[speed_p].splitting_ashiftrt
1285 			 : choices[speed_p].splitting_lshiftrt);
1286       if (!CONST_INT_P (XEXP (op, 1))
1287 	  || !IN_RANGE (INTVAL (XEXP (op, 1)), BITS_PER_WORD,
1288 			2 * BITS_PER_WORD - 1)
1289 	  || !splitting[INTVAL (XEXP (op, 1)) - BITS_PER_WORD])
1290 	return false;
1291 
1292       bitmap_set_bit (decomposable_context, REGNO (op_operand));
1293     }
1294 
1295   bitmap_set_bit (decomposable_context, REGNO (SET_DEST (set)));
1296 
1297   return true;
1298 }
1299 
1300 /* Decompose a more than word wide shift (in INSN) of a multiword
1301    pseudo or a multiword zero-extend of a wordmode pseudo into a move
1302    and 'set to zero' insn.  Return a pointer to the new insn when a
1303    replacement was done.  */
1304 
1305 static rtx_insn *
resolve_shift_zext(rtx_insn * insn)1306 resolve_shift_zext (rtx_insn *insn)
1307 {
1308   rtx set;
1309   rtx op;
1310   rtx op_operand;
1311   rtx_insn *insns;
1312   rtx src_reg, dest_reg, dest_upper, upper_src = NULL_RTX;
1313   int src_reg_num, dest_reg_num, offset1, offset2, src_offset;
1314   scalar_int_mode inner_mode;
1315 
1316   set = single_set (insn);
1317   if (!set)
1318     return NULL;
1319 
1320   op = SET_SRC (set);
1321   if (GET_CODE (op) != ASHIFT
1322       && GET_CODE (op) != LSHIFTRT
1323       && GET_CODE (op) != ASHIFTRT
1324       && GET_CODE (op) != ZERO_EXTEND)
1325     return NULL;
1326 
1327   op_operand = XEXP (op, 0);
1328   if (!is_a <scalar_int_mode> (GET_MODE (op_operand), &inner_mode))
1329     return NULL;
1330 
1331   /* We can tear this operation apart only if the regs were already
1332      torn apart.  */
1333   if (!resolve_reg_p (SET_DEST (set)) && !resolve_reg_p (op_operand))
1334     return NULL;
1335 
1336   /* src_reg_num is the number of the word mode register which we
1337      are operating on.  For a left shift and a zero_extend on little
1338      endian machines this is register 0.  */
1339   src_reg_num = (GET_CODE (op) == LSHIFTRT || GET_CODE (op) == ASHIFTRT)
1340 		? 1 : 0;
1341 
1342   if (WORDS_BIG_ENDIAN && GET_MODE_SIZE (inner_mode) > UNITS_PER_WORD)
1343     src_reg_num = 1 - src_reg_num;
1344 
1345   if (GET_CODE (op) == ZERO_EXTEND)
1346     dest_reg_num = WORDS_BIG_ENDIAN ? 1 : 0;
1347   else
1348     dest_reg_num = 1 - src_reg_num;
1349 
1350   offset1 = UNITS_PER_WORD * dest_reg_num;
1351   offset2 = UNITS_PER_WORD * (1 - dest_reg_num);
1352   src_offset = UNITS_PER_WORD * src_reg_num;
1353 
1354   start_sequence ();
1355 
1356   dest_reg = simplify_gen_subreg_concatn (word_mode, SET_DEST (set),
1357                                           GET_MODE (SET_DEST (set)),
1358                                           offset1);
1359   dest_upper = simplify_gen_subreg_concatn (word_mode, SET_DEST (set),
1360 					    GET_MODE (SET_DEST (set)),
1361 					    offset2);
1362   src_reg = simplify_gen_subreg_concatn (word_mode, op_operand,
1363                                          GET_MODE (op_operand),
1364                                          src_offset);
1365   if (GET_CODE (op) == ASHIFTRT
1366       && INTVAL (XEXP (op, 1)) != 2 * BITS_PER_WORD - 1)
1367     upper_src = expand_shift (RSHIFT_EXPR, word_mode, copy_rtx (src_reg),
1368 			      BITS_PER_WORD - 1, NULL_RTX, 0);
1369 
1370   if (GET_CODE (op) != ZERO_EXTEND)
1371     {
1372       int shift_count = INTVAL (XEXP (op, 1));
1373       if (shift_count > BITS_PER_WORD)
1374 	src_reg = expand_shift (GET_CODE (op) == ASHIFT ?
1375 				LSHIFT_EXPR : RSHIFT_EXPR,
1376 				word_mode, src_reg,
1377 				shift_count - BITS_PER_WORD,
1378 				dest_reg, GET_CODE (op) != ASHIFTRT);
1379     }
1380 
1381   if (dest_reg != src_reg)
1382     emit_move_insn (dest_reg, src_reg);
1383   if (GET_CODE (op) != ASHIFTRT)
1384     emit_move_insn (dest_upper, CONST0_RTX (word_mode));
1385   else if (INTVAL (XEXP (op, 1)) == 2 * BITS_PER_WORD - 1)
1386     emit_move_insn (dest_upper, copy_rtx (src_reg));
1387   else
1388     emit_move_insn (dest_upper, upper_src);
1389   insns = get_insns ();
1390 
1391   end_sequence ();
1392 
1393   emit_insn_before (insns, insn);
1394 
1395   if (dump_file)
1396     {
1397       rtx_insn *in;
1398       fprintf (dump_file, "; Replacing insn: %d with insns: ", INSN_UID (insn));
1399       for (in = insns; in != insn; in = NEXT_INSN (in))
1400 	fprintf (dump_file, "%d ", INSN_UID (in));
1401       fprintf (dump_file, "\n");
1402     }
1403 
1404   delete_insn (insn);
1405   return insns;
1406 }
1407 
1408 /* Print to dump_file a description of what we're doing with shift code CODE.
1409    SPLITTING[X] is true if we are splitting shifts by X + BITS_PER_WORD.  */
1410 
1411 static void
dump_shift_choices(enum rtx_code code,bool * splitting)1412 dump_shift_choices (enum rtx_code code, bool *splitting)
1413 {
1414   int i;
1415   const char *sep;
1416 
1417   fprintf (dump_file,
1418 	   "  Splitting mode %s for %s lowering with shift amounts = ",
1419 	   GET_MODE_NAME (twice_word_mode), GET_RTX_NAME (code));
1420   sep = "";
1421   for (i = 0; i < BITS_PER_WORD; i++)
1422     if (splitting[i])
1423       {
1424 	fprintf (dump_file, "%s%d", sep, i + BITS_PER_WORD);
1425 	sep = ",";
1426       }
1427   fprintf (dump_file, "\n");
1428 }
1429 
1430 /* Print to dump_file a description of what we're doing when optimizing
1431    for speed or size; SPEED_P says which.  DESCRIPTION is a description
1432    of the SPEED_P choice.  */
1433 
1434 static void
dump_choices(bool speed_p,const char * description)1435 dump_choices (bool speed_p, const char *description)
1436 {
1437   unsigned int size, factor, i;
1438 
1439   fprintf (dump_file, "Choices when optimizing for %s:\n", description);
1440 
1441   for (i = 0; i < MAX_MACHINE_MODE; i++)
1442     if (interesting_mode_p ((machine_mode) i, &size, &factor)
1443 	&& factor > 1)
1444       fprintf (dump_file, "  %s mode %s for copy lowering.\n",
1445 	       choices[speed_p].move_modes_to_split[i]
1446 	       ? "Splitting"
1447 	       : "Skipping",
1448 	       GET_MODE_NAME ((machine_mode) i));
1449 
1450   fprintf (dump_file, "  %s mode %s for zero_extend lowering.\n",
1451 	   choices[speed_p].splitting_zext ? "Splitting" : "Skipping",
1452 	   GET_MODE_NAME (twice_word_mode));
1453 
1454   dump_shift_choices (ASHIFT, choices[speed_p].splitting_ashift);
1455   dump_shift_choices (LSHIFTRT, choices[speed_p].splitting_lshiftrt);
1456   dump_shift_choices (ASHIFTRT, choices[speed_p].splitting_ashiftrt);
1457   fprintf (dump_file, "\n");
1458 }
1459 
1460 /* Look for registers which are always accessed via word-sized SUBREGs
1461    or -if DECOMPOSE_COPIES is true- via copies.  Decompose these
1462    registers into several word-sized pseudo-registers.  */
1463 
1464 static void
decompose_multiword_subregs(bool decompose_copies)1465 decompose_multiword_subregs (bool decompose_copies)
1466 {
1467   unsigned int max;
1468   basic_block bb;
1469   bool speed_p;
1470 
1471   if (dump_file)
1472     {
1473       dump_choices (false, "size");
1474       dump_choices (true, "speed");
1475     }
1476 
1477   /* Check if this target even has any modes to consider lowering.   */
1478   if (!choices[false].something_to_do && !choices[true].something_to_do)
1479     {
1480       if (dump_file)
1481 	fprintf (dump_file, "Nothing to do!\n");
1482       return;
1483     }
1484 
1485   max = max_reg_num ();
1486 
1487   /* First see if there are any multi-word pseudo-registers.  If there
1488      aren't, there is nothing we can do.  This should speed up this
1489      pass in the normal case, since it should be faster than scanning
1490      all the insns.  */
1491   {
1492     unsigned int i;
1493     bool useful_modes_seen = false;
1494 
1495     for (i = FIRST_PSEUDO_REGISTER; i < max; ++i)
1496       if (regno_reg_rtx[i] != NULL)
1497 	{
1498 	  machine_mode mode = GET_MODE (regno_reg_rtx[i]);
1499 	  if (choices[false].move_modes_to_split[(int) mode]
1500 	      || choices[true].move_modes_to_split[(int) mode])
1501 	    {
1502 	      useful_modes_seen = true;
1503 	      break;
1504 	    }
1505 	}
1506 
1507     if (!useful_modes_seen)
1508       {
1509 	if (dump_file)
1510 	  fprintf (dump_file, "Nothing to lower in this function.\n");
1511 	return;
1512       }
1513   }
1514 
1515   if (df)
1516     {
1517       df_set_flags (DF_DEFER_INSN_RESCAN);
1518       run_word_dce ();
1519     }
1520 
1521   /* FIXME: It may be possible to change this code to look for each
1522      multi-word pseudo-register and to find each insn which sets or
1523      uses that register.  That should be faster than scanning all the
1524      insns.  */
1525 
1526   decomposable_context = BITMAP_ALLOC (NULL);
1527   non_decomposable_context = BITMAP_ALLOC (NULL);
1528   subreg_context = BITMAP_ALLOC (NULL);
1529 
1530   reg_copy_graph.create (max);
1531   reg_copy_graph.safe_grow_cleared (max, true);
1532   memset (reg_copy_graph.address (), 0, sizeof (bitmap) * max);
1533 
1534   speed_p = optimize_function_for_speed_p (cfun);
1535   FOR_EACH_BB_FN (bb, cfun)
1536     {
1537       rtx_insn *insn;
1538 
1539       FOR_BB_INSNS (bb, insn)
1540 	{
1541 	  rtx set;
1542 	  enum classify_move_insn cmi;
1543 	  int i, n;
1544 
1545 	  if (!INSN_P (insn)
1546 	      || GET_CODE (PATTERN (insn)) == CLOBBER
1547 	      || GET_CODE (PATTERN (insn)) == USE)
1548 	    continue;
1549 
1550 	  recog_memoized (insn);
1551 
1552 	  if (find_decomposable_shift_zext (insn, speed_p))
1553 	    continue;
1554 
1555 	  extract_insn (insn);
1556 
1557 	  set = simple_move (insn, speed_p);
1558 
1559 	  if (!set)
1560 	    cmi = NOT_SIMPLE_MOVE;
1561 	  else
1562 	    {
1563 	      /* We mark pseudo-to-pseudo copies as decomposable during the
1564 		 second pass only.  The first pass is so early that there is
1565 		 good chance such moves will be optimized away completely by
1566 		 subsequent optimizations anyway.
1567 
1568 		 However, we call find_pseudo_copy even during the first pass
1569 		 so as to properly set up the reg_copy_graph.  */
1570 	      if (find_pseudo_copy (set))
1571 		cmi = decompose_copies? DECOMPOSABLE_SIMPLE_MOVE : SIMPLE_MOVE;
1572 	      else
1573 		cmi = SIMPLE_MOVE;
1574 	    }
1575 
1576 	  n = recog_data.n_operands;
1577 	  for (i = 0; i < n; ++i)
1578 	    {
1579 	      find_decomposable_subregs (&recog_data.operand[i], &cmi);
1580 
1581 	      /* We handle ASM_OPERANDS as a special case to support
1582 		 things like x86 rdtsc which returns a DImode value.
1583 		 We can decompose the output, which will certainly be
1584 		 operand 0, but not the inputs.  */
1585 
1586 	      if (cmi == SIMPLE_MOVE
1587 		  && GET_CODE (SET_SRC (set)) == ASM_OPERANDS)
1588 		{
1589 		  gcc_assert (i == 0);
1590 		  cmi = NOT_SIMPLE_MOVE;
1591 		}
1592 	    }
1593 	}
1594     }
1595 
1596   bitmap_and_compl_into (decomposable_context, non_decomposable_context);
1597   if (!bitmap_empty_p (decomposable_context))
1598     {
1599       unsigned int i;
1600       sbitmap_iterator sbi;
1601       bitmap_iterator iter;
1602       unsigned int regno;
1603 
1604       propagate_pseudo_copies ();
1605 
1606       auto_sbitmap sub_blocks (last_basic_block_for_fn (cfun));
1607       bitmap_clear (sub_blocks);
1608 
1609       EXECUTE_IF_SET_IN_BITMAP (decomposable_context, 0, regno, iter)
1610 	decompose_register (regno);
1611 
1612       FOR_EACH_BB_FN (bb, cfun)
1613 	{
1614 	  rtx_insn *insn;
1615 
1616 	  FOR_BB_INSNS (bb, insn)
1617 	    {
1618 	      rtx pat;
1619 
1620 	      if (!INSN_P (insn))
1621 		continue;
1622 
1623 	      pat = PATTERN (insn);
1624 	      if (GET_CODE (pat) == CLOBBER)
1625 		resolve_clobber (pat, insn);
1626 	      else if (GET_CODE (pat) == USE)
1627 		resolve_use (pat, insn);
1628 	      else if (DEBUG_INSN_P (insn))
1629 		resolve_debug (insn);
1630 	      else
1631 		{
1632 		  rtx set;
1633 		  int i;
1634 
1635 		  recog_memoized (insn);
1636 		  extract_insn (insn);
1637 
1638 		  set = simple_move (insn, speed_p);
1639 		  if (set)
1640 		    {
1641 		      rtx_insn *orig_insn = insn;
1642 		      bool cfi = control_flow_insn_p (insn);
1643 
1644 		      /* We can end up splitting loads to multi-word pseudos
1645 			 into separate loads to machine word size pseudos.
1646 			 When this happens, we first had one load that can
1647 			 throw, and after resolve_simple_move we'll have a
1648 			 bunch of loads (at least two).  All those loads may
1649 			 trap if we can have non-call exceptions, so they
1650 			 all will end the current basic block.  We split the
1651 			 block after the outer loop over all insns, but we
1652 			 make sure here that we will be able to split the
1653 			 basic block and still produce the correct control
1654 			 flow graph for it.  */
1655 		      gcc_assert (!cfi
1656 				  || (cfun->can_throw_non_call_exceptions
1657 				      && can_throw_internal (insn)));
1658 
1659 		      insn = resolve_simple_move (set, insn);
1660 		      if (insn != orig_insn)
1661 			{
1662 			  recog_memoized (insn);
1663 			  extract_insn (insn);
1664 
1665 			  if (cfi)
1666 			    bitmap_set_bit (sub_blocks, bb->index);
1667 			}
1668 		    }
1669 		  else
1670 		    {
1671 		      rtx_insn *decomposed_shift;
1672 
1673 		      decomposed_shift = resolve_shift_zext (insn);
1674 		      if (decomposed_shift != NULL_RTX)
1675 			{
1676 			  insn = decomposed_shift;
1677 			  recog_memoized (insn);
1678 			  extract_insn (insn);
1679 			}
1680 		    }
1681 
1682 		  for (i = recog_data.n_operands - 1; i >= 0; --i)
1683 		    resolve_subreg_use (recog_data.operand_loc[i], insn);
1684 
1685 		  resolve_reg_notes (insn);
1686 
1687 		  if (num_validated_changes () > 0)
1688 		    {
1689 		      for (i = recog_data.n_dups - 1; i >= 0; --i)
1690 			{
1691 			  rtx *pl = recog_data.dup_loc[i];
1692 			  int dup_num = recog_data.dup_num[i];
1693 			  rtx *px = recog_data.operand_loc[dup_num];
1694 
1695 			  validate_unshare_change (insn, pl, *px, 1);
1696 			}
1697 
1698 		      i = apply_change_group ();
1699 		      gcc_assert (i);
1700 		    }
1701 		}
1702 	    }
1703 	}
1704 
1705       /* If we had insns to split that caused control flow insns in the middle
1706 	 of a basic block, split those blocks now.  Note that we only handle
1707 	 the case where splitting a load has caused multiple possibly trapping
1708 	 loads to appear.  */
1709       EXECUTE_IF_SET_IN_BITMAP (sub_blocks, 0, i, sbi)
1710 	{
1711 	  rtx_insn *insn, *end;
1712 	  edge fallthru;
1713 
1714 	  bb = BASIC_BLOCK_FOR_FN (cfun, i);
1715 	  insn = BB_HEAD (bb);
1716 	  end = BB_END (bb);
1717 
1718 	  while (insn != end)
1719 	    {
1720 	      if (control_flow_insn_p (insn))
1721 		{
1722 		  /* Split the block after insn.  There will be a fallthru
1723 		     edge, which is OK so we keep it.  We have to create the
1724 		     exception edges ourselves.  */
1725 		  fallthru = split_block (bb, insn);
1726 		  rtl_make_eh_edge (NULL, bb, BB_END (bb));
1727 		  bb = fallthru->dest;
1728 		  insn = BB_HEAD (bb);
1729 		}
1730 	      else
1731 	        insn = NEXT_INSN (insn);
1732 	    }
1733 	}
1734     }
1735 
1736   for (bitmap b : reg_copy_graph)
1737     if (b)
1738       BITMAP_FREE (b);
1739 
1740   reg_copy_graph.release ();
1741 
1742   BITMAP_FREE (decomposable_context);
1743   BITMAP_FREE (non_decomposable_context);
1744   BITMAP_FREE (subreg_context);
1745 }
1746 
1747 /* Implement first lower subreg pass.  */
1748 
1749 namespace {
1750 
1751 const pass_data pass_data_lower_subreg =
1752 {
1753   RTL_PASS, /* type */
1754   "subreg1", /* name */
1755   OPTGROUP_NONE, /* optinfo_flags */
1756   TV_LOWER_SUBREG, /* tv_id */
1757   0, /* properties_required */
1758   0, /* properties_provided */
1759   0, /* properties_destroyed */
1760   0, /* todo_flags_start */
1761   0, /* todo_flags_finish */
1762 };
1763 
1764 class pass_lower_subreg : public rtl_opt_pass
1765 {
1766 public:
pass_lower_subreg(gcc::context * ctxt)1767   pass_lower_subreg (gcc::context *ctxt)
1768     : rtl_opt_pass (pass_data_lower_subreg, ctxt)
1769   {}
1770 
1771   /* opt_pass methods: */
gate(function *)1772   virtual bool gate (function *) { return flag_split_wide_types != 0; }
execute(function *)1773   virtual unsigned int execute (function *)
1774     {
1775       decompose_multiword_subregs (false);
1776       return 0;
1777     }
1778 
1779 }; // class pass_lower_subreg
1780 
1781 } // anon namespace
1782 
1783 rtl_opt_pass *
make_pass_lower_subreg(gcc::context * ctxt)1784 make_pass_lower_subreg (gcc::context *ctxt)
1785 {
1786   return new pass_lower_subreg (ctxt);
1787 }
1788 
1789 /* Implement second lower subreg pass.  */
1790 
1791 namespace {
1792 
1793 const pass_data pass_data_lower_subreg2 =
1794 {
1795   RTL_PASS, /* type */
1796   "subreg2", /* name */
1797   OPTGROUP_NONE, /* optinfo_flags */
1798   TV_LOWER_SUBREG, /* tv_id */
1799   0, /* properties_required */
1800   0, /* properties_provided */
1801   0, /* properties_destroyed */
1802   0, /* todo_flags_start */
1803   TODO_df_finish, /* todo_flags_finish */
1804 };
1805 
1806 class pass_lower_subreg2 : public rtl_opt_pass
1807 {
1808 public:
pass_lower_subreg2(gcc::context * ctxt)1809   pass_lower_subreg2 (gcc::context *ctxt)
1810     : rtl_opt_pass (pass_data_lower_subreg2, ctxt)
1811   {}
1812 
1813   /* opt_pass methods: */
gate(function *)1814   virtual bool gate (function *) { return flag_split_wide_types
1815 					  && flag_split_wide_types_early; }
execute(function *)1816   virtual unsigned int execute (function *)
1817     {
1818       decompose_multiword_subregs (true);
1819       return 0;
1820     }
1821 
1822 }; // class pass_lower_subreg2
1823 
1824 } // anon namespace
1825 
1826 rtl_opt_pass *
make_pass_lower_subreg2(gcc::context * ctxt)1827 make_pass_lower_subreg2 (gcc::context *ctxt)
1828 {
1829   return new pass_lower_subreg2 (ctxt);
1830 }
1831 
1832 /* Implement third lower subreg pass.  */
1833 
1834 namespace {
1835 
1836 const pass_data pass_data_lower_subreg3 =
1837 {
1838   RTL_PASS, /* type */
1839   "subreg3", /* name */
1840   OPTGROUP_NONE, /* optinfo_flags */
1841   TV_LOWER_SUBREG, /* tv_id */
1842   0, /* properties_required */
1843   0, /* properties_provided */
1844   0, /* properties_destroyed */
1845   0, /* todo_flags_start */
1846   TODO_df_finish, /* todo_flags_finish */
1847 };
1848 
1849 class pass_lower_subreg3 : public rtl_opt_pass
1850 {
1851 public:
pass_lower_subreg3(gcc::context * ctxt)1852   pass_lower_subreg3 (gcc::context *ctxt)
1853     : rtl_opt_pass (pass_data_lower_subreg3, ctxt)
1854   {}
1855 
1856   /* opt_pass methods: */
gate(function *)1857   virtual bool gate (function *) { return flag_split_wide_types; }
execute(function *)1858   virtual unsigned int execute (function *)
1859     {
1860       decompose_multiword_subregs (true);
1861       return 0;
1862     }
1863 
1864 }; // class pass_lower_subreg3
1865 
1866 } // anon namespace
1867 
1868 rtl_opt_pass *
make_pass_lower_subreg3(gcc::context * ctxt)1869 make_pass_lower_subreg3 (gcc::context *ctxt)
1870 {
1871   return new pass_lower_subreg3 (ctxt);
1872 }
1873