xref: /dflybsd-src/contrib/gcc-8.0/gcc/lra-coalesce.c (revision 38fd149817dfbff97799f62fcb70be98c4e32523)
1*38fd1498Szrj /* Coalesce spilled pseudos.
2*38fd1498Szrj    Copyright (C) 2010-2018 Free Software Foundation, Inc.
3*38fd1498Szrj    Contributed by Vladimir Makarov <vmakarov@redhat.com>.
4*38fd1498Szrj 
5*38fd1498Szrj This file is part of GCC.
6*38fd1498Szrj 
7*38fd1498Szrj GCC is free software; you can redistribute it and/or modify it under
8*38fd1498Szrj the terms of the GNU General Public License as published by the Free
9*38fd1498Szrj Software Foundation; either version 3, or (at your option) any later
10*38fd1498Szrj version.
11*38fd1498Szrj 
12*38fd1498Szrj GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13*38fd1498Szrj WARRANTY; without even the implied warranty of MERCHANTABILITY or
14*38fd1498Szrj FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
15*38fd1498Szrj for more details.
16*38fd1498Szrj 
17*38fd1498Szrj You should have received a copy of the GNU General Public License
18*38fd1498Szrj along with GCC; see the file COPYING3.	If not see
19*38fd1498Szrj <http://www.gnu.org/licenses/>.	 */
20*38fd1498Szrj 
21*38fd1498Szrj 
22*38fd1498Szrj /* This file contains a pass making some simple RTL code
23*38fd1498Szrj    transformations by coalescing pseudos to remove some move insns.
24*38fd1498Szrj 
25*38fd1498Szrj    Spilling pseudos in LRA can create memory-memory moves.  We should
26*38fd1498Szrj    remove potential memory-memory moves before the next constraint
27*38fd1498Szrj    pass because the constraint pass will generate additional insns for
28*38fd1498Szrj    such moves and all these insns will be hard to remove afterwards.
29*38fd1498Szrj 
30*38fd1498Szrj    Here we coalesce only spilled pseudos.  Coalescing non-spilled
31*38fd1498Szrj    pseudos (with different hard regs) might result in spilling
32*38fd1498Szrj    additional pseudos because of possible conflicts with other
33*38fd1498Szrj    non-spilled pseudos and, as a consequence, in more constraint
34*38fd1498Szrj    passes and even LRA infinite cycling.  Trivial the same hard
35*38fd1498Szrj    register moves will be removed by subsequent compiler passes.
36*38fd1498Szrj 
37*38fd1498Szrj    We don't coalesce special reload pseudos.  It complicates LRA code
38*38fd1498Szrj    a lot without visible generated code improvement.
39*38fd1498Szrj 
40*38fd1498Szrj    The pseudo live-ranges are used to find conflicting pseudos during
41*38fd1498Szrj    coalescing.
42*38fd1498Szrj 
43*38fd1498Szrj    Most frequently executed moves is tried to be coalesced first.  */
44*38fd1498Szrj 
45*38fd1498Szrj #include "config.h"
46*38fd1498Szrj #include "system.h"
47*38fd1498Szrj #include "coretypes.h"
48*38fd1498Szrj #include "backend.h"
49*38fd1498Szrj #include "rtl.h"
50*38fd1498Szrj #include "predict.h"
51*38fd1498Szrj #include "df.h"
52*38fd1498Szrj #include "insn-config.h"
53*38fd1498Szrj #include "regs.h"
54*38fd1498Szrj #include "memmodel.h"
55*38fd1498Szrj #include "ira.h"
56*38fd1498Szrj #include "recog.h"
57*38fd1498Szrj #include "lra-int.h"
58*38fd1498Szrj 
59*38fd1498Szrj /* Arrays whose elements represent the first and the next pseudo
60*38fd1498Szrj    (regno) in the coalesced pseudos group to which given pseudo (its
61*38fd1498Szrj    regno is the index) belongs.	 The next of the last pseudo in the
62*38fd1498Szrj    group refers to the first pseudo in the group, in other words the
63*38fd1498Szrj    group is represented by a cyclic list.  */
64*38fd1498Szrj static int *first_coalesced_pseudo, *next_coalesced_pseudo;
65*38fd1498Szrj 
66*38fd1498Szrj /* The function is used to sort moves according to their execution
67*38fd1498Szrj    frequencies.	 */
68*38fd1498Szrj static int
move_freq_compare_func(const void * v1p,const void * v2p)69*38fd1498Szrj move_freq_compare_func (const void *v1p, const void *v2p)
70*38fd1498Szrj {
71*38fd1498Szrj   rtx_insn *mv1 = *(rtx_insn * const *) v1p;
72*38fd1498Szrj   rtx_insn *mv2 = *(rtx_insn * const *) v2p;
73*38fd1498Szrj   int pri1, pri2;
74*38fd1498Szrj 
75*38fd1498Szrj   pri1 = REG_FREQ_FROM_BB (BLOCK_FOR_INSN (mv1));
76*38fd1498Szrj   pri2 = REG_FREQ_FROM_BB (BLOCK_FOR_INSN (mv2));
77*38fd1498Szrj   if (pri2 - pri1)
78*38fd1498Szrj     return pri2 - pri1;
79*38fd1498Szrj 
80*38fd1498Szrj   /* If frequencies are equal, sort by moves, so that the results of
81*38fd1498Szrj      qsort leave nothing to chance.  */
82*38fd1498Szrj   return (int) INSN_UID (mv1) - (int) INSN_UID (mv2);
83*38fd1498Szrj }
84*38fd1498Szrj 
85*38fd1498Szrj /* Pseudos which go away after coalescing.  */
86*38fd1498Szrj static bitmap_head coalesced_pseudos_bitmap;
87*38fd1498Szrj 
88*38fd1498Szrj /* Merge two sets of coalesced pseudos given correspondingly by
89*38fd1498Szrj    pseudos REGNO1 and REGNO2 (more accurately merging REGNO2 group
90*38fd1498Szrj    into REGNO1 group).	Set up COALESCED_PSEUDOS_BITMAP.  */
91*38fd1498Szrj static void
merge_pseudos(int regno1,int regno2)92*38fd1498Szrj merge_pseudos (int regno1, int regno2)
93*38fd1498Szrj {
94*38fd1498Szrj   int regno, first, first2, last, next;
95*38fd1498Szrj 
96*38fd1498Szrj   first = first_coalesced_pseudo[regno1];
97*38fd1498Szrj   if ((first2 = first_coalesced_pseudo[regno2]) == first)
98*38fd1498Szrj     return;
99*38fd1498Szrj   for (last = regno2, regno = next_coalesced_pseudo[regno2];;
100*38fd1498Szrj        regno = next_coalesced_pseudo[regno])
101*38fd1498Szrj     {
102*38fd1498Szrj       first_coalesced_pseudo[regno] = first;
103*38fd1498Szrj       bitmap_set_bit (&coalesced_pseudos_bitmap, regno);
104*38fd1498Szrj       if (regno == regno2)
105*38fd1498Szrj 	break;
106*38fd1498Szrj       last = regno;
107*38fd1498Szrj     }
108*38fd1498Szrj   next = next_coalesced_pseudo[first];
109*38fd1498Szrj   next_coalesced_pseudo[first] = regno2;
110*38fd1498Szrj   next_coalesced_pseudo[last] = next;
111*38fd1498Szrj   lra_reg_info[first].live_ranges
112*38fd1498Szrj     = (lra_merge_live_ranges
113*38fd1498Szrj        (lra_reg_info[first].live_ranges,
114*38fd1498Szrj 	lra_copy_live_range_list (lra_reg_info[first2].live_ranges)));
115*38fd1498Szrj   if (partial_subreg_p (lra_reg_info[first].biggest_mode,
116*38fd1498Szrj 			lra_reg_info[first2].biggest_mode))
117*38fd1498Szrj     lra_reg_info[first].biggest_mode = lra_reg_info[first2].biggest_mode;
118*38fd1498Szrj }
119*38fd1498Szrj 
120*38fd1498Szrj /* Change pseudos in *LOC on their coalescing group
121*38fd1498Szrj    representatives.  */
122*38fd1498Szrj static bool
substitute(rtx * loc)123*38fd1498Szrj substitute (rtx *loc)
124*38fd1498Szrj {
125*38fd1498Szrj   int i, regno;
126*38fd1498Szrj   const char *fmt;
127*38fd1498Szrj   enum rtx_code code;
128*38fd1498Szrj   bool res;
129*38fd1498Szrj 
130*38fd1498Szrj   if (*loc == NULL_RTX)
131*38fd1498Szrj     return false;
132*38fd1498Szrj   code = GET_CODE (*loc);
133*38fd1498Szrj   if (code == REG)
134*38fd1498Szrj     {
135*38fd1498Szrj       regno = REGNO (*loc);
136*38fd1498Szrj       if (regno < FIRST_PSEUDO_REGISTER
137*38fd1498Szrj 	  || first_coalesced_pseudo[regno] == regno)
138*38fd1498Szrj 	return false;
139*38fd1498Szrj       *loc = regno_reg_rtx[first_coalesced_pseudo[regno]];
140*38fd1498Szrj       return true;
141*38fd1498Szrj     }
142*38fd1498Szrj 
143*38fd1498Szrj   res = false;
144*38fd1498Szrj   fmt = GET_RTX_FORMAT (code);
145*38fd1498Szrj   for (i = GET_RTX_LENGTH (code) - 1; i >= 0; i--)
146*38fd1498Szrj     {
147*38fd1498Szrj       if (fmt[i] == 'e')
148*38fd1498Szrj 	{
149*38fd1498Szrj 	  if (substitute (&XEXP (*loc, i)))
150*38fd1498Szrj 	    res = true;
151*38fd1498Szrj 	}
152*38fd1498Szrj       else if (fmt[i] == 'E')
153*38fd1498Szrj 	{
154*38fd1498Szrj 	  int j;
155*38fd1498Szrj 
156*38fd1498Szrj 	  for (j = XVECLEN (*loc, i) - 1; j >= 0; j--)
157*38fd1498Szrj 	    if (substitute (&XVECEXP (*loc, i, j)))
158*38fd1498Szrj 	      res = true;
159*38fd1498Szrj 	}
160*38fd1498Szrj     }
161*38fd1498Szrj   return res;
162*38fd1498Szrj }
163*38fd1498Szrj 
164*38fd1498Szrj /* Specialize "substitute" for use on an insn.  This can't change
165*38fd1498Szrj    the insn ptr, just the contents of the insn.  */
166*38fd1498Szrj 
167*38fd1498Szrj static bool
substitute_within_insn(rtx_insn * insn)168*38fd1498Szrj substitute_within_insn (rtx_insn *insn)
169*38fd1498Szrj {
170*38fd1498Szrj   rtx loc = insn;
171*38fd1498Szrj   return substitute (&loc);
172*38fd1498Szrj }
173*38fd1498Szrj 
174*38fd1498Szrj /* The current iteration (1, 2, ...) of the coalescing pass.  */
175*38fd1498Szrj int lra_coalesce_iter;
176*38fd1498Szrj 
177*38fd1498Szrj /* Return true if the move involving REGNO1 and REGNO2 is a potential
178*38fd1498Szrj    memory-memory move.	*/
179*38fd1498Szrj static bool
mem_move_p(int regno1,int regno2)180*38fd1498Szrj mem_move_p (int regno1, int regno2)
181*38fd1498Szrj {
182*38fd1498Szrj   return reg_renumber[regno1] < 0 && reg_renumber[regno2] < 0;
183*38fd1498Szrj }
184*38fd1498Szrj 
185*38fd1498Szrj /* Pseudos used instead of the coalesced pseudos.  */
186*38fd1498Szrj static bitmap_head used_pseudos_bitmap;
187*38fd1498Szrj 
188*38fd1498Szrj /* Set up USED_PSEUDOS_BITMAP, and update LR_BITMAP (a BB live info
189*38fd1498Szrj    bitmap).  */
190*38fd1498Szrj static void
update_live_info(bitmap lr_bitmap)191*38fd1498Szrj update_live_info (bitmap lr_bitmap)
192*38fd1498Szrj {
193*38fd1498Szrj   unsigned int j;
194*38fd1498Szrj   bitmap_iterator bi;
195*38fd1498Szrj 
196*38fd1498Szrj   bitmap_clear (&used_pseudos_bitmap);
197*38fd1498Szrj   EXECUTE_IF_AND_IN_BITMAP (&coalesced_pseudos_bitmap, lr_bitmap,
198*38fd1498Szrj 			    FIRST_PSEUDO_REGISTER, j, bi)
199*38fd1498Szrj     bitmap_set_bit (&used_pseudos_bitmap, first_coalesced_pseudo[j]);
200*38fd1498Szrj   if (! bitmap_empty_p (&used_pseudos_bitmap))
201*38fd1498Szrj     {
202*38fd1498Szrj       bitmap_and_compl_into (lr_bitmap, &coalesced_pseudos_bitmap);
203*38fd1498Szrj       bitmap_ior_into (lr_bitmap, &used_pseudos_bitmap);
204*38fd1498Szrj     }
205*38fd1498Szrj }
206*38fd1498Szrj 
207*38fd1498Szrj /* Return true if pseudo REGNO can be potentially coalesced.  */
208*38fd1498Szrj static bool
coalescable_pseudo_p(int regno)209*38fd1498Szrj coalescable_pseudo_p (int regno)
210*38fd1498Szrj {
211*38fd1498Szrj   lra_assert (regno >= FIRST_PSEUDO_REGISTER);
212*38fd1498Szrj   return (/* We don't want to coalesce regnos with equivalences, at
213*38fd1498Szrj 	     least without updating this info.  */
214*38fd1498Szrj 	  ira_reg_equiv[regno].constant == NULL_RTX
215*38fd1498Szrj 	  && ira_reg_equiv[regno].memory == NULL_RTX
216*38fd1498Szrj 	  && ira_reg_equiv[regno].invariant == NULL_RTX);
217*38fd1498Szrj }
218*38fd1498Szrj 
219*38fd1498Szrj /* The major function for aggressive pseudo coalescing of moves only
220*38fd1498Szrj    if the both pseudos were spilled and not special reload pseudos.  */
221*38fd1498Szrj bool
lra_coalesce(void)222*38fd1498Szrj lra_coalesce (void)
223*38fd1498Szrj {
224*38fd1498Szrj   basic_block bb;
225*38fd1498Szrj   rtx_insn *mv, *insn, *next, **sorted_moves;
226*38fd1498Szrj   rtx set;
227*38fd1498Szrj   int i, mv_num, sregno, dregno;
228*38fd1498Szrj   int coalesced_moves;
229*38fd1498Szrj   int max_regno = max_reg_num ();
230*38fd1498Szrj   bitmap_head involved_insns_bitmap;
231*38fd1498Szrj 
232*38fd1498Szrj   timevar_push (TV_LRA_COALESCE);
233*38fd1498Szrj 
234*38fd1498Szrj   if (lra_dump_file != NULL)
235*38fd1498Szrj     fprintf (lra_dump_file,
236*38fd1498Szrj 	     "\n********** Pseudos coalescing #%d: **********\n\n",
237*38fd1498Szrj 	     ++lra_coalesce_iter);
238*38fd1498Szrj   first_coalesced_pseudo = XNEWVEC (int, max_regno);
239*38fd1498Szrj   next_coalesced_pseudo = XNEWVEC (int, max_regno);
240*38fd1498Szrj   for (i = 0; i < max_regno; i++)
241*38fd1498Szrj     first_coalesced_pseudo[i] = next_coalesced_pseudo[i] = i;
242*38fd1498Szrj   sorted_moves = XNEWVEC (rtx_insn *, get_max_uid ());
243*38fd1498Szrj   mv_num = 0;
244*38fd1498Szrj   /* Collect moves.  */
245*38fd1498Szrj   coalesced_moves = 0;
246*38fd1498Szrj   FOR_EACH_BB_FN (bb, cfun)
247*38fd1498Szrj     {
248*38fd1498Szrj       FOR_BB_INSNS_SAFE (bb, insn, next)
249*38fd1498Szrj 	if (INSN_P (insn)
250*38fd1498Szrj 	    && (set = single_set (insn)) != NULL_RTX
251*38fd1498Szrj 	    && REG_P (SET_DEST (set)) && REG_P (SET_SRC (set))
252*38fd1498Szrj 	    && (sregno = REGNO (SET_SRC (set))) >= FIRST_PSEUDO_REGISTER
253*38fd1498Szrj 	    && (dregno = REGNO (SET_DEST (set))) >= FIRST_PSEUDO_REGISTER
254*38fd1498Szrj 	    && mem_move_p (sregno, dregno)
255*38fd1498Szrj 	    && coalescable_pseudo_p (sregno) && coalescable_pseudo_p (dregno)
256*38fd1498Szrj 	    && ! side_effects_p (set)
257*38fd1498Szrj 	    && !(lra_intersected_live_ranges_p
258*38fd1498Szrj 		 (lra_reg_info[sregno].live_ranges,
259*38fd1498Szrj 		  lra_reg_info[dregno].live_ranges)))
260*38fd1498Szrj 	  sorted_moves[mv_num++] = insn;
261*38fd1498Szrj     }
262*38fd1498Szrj   qsort (sorted_moves, mv_num, sizeof (rtx), move_freq_compare_func);
263*38fd1498Szrj   /* Coalesced copies, most frequently executed first.	*/
264*38fd1498Szrj   bitmap_initialize (&coalesced_pseudos_bitmap, &reg_obstack);
265*38fd1498Szrj   bitmap_initialize (&involved_insns_bitmap, &reg_obstack);
266*38fd1498Szrj   for (i = 0; i < mv_num; i++)
267*38fd1498Szrj     {
268*38fd1498Szrj       mv = sorted_moves[i];
269*38fd1498Szrj       set = single_set (mv);
270*38fd1498Szrj       lra_assert (set != NULL && REG_P (SET_SRC (set))
271*38fd1498Szrj 		  && REG_P (SET_DEST (set)));
272*38fd1498Szrj       sregno = REGNO (SET_SRC (set));
273*38fd1498Szrj       dregno = REGNO (SET_DEST (set));
274*38fd1498Szrj       if (first_coalesced_pseudo[sregno] == first_coalesced_pseudo[dregno])
275*38fd1498Szrj 	{
276*38fd1498Szrj 	  coalesced_moves++;
277*38fd1498Szrj 	  if (lra_dump_file != NULL)
278*38fd1498Szrj 	    fprintf
279*38fd1498Szrj 	      (lra_dump_file, "      Coalescing move %i:r%d-r%d (freq=%d)\n",
280*38fd1498Szrj 	       INSN_UID (mv), sregno, dregno,
281*38fd1498Szrj 	       REG_FREQ_FROM_BB (BLOCK_FOR_INSN (mv)));
282*38fd1498Szrj 	  /* We updated involved_insns_bitmap when doing the merge.  */
283*38fd1498Szrj 	}
284*38fd1498Szrj       else if (!(lra_intersected_live_ranges_p
285*38fd1498Szrj 		 (lra_reg_info[first_coalesced_pseudo[sregno]].live_ranges,
286*38fd1498Szrj 		  lra_reg_info[first_coalesced_pseudo[dregno]].live_ranges)))
287*38fd1498Szrj 	{
288*38fd1498Szrj 	  coalesced_moves++;
289*38fd1498Szrj 	  if (lra_dump_file != NULL)
290*38fd1498Szrj 	    fprintf
291*38fd1498Szrj 	      (lra_dump_file,
292*38fd1498Szrj 	       "  Coalescing move %i:r%d(%d)-r%d(%d) (freq=%d)\n",
293*38fd1498Szrj 	       INSN_UID (mv), sregno, ORIGINAL_REGNO (SET_SRC (set)),
294*38fd1498Szrj 	       dregno, ORIGINAL_REGNO (SET_DEST (set)),
295*38fd1498Szrj 	       REG_FREQ_FROM_BB (BLOCK_FOR_INSN (mv)));
296*38fd1498Szrj 	  bitmap_ior_into (&involved_insns_bitmap,
297*38fd1498Szrj 			   &lra_reg_info[sregno].insn_bitmap);
298*38fd1498Szrj 	  bitmap_ior_into (&involved_insns_bitmap,
299*38fd1498Szrj 			   &lra_reg_info[dregno].insn_bitmap);
300*38fd1498Szrj 	  merge_pseudos (sregno, dregno);
301*38fd1498Szrj 	}
302*38fd1498Szrj     }
303*38fd1498Szrj   bitmap_initialize (&used_pseudos_bitmap, &reg_obstack);
304*38fd1498Szrj   FOR_EACH_BB_FN (bb, cfun)
305*38fd1498Szrj     {
306*38fd1498Szrj       update_live_info (df_get_live_in (bb));
307*38fd1498Szrj       update_live_info (df_get_live_out (bb));
308*38fd1498Szrj       FOR_BB_INSNS_SAFE (bb, insn, next)
309*38fd1498Szrj 	if (INSN_P (insn)
310*38fd1498Szrj 	    && bitmap_bit_p (&involved_insns_bitmap, INSN_UID (insn)))
311*38fd1498Szrj 	  {
312*38fd1498Szrj 	    if (! substitute_within_insn (insn))
313*38fd1498Szrj 	      continue;
314*38fd1498Szrj 	    lra_update_insn_regno_info (insn);
315*38fd1498Szrj 	    if ((set = single_set (insn)) != NULL_RTX && set_noop_p (set))
316*38fd1498Szrj 	      {
317*38fd1498Szrj 		/* Coalesced move.  */
318*38fd1498Szrj 		if (lra_dump_file != NULL)
319*38fd1498Szrj 		  fprintf (lra_dump_file, "	 Removing move %i (freq=%d)\n",
320*38fd1498Szrj 			   INSN_UID (insn),
321*38fd1498Szrj 			   REG_FREQ_FROM_BB (BLOCK_FOR_INSN (insn)));
322*38fd1498Szrj 		lra_set_insn_deleted (insn);
323*38fd1498Szrj 	      }
324*38fd1498Szrj 	  }
325*38fd1498Szrj     }
326*38fd1498Szrj   /* If we have situation after inheritance pass:
327*38fd1498Szrj 
328*38fd1498Szrj      r1 <- p1   insn originally setting p1
329*38fd1498Szrj      i1 <- r1   setting inheritance i1 from reload r1
330*38fd1498Szrj        ...
331*38fd1498Szrj      ... <- ... p2 ... dead p2
332*38fd1498Szrj      ..
333*38fd1498Szrj      p1 <- i1
334*38fd1498Szrj      r2 <- i1
335*38fd1498Szrj      ...<- ... r2 ...
336*38fd1498Szrj 
337*38fd1498Szrj      And we are coalescing p1 and p2 using p1.  In this case i1 and p1
338*38fd1498Szrj      should have different values, otherwise they can get the same
339*38fd1498Szrj      hard reg and this is wrong for insn using p2 before coalescing.
340*38fd1498Szrj      The situation even can be more complicated when new reload
341*38fd1498Szrj      pseudos occur after the inheriatnce.  So invalidate the result
342*38fd1498Szrj      pseudos.  */
343*38fd1498Szrj   for (i = 0; i < max_regno; i++)
344*38fd1498Szrj     if (first_coalesced_pseudo[i] == i
345*38fd1498Szrj 	&& first_coalesced_pseudo[i] != next_coalesced_pseudo[i])
346*38fd1498Szrj       {
347*38fd1498Szrj 	lra_set_regno_unique_value (i);
348*38fd1498Szrj 	if (lra_dump_file != NULL)
349*38fd1498Szrj 	  fprintf (lra_dump_file,
350*38fd1498Szrj 		   "	 Make unique value for coalescing result r%d\n", i);
351*38fd1498Szrj       }
352*38fd1498Szrj   bitmap_clear (&used_pseudos_bitmap);
353*38fd1498Szrj   bitmap_clear (&involved_insns_bitmap);
354*38fd1498Szrj   bitmap_clear (&coalesced_pseudos_bitmap);
355*38fd1498Szrj   if (lra_dump_file != NULL && coalesced_moves != 0)
356*38fd1498Szrj     fprintf (lra_dump_file, "Coalesced Moves = %d\n", coalesced_moves);
357*38fd1498Szrj   free (sorted_moves);
358*38fd1498Szrj   free (next_coalesced_pseudo);
359*38fd1498Szrj   free (first_coalesced_pseudo);
360*38fd1498Szrj   timevar_pop (TV_LRA_COALESCE);
361*38fd1498Szrj   return coalesced_moves != 0;
362*38fd1498Szrj }
363