1*38fd1498Szrj /* Coalesce spilled pseudos.
2*38fd1498Szrj Copyright (C) 2010-2018 Free Software Foundation, Inc.
3*38fd1498Szrj Contributed by Vladimir Makarov <vmakarov@redhat.com>.
4*38fd1498Szrj
5*38fd1498Szrj This file is part of GCC.
6*38fd1498Szrj
7*38fd1498Szrj GCC is free software; you can redistribute it and/or modify it under
8*38fd1498Szrj the terms of the GNU General Public License as published by the Free
9*38fd1498Szrj Software Foundation; either version 3, or (at your option) any later
10*38fd1498Szrj version.
11*38fd1498Szrj
12*38fd1498Szrj GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13*38fd1498Szrj WARRANTY; without even the implied warranty of MERCHANTABILITY or
14*38fd1498Szrj FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
15*38fd1498Szrj for more details.
16*38fd1498Szrj
17*38fd1498Szrj You should have received a copy of the GNU General Public License
18*38fd1498Szrj along with GCC; see the file COPYING3. If not see
19*38fd1498Szrj <http://www.gnu.org/licenses/>. */
20*38fd1498Szrj
21*38fd1498Szrj
22*38fd1498Szrj /* This file contains a pass making some simple RTL code
23*38fd1498Szrj transformations by coalescing pseudos to remove some move insns.
24*38fd1498Szrj
25*38fd1498Szrj Spilling pseudos in LRA can create memory-memory moves. We should
26*38fd1498Szrj remove potential memory-memory moves before the next constraint
27*38fd1498Szrj pass because the constraint pass will generate additional insns for
28*38fd1498Szrj such moves and all these insns will be hard to remove afterwards.
29*38fd1498Szrj
30*38fd1498Szrj Here we coalesce only spilled pseudos. Coalescing non-spilled
31*38fd1498Szrj pseudos (with different hard regs) might result in spilling
32*38fd1498Szrj additional pseudos because of possible conflicts with other
33*38fd1498Szrj non-spilled pseudos and, as a consequence, in more constraint
34*38fd1498Szrj passes and even LRA infinite cycling. Trivial the same hard
35*38fd1498Szrj register moves will be removed by subsequent compiler passes.
36*38fd1498Szrj
37*38fd1498Szrj We don't coalesce special reload pseudos. It complicates LRA code
38*38fd1498Szrj a lot without visible generated code improvement.
39*38fd1498Szrj
40*38fd1498Szrj The pseudo live-ranges are used to find conflicting pseudos during
41*38fd1498Szrj coalescing.
42*38fd1498Szrj
43*38fd1498Szrj Most frequently executed moves is tried to be coalesced first. */
44*38fd1498Szrj
45*38fd1498Szrj #include "config.h"
46*38fd1498Szrj #include "system.h"
47*38fd1498Szrj #include "coretypes.h"
48*38fd1498Szrj #include "backend.h"
49*38fd1498Szrj #include "rtl.h"
50*38fd1498Szrj #include "predict.h"
51*38fd1498Szrj #include "df.h"
52*38fd1498Szrj #include "insn-config.h"
53*38fd1498Szrj #include "regs.h"
54*38fd1498Szrj #include "memmodel.h"
55*38fd1498Szrj #include "ira.h"
56*38fd1498Szrj #include "recog.h"
57*38fd1498Szrj #include "lra-int.h"
58*38fd1498Szrj
59*38fd1498Szrj /* Arrays whose elements represent the first and the next pseudo
60*38fd1498Szrj (regno) in the coalesced pseudos group to which given pseudo (its
61*38fd1498Szrj regno is the index) belongs. The next of the last pseudo in the
62*38fd1498Szrj group refers to the first pseudo in the group, in other words the
63*38fd1498Szrj group is represented by a cyclic list. */
64*38fd1498Szrj static int *first_coalesced_pseudo, *next_coalesced_pseudo;
65*38fd1498Szrj
66*38fd1498Szrj /* The function is used to sort moves according to their execution
67*38fd1498Szrj frequencies. */
68*38fd1498Szrj static int
move_freq_compare_func(const void * v1p,const void * v2p)69*38fd1498Szrj move_freq_compare_func (const void *v1p, const void *v2p)
70*38fd1498Szrj {
71*38fd1498Szrj rtx_insn *mv1 = *(rtx_insn * const *) v1p;
72*38fd1498Szrj rtx_insn *mv2 = *(rtx_insn * const *) v2p;
73*38fd1498Szrj int pri1, pri2;
74*38fd1498Szrj
75*38fd1498Szrj pri1 = REG_FREQ_FROM_BB (BLOCK_FOR_INSN (mv1));
76*38fd1498Szrj pri2 = REG_FREQ_FROM_BB (BLOCK_FOR_INSN (mv2));
77*38fd1498Szrj if (pri2 - pri1)
78*38fd1498Szrj return pri2 - pri1;
79*38fd1498Szrj
80*38fd1498Szrj /* If frequencies are equal, sort by moves, so that the results of
81*38fd1498Szrj qsort leave nothing to chance. */
82*38fd1498Szrj return (int) INSN_UID (mv1) - (int) INSN_UID (mv2);
83*38fd1498Szrj }
84*38fd1498Szrj
85*38fd1498Szrj /* Pseudos which go away after coalescing. */
86*38fd1498Szrj static bitmap_head coalesced_pseudos_bitmap;
87*38fd1498Szrj
88*38fd1498Szrj /* Merge two sets of coalesced pseudos given correspondingly by
89*38fd1498Szrj pseudos REGNO1 and REGNO2 (more accurately merging REGNO2 group
90*38fd1498Szrj into REGNO1 group). Set up COALESCED_PSEUDOS_BITMAP. */
91*38fd1498Szrj static void
merge_pseudos(int regno1,int regno2)92*38fd1498Szrj merge_pseudos (int regno1, int regno2)
93*38fd1498Szrj {
94*38fd1498Szrj int regno, first, first2, last, next;
95*38fd1498Szrj
96*38fd1498Szrj first = first_coalesced_pseudo[regno1];
97*38fd1498Szrj if ((first2 = first_coalesced_pseudo[regno2]) == first)
98*38fd1498Szrj return;
99*38fd1498Szrj for (last = regno2, regno = next_coalesced_pseudo[regno2];;
100*38fd1498Szrj regno = next_coalesced_pseudo[regno])
101*38fd1498Szrj {
102*38fd1498Szrj first_coalesced_pseudo[regno] = first;
103*38fd1498Szrj bitmap_set_bit (&coalesced_pseudos_bitmap, regno);
104*38fd1498Szrj if (regno == regno2)
105*38fd1498Szrj break;
106*38fd1498Szrj last = regno;
107*38fd1498Szrj }
108*38fd1498Szrj next = next_coalesced_pseudo[first];
109*38fd1498Szrj next_coalesced_pseudo[first] = regno2;
110*38fd1498Szrj next_coalesced_pseudo[last] = next;
111*38fd1498Szrj lra_reg_info[first].live_ranges
112*38fd1498Szrj = (lra_merge_live_ranges
113*38fd1498Szrj (lra_reg_info[first].live_ranges,
114*38fd1498Szrj lra_copy_live_range_list (lra_reg_info[first2].live_ranges)));
115*38fd1498Szrj if (partial_subreg_p (lra_reg_info[first].biggest_mode,
116*38fd1498Szrj lra_reg_info[first2].biggest_mode))
117*38fd1498Szrj lra_reg_info[first].biggest_mode = lra_reg_info[first2].biggest_mode;
118*38fd1498Szrj }
119*38fd1498Szrj
120*38fd1498Szrj /* Change pseudos in *LOC on their coalescing group
121*38fd1498Szrj representatives. */
122*38fd1498Szrj static bool
substitute(rtx * loc)123*38fd1498Szrj substitute (rtx *loc)
124*38fd1498Szrj {
125*38fd1498Szrj int i, regno;
126*38fd1498Szrj const char *fmt;
127*38fd1498Szrj enum rtx_code code;
128*38fd1498Szrj bool res;
129*38fd1498Szrj
130*38fd1498Szrj if (*loc == NULL_RTX)
131*38fd1498Szrj return false;
132*38fd1498Szrj code = GET_CODE (*loc);
133*38fd1498Szrj if (code == REG)
134*38fd1498Szrj {
135*38fd1498Szrj regno = REGNO (*loc);
136*38fd1498Szrj if (regno < FIRST_PSEUDO_REGISTER
137*38fd1498Szrj || first_coalesced_pseudo[regno] == regno)
138*38fd1498Szrj return false;
139*38fd1498Szrj *loc = regno_reg_rtx[first_coalesced_pseudo[regno]];
140*38fd1498Szrj return true;
141*38fd1498Szrj }
142*38fd1498Szrj
143*38fd1498Szrj res = false;
144*38fd1498Szrj fmt = GET_RTX_FORMAT (code);
145*38fd1498Szrj for (i = GET_RTX_LENGTH (code) - 1; i >= 0; i--)
146*38fd1498Szrj {
147*38fd1498Szrj if (fmt[i] == 'e')
148*38fd1498Szrj {
149*38fd1498Szrj if (substitute (&XEXP (*loc, i)))
150*38fd1498Szrj res = true;
151*38fd1498Szrj }
152*38fd1498Szrj else if (fmt[i] == 'E')
153*38fd1498Szrj {
154*38fd1498Szrj int j;
155*38fd1498Szrj
156*38fd1498Szrj for (j = XVECLEN (*loc, i) - 1; j >= 0; j--)
157*38fd1498Szrj if (substitute (&XVECEXP (*loc, i, j)))
158*38fd1498Szrj res = true;
159*38fd1498Szrj }
160*38fd1498Szrj }
161*38fd1498Szrj return res;
162*38fd1498Szrj }
163*38fd1498Szrj
164*38fd1498Szrj /* Specialize "substitute" for use on an insn. This can't change
165*38fd1498Szrj the insn ptr, just the contents of the insn. */
166*38fd1498Szrj
167*38fd1498Szrj static bool
substitute_within_insn(rtx_insn * insn)168*38fd1498Szrj substitute_within_insn (rtx_insn *insn)
169*38fd1498Szrj {
170*38fd1498Szrj rtx loc = insn;
171*38fd1498Szrj return substitute (&loc);
172*38fd1498Szrj }
173*38fd1498Szrj
174*38fd1498Szrj /* The current iteration (1, 2, ...) of the coalescing pass. */
175*38fd1498Szrj int lra_coalesce_iter;
176*38fd1498Szrj
177*38fd1498Szrj /* Return true if the move involving REGNO1 and REGNO2 is a potential
178*38fd1498Szrj memory-memory move. */
179*38fd1498Szrj static bool
mem_move_p(int regno1,int regno2)180*38fd1498Szrj mem_move_p (int regno1, int regno2)
181*38fd1498Szrj {
182*38fd1498Szrj return reg_renumber[regno1] < 0 && reg_renumber[regno2] < 0;
183*38fd1498Szrj }
184*38fd1498Szrj
185*38fd1498Szrj /* Pseudos used instead of the coalesced pseudos. */
186*38fd1498Szrj static bitmap_head used_pseudos_bitmap;
187*38fd1498Szrj
188*38fd1498Szrj /* Set up USED_PSEUDOS_BITMAP, and update LR_BITMAP (a BB live info
189*38fd1498Szrj bitmap). */
190*38fd1498Szrj static void
update_live_info(bitmap lr_bitmap)191*38fd1498Szrj update_live_info (bitmap lr_bitmap)
192*38fd1498Szrj {
193*38fd1498Szrj unsigned int j;
194*38fd1498Szrj bitmap_iterator bi;
195*38fd1498Szrj
196*38fd1498Szrj bitmap_clear (&used_pseudos_bitmap);
197*38fd1498Szrj EXECUTE_IF_AND_IN_BITMAP (&coalesced_pseudos_bitmap, lr_bitmap,
198*38fd1498Szrj FIRST_PSEUDO_REGISTER, j, bi)
199*38fd1498Szrj bitmap_set_bit (&used_pseudos_bitmap, first_coalesced_pseudo[j]);
200*38fd1498Szrj if (! bitmap_empty_p (&used_pseudos_bitmap))
201*38fd1498Szrj {
202*38fd1498Szrj bitmap_and_compl_into (lr_bitmap, &coalesced_pseudos_bitmap);
203*38fd1498Szrj bitmap_ior_into (lr_bitmap, &used_pseudos_bitmap);
204*38fd1498Szrj }
205*38fd1498Szrj }
206*38fd1498Szrj
207*38fd1498Szrj /* Return true if pseudo REGNO can be potentially coalesced. */
208*38fd1498Szrj static bool
coalescable_pseudo_p(int regno)209*38fd1498Szrj coalescable_pseudo_p (int regno)
210*38fd1498Szrj {
211*38fd1498Szrj lra_assert (regno >= FIRST_PSEUDO_REGISTER);
212*38fd1498Szrj return (/* We don't want to coalesce regnos with equivalences, at
213*38fd1498Szrj least without updating this info. */
214*38fd1498Szrj ira_reg_equiv[regno].constant == NULL_RTX
215*38fd1498Szrj && ira_reg_equiv[regno].memory == NULL_RTX
216*38fd1498Szrj && ira_reg_equiv[regno].invariant == NULL_RTX);
217*38fd1498Szrj }
218*38fd1498Szrj
219*38fd1498Szrj /* The major function for aggressive pseudo coalescing of moves only
220*38fd1498Szrj if the both pseudos were spilled and not special reload pseudos. */
221*38fd1498Szrj bool
lra_coalesce(void)222*38fd1498Szrj lra_coalesce (void)
223*38fd1498Szrj {
224*38fd1498Szrj basic_block bb;
225*38fd1498Szrj rtx_insn *mv, *insn, *next, **sorted_moves;
226*38fd1498Szrj rtx set;
227*38fd1498Szrj int i, mv_num, sregno, dregno;
228*38fd1498Szrj int coalesced_moves;
229*38fd1498Szrj int max_regno = max_reg_num ();
230*38fd1498Szrj bitmap_head involved_insns_bitmap;
231*38fd1498Szrj
232*38fd1498Szrj timevar_push (TV_LRA_COALESCE);
233*38fd1498Szrj
234*38fd1498Szrj if (lra_dump_file != NULL)
235*38fd1498Szrj fprintf (lra_dump_file,
236*38fd1498Szrj "\n********** Pseudos coalescing #%d: **********\n\n",
237*38fd1498Szrj ++lra_coalesce_iter);
238*38fd1498Szrj first_coalesced_pseudo = XNEWVEC (int, max_regno);
239*38fd1498Szrj next_coalesced_pseudo = XNEWVEC (int, max_regno);
240*38fd1498Szrj for (i = 0; i < max_regno; i++)
241*38fd1498Szrj first_coalesced_pseudo[i] = next_coalesced_pseudo[i] = i;
242*38fd1498Szrj sorted_moves = XNEWVEC (rtx_insn *, get_max_uid ());
243*38fd1498Szrj mv_num = 0;
244*38fd1498Szrj /* Collect moves. */
245*38fd1498Szrj coalesced_moves = 0;
246*38fd1498Szrj FOR_EACH_BB_FN (bb, cfun)
247*38fd1498Szrj {
248*38fd1498Szrj FOR_BB_INSNS_SAFE (bb, insn, next)
249*38fd1498Szrj if (INSN_P (insn)
250*38fd1498Szrj && (set = single_set (insn)) != NULL_RTX
251*38fd1498Szrj && REG_P (SET_DEST (set)) && REG_P (SET_SRC (set))
252*38fd1498Szrj && (sregno = REGNO (SET_SRC (set))) >= FIRST_PSEUDO_REGISTER
253*38fd1498Szrj && (dregno = REGNO (SET_DEST (set))) >= FIRST_PSEUDO_REGISTER
254*38fd1498Szrj && mem_move_p (sregno, dregno)
255*38fd1498Szrj && coalescable_pseudo_p (sregno) && coalescable_pseudo_p (dregno)
256*38fd1498Szrj && ! side_effects_p (set)
257*38fd1498Szrj && !(lra_intersected_live_ranges_p
258*38fd1498Szrj (lra_reg_info[sregno].live_ranges,
259*38fd1498Szrj lra_reg_info[dregno].live_ranges)))
260*38fd1498Szrj sorted_moves[mv_num++] = insn;
261*38fd1498Szrj }
262*38fd1498Szrj qsort (sorted_moves, mv_num, sizeof (rtx), move_freq_compare_func);
263*38fd1498Szrj /* Coalesced copies, most frequently executed first. */
264*38fd1498Szrj bitmap_initialize (&coalesced_pseudos_bitmap, ®_obstack);
265*38fd1498Szrj bitmap_initialize (&involved_insns_bitmap, ®_obstack);
266*38fd1498Szrj for (i = 0; i < mv_num; i++)
267*38fd1498Szrj {
268*38fd1498Szrj mv = sorted_moves[i];
269*38fd1498Szrj set = single_set (mv);
270*38fd1498Szrj lra_assert (set != NULL && REG_P (SET_SRC (set))
271*38fd1498Szrj && REG_P (SET_DEST (set)));
272*38fd1498Szrj sregno = REGNO (SET_SRC (set));
273*38fd1498Szrj dregno = REGNO (SET_DEST (set));
274*38fd1498Szrj if (first_coalesced_pseudo[sregno] == first_coalesced_pseudo[dregno])
275*38fd1498Szrj {
276*38fd1498Szrj coalesced_moves++;
277*38fd1498Szrj if (lra_dump_file != NULL)
278*38fd1498Szrj fprintf
279*38fd1498Szrj (lra_dump_file, " Coalescing move %i:r%d-r%d (freq=%d)\n",
280*38fd1498Szrj INSN_UID (mv), sregno, dregno,
281*38fd1498Szrj REG_FREQ_FROM_BB (BLOCK_FOR_INSN (mv)));
282*38fd1498Szrj /* We updated involved_insns_bitmap when doing the merge. */
283*38fd1498Szrj }
284*38fd1498Szrj else if (!(lra_intersected_live_ranges_p
285*38fd1498Szrj (lra_reg_info[first_coalesced_pseudo[sregno]].live_ranges,
286*38fd1498Szrj lra_reg_info[first_coalesced_pseudo[dregno]].live_ranges)))
287*38fd1498Szrj {
288*38fd1498Szrj coalesced_moves++;
289*38fd1498Szrj if (lra_dump_file != NULL)
290*38fd1498Szrj fprintf
291*38fd1498Szrj (lra_dump_file,
292*38fd1498Szrj " Coalescing move %i:r%d(%d)-r%d(%d) (freq=%d)\n",
293*38fd1498Szrj INSN_UID (mv), sregno, ORIGINAL_REGNO (SET_SRC (set)),
294*38fd1498Szrj dregno, ORIGINAL_REGNO (SET_DEST (set)),
295*38fd1498Szrj REG_FREQ_FROM_BB (BLOCK_FOR_INSN (mv)));
296*38fd1498Szrj bitmap_ior_into (&involved_insns_bitmap,
297*38fd1498Szrj &lra_reg_info[sregno].insn_bitmap);
298*38fd1498Szrj bitmap_ior_into (&involved_insns_bitmap,
299*38fd1498Szrj &lra_reg_info[dregno].insn_bitmap);
300*38fd1498Szrj merge_pseudos (sregno, dregno);
301*38fd1498Szrj }
302*38fd1498Szrj }
303*38fd1498Szrj bitmap_initialize (&used_pseudos_bitmap, ®_obstack);
304*38fd1498Szrj FOR_EACH_BB_FN (bb, cfun)
305*38fd1498Szrj {
306*38fd1498Szrj update_live_info (df_get_live_in (bb));
307*38fd1498Szrj update_live_info (df_get_live_out (bb));
308*38fd1498Szrj FOR_BB_INSNS_SAFE (bb, insn, next)
309*38fd1498Szrj if (INSN_P (insn)
310*38fd1498Szrj && bitmap_bit_p (&involved_insns_bitmap, INSN_UID (insn)))
311*38fd1498Szrj {
312*38fd1498Szrj if (! substitute_within_insn (insn))
313*38fd1498Szrj continue;
314*38fd1498Szrj lra_update_insn_regno_info (insn);
315*38fd1498Szrj if ((set = single_set (insn)) != NULL_RTX && set_noop_p (set))
316*38fd1498Szrj {
317*38fd1498Szrj /* Coalesced move. */
318*38fd1498Szrj if (lra_dump_file != NULL)
319*38fd1498Szrj fprintf (lra_dump_file, " Removing move %i (freq=%d)\n",
320*38fd1498Szrj INSN_UID (insn),
321*38fd1498Szrj REG_FREQ_FROM_BB (BLOCK_FOR_INSN (insn)));
322*38fd1498Szrj lra_set_insn_deleted (insn);
323*38fd1498Szrj }
324*38fd1498Szrj }
325*38fd1498Szrj }
326*38fd1498Szrj /* If we have situation after inheritance pass:
327*38fd1498Szrj
328*38fd1498Szrj r1 <- p1 insn originally setting p1
329*38fd1498Szrj i1 <- r1 setting inheritance i1 from reload r1
330*38fd1498Szrj ...
331*38fd1498Szrj ... <- ... p2 ... dead p2
332*38fd1498Szrj ..
333*38fd1498Szrj p1 <- i1
334*38fd1498Szrj r2 <- i1
335*38fd1498Szrj ...<- ... r2 ...
336*38fd1498Szrj
337*38fd1498Szrj And we are coalescing p1 and p2 using p1. In this case i1 and p1
338*38fd1498Szrj should have different values, otherwise they can get the same
339*38fd1498Szrj hard reg and this is wrong for insn using p2 before coalescing.
340*38fd1498Szrj The situation even can be more complicated when new reload
341*38fd1498Szrj pseudos occur after the inheriatnce. So invalidate the result
342*38fd1498Szrj pseudos. */
343*38fd1498Szrj for (i = 0; i < max_regno; i++)
344*38fd1498Szrj if (first_coalesced_pseudo[i] == i
345*38fd1498Szrj && first_coalesced_pseudo[i] != next_coalesced_pseudo[i])
346*38fd1498Szrj {
347*38fd1498Szrj lra_set_regno_unique_value (i);
348*38fd1498Szrj if (lra_dump_file != NULL)
349*38fd1498Szrj fprintf (lra_dump_file,
350*38fd1498Szrj " Make unique value for coalescing result r%d\n", i);
351*38fd1498Szrj }
352*38fd1498Szrj bitmap_clear (&used_pseudos_bitmap);
353*38fd1498Szrj bitmap_clear (&involved_insns_bitmap);
354*38fd1498Szrj bitmap_clear (&coalesced_pseudos_bitmap);
355*38fd1498Szrj if (lra_dump_file != NULL && coalesced_moves != 0)
356*38fd1498Szrj fprintf (lra_dump_file, "Coalesced Moves = %d\n", coalesced_moves);
357*38fd1498Szrj free (sorted_moves);
358*38fd1498Szrj free (next_coalesced_pseudo);
359*38fd1498Szrj free (first_coalesced_pseudo);
360*38fd1498Szrj timevar_pop (TV_LRA_COALESCE);
361*38fd1498Szrj return coalesced_moves != 0;
362*38fd1498Szrj }
363