1*38fd1498Szrj /* RTL-based forward propagation pass for GNU compiler.
2*38fd1498Szrj Copyright (C) 2005-2018 Free Software Foundation, Inc.
3*38fd1498Szrj Contributed by Paolo Bonzini and Steven Bosscher.
4*38fd1498Szrj
5*38fd1498Szrj This file is part of GCC.
6*38fd1498Szrj
7*38fd1498Szrj GCC is free software; you can redistribute it and/or modify it under
8*38fd1498Szrj the terms of the GNU General Public License as published by the Free
9*38fd1498Szrj Software Foundation; either version 3, or (at your option) any later
10*38fd1498Szrj version.
11*38fd1498Szrj
12*38fd1498Szrj GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13*38fd1498Szrj WARRANTY; without even the implied warranty of MERCHANTABILITY or
14*38fd1498Szrj FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
15*38fd1498Szrj for more details.
16*38fd1498Szrj
17*38fd1498Szrj You should have received a copy of the GNU General Public License
18*38fd1498Szrj along with GCC; see the file COPYING3. If not see
19*38fd1498Szrj <http://www.gnu.org/licenses/>. */
20*38fd1498Szrj
21*38fd1498Szrj #include "config.h"
22*38fd1498Szrj #include "system.h"
23*38fd1498Szrj #include "coretypes.h"
24*38fd1498Szrj #include "backend.h"
25*38fd1498Szrj #include "target.h"
26*38fd1498Szrj #include "rtl.h"
27*38fd1498Szrj #include "predict.h"
28*38fd1498Szrj #include "df.h"
29*38fd1498Szrj #include "memmodel.h"
30*38fd1498Szrj #include "tm_p.h"
31*38fd1498Szrj #include "insn-config.h"
32*38fd1498Szrj #include "emit-rtl.h"
33*38fd1498Szrj #include "recog.h"
34*38fd1498Szrj
35*38fd1498Szrj #include "sparseset.h"
36*38fd1498Szrj #include "cfgrtl.h"
37*38fd1498Szrj #include "cfgcleanup.h"
38*38fd1498Szrj #include "cfgloop.h"
39*38fd1498Szrj #include "tree-pass.h"
40*38fd1498Szrj #include "domwalk.h"
41*38fd1498Szrj #include "rtl-iter.h"
42*38fd1498Szrj
43*38fd1498Szrj
44*38fd1498Szrj /* This pass does simple forward propagation and simplification when an
45*38fd1498Szrj operand of an insn can only come from a single def. This pass uses
46*38fd1498Szrj df.c, so it is global. However, we only do limited analysis of
47*38fd1498Szrj available expressions.
48*38fd1498Szrj
49*38fd1498Szrj 1) The pass tries to propagate the source of the def into the use,
50*38fd1498Szrj and checks if the result is independent of the substituted value.
51*38fd1498Szrj For example, the high word of a (zero_extend:DI (reg:SI M)) is always
52*38fd1498Szrj zero, independent of the source register.
53*38fd1498Szrj
54*38fd1498Szrj In particular, we propagate constants into the use site. Sometimes
55*38fd1498Szrj RTL expansion did not put the constant in the same insn on purpose,
56*38fd1498Szrj to satisfy a predicate, and the result will fail to be recognized;
57*38fd1498Szrj but this happens rarely and in this case we can still create a
58*38fd1498Szrj REG_EQUAL note. For multi-word operations, this
59*38fd1498Szrj
60*38fd1498Szrj (set (subreg:SI (reg:DI 120) 0) (const_int 0))
61*38fd1498Szrj (set (subreg:SI (reg:DI 120) 4) (const_int -1))
62*38fd1498Szrj (set (subreg:SI (reg:DI 122) 0)
63*38fd1498Szrj (ior:SI (subreg:SI (reg:DI 119) 0) (subreg:SI (reg:DI 120) 0)))
64*38fd1498Szrj (set (subreg:SI (reg:DI 122) 4)
65*38fd1498Szrj (ior:SI (subreg:SI (reg:DI 119) 4) (subreg:SI (reg:DI 120) 4)))
66*38fd1498Szrj
67*38fd1498Szrj can be simplified to the much simpler
68*38fd1498Szrj
69*38fd1498Szrj (set (subreg:SI (reg:DI 122) 0) (subreg:SI (reg:DI 119)))
70*38fd1498Szrj (set (subreg:SI (reg:DI 122) 4) (const_int -1))
71*38fd1498Szrj
72*38fd1498Szrj This particular propagation is also effective at putting together
73*38fd1498Szrj complex addressing modes. We are more aggressive inside MEMs, in
74*38fd1498Szrj that all definitions are propagated if the use is in a MEM; if the
75*38fd1498Szrj result is a valid memory address we check address_cost to decide
76*38fd1498Szrj whether the substitution is worthwhile.
77*38fd1498Szrj
78*38fd1498Szrj 2) The pass propagates register copies. This is not as effective as
79*38fd1498Szrj the copy propagation done by CSE's canon_reg, which works by walking
80*38fd1498Szrj the instruction chain, it can help the other transformations.
81*38fd1498Szrj
82*38fd1498Szrj We should consider removing this optimization, and instead reorder the
83*38fd1498Szrj RTL passes, because GCSE does this transformation too. With some luck,
84*38fd1498Szrj the CSE pass at the end of rest_of_handle_gcse could also go away.
85*38fd1498Szrj
86*38fd1498Szrj 3) The pass looks for paradoxical subregs that are actually unnecessary.
87*38fd1498Szrj Things like this:
88*38fd1498Szrj
89*38fd1498Szrj (set (reg:QI 120) (subreg:QI (reg:SI 118) 0))
90*38fd1498Szrj (set (reg:QI 121) (subreg:QI (reg:SI 119) 0))
91*38fd1498Szrj (set (reg:SI 122) (plus:SI (subreg:SI (reg:QI 120) 0)
92*38fd1498Szrj (subreg:SI (reg:QI 121) 0)))
93*38fd1498Szrj
94*38fd1498Szrj are very common on machines that can only do word-sized operations.
95*38fd1498Szrj For each use of a paradoxical subreg (subreg:WIDER (reg:NARROW N) 0),
96*38fd1498Szrj if it has a single def and it is (subreg:NARROW (reg:WIDE M) 0),
97*38fd1498Szrj we can replace the paradoxical subreg with simply (reg:WIDE M). The
98*38fd1498Szrj above will simplify this to
99*38fd1498Szrj
100*38fd1498Szrj (set (reg:QI 120) (subreg:QI (reg:SI 118) 0))
101*38fd1498Szrj (set (reg:QI 121) (subreg:QI (reg:SI 119) 0))
102*38fd1498Szrj (set (reg:SI 122) (plus:SI (reg:SI 118) (reg:SI 119)))
103*38fd1498Szrj
104*38fd1498Szrj where the first two insns are now dead.
105*38fd1498Szrj
106*38fd1498Szrj We used to use reaching definitions to find which uses have a
107*38fd1498Szrj single reaching definition (sounds obvious...), but this is too
108*38fd1498Szrj complex a problem in nasty testcases like PR33928. Now we use the
109*38fd1498Szrj multiple definitions problem in df-problems.c. The similarity
110*38fd1498Szrj between that problem and SSA form creation is taken further, in
111*38fd1498Szrj that fwprop does a dominator walk to create its chains; however,
112*38fd1498Szrj instead of creating a PHI function where multiple definitions meet
113*38fd1498Szrj I just punt and record only singleton use-def chains, which is
114*38fd1498Szrj all that is needed by fwprop. */
115*38fd1498Szrj
116*38fd1498Szrj
117*38fd1498Szrj static int num_changes;
118*38fd1498Szrj
119*38fd1498Szrj static vec<df_ref> use_def_ref;
120*38fd1498Szrj static vec<df_ref> reg_defs;
121*38fd1498Szrj static vec<df_ref> reg_defs_stack;
122*38fd1498Szrj
123*38fd1498Szrj /* The maximum number of propagations that are still allowed. If we do
124*38fd1498Szrj more propagations than originally we had uses, we must have ended up
125*38fd1498Szrj in a propagation loop, as in PR79405. Until the algorithm fwprop
126*38fd1498Szrj uses can obviously not get into such loops we need a workaround like
127*38fd1498Szrj this. */
128*38fd1498Szrj static int propagations_left;
129*38fd1498Szrj
130*38fd1498Szrj /* The MD bitmaps are trimmed to include only live registers to cut
131*38fd1498Szrj memory usage on testcases like insn-recog.c. Track live registers
132*38fd1498Szrj in the basic block and do not perform forward propagation if the
133*38fd1498Szrj destination is a dead pseudo occurring in a note. */
134*38fd1498Szrj static bitmap local_md;
135*38fd1498Szrj static bitmap local_lr;
136*38fd1498Szrj
137*38fd1498Szrj /* Return the only def in USE's use-def chain, or NULL if there is
138*38fd1498Szrj more than one def in the chain. */
139*38fd1498Szrj
140*38fd1498Szrj static inline df_ref
get_def_for_use(df_ref use)141*38fd1498Szrj get_def_for_use (df_ref use)
142*38fd1498Szrj {
143*38fd1498Szrj return use_def_ref[DF_REF_ID (use)];
144*38fd1498Szrj }
145*38fd1498Szrj
146*38fd1498Szrj
147*38fd1498Szrj /* Update the reg_defs vector with non-partial definitions in DEF_REC.
148*38fd1498Szrj TOP_FLAG says which artificials uses should be used, when DEF_REC
149*38fd1498Szrj is an artificial def vector. LOCAL_MD is modified as after a
150*38fd1498Szrj df_md_simulate_* function; we do more or less the same processing
151*38fd1498Szrj done there, so we do not use those functions. */
152*38fd1498Szrj
153*38fd1498Szrj #define DF_MD_GEN_FLAGS \
154*38fd1498Szrj (DF_REF_PARTIAL | DF_REF_CONDITIONAL | DF_REF_MAY_CLOBBER)
155*38fd1498Szrj
156*38fd1498Szrj static void
process_defs(df_ref def,int top_flag)157*38fd1498Szrj process_defs (df_ref def, int top_flag)
158*38fd1498Szrj {
159*38fd1498Szrj for (; def; def = DF_REF_NEXT_LOC (def))
160*38fd1498Szrj {
161*38fd1498Szrj df_ref curr_def = reg_defs[DF_REF_REGNO (def)];
162*38fd1498Szrj unsigned int dregno;
163*38fd1498Szrj
164*38fd1498Szrj if ((DF_REF_FLAGS (def) & DF_REF_AT_TOP) != top_flag)
165*38fd1498Szrj continue;
166*38fd1498Szrj
167*38fd1498Szrj dregno = DF_REF_REGNO (def);
168*38fd1498Szrj if (curr_def)
169*38fd1498Szrj reg_defs_stack.safe_push (curr_def);
170*38fd1498Szrj else
171*38fd1498Szrj {
172*38fd1498Szrj /* Do not store anything if "transitioning" from NULL to NULL. But
173*38fd1498Szrj otherwise, push a special entry on the stack to tell the
174*38fd1498Szrj leave_block callback that the entry in reg_defs was NULL. */
175*38fd1498Szrj if (DF_REF_FLAGS (def) & DF_MD_GEN_FLAGS)
176*38fd1498Szrj ;
177*38fd1498Szrj else
178*38fd1498Szrj reg_defs_stack.safe_push (def);
179*38fd1498Szrj }
180*38fd1498Szrj
181*38fd1498Szrj if (DF_REF_FLAGS (def) & DF_MD_GEN_FLAGS)
182*38fd1498Szrj {
183*38fd1498Szrj bitmap_set_bit (local_md, dregno);
184*38fd1498Szrj reg_defs[dregno] = NULL;
185*38fd1498Szrj }
186*38fd1498Szrj else
187*38fd1498Szrj {
188*38fd1498Szrj bitmap_clear_bit (local_md, dregno);
189*38fd1498Szrj reg_defs[dregno] = def;
190*38fd1498Szrj }
191*38fd1498Szrj }
192*38fd1498Szrj }
193*38fd1498Szrj
194*38fd1498Szrj
195*38fd1498Szrj /* Fill the use_def_ref vector with values for the uses in USE_REC,
196*38fd1498Szrj taking reaching definitions info from LOCAL_MD and REG_DEFS.
197*38fd1498Szrj TOP_FLAG says which artificials uses should be used, when USE_REC
198*38fd1498Szrj is an artificial use vector. */
199*38fd1498Szrj
200*38fd1498Szrj static void
process_uses(df_ref use,int top_flag)201*38fd1498Szrj process_uses (df_ref use, int top_flag)
202*38fd1498Szrj {
203*38fd1498Szrj for (; use; use = DF_REF_NEXT_LOC (use))
204*38fd1498Szrj if ((DF_REF_FLAGS (use) & DF_REF_AT_TOP) == top_flag)
205*38fd1498Szrj {
206*38fd1498Szrj unsigned int uregno = DF_REF_REGNO (use);
207*38fd1498Szrj if (reg_defs[uregno]
208*38fd1498Szrj && !bitmap_bit_p (local_md, uregno)
209*38fd1498Szrj && bitmap_bit_p (local_lr, uregno))
210*38fd1498Szrj use_def_ref[DF_REF_ID (use)] = reg_defs[uregno];
211*38fd1498Szrj }
212*38fd1498Szrj }
213*38fd1498Szrj
214*38fd1498Szrj class single_def_use_dom_walker : public dom_walker
215*38fd1498Szrj {
216*38fd1498Szrj public:
single_def_use_dom_walker(cdi_direction direction)217*38fd1498Szrj single_def_use_dom_walker (cdi_direction direction)
218*38fd1498Szrj : dom_walker (direction) {}
219*38fd1498Szrj virtual edge before_dom_children (basic_block);
220*38fd1498Szrj virtual void after_dom_children (basic_block);
221*38fd1498Szrj };
222*38fd1498Szrj
223*38fd1498Szrj edge
before_dom_children(basic_block bb)224*38fd1498Szrj single_def_use_dom_walker::before_dom_children (basic_block bb)
225*38fd1498Szrj {
226*38fd1498Szrj int bb_index = bb->index;
227*38fd1498Szrj struct df_md_bb_info *md_bb_info = df_md_get_bb_info (bb_index);
228*38fd1498Szrj struct df_lr_bb_info *lr_bb_info = df_lr_get_bb_info (bb_index);
229*38fd1498Szrj rtx_insn *insn;
230*38fd1498Szrj
231*38fd1498Szrj bitmap_copy (local_md, &md_bb_info->in);
232*38fd1498Szrj bitmap_copy (local_lr, &lr_bb_info->in);
233*38fd1498Szrj
234*38fd1498Szrj /* Push a marker for the leave_block callback. */
235*38fd1498Szrj reg_defs_stack.safe_push (NULL);
236*38fd1498Szrj
237*38fd1498Szrj process_uses (df_get_artificial_uses (bb_index), DF_REF_AT_TOP);
238*38fd1498Szrj process_defs (df_get_artificial_defs (bb_index), DF_REF_AT_TOP);
239*38fd1498Szrj
240*38fd1498Szrj /* We don't call df_simulate_initialize_forwards, as it may overestimate
241*38fd1498Szrj the live registers if there are unused artificial defs. We prefer
242*38fd1498Szrj liveness to be underestimated. */
243*38fd1498Szrj
244*38fd1498Szrj FOR_BB_INSNS (bb, insn)
245*38fd1498Szrj if (INSN_P (insn))
246*38fd1498Szrj {
247*38fd1498Szrj unsigned int uid = INSN_UID (insn);
248*38fd1498Szrj process_uses (DF_INSN_UID_USES (uid), 0);
249*38fd1498Szrj process_uses (DF_INSN_UID_EQ_USES (uid), 0);
250*38fd1498Szrj process_defs (DF_INSN_UID_DEFS (uid), 0);
251*38fd1498Szrj df_simulate_one_insn_forwards (bb, insn, local_lr);
252*38fd1498Szrj }
253*38fd1498Szrj
254*38fd1498Szrj process_uses (df_get_artificial_uses (bb_index), 0);
255*38fd1498Szrj process_defs (df_get_artificial_defs (bb_index), 0);
256*38fd1498Szrj
257*38fd1498Szrj return NULL;
258*38fd1498Szrj }
259*38fd1498Szrj
260*38fd1498Szrj /* Pop the definitions created in this basic block when leaving its
261*38fd1498Szrj dominated parts. */
262*38fd1498Szrj
263*38fd1498Szrj void
after_dom_children(basic_block bb ATTRIBUTE_UNUSED)264*38fd1498Szrj single_def_use_dom_walker::after_dom_children (basic_block bb ATTRIBUTE_UNUSED)
265*38fd1498Szrj {
266*38fd1498Szrj df_ref saved_def;
267*38fd1498Szrj while ((saved_def = reg_defs_stack.pop ()) != NULL)
268*38fd1498Szrj {
269*38fd1498Szrj unsigned int dregno = DF_REF_REGNO (saved_def);
270*38fd1498Szrj
271*38fd1498Szrj /* See also process_defs. */
272*38fd1498Szrj if (saved_def == reg_defs[dregno])
273*38fd1498Szrj reg_defs[dregno] = NULL;
274*38fd1498Szrj else
275*38fd1498Szrj reg_defs[dregno] = saved_def;
276*38fd1498Szrj }
277*38fd1498Szrj }
278*38fd1498Szrj
279*38fd1498Szrj
280*38fd1498Szrj /* Build a vector holding the reaching definitions of uses reached by a
281*38fd1498Szrj single dominating definition. */
282*38fd1498Szrj
283*38fd1498Szrj static void
build_single_def_use_links(void)284*38fd1498Szrj build_single_def_use_links (void)
285*38fd1498Szrj {
286*38fd1498Szrj /* We use the multiple definitions problem to compute our restricted
287*38fd1498Szrj use-def chains. */
288*38fd1498Szrj df_set_flags (DF_EQ_NOTES);
289*38fd1498Szrj df_md_add_problem ();
290*38fd1498Szrj df_note_add_problem ();
291*38fd1498Szrj df_analyze ();
292*38fd1498Szrj df_maybe_reorganize_use_refs (DF_REF_ORDER_BY_INSN_WITH_NOTES);
293*38fd1498Szrj
294*38fd1498Szrj use_def_ref.create (DF_USES_TABLE_SIZE ());
295*38fd1498Szrj use_def_ref.safe_grow_cleared (DF_USES_TABLE_SIZE ());
296*38fd1498Szrj
297*38fd1498Szrj reg_defs.create (max_reg_num ());
298*38fd1498Szrj reg_defs.safe_grow_cleared (max_reg_num ());
299*38fd1498Szrj
300*38fd1498Szrj reg_defs_stack.create (n_basic_blocks_for_fn (cfun) * 10);
301*38fd1498Szrj local_md = BITMAP_ALLOC (NULL);
302*38fd1498Szrj local_lr = BITMAP_ALLOC (NULL);
303*38fd1498Szrj
304*38fd1498Szrj /* Walk the dominator tree looking for single reaching definitions
305*38fd1498Szrj dominating the uses. This is similar to how SSA form is built. */
306*38fd1498Szrj single_def_use_dom_walker (CDI_DOMINATORS)
307*38fd1498Szrj .walk (cfun->cfg->x_entry_block_ptr);
308*38fd1498Szrj
309*38fd1498Szrj BITMAP_FREE (local_lr);
310*38fd1498Szrj BITMAP_FREE (local_md);
311*38fd1498Szrj reg_defs.release ();
312*38fd1498Szrj reg_defs_stack.release ();
313*38fd1498Szrj }
314*38fd1498Szrj
315*38fd1498Szrj
316*38fd1498Szrj /* Do not try to replace constant addresses or addresses of local and
317*38fd1498Szrj argument slots. These MEM expressions are made only once and inserted
318*38fd1498Szrj in many instructions, as well as being used to control symbol table
319*38fd1498Szrj output. It is not safe to clobber them.
320*38fd1498Szrj
321*38fd1498Szrj There are some uncommon cases where the address is already in a register
322*38fd1498Szrj for some reason, but we cannot take advantage of that because we have
323*38fd1498Szrj no easy way to unshare the MEM. In addition, looking up all stack
324*38fd1498Szrj addresses is costly. */
325*38fd1498Szrj
326*38fd1498Szrj static bool
can_simplify_addr(rtx addr)327*38fd1498Szrj can_simplify_addr (rtx addr)
328*38fd1498Szrj {
329*38fd1498Szrj rtx reg;
330*38fd1498Szrj
331*38fd1498Szrj if (CONSTANT_ADDRESS_P (addr))
332*38fd1498Szrj return false;
333*38fd1498Szrj
334*38fd1498Szrj if (GET_CODE (addr) == PLUS)
335*38fd1498Szrj reg = XEXP (addr, 0);
336*38fd1498Szrj else
337*38fd1498Szrj reg = addr;
338*38fd1498Szrj
339*38fd1498Szrj return (!REG_P (reg)
340*38fd1498Szrj || (REGNO (reg) != FRAME_POINTER_REGNUM
341*38fd1498Szrj && REGNO (reg) != HARD_FRAME_POINTER_REGNUM
342*38fd1498Szrj && REGNO (reg) != ARG_POINTER_REGNUM));
343*38fd1498Szrj }
344*38fd1498Szrj
345*38fd1498Szrj /* Returns a canonical version of X for the address, from the point of view,
346*38fd1498Szrj that all multiplications are represented as MULT instead of the multiply
347*38fd1498Szrj by a power of 2 being represented as ASHIFT.
348*38fd1498Szrj
349*38fd1498Szrj Every ASHIFT we find has been made by simplify_gen_binary and was not
350*38fd1498Szrj there before, so it is not shared. So we can do this in place. */
351*38fd1498Szrj
352*38fd1498Szrj static void
canonicalize_address(rtx x)353*38fd1498Szrj canonicalize_address (rtx x)
354*38fd1498Szrj {
355*38fd1498Szrj for (;;)
356*38fd1498Szrj switch (GET_CODE (x))
357*38fd1498Szrj {
358*38fd1498Szrj case ASHIFT:
359*38fd1498Szrj if (CONST_INT_P (XEXP (x, 1))
360*38fd1498Szrj && INTVAL (XEXP (x, 1)) < GET_MODE_UNIT_BITSIZE (GET_MODE (x))
361*38fd1498Szrj && INTVAL (XEXP (x, 1)) >= 0)
362*38fd1498Szrj {
363*38fd1498Szrj HOST_WIDE_INT shift = INTVAL (XEXP (x, 1));
364*38fd1498Szrj PUT_CODE (x, MULT);
365*38fd1498Szrj XEXP (x, 1) = gen_int_mode (HOST_WIDE_INT_1 << shift,
366*38fd1498Szrj GET_MODE (x));
367*38fd1498Szrj }
368*38fd1498Szrj
369*38fd1498Szrj x = XEXP (x, 0);
370*38fd1498Szrj break;
371*38fd1498Szrj
372*38fd1498Szrj case PLUS:
373*38fd1498Szrj if (GET_CODE (XEXP (x, 0)) == PLUS
374*38fd1498Szrj || GET_CODE (XEXP (x, 0)) == ASHIFT
375*38fd1498Szrj || GET_CODE (XEXP (x, 0)) == CONST)
376*38fd1498Szrj canonicalize_address (XEXP (x, 0));
377*38fd1498Szrj
378*38fd1498Szrj x = XEXP (x, 1);
379*38fd1498Szrj break;
380*38fd1498Szrj
381*38fd1498Szrj case CONST:
382*38fd1498Szrj x = XEXP (x, 0);
383*38fd1498Szrj break;
384*38fd1498Szrj
385*38fd1498Szrj default:
386*38fd1498Szrj return;
387*38fd1498Szrj }
388*38fd1498Szrj }
389*38fd1498Szrj
390*38fd1498Szrj /* OLD is a memory address. Return whether it is good to use NEW instead,
391*38fd1498Szrj for a memory access in the given MODE. */
392*38fd1498Szrj
393*38fd1498Szrj static bool
should_replace_address(rtx old_rtx,rtx new_rtx,machine_mode mode,addr_space_t as,bool speed)394*38fd1498Szrj should_replace_address (rtx old_rtx, rtx new_rtx, machine_mode mode,
395*38fd1498Szrj addr_space_t as, bool speed)
396*38fd1498Szrj {
397*38fd1498Szrj int gain;
398*38fd1498Szrj
399*38fd1498Szrj if (rtx_equal_p (old_rtx, new_rtx)
400*38fd1498Szrj || !memory_address_addr_space_p (mode, new_rtx, as))
401*38fd1498Szrj return false;
402*38fd1498Szrj
403*38fd1498Szrj /* Copy propagation is always ok. */
404*38fd1498Szrj if (REG_P (old_rtx) && REG_P (new_rtx))
405*38fd1498Szrj return true;
406*38fd1498Szrj
407*38fd1498Szrj /* Prefer the new address if it is less expensive. */
408*38fd1498Szrj gain = (address_cost (old_rtx, mode, as, speed)
409*38fd1498Szrj - address_cost (new_rtx, mode, as, speed));
410*38fd1498Szrj
411*38fd1498Szrj /* If the addresses have equivalent cost, prefer the new address
412*38fd1498Szrj if it has the highest `set_src_cost'. That has the potential of
413*38fd1498Szrj eliminating the most insns without additional costs, and it
414*38fd1498Szrj is the same that cse.c used to do. */
415*38fd1498Szrj if (gain == 0)
416*38fd1498Szrj gain = (set_src_cost (new_rtx, VOIDmode, speed)
417*38fd1498Szrj - set_src_cost (old_rtx, VOIDmode, speed));
418*38fd1498Szrj
419*38fd1498Szrj return (gain > 0);
420*38fd1498Szrj }
421*38fd1498Szrj
422*38fd1498Szrj
423*38fd1498Szrj /* Flags for the last parameter of propagate_rtx_1. */
424*38fd1498Szrj
425*38fd1498Szrj enum {
426*38fd1498Szrj /* If PR_CAN_APPEAR is true, propagate_rtx_1 always returns true;
427*38fd1498Szrj if it is false, propagate_rtx_1 returns false if, for at least
428*38fd1498Szrj one occurrence OLD, it failed to collapse the result to a constant.
429*38fd1498Szrj For example, (mult:M (reg:M A) (minus:M (reg:M B) (reg:M A))) may
430*38fd1498Szrj collapse to zero if replacing (reg:M B) with (reg:M A).
431*38fd1498Szrj
432*38fd1498Szrj PR_CAN_APPEAR is disregarded inside MEMs: in that case,
433*38fd1498Szrj propagate_rtx_1 just tries to make cheaper and valid memory
434*38fd1498Szrj addresses. */
435*38fd1498Szrj PR_CAN_APPEAR = 1,
436*38fd1498Szrj
437*38fd1498Szrj /* If PR_HANDLE_MEM is not set, propagate_rtx_1 won't attempt any replacement
438*38fd1498Szrj outside memory addresses. This is needed because propagate_rtx_1 does
439*38fd1498Szrj not do any analysis on memory; thus it is very conservative and in general
440*38fd1498Szrj it will fail if non-read-only MEMs are found in the source expression.
441*38fd1498Szrj
442*38fd1498Szrj PR_HANDLE_MEM is set when the source of the propagation was not
443*38fd1498Szrj another MEM. Then, it is safe not to treat non-read-only MEMs as
444*38fd1498Szrj ``opaque'' objects. */
445*38fd1498Szrj PR_HANDLE_MEM = 2,
446*38fd1498Szrj
447*38fd1498Szrj /* Set when costs should be optimized for speed. */
448*38fd1498Szrj PR_OPTIMIZE_FOR_SPEED = 4
449*38fd1498Szrj };
450*38fd1498Szrj
451*38fd1498Szrj
452*38fd1498Szrj /* Replace all occurrences of OLD in *PX with NEW and try to simplify the
453*38fd1498Szrj resulting expression. Replace *PX with a new RTL expression if an
454*38fd1498Szrj occurrence of OLD was found.
455*38fd1498Szrj
456*38fd1498Szrj This is only a wrapper around simplify-rtx.c: do not add any pattern
457*38fd1498Szrj matching code here. (The sole exception is the handling of LO_SUM, but
458*38fd1498Szrj that is because there is no simplify_gen_* function for LO_SUM). */
459*38fd1498Szrj
460*38fd1498Szrj static bool
propagate_rtx_1(rtx * px,rtx old_rtx,rtx new_rtx,int flags)461*38fd1498Szrj propagate_rtx_1 (rtx *px, rtx old_rtx, rtx new_rtx, int flags)
462*38fd1498Szrj {
463*38fd1498Szrj rtx x = *px, tem = NULL_RTX, op0, op1, op2;
464*38fd1498Szrj enum rtx_code code = GET_CODE (x);
465*38fd1498Szrj machine_mode mode = GET_MODE (x);
466*38fd1498Szrj machine_mode op_mode;
467*38fd1498Szrj bool can_appear = (flags & PR_CAN_APPEAR) != 0;
468*38fd1498Szrj bool valid_ops = true;
469*38fd1498Szrj
470*38fd1498Szrj if (!(flags & PR_HANDLE_MEM) && MEM_P (x) && !MEM_READONLY_P (x))
471*38fd1498Szrj {
472*38fd1498Szrj /* If unsafe, change MEMs to CLOBBERs or SCRATCHes (to preserve whether
473*38fd1498Szrj they have side effects or not). */
474*38fd1498Szrj *px = (side_effects_p (x)
475*38fd1498Szrj ? gen_rtx_CLOBBER (GET_MODE (x), const0_rtx)
476*38fd1498Szrj : gen_rtx_SCRATCH (GET_MODE (x)));
477*38fd1498Szrj return false;
478*38fd1498Szrj }
479*38fd1498Szrj
480*38fd1498Szrj /* If X is OLD_RTX, return NEW_RTX. But not if replacing only within an
481*38fd1498Szrj address, and we are *not* inside one. */
482*38fd1498Szrj if (x == old_rtx)
483*38fd1498Szrj {
484*38fd1498Szrj *px = new_rtx;
485*38fd1498Szrj return can_appear;
486*38fd1498Szrj }
487*38fd1498Szrj
488*38fd1498Szrj /* If this is an expression, try recursive substitution. */
489*38fd1498Szrj switch (GET_RTX_CLASS (code))
490*38fd1498Szrj {
491*38fd1498Szrj case RTX_UNARY:
492*38fd1498Szrj op0 = XEXP (x, 0);
493*38fd1498Szrj op_mode = GET_MODE (op0);
494*38fd1498Szrj valid_ops &= propagate_rtx_1 (&op0, old_rtx, new_rtx, flags);
495*38fd1498Szrj if (op0 == XEXP (x, 0))
496*38fd1498Szrj return true;
497*38fd1498Szrj tem = simplify_gen_unary (code, mode, op0, op_mode);
498*38fd1498Szrj break;
499*38fd1498Szrj
500*38fd1498Szrj case RTX_BIN_ARITH:
501*38fd1498Szrj case RTX_COMM_ARITH:
502*38fd1498Szrj op0 = XEXP (x, 0);
503*38fd1498Szrj op1 = XEXP (x, 1);
504*38fd1498Szrj valid_ops &= propagate_rtx_1 (&op0, old_rtx, new_rtx, flags);
505*38fd1498Szrj valid_ops &= propagate_rtx_1 (&op1, old_rtx, new_rtx, flags);
506*38fd1498Szrj if (op0 == XEXP (x, 0) && op1 == XEXP (x, 1))
507*38fd1498Szrj return true;
508*38fd1498Szrj tem = simplify_gen_binary (code, mode, op0, op1);
509*38fd1498Szrj break;
510*38fd1498Szrj
511*38fd1498Szrj case RTX_COMPARE:
512*38fd1498Szrj case RTX_COMM_COMPARE:
513*38fd1498Szrj op0 = XEXP (x, 0);
514*38fd1498Szrj op1 = XEXP (x, 1);
515*38fd1498Szrj op_mode = GET_MODE (op0) != VOIDmode ? GET_MODE (op0) : GET_MODE (op1);
516*38fd1498Szrj valid_ops &= propagate_rtx_1 (&op0, old_rtx, new_rtx, flags);
517*38fd1498Szrj valid_ops &= propagate_rtx_1 (&op1, old_rtx, new_rtx, flags);
518*38fd1498Szrj if (op0 == XEXP (x, 0) && op1 == XEXP (x, 1))
519*38fd1498Szrj return true;
520*38fd1498Szrj tem = simplify_gen_relational (code, mode, op_mode, op0, op1);
521*38fd1498Szrj break;
522*38fd1498Szrj
523*38fd1498Szrj case RTX_TERNARY:
524*38fd1498Szrj case RTX_BITFIELD_OPS:
525*38fd1498Szrj op0 = XEXP (x, 0);
526*38fd1498Szrj op1 = XEXP (x, 1);
527*38fd1498Szrj op2 = XEXP (x, 2);
528*38fd1498Szrj op_mode = GET_MODE (op0);
529*38fd1498Szrj valid_ops &= propagate_rtx_1 (&op0, old_rtx, new_rtx, flags);
530*38fd1498Szrj valid_ops &= propagate_rtx_1 (&op1, old_rtx, new_rtx, flags);
531*38fd1498Szrj valid_ops &= propagate_rtx_1 (&op2, old_rtx, new_rtx, flags);
532*38fd1498Szrj if (op0 == XEXP (x, 0) && op1 == XEXP (x, 1) && op2 == XEXP (x, 2))
533*38fd1498Szrj return true;
534*38fd1498Szrj if (op_mode == VOIDmode)
535*38fd1498Szrj op_mode = GET_MODE (op0);
536*38fd1498Szrj tem = simplify_gen_ternary (code, mode, op_mode, op0, op1, op2);
537*38fd1498Szrj break;
538*38fd1498Szrj
539*38fd1498Szrj case RTX_EXTRA:
540*38fd1498Szrj /* The only case we try to handle is a SUBREG. */
541*38fd1498Szrj if (code == SUBREG)
542*38fd1498Szrj {
543*38fd1498Szrj op0 = XEXP (x, 0);
544*38fd1498Szrj valid_ops &= propagate_rtx_1 (&op0, old_rtx, new_rtx, flags);
545*38fd1498Szrj if (op0 == XEXP (x, 0))
546*38fd1498Szrj return true;
547*38fd1498Szrj tem = simplify_gen_subreg (mode, op0, GET_MODE (SUBREG_REG (x)),
548*38fd1498Szrj SUBREG_BYTE (x));
549*38fd1498Szrj }
550*38fd1498Szrj break;
551*38fd1498Szrj
552*38fd1498Szrj case RTX_OBJ:
553*38fd1498Szrj if (code == MEM && x != new_rtx)
554*38fd1498Szrj {
555*38fd1498Szrj rtx new_op0;
556*38fd1498Szrj op0 = XEXP (x, 0);
557*38fd1498Szrj
558*38fd1498Szrj /* There are some addresses that we cannot work on. */
559*38fd1498Szrj if (!can_simplify_addr (op0))
560*38fd1498Szrj return true;
561*38fd1498Szrj
562*38fd1498Szrj op0 = new_op0 = targetm.delegitimize_address (op0);
563*38fd1498Szrj valid_ops &= propagate_rtx_1 (&new_op0, old_rtx, new_rtx,
564*38fd1498Szrj flags | PR_CAN_APPEAR);
565*38fd1498Szrj
566*38fd1498Szrj /* Dismiss transformation that we do not want to carry on. */
567*38fd1498Szrj if (!valid_ops
568*38fd1498Szrj || new_op0 == op0
569*38fd1498Szrj || !(GET_MODE (new_op0) == GET_MODE (op0)
570*38fd1498Szrj || GET_MODE (new_op0) == VOIDmode))
571*38fd1498Szrj return true;
572*38fd1498Szrj
573*38fd1498Szrj canonicalize_address (new_op0);
574*38fd1498Szrj
575*38fd1498Szrj /* Copy propagations are always ok. Otherwise check the costs. */
576*38fd1498Szrj if (!(REG_P (old_rtx) && REG_P (new_rtx))
577*38fd1498Szrj && !should_replace_address (op0, new_op0, GET_MODE (x),
578*38fd1498Szrj MEM_ADDR_SPACE (x),
579*38fd1498Szrj flags & PR_OPTIMIZE_FOR_SPEED))
580*38fd1498Szrj return true;
581*38fd1498Szrj
582*38fd1498Szrj tem = replace_equiv_address_nv (x, new_op0);
583*38fd1498Szrj }
584*38fd1498Szrj
585*38fd1498Szrj else if (code == LO_SUM)
586*38fd1498Szrj {
587*38fd1498Szrj op0 = XEXP (x, 0);
588*38fd1498Szrj op1 = XEXP (x, 1);
589*38fd1498Szrj
590*38fd1498Szrj /* The only simplification we do attempts to remove references to op0
591*38fd1498Szrj or make it constant -- in both cases, op0's invalidity will not
592*38fd1498Szrj make the result invalid. */
593*38fd1498Szrj propagate_rtx_1 (&op0, old_rtx, new_rtx, flags | PR_CAN_APPEAR);
594*38fd1498Szrj valid_ops &= propagate_rtx_1 (&op1, old_rtx, new_rtx, flags);
595*38fd1498Szrj if (op0 == XEXP (x, 0) && op1 == XEXP (x, 1))
596*38fd1498Szrj return true;
597*38fd1498Szrj
598*38fd1498Szrj /* (lo_sum (high x) x) -> x */
599*38fd1498Szrj if (GET_CODE (op0) == HIGH && rtx_equal_p (XEXP (op0, 0), op1))
600*38fd1498Szrj tem = op1;
601*38fd1498Szrj else
602*38fd1498Szrj tem = gen_rtx_LO_SUM (mode, op0, op1);
603*38fd1498Szrj
604*38fd1498Szrj /* OP1 is likely not a legitimate address, otherwise there would have
605*38fd1498Szrj been no LO_SUM. We want it to disappear if it is invalid, return
606*38fd1498Szrj false in that case. */
607*38fd1498Szrj return memory_address_p (mode, tem);
608*38fd1498Szrj }
609*38fd1498Szrj
610*38fd1498Szrj else if (code == REG)
611*38fd1498Szrj {
612*38fd1498Szrj if (rtx_equal_p (x, old_rtx))
613*38fd1498Szrj {
614*38fd1498Szrj *px = new_rtx;
615*38fd1498Szrj return can_appear;
616*38fd1498Szrj }
617*38fd1498Szrj }
618*38fd1498Szrj break;
619*38fd1498Szrj
620*38fd1498Szrj default:
621*38fd1498Szrj break;
622*38fd1498Szrj }
623*38fd1498Szrj
624*38fd1498Szrj /* No change, no trouble. */
625*38fd1498Szrj if (tem == NULL_RTX)
626*38fd1498Szrj return true;
627*38fd1498Szrj
628*38fd1498Szrj *px = tem;
629*38fd1498Szrj
630*38fd1498Szrj /* Allow replacements that simplify operations on a vector or complex
631*38fd1498Szrj value to a component. The most prominent case is
632*38fd1498Szrj (subreg ([vec_]concat ...)). */
633*38fd1498Szrj if (REG_P (tem) && !HARD_REGISTER_P (tem)
634*38fd1498Szrj && (VECTOR_MODE_P (GET_MODE (new_rtx))
635*38fd1498Szrj || COMPLEX_MODE_P (GET_MODE (new_rtx)))
636*38fd1498Szrj && GET_MODE (tem) == GET_MODE_INNER (GET_MODE (new_rtx)))
637*38fd1498Szrj return true;
638*38fd1498Szrj
639*38fd1498Szrj /* The replacement we made so far is valid, if all of the recursive
640*38fd1498Szrj replacements were valid, or we could simplify everything to
641*38fd1498Szrj a constant. */
642*38fd1498Szrj return valid_ops || can_appear || CONSTANT_P (tem);
643*38fd1498Szrj }
644*38fd1498Szrj
645*38fd1498Szrj
646*38fd1498Szrj /* Return true if X constains a non-constant mem. */
647*38fd1498Szrj
648*38fd1498Szrj static bool
varying_mem_p(const_rtx x)649*38fd1498Szrj varying_mem_p (const_rtx x)
650*38fd1498Szrj {
651*38fd1498Szrj subrtx_iterator::array_type array;
652*38fd1498Szrj FOR_EACH_SUBRTX (iter, array, x, NONCONST)
653*38fd1498Szrj if (MEM_P (*iter) && !MEM_READONLY_P (*iter))
654*38fd1498Szrj return true;
655*38fd1498Szrj return false;
656*38fd1498Szrj }
657*38fd1498Szrj
658*38fd1498Szrj
659*38fd1498Szrj /* Replace all occurrences of OLD in X with NEW and try to simplify the
660*38fd1498Szrj resulting expression (in mode MODE). Return a new expression if it is
661*38fd1498Szrj a constant, otherwise X.
662*38fd1498Szrj
663*38fd1498Szrj Simplifications where occurrences of NEW collapse to a constant are always
664*38fd1498Szrj accepted. All simplifications are accepted if NEW is a pseudo too.
665*38fd1498Szrj Otherwise, we accept simplifications that have a lower or equal cost. */
666*38fd1498Szrj
667*38fd1498Szrj static rtx
propagate_rtx(rtx x,machine_mode mode,rtx old_rtx,rtx new_rtx,bool speed)668*38fd1498Szrj propagate_rtx (rtx x, machine_mode mode, rtx old_rtx, rtx new_rtx,
669*38fd1498Szrj bool speed)
670*38fd1498Szrj {
671*38fd1498Szrj rtx tem;
672*38fd1498Szrj bool collapsed;
673*38fd1498Szrj int flags;
674*38fd1498Szrj
675*38fd1498Szrj if (REG_P (new_rtx) && REGNO (new_rtx) < FIRST_PSEUDO_REGISTER)
676*38fd1498Szrj return NULL_RTX;
677*38fd1498Szrj
678*38fd1498Szrj flags = 0;
679*38fd1498Szrj if (REG_P (new_rtx)
680*38fd1498Szrj || CONSTANT_P (new_rtx)
681*38fd1498Szrj || (GET_CODE (new_rtx) == SUBREG
682*38fd1498Szrj && REG_P (SUBREG_REG (new_rtx))
683*38fd1498Szrj && !paradoxical_subreg_p (mode, GET_MODE (SUBREG_REG (new_rtx)))))
684*38fd1498Szrj flags |= PR_CAN_APPEAR;
685*38fd1498Szrj if (!varying_mem_p (new_rtx))
686*38fd1498Szrj flags |= PR_HANDLE_MEM;
687*38fd1498Szrj
688*38fd1498Szrj if (speed)
689*38fd1498Szrj flags |= PR_OPTIMIZE_FOR_SPEED;
690*38fd1498Szrj
691*38fd1498Szrj tem = x;
692*38fd1498Szrj collapsed = propagate_rtx_1 (&tem, old_rtx, copy_rtx (new_rtx), flags);
693*38fd1498Szrj if (tem == x || !collapsed)
694*38fd1498Szrj return NULL_RTX;
695*38fd1498Szrj
696*38fd1498Szrj /* gen_lowpart_common will not be able to process VOIDmode entities other
697*38fd1498Szrj than CONST_INTs. */
698*38fd1498Szrj if (GET_MODE (tem) == VOIDmode && !CONST_INT_P (tem))
699*38fd1498Szrj return NULL_RTX;
700*38fd1498Szrj
701*38fd1498Szrj if (GET_MODE (tem) == VOIDmode)
702*38fd1498Szrj tem = rtl_hooks.gen_lowpart_no_emit (mode, tem);
703*38fd1498Szrj else
704*38fd1498Szrj gcc_assert (GET_MODE (tem) == mode);
705*38fd1498Szrj
706*38fd1498Szrj return tem;
707*38fd1498Szrj }
708*38fd1498Szrj
709*38fd1498Szrj
710*38fd1498Szrj
711*38fd1498Szrj
712*38fd1498Szrj /* Return true if the register from reference REF is killed
713*38fd1498Szrj between FROM to (but not including) TO. */
714*38fd1498Szrj
715*38fd1498Szrj static bool
local_ref_killed_between_p(df_ref ref,rtx_insn * from,rtx_insn * to)716*38fd1498Szrj local_ref_killed_between_p (df_ref ref, rtx_insn *from, rtx_insn *to)
717*38fd1498Szrj {
718*38fd1498Szrj rtx_insn *insn;
719*38fd1498Szrj
720*38fd1498Szrj for (insn = from; insn != to; insn = NEXT_INSN (insn))
721*38fd1498Szrj {
722*38fd1498Szrj df_ref def;
723*38fd1498Szrj if (!INSN_P (insn))
724*38fd1498Szrj continue;
725*38fd1498Szrj
726*38fd1498Szrj FOR_EACH_INSN_DEF (def, insn)
727*38fd1498Szrj if (DF_REF_REGNO (ref) == DF_REF_REGNO (def))
728*38fd1498Szrj return true;
729*38fd1498Szrj }
730*38fd1498Szrj return false;
731*38fd1498Szrj }
732*38fd1498Szrj
733*38fd1498Szrj
734*38fd1498Szrj /* Check if the given DEF is available in INSN. This would require full
735*38fd1498Szrj computation of available expressions; we check only restricted conditions:
736*38fd1498Szrj - if DEF is the sole definition of its register, go ahead;
737*38fd1498Szrj - in the same basic block, we check for no definitions killing the
738*38fd1498Szrj definition of DEF_INSN;
739*38fd1498Szrj - if USE's basic block has DEF's basic block as the sole predecessor,
740*38fd1498Szrj we check if the definition is killed after DEF_INSN or before
741*38fd1498Szrj TARGET_INSN insn, in their respective basic blocks. */
742*38fd1498Szrj static bool
use_killed_between(df_ref use,rtx_insn * def_insn,rtx_insn * target_insn)743*38fd1498Szrj use_killed_between (df_ref use, rtx_insn *def_insn, rtx_insn *target_insn)
744*38fd1498Szrj {
745*38fd1498Szrj basic_block def_bb = BLOCK_FOR_INSN (def_insn);
746*38fd1498Szrj basic_block target_bb = BLOCK_FOR_INSN (target_insn);
747*38fd1498Szrj int regno;
748*38fd1498Szrj df_ref def;
749*38fd1498Szrj
750*38fd1498Szrj /* We used to have a def reaching a use that is _before_ the def,
751*38fd1498Szrj with the def not dominating the use even though the use and def
752*38fd1498Szrj are in the same basic block, when a register may be used
753*38fd1498Szrj uninitialized in a loop. This should not happen anymore since
754*38fd1498Szrj we do not use reaching definitions, but still we test for such
755*38fd1498Szrj cases and assume that DEF is not available. */
756*38fd1498Szrj if (def_bb == target_bb
757*38fd1498Szrj ? DF_INSN_LUID (def_insn) >= DF_INSN_LUID (target_insn)
758*38fd1498Szrj : !dominated_by_p (CDI_DOMINATORS, target_bb, def_bb))
759*38fd1498Szrj return true;
760*38fd1498Szrj
761*38fd1498Szrj /* Check if the reg in USE has only one definition. We already
762*38fd1498Szrj know that this definition reaches use, or we wouldn't be here.
763*38fd1498Szrj However, this is invalid for hard registers because if they are
764*38fd1498Szrj live at the beginning of the function it does not mean that we
765*38fd1498Szrj have an uninitialized access. */
766*38fd1498Szrj regno = DF_REF_REGNO (use);
767*38fd1498Szrj def = DF_REG_DEF_CHAIN (regno);
768*38fd1498Szrj if (def
769*38fd1498Szrj && DF_REF_NEXT_REG (def) == NULL
770*38fd1498Szrj && regno >= FIRST_PSEUDO_REGISTER)
771*38fd1498Szrj return false;
772*38fd1498Szrj
773*38fd1498Szrj /* Check locally if we are in the same basic block. */
774*38fd1498Szrj if (def_bb == target_bb)
775*38fd1498Szrj return local_ref_killed_between_p (use, def_insn, target_insn);
776*38fd1498Szrj
777*38fd1498Szrj /* Finally, if DEF_BB is the sole predecessor of TARGET_BB. */
778*38fd1498Szrj if (single_pred_p (target_bb)
779*38fd1498Szrj && single_pred (target_bb) == def_bb)
780*38fd1498Szrj {
781*38fd1498Szrj df_ref x;
782*38fd1498Szrj
783*38fd1498Szrj /* See if USE is killed between DEF_INSN and the last insn in the
784*38fd1498Szrj basic block containing DEF_INSN. */
785*38fd1498Szrj x = df_bb_regno_last_def_find (def_bb, regno);
786*38fd1498Szrj if (x && DF_INSN_LUID (DF_REF_INSN (x)) >= DF_INSN_LUID (def_insn))
787*38fd1498Szrj return true;
788*38fd1498Szrj
789*38fd1498Szrj /* See if USE is killed between TARGET_INSN and the first insn in the
790*38fd1498Szrj basic block containing TARGET_INSN. */
791*38fd1498Szrj x = df_bb_regno_first_def_find (target_bb, regno);
792*38fd1498Szrj if (x && DF_INSN_LUID (DF_REF_INSN (x)) < DF_INSN_LUID (target_insn))
793*38fd1498Szrj return true;
794*38fd1498Szrj
795*38fd1498Szrj return false;
796*38fd1498Szrj }
797*38fd1498Szrj
798*38fd1498Szrj /* Otherwise assume the worst case. */
799*38fd1498Szrj return true;
800*38fd1498Szrj }
801*38fd1498Szrj
802*38fd1498Szrj
803*38fd1498Szrj /* Check if all uses in DEF_INSN can be used in TARGET_INSN. This
804*38fd1498Szrj would require full computation of available expressions;
805*38fd1498Szrj we check only restricted conditions, see use_killed_between. */
806*38fd1498Szrj static bool
all_uses_available_at(rtx_insn * def_insn,rtx_insn * target_insn)807*38fd1498Szrj all_uses_available_at (rtx_insn *def_insn, rtx_insn *target_insn)
808*38fd1498Szrj {
809*38fd1498Szrj df_ref use;
810*38fd1498Szrj struct df_insn_info *insn_info = DF_INSN_INFO_GET (def_insn);
811*38fd1498Szrj rtx def_set = single_set (def_insn);
812*38fd1498Szrj rtx_insn *next;
813*38fd1498Szrj
814*38fd1498Szrj gcc_assert (def_set);
815*38fd1498Szrj
816*38fd1498Szrj /* If target_insn comes right after def_insn, which is very common
817*38fd1498Szrj for addresses, we can use a quicker test. Ignore debug insns
818*38fd1498Szrj other than target insns for this. */
819*38fd1498Szrj next = NEXT_INSN (def_insn);
820*38fd1498Szrj while (next && next != target_insn && DEBUG_INSN_P (next))
821*38fd1498Szrj next = NEXT_INSN (next);
822*38fd1498Szrj if (next == target_insn && REG_P (SET_DEST (def_set)))
823*38fd1498Szrj {
824*38fd1498Szrj rtx def_reg = SET_DEST (def_set);
825*38fd1498Szrj
826*38fd1498Szrj /* If the insn uses the reg that it defines, the substitution is
827*38fd1498Szrj invalid. */
828*38fd1498Szrj FOR_EACH_INSN_INFO_USE (use, insn_info)
829*38fd1498Szrj if (rtx_equal_p (DF_REF_REG (use), def_reg))
830*38fd1498Szrj return false;
831*38fd1498Szrj FOR_EACH_INSN_INFO_EQ_USE (use, insn_info)
832*38fd1498Szrj if (rtx_equal_p (DF_REF_REG (use), def_reg))
833*38fd1498Szrj return false;
834*38fd1498Szrj }
835*38fd1498Szrj else
836*38fd1498Szrj {
837*38fd1498Szrj rtx def_reg = REG_P (SET_DEST (def_set)) ? SET_DEST (def_set) : NULL_RTX;
838*38fd1498Szrj
839*38fd1498Szrj /* Look at all the uses of DEF_INSN, and see if they are not
840*38fd1498Szrj killed between DEF_INSN and TARGET_INSN. */
841*38fd1498Szrj FOR_EACH_INSN_INFO_USE (use, insn_info)
842*38fd1498Szrj {
843*38fd1498Szrj if (def_reg && rtx_equal_p (DF_REF_REG (use), def_reg))
844*38fd1498Szrj return false;
845*38fd1498Szrj if (use_killed_between (use, def_insn, target_insn))
846*38fd1498Szrj return false;
847*38fd1498Szrj }
848*38fd1498Szrj FOR_EACH_INSN_INFO_EQ_USE (use, insn_info)
849*38fd1498Szrj {
850*38fd1498Szrj if (def_reg && rtx_equal_p (DF_REF_REG (use), def_reg))
851*38fd1498Szrj return false;
852*38fd1498Szrj if (use_killed_between (use, def_insn, target_insn))
853*38fd1498Szrj return false;
854*38fd1498Szrj }
855*38fd1498Szrj }
856*38fd1498Szrj
857*38fd1498Szrj return true;
858*38fd1498Szrj }
859*38fd1498Szrj
860*38fd1498Szrj
861*38fd1498Szrj static df_ref *active_defs;
862*38fd1498Szrj static sparseset active_defs_check;
863*38fd1498Szrj
864*38fd1498Szrj /* Fill the ACTIVE_DEFS array with the use->def link for the registers
865*38fd1498Szrj mentioned in USE_REC. Register the valid entries in ACTIVE_DEFS_CHECK
866*38fd1498Szrj too, for checking purposes. */
867*38fd1498Szrj
868*38fd1498Szrj static void
register_active_defs(df_ref use)869*38fd1498Szrj register_active_defs (df_ref use)
870*38fd1498Szrj {
871*38fd1498Szrj for (; use; use = DF_REF_NEXT_LOC (use))
872*38fd1498Szrj {
873*38fd1498Szrj df_ref def = get_def_for_use (use);
874*38fd1498Szrj int regno = DF_REF_REGNO (use);
875*38fd1498Szrj
876*38fd1498Szrj if (flag_checking)
877*38fd1498Szrj sparseset_set_bit (active_defs_check, regno);
878*38fd1498Szrj active_defs[regno] = def;
879*38fd1498Szrj }
880*38fd1498Szrj }
881*38fd1498Szrj
882*38fd1498Szrj
883*38fd1498Szrj /* Build the use->def links that we use to update the dataflow info
884*38fd1498Szrj for new uses. Note that building the links is very cheap and if
885*38fd1498Szrj it were done earlier, they could be used to rule out invalid
886*38fd1498Szrj propagations (in addition to what is done in all_uses_available_at).
887*38fd1498Szrj I'm not doing this yet, though. */
888*38fd1498Szrj
889*38fd1498Szrj static void
update_df_init(rtx_insn * def_insn,rtx_insn * insn)890*38fd1498Szrj update_df_init (rtx_insn *def_insn, rtx_insn *insn)
891*38fd1498Szrj {
892*38fd1498Szrj if (flag_checking)
893*38fd1498Szrj sparseset_clear (active_defs_check);
894*38fd1498Szrj register_active_defs (DF_INSN_USES (def_insn));
895*38fd1498Szrj register_active_defs (DF_INSN_USES (insn));
896*38fd1498Szrj register_active_defs (DF_INSN_EQ_USES (insn));
897*38fd1498Szrj }
898*38fd1498Szrj
899*38fd1498Szrj
900*38fd1498Szrj /* Update the USE_DEF_REF array for the given use, using the active definitions
901*38fd1498Szrj in the ACTIVE_DEFS array to match pseudos to their def. */
902*38fd1498Szrj
903*38fd1498Szrj static inline void
update_uses(df_ref use)904*38fd1498Szrj update_uses (df_ref use)
905*38fd1498Szrj {
906*38fd1498Szrj for (; use; use = DF_REF_NEXT_LOC (use))
907*38fd1498Szrj {
908*38fd1498Szrj int regno = DF_REF_REGNO (use);
909*38fd1498Szrj
910*38fd1498Szrj /* Set up the use-def chain. */
911*38fd1498Szrj if (DF_REF_ID (use) >= (int) use_def_ref.length ())
912*38fd1498Szrj use_def_ref.safe_grow_cleared (DF_REF_ID (use) + 1);
913*38fd1498Szrj
914*38fd1498Szrj if (flag_checking)
915*38fd1498Szrj gcc_assert (sparseset_bit_p (active_defs_check, regno));
916*38fd1498Szrj use_def_ref[DF_REF_ID (use)] = active_defs[regno];
917*38fd1498Szrj }
918*38fd1498Szrj }
919*38fd1498Szrj
920*38fd1498Szrj
921*38fd1498Szrj /* Update the USE_DEF_REF array for the uses in INSN. Only update note
922*38fd1498Szrj uses if NOTES_ONLY is true. */
923*38fd1498Szrj
924*38fd1498Szrj static void
update_df(rtx_insn * insn,rtx note)925*38fd1498Szrj update_df (rtx_insn *insn, rtx note)
926*38fd1498Szrj {
927*38fd1498Szrj struct df_insn_info *insn_info = DF_INSN_INFO_GET (insn);
928*38fd1498Szrj
929*38fd1498Szrj if (note)
930*38fd1498Szrj {
931*38fd1498Szrj df_uses_create (&XEXP (note, 0), insn, DF_REF_IN_NOTE);
932*38fd1498Szrj df_notes_rescan (insn);
933*38fd1498Szrj }
934*38fd1498Szrj else
935*38fd1498Szrj {
936*38fd1498Szrj df_uses_create (&PATTERN (insn), insn, 0);
937*38fd1498Szrj df_insn_rescan (insn);
938*38fd1498Szrj update_uses (DF_INSN_INFO_USES (insn_info));
939*38fd1498Szrj }
940*38fd1498Szrj
941*38fd1498Szrj update_uses (DF_INSN_INFO_EQ_USES (insn_info));
942*38fd1498Szrj }
943*38fd1498Szrj
944*38fd1498Szrj
945*38fd1498Szrj /* Try substituting NEW into LOC, which originated from forward propagation
946*38fd1498Szrj of USE's value from DEF_INSN. SET_REG_EQUAL says whether we are
947*38fd1498Szrj substituting the whole SET_SRC, so we can set a REG_EQUAL note if the
948*38fd1498Szrj new insn is not recognized. Return whether the substitution was
949*38fd1498Szrj performed. */
950*38fd1498Szrj
951*38fd1498Szrj static bool
try_fwprop_subst(df_ref use,rtx * loc,rtx new_rtx,rtx_insn * def_insn,bool set_reg_equal)952*38fd1498Szrj try_fwprop_subst (df_ref use, rtx *loc, rtx new_rtx, rtx_insn *def_insn,
953*38fd1498Szrj bool set_reg_equal)
954*38fd1498Szrj {
955*38fd1498Szrj rtx_insn *insn = DF_REF_INSN (use);
956*38fd1498Szrj rtx set = single_set (insn);
957*38fd1498Szrj rtx note = NULL_RTX;
958*38fd1498Szrj bool speed = optimize_bb_for_speed_p (BLOCK_FOR_INSN (insn));
959*38fd1498Szrj int old_cost = 0;
960*38fd1498Szrj bool ok;
961*38fd1498Szrj
962*38fd1498Szrj update_df_init (def_insn, insn);
963*38fd1498Szrj
964*38fd1498Szrj /* forward_propagate_subreg may be operating on an instruction with
965*38fd1498Szrj multiple sets. If so, assume the cost of the new instruction is
966*38fd1498Szrj not greater than the old one. */
967*38fd1498Szrj if (set)
968*38fd1498Szrj old_cost = set_src_cost (SET_SRC (set), GET_MODE (SET_DEST (set)), speed);
969*38fd1498Szrj if (dump_file)
970*38fd1498Szrj {
971*38fd1498Szrj fprintf (dump_file, "\nIn insn %d, replacing\n ", INSN_UID (insn));
972*38fd1498Szrj print_inline_rtx (dump_file, *loc, 2);
973*38fd1498Szrj fprintf (dump_file, "\n with ");
974*38fd1498Szrj print_inline_rtx (dump_file, new_rtx, 2);
975*38fd1498Szrj fprintf (dump_file, "\n");
976*38fd1498Szrj }
977*38fd1498Szrj
978*38fd1498Szrj validate_unshare_change (insn, loc, new_rtx, true);
979*38fd1498Szrj if (!verify_changes (0))
980*38fd1498Szrj {
981*38fd1498Szrj if (dump_file)
982*38fd1498Szrj fprintf (dump_file, "Changes to insn %d not recognized\n",
983*38fd1498Szrj INSN_UID (insn));
984*38fd1498Szrj ok = false;
985*38fd1498Szrj }
986*38fd1498Szrj
987*38fd1498Szrj else if (DF_REF_TYPE (use) == DF_REF_REG_USE
988*38fd1498Szrj && set
989*38fd1498Szrj && (set_src_cost (SET_SRC (set), GET_MODE (SET_DEST (set)), speed)
990*38fd1498Szrj > old_cost))
991*38fd1498Szrj {
992*38fd1498Szrj if (dump_file)
993*38fd1498Szrj fprintf (dump_file, "Changes to insn %d not profitable\n",
994*38fd1498Szrj INSN_UID (insn));
995*38fd1498Szrj ok = false;
996*38fd1498Szrj }
997*38fd1498Szrj
998*38fd1498Szrj else
999*38fd1498Szrj {
1000*38fd1498Szrj if (dump_file)
1001*38fd1498Szrj fprintf (dump_file, "Changed insn %d\n", INSN_UID (insn));
1002*38fd1498Szrj ok = true;
1003*38fd1498Szrj }
1004*38fd1498Szrj
1005*38fd1498Szrj if (ok)
1006*38fd1498Szrj {
1007*38fd1498Szrj confirm_change_group ();
1008*38fd1498Szrj num_changes++;
1009*38fd1498Szrj }
1010*38fd1498Szrj else
1011*38fd1498Szrj {
1012*38fd1498Szrj cancel_changes (0);
1013*38fd1498Szrj
1014*38fd1498Szrj /* Can also record a simplified value in a REG_EQUAL note,
1015*38fd1498Szrj making a new one if one does not already exist. */
1016*38fd1498Szrj if (set_reg_equal)
1017*38fd1498Szrj {
1018*38fd1498Szrj /* If there are any paradoxical SUBREGs, don't add REG_EQUAL note,
1019*38fd1498Szrj because the bits in there can be anything and so might not
1020*38fd1498Szrj match the REG_EQUAL note content. See PR70574. */
1021*38fd1498Szrj subrtx_var_iterator::array_type array;
1022*38fd1498Szrj FOR_EACH_SUBRTX_VAR (iter, array, *loc, NONCONST)
1023*38fd1498Szrj {
1024*38fd1498Szrj rtx x = *iter;
1025*38fd1498Szrj if (SUBREG_P (x) && paradoxical_subreg_p (x))
1026*38fd1498Szrj {
1027*38fd1498Szrj set_reg_equal = false;
1028*38fd1498Szrj break;
1029*38fd1498Szrj }
1030*38fd1498Szrj }
1031*38fd1498Szrj
1032*38fd1498Szrj if (set_reg_equal)
1033*38fd1498Szrj {
1034*38fd1498Szrj if (dump_file)
1035*38fd1498Szrj fprintf (dump_file, " Setting REG_EQUAL note\n");
1036*38fd1498Szrj
1037*38fd1498Szrj note = set_unique_reg_note (insn, REG_EQUAL, copy_rtx (new_rtx));
1038*38fd1498Szrj }
1039*38fd1498Szrj }
1040*38fd1498Szrj }
1041*38fd1498Szrj
1042*38fd1498Szrj if ((ok || note) && !CONSTANT_P (new_rtx))
1043*38fd1498Szrj update_df (insn, note);
1044*38fd1498Szrj
1045*38fd1498Szrj return ok;
1046*38fd1498Szrj }
1047*38fd1498Szrj
1048*38fd1498Szrj /* For the given single_set INSN, containing SRC known to be a
1049*38fd1498Szrj ZERO_EXTEND or SIGN_EXTEND of a register, return true if INSN
1050*38fd1498Szrj is redundant due to the register being set by a LOAD_EXTEND_OP
1051*38fd1498Szrj load from memory. */
1052*38fd1498Szrj
1053*38fd1498Szrj static bool
free_load_extend(rtx src,rtx_insn * insn)1054*38fd1498Szrj free_load_extend (rtx src, rtx_insn *insn)
1055*38fd1498Szrj {
1056*38fd1498Szrj rtx reg;
1057*38fd1498Szrj df_ref def, use;
1058*38fd1498Szrj
1059*38fd1498Szrj reg = XEXP (src, 0);
1060*38fd1498Szrj if (load_extend_op (GET_MODE (reg)) != GET_CODE (src))
1061*38fd1498Szrj return false;
1062*38fd1498Szrj
1063*38fd1498Szrj FOR_EACH_INSN_USE (use, insn)
1064*38fd1498Szrj if (!DF_REF_IS_ARTIFICIAL (use)
1065*38fd1498Szrj && DF_REF_TYPE (use) == DF_REF_REG_USE
1066*38fd1498Szrj && DF_REF_REG (use) == reg)
1067*38fd1498Szrj break;
1068*38fd1498Szrj if (!use)
1069*38fd1498Szrj return false;
1070*38fd1498Szrj
1071*38fd1498Szrj def = get_def_for_use (use);
1072*38fd1498Szrj if (!def)
1073*38fd1498Szrj return false;
1074*38fd1498Szrj
1075*38fd1498Szrj if (DF_REF_IS_ARTIFICIAL (def))
1076*38fd1498Szrj return false;
1077*38fd1498Szrj
1078*38fd1498Szrj if (NONJUMP_INSN_P (DF_REF_INSN (def)))
1079*38fd1498Szrj {
1080*38fd1498Szrj rtx patt = PATTERN (DF_REF_INSN (def));
1081*38fd1498Szrj
1082*38fd1498Szrj if (GET_CODE (patt) == SET
1083*38fd1498Szrj && GET_CODE (SET_SRC (patt)) == MEM
1084*38fd1498Szrj && rtx_equal_p (SET_DEST (patt), reg))
1085*38fd1498Szrj return true;
1086*38fd1498Szrj }
1087*38fd1498Szrj return false;
1088*38fd1498Szrj }
1089*38fd1498Szrj
1090*38fd1498Szrj /* If USE is a subreg, see if it can be replaced by a pseudo. */
1091*38fd1498Szrj
1092*38fd1498Szrj static bool
forward_propagate_subreg(df_ref use,rtx_insn * def_insn,rtx def_set)1093*38fd1498Szrj forward_propagate_subreg (df_ref use, rtx_insn *def_insn, rtx def_set)
1094*38fd1498Szrj {
1095*38fd1498Szrj rtx use_reg = DF_REF_REG (use);
1096*38fd1498Szrj rtx_insn *use_insn;
1097*38fd1498Szrj rtx src;
1098*38fd1498Szrj scalar_int_mode int_use_mode, src_mode;
1099*38fd1498Szrj
1100*38fd1498Szrj /* Only consider subregs... */
1101*38fd1498Szrj machine_mode use_mode = GET_MODE (use_reg);
1102*38fd1498Szrj if (GET_CODE (use_reg) != SUBREG
1103*38fd1498Szrj || !REG_P (SET_DEST (def_set)))
1104*38fd1498Szrj return false;
1105*38fd1498Szrj
1106*38fd1498Szrj if (paradoxical_subreg_p (use_reg))
1107*38fd1498Szrj {
1108*38fd1498Szrj /* If this is a paradoxical SUBREG, we have no idea what value the
1109*38fd1498Szrj extra bits would have. However, if the operand is equivalent to
1110*38fd1498Szrj a SUBREG whose operand is the same as our mode, and all the modes
1111*38fd1498Szrj are within a word, we can just use the inner operand because
1112*38fd1498Szrj these SUBREGs just say how to treat the register. */
1113*38fd1498Szrj use_insn = DF_REF_INSN (use);
1114*38fd1498Szrj src = SET_SRC (def_set);
1115*38fd1498Szrj if (GET_CODE (src) == SUBREG
1116*38fd1498Szrj && REG_P (SUBREG_REG (src))
1117*38fd1498Szrj && REGNO (SUBREG_REG (src)) >= FIRST_PSEUDO_REGISTER
1118*38fd1498Szrj && GET_MODE (SUBREG_REG (src)) == use_mode
1119*38fd1498Szrj && subreg_lowpart_p (src)
1120*38fd1498Szrj && all_uses_available_at (def_insn, use_insn))
1121*38fd1498Szrj return try_fwprop_subst (use, DF_REF_LOC (use), SUBREG_REG (src),
1122*38fd1498Szrj def_insn, false);
1123*38fd1498Szrj }
1124*38fd1498Szrj
1125*38fd1498Szrj /* If this is a SUBREG of a ZERO_EXTEND or SIGN_EXTEND, and the SUBREG
1126*38fd1498Szrj is the low part of the reg being extended then just use the inner
1127*38fd1498Szrj operand. Don't do this if the ZERO_EXTEND or SIGN_EXTEND insn will
1128*38fd1498Szrj be removed due to it matching a LOAD_EXTEND_OP load from memory,
1129*38fd1498Szrj or due to the operation being a no-op when applied to registers.
1130*38fd1498Szrj For example, if we have:
1131*38fd1498Szrj
1132*38fd1498Szrj A: (set (reg:DI X) (sign_extend:DI (reg:SI Y)))
1133*38fd1498Szrj B: (... (subreg:SI (reg:DI X)) ...)
1134*38fd1498Szrj
1135*38fd1498Szrj and mode_rep_extended says that Y is already sign-extended,
1136*38fd1498Szrj the backend will typically allow A to be combined with the
1137*38fd1498Szrj definition of Y or, failing that, allow A to be deleted after
1138*38fd1498Szrj reload through register tying. Introducing more uses of Y
1139*38fd1498Szrj prevents both optimisations. */
1140*38fd1498Szrj else if (is_a <scalar_int_mode> (use_mode, &int_use_mode)
1141*38fd1498Szrj && subreg_lowpart_p (use_reg))
1142*38fd1498Szrj {
1143*38fd1498Szrj use_insn = DF_REF_INSN (use);
1144*38fd1498Szrj src = SET_SRC (def_set);
1145*38fd1498Szrj if ((GET_CODE (src) == ZERO_EXTEND
1146*38fd1498Szrj || GET_CODE (src) == SIGN_EXTEND)
1147*38fd1498Szrj && is_a <scalar_int_mode> (GET_MODE (src), &src_mode)
1148*38fd1498Szrj && REG_P (XEXP (src, 0))
1149*38fd1498Szrj && REGNO (XEXP (src, 0)) >= FIRST_PSEUDO_REGISTER
1150*38fd1498Szrj && GET_MODE (XEXP (src, 0)) == use_mode
1151*38fd1498Szrj && !free_load_extend (src, def_insn)
1152*38fd1498Szrj && (targetm.mode_rep_extended (int_use_mode, src_mode)
1153*38fd1498Szrj != (int) GET_CODE (src))
1154*38fd1498Szrj && all_uses_available_at (def_insn, use_insn))
1155*38fd1498Szrj return try_fwprop_subst (use, DF_REF_LOC (use), XEXP (src, 0),
1156*38fd1498Szrj def_insn, false);
1157*38fd1498Szrj }
1158*38fd1498Szrj
1159*38fd1498Szrj return false;
1160*38fd1498Szrj }
1161*38fd1498Szrj
1162*38fd1498Szrj /* Try to replace USE with SRC (defined in DEF_INSN) in __asm. */
1163*38fd1498Szrj
1164*38fd1498Szrj static bool
forward_propagate_asm(df_ref use,rtx_insn * def_insn,rtx def_set,rtx reg)1165*38fd1498Szrj forward_propagate_asm (df_ref use, rtx_insn *def_insn, rtx def_set, rtx reg)
1166*38fd1498Szrj {
1167*38fd1498Szrj rtx_insn *use_insn = DF_REF_INSN (use);
1168*38fd1498Szrj rtx src, use_pat, asm_operands, new_rtx, *loc;
1169*38fd1498Szrj int speed_p, i;
1170*38fd1498Szrj df_ref uses;
1171*38fd1498Szrj
1172*38fd1498Szrj gcc_assert ((DF_REF_FLAGS (use) & DF_REF_IN_NOTE) == 0);
1173*38fd1498Szrj
1174*38fd1498Szrj src = SET_SRC (def_set);
1175*38fd1498Szrj use_pat = PATTERN (use_insn);
1176*38fd1498Szrj
1177*38fd1498Szrj /* In __asm don't replace if src might need more registers than
1178*38fd1498Szrj reg, as that could increase register pressure on the __asm. */
1179*38fd1498Szrj uses = DF_INSN_USES (def_insn);
1180*38fd1498Szrj if (uses && DF_REF_NEXT_LOC (uses))
1181*38fd1498Szrj return false;
1182*38fd1498Szrj
1183*38fd1498Szrj update_df_init (def_insn, use_insn);
1184*38fd1498Szrj speed_p = optimize_bb_for_speed_p (BLOCK_FOR_INSN (use_insn));
1185*38fd1498Szrj asm_operands = NULL_RTX;
1186*38fd1498Szrj switch (GET_CODE (use_pat))
1187*38fd1498Szrj {
1188*38fd1498Szrj case ASM_OPERANDS:
1189*38fd1498Szrj asm_operands = use_pat;
1190*38fd1498Szrj break;
1191*38fd1498Szrj case SET:
1192*38fd1498Szrj if (MEM_P (SET_DEST (use_pat)))
1193*38fd1498Szrj {
1194*38fd1498Szrj loc = &SET_DEST (use_pat);
1195*38fd1498Szrj new_rtx = propagate_rtx (*loc, GET_MODE (*loc), reg, src, speed_p);
1196*38fd1498Szrj if (new_rtx)
1197*38fd1498Szrj validate_unshare_change (use_insn, loc, new_rtx, true);
1198*38fd1498Szrj }
1199*38fd1498Szrj asm_operands = SET_SRC (use_pat);
1200*38fd1498Szrj break;
1201*38fd1498Szrj case PARALLEL:
1202*38fd1498Szrj for (i = 0; i < XVECLEN (use_pat, 0); i++)
1203*38fd1498Szrj if (GET_CODE (XVECEXP (use_pat, 0, i)) == SET)
1204*38fd1498Szrj {
1205*38fd1498Szrj if (MEM_P (SET_DEST (XVECEXP (use_pat, 0, i))))
1206*38fd1498Szrj {
1207*38fd1498Szrj loc = &SET_DEST (XVECEXP (use_pat, 0, i));
1208*38fd1498Szrj new_rtx = propagate_rtx (*loc, GET_MODE (*loc), reg,
1209*38fd1498Szrj src, speed_p);
1210*38fd1498Szrj if (new_rtx)
1211*38fd1498Szrj validate_unshare_change (use_insn, loc, new_rtx, true);
1212*38fd1498Szrj }
1213*38fd1498Szrj asm_operands = SET_SRC (XVECEXP (use_pat, 0, i));
1214*38fd1498Szrj }
1215*38fd1498Szrj else if (GET_CODE (XVECEXP (use_pat, 0, i)) == ASM_OPERANDS)
1216*38fd1498Szrj asm_operands = XVECEXP (use_pat, 0, i);
1217*38fd1498Szrj break;
1218*38fd1498Szrj default:
1219*38fd1498Szrj gcc_unreachable ();
1220*38fd1498Szrj }
1221*38fd1498Szrj
1222*38fd1498Szrj gcc_assert (asm_operands && GET_CODE (asm_operands) == ASM_OPERANDS);
1223*38fd1498Szrj for (i = 0; i < ASM_OPERANDS_INPUT_LENGTH (asm_operands); i++)
1224*38fd1498Szrj {
1225*38fd1498Szrj loc = &ASM_OPERANDS_INPUT (asm_operands, i);
1226*38fd1498Szrj new_rtx = propagate_rtx (*loc, GET_MODE (*loc), reg, src, speed_p);
1227*38fd1498Szrj if (new_rtx)
1228*38fd1498Szrj validate_unshare_change (use_insn, loc, new_rtx, true);
1229*38fd1498Szrj }
1230*38fd1498Szrj
1231*38fd1498Szrj if (num_changes_pending () == 0 || !apply_change_group ())
1232*38fd1498Szrj return false;
1233*38fd1498Szrj
1234*38fd1498Szrj update_df (use_insn, NULL);
1235*38fd1498Szrj num_changes++;
1236*38fd1498Szrj return true;
1237*38fd1498Szrj }
1238*38fd1498Szrj
1239*38fd1498Szrj /* Try to replace USE with SRC (defined in DEF_INSN) and simplify the
1240*38fd1498Szrj result. */
1241*38fd1498Szrj
1242*38fd1498Szrj static bool
forward_propagate_and_simplify(df_ref use,rtx_insn * def_insn,rtx def_set)1243*38fd1498Szrj forward_propagate_and_simplify (df_ref use, rtx_insn *def_insn, rtx def_set)
1244*38fd1498Szrj {
1245*38fd1498Szrj rtx_insn *use_insn = DF_REF_INSN (use);
1246*38fd1498Szrj rtx use_set = single_set (use_insn);
1247*38fd1498Szrj rtx src, reg, new_rtx, *loc;
1248*38fd1498Szrj bool set_reg_equal;
1249*38fd1498Szrj machine_mode mode;
1250*38fd1498Szrj int asm_use = -1;
1251*38fd1498Szrj
1252*38fd1498Szrj if (INSN_CODE (use_insn) < 0)
1253*38fd1498Szrj asm_use = asm_noperands (PATTERN (use_insn));
1254*38fd1498Szrj
1255*38fd1498Szrj if (!use_set && asm_use < 0 && !DEBUG_INSN_P (use_insn))
1256*38fd1498Szrj return false;
1257*38fd1498Szrj
1258*38fd1498Szrj /* Do not propagate into PC, CC0, etc. */
1259*38fd1498Szrj if (use_set && GET_MODE (SET_DEST (use_set)) == VOIDmode)
1260*38fd1498Szrj return false;
1261*38fd1498Szrj
1262*38fd1498Szrj /* If def and use are subreg, check if they match. */
1263*38fd1498Szrj reg = DF_REF_REG (use);
1264*38fd1498Szrj if (GET_CODE (reg) == SUBREG && GET_CODE (SET_DEST (def_set)) == SUBREG)
1265*38fd1498Szrj {
1266*38fd1498Szrj if (maybe_ne (SUBREG_BYTE (SET_DEST (def_set)), SUBREG_BYTE (reg)))
1267*38fd1498Szrj return false;
1268*38fd1498Szrj }
1269*38fd1498Szrj /* Check if the def had a subreg, but the use has the whole reg. */
1270*38fd1498Szrj else if (REG_P (reg) && GET_CODE (SET_DEST (def_set)) == SUBREG)
1271*38fd1498Szrj return false;
1272*38fd1498Szrj /* Check if the use has a subreg, but the def had the whole reg. Unlike the
1273*38fd1498Szrj previous case, the optimization is possible and often useful indeed. */
1274*38fd1498Szrj else if (GET_CODE (reg) == SUBREG && REG_P (SET_DEST (def_set)))
1275*38fd1498Szrj reg = SUBREG_REG (reg);
1276*38fd1498Szrj
1277*38fd1498Szrj /* Make sure that we can treat REG as having the same mode as the
1278*38fd1498Szrj source of DEF_SET. */
1279*38fd1498Szrj if (GET_MODE (SET_DEST (def_set)) != GET_MODE (reg))
1280*38fd1498Szrj return false;
1281*38fd1498Szrj
1282*38fd1498Szrj /* Check if the substitution is valid (last, because it's the most
1283*38fd1498Szrj expensive check!). */
1284*38fd1498Szrj src = SET_SRC (def_set);
1285*38fd1498Szrj if (!CONSTANT_P (src) && !all_uses_available_at (def_insn, use_insn))
1286*38fd1498Szrj return false;
1287*38fd1498Szrj
1288*38fd1498Szrj /* Check if the def is loading something from the constant pool; in this
1289*38fd1498Szrj case we would undo optimization such as compress_float_constant.
1290*38fd1498Szrj Still, we can set a REG_EQUAL note. */
1291*38fd1498Szrj if (MEM_P (src) && MEM_READONLY_P (src))
1292*38fd1498Szrj {
1293*38fd1498Szrj rtx x = avoid_constant_pool_reference (src);
1294*38fd1498Szrj if (x != src && use_set)
1295*38fd1498Szrj {
1296*38fd1498Szrj rtx note = find_reg_note (use_insn, REG_EQUAL, NULL_RTX);
1297*38fd1498Szrj rtx old_rtx = note ? XEXP (note, 0) : SET_SRC (use_set);
1298*38fd1498Szrj rtx new_rtx = simplify_replace_rtx (old_rtx, src, x);
1299*38fd1498Szrj if (old_rtx != new_rtx)
1300*38fd1498Szrj set_unique_reg_note (use_insn, REG_EQUAL, copy_rtx (new_rtx));
1301*38fd1498Szrj }
1302*38fd1498Szrj return false;
1303*38fd1498Szrj }
1304*38fd1498Szrj
1305*38fd1498Szrj if (asm_use >= 0)
1306*38fd1498Szrj return forward_propagate_asm (use, def_insn, def_set, reg);
1307*38fd1498Szrj
1308*38fd1498Szrj /* Else try simplifying. */
1309*38fd1498Szrj
1310*38fd1498Szrj if (DF_REF_TYPE (use) == DF_REF_REG_MEM_STORE)
1311*38fd1498Szrj {
1312*38fd1498Szrj loc = &SET_DEST (use_set);
1313*38fd1498Szrj set_reg_equal = false;
1314*38fd1498Szrj }
1315*38fd1498Szrj else if (!use_set)
1316*38fd1498Szrj {
1317*38fd1498Szrj loc = &INSN_VAR_LOCATION_LOC (use_insn);
1318*38fd1498Szrj set_reg_equal = false;
1319*38fd1498Szrj }
1320*38fd1498Szrj else
1321*38fd1498Szrj {
1322*38fd1498Szrj rtx note = find_reg_note (use_insn, REG_EQUAL, NULL_RTX);
1323*38fd1498Szrj if (DF_REF_FLAGS (use) & DF_REF_IN_NOTE)
1324*38fd1498Szrj loc = &XEXP (note, 0);
1325*38fd1498Szrj else
1326*38fd1498Szrj loc = &SET_SRC (use_set);
1327*38fd1498Szrj
1328*38fd1498Szrj /* Do not replace an existing REG_EQUAL note if the insn is not
1329*38fd1498Szrj recognized. Either we're already replacing in the note, or we'll
1330*38fd1498Szrj separately try plugging the definition in the note and simplifying.
1331*38fd1498Szrj And only install a REQ_EQUAL note when the destination is a REG
1332*38fd1498Szrj that isn't mentioned in USE_SET, as the note would be invalid
1333*38fd1498Szrj otherwise. We also don't want to install a note if we are merely
1334*38fd1498Szrj propagating a pseudo since verifying that this pseudo isn't dead
1335*38fd1498Szrj is a pain; moreover such a note won't help anything.
1336*38fd1498Szrj If the use is a paradoxical subreg, make sure we don't add a
1337*38fd1498Szrj REG_EQUAL note for it, because it is not equivalent, it is one
1338*38fd1498Szrj possible value for it, but we can't rely on it holding that value.
1339*38fd1498Szrj See PR70574. */
1340*38fd1498Szrj set_reg_equal = (note == NULL_RTX
1341*38fd1498Szrj && REG_P (SET_DEST (use_set))
1342*38fd1498Szrj && !REG_P (src)
1343*38fd1498Szrj && !(GET_CODE (src) == SUBREG
1344*38fd1498Szrj && REG_P (SUBREG_REG (src)))
1345*38fd1498Szrj && !reg_mentioned_p (SET_DEST (use_set),
1346*38fd1498Szrj SET_SRC (use_set))
1347*38fd1498Szrj && !paradoxical_subreg_p (DF_REF_REG (use)));
1348*38fd1498Szrj }
1349*38fd1498Szrj
1350*38fd1498Szrj if (GET_MODE (*loc) == VOIDmode)
1351*38fd1498Szrj mode = GET_MODE (SET_DEST (use_set));
1352*38fd1498Szrj else
1353*38fd1498Szrj mode = GET_MODE (*loc);
1354*38fd1498Szrj
1355*38fd1498Szrj new_rtx = propagate_rtx (*loc, mode, reg, src,
1356*38fd1498Szrj optimize_bb_for_speed_p (BLOCK_FOR_INSN (use_insn)));
1357*38fd1498Szrj
1358*38fd1498Szrj if (!new_rtx)
1359*38fd1498Szrj return false;
1360*38fd1498Szrj
1361*38fd1498Szrj return try_fwprop_subst (use, loc, new_rtx, def_insn, set_reg_equal);
1362*38fd1498Szrj }
1363*38fd1498Szrj
1364*38fd1498Szrj
1365*38fd1498Szrj /* Given a use USE of an insn, if it has a single reaching
1366*38fd1498Szrj definition, try to forward propagate it into that insn.
1367*38fd1498Szrj Return true if cfg cleanup will be needed. */
1368*38fd1498Szrj
1369*38fd1498Szrj static bool
forward_propagate_into(df_ref use)1370*38fd1498Szrj forward_propagate_into (df_ref use)
1371*38fd1498Szrj {
1372*38fd1498Szrj df_ref def;
1373*38fd1498Szrj rtx_insn *def_insn, *use_insn;
1374*38fd1498Szrj rtx def_set;
1375*38fd1498Szrj rtx parent;
1376*38fd1498Szrj
1377*38fd1498Szrj if (DF_REF_FLAGS (use) & DF_REF_READ_WRITE)
1378*38fd1498Szrj return false;
1379*38fd1498Szrj if (DF_REF_IS_ARTIFICIAL (use))
1380*38fd1498Szrj return false;
1381*38fd1498Szrj
1382*38fd1498Szrj /* Only consider uses that have a single definition. */
1383*38fd1498Szrj def = get_def_for_use (use);
1384*38fd1498Szrj if (!def)
1385*38fd1498Szrj return false;
1386*38fd1498Szrj if (DF_REF_FLAGS (def) & DF_REF_READ_WRITE)
1387*38fd1498Szrj return false;
1388*38fd1498Szrj if (DF_REF_IS_ARTIFICIAL (def))
1389*38fd1498Szrj return false;
1390*38fd1498Szrj
1391*38fd1498Szrj /* Do not propagate loop invariant definitions inside the loop. */
1392*38fd1498Szrj if (DF_REF_BB (def)->loop_father != DF_REF_BB (use)->loop_father)
1393*38fd1498Szrj return false;
1394*38fd1498Szrj
1395*38fd1498Szrj /* Check if the use is still present in the insn! */
1396*38fd1498Szrj use_insn = DF_REF_INSN (use);
1397*38fd1498Szrj if (DF_REF_FLAGS (use) & DF_REF_IN_NOTE)
1398*38fd1498Szrj parent = find_reg_note (use_insn, REG_EQUAL, NULL_RTX);
1399*38fd1498Szrj else
1400*38fd1498Szrj parent = PATTERN (use_insn);
1401*38fd1498Szrj
1402*38fd1498Szrj if (!reg_mentioned_p (DF_REF_REG (use), parent))
1403*38fd1498Szrj return false;
1404*38fd1498Szrj
1405*38fd1498Szrj def_insn = DF_REF_INSN (def);
1406*38fd1498Szrj if (multiple_sets (def_insn))
1407*38fd1498Szrj return false;
1408*38fd1498Szrj def_set = single_set (def_insn);
1409*38fd1498Szrj if (!def_set)
1410*38fd1498Szrj return false;
1411*38fd1498Szrj
1412*38fd1498Szrj /* Only try one kind of propagation. If two are possible, we'll
1413*38fd1498Szrj do it on the following iterations. */
1414*38fd1498Szrj if (forward_propagate_and_simplify (use, def_insn, def_set)
1415*38fd1498Szrj || forward_propagate_subreg (use, def_insn, def_set))
1416*38fd1498Szrj {
1417*38fd1498Szrj propagations_left--;
1418*38fd1498Szrj
1419*38fd1498Szrj if (cfun->can_throw_non_call_exceptions
1420*38fd1498Szrj && find_reg_note (use_insn, REG_EH_REGION, NULL_RTX)
1421*38fd1498Szrj && purge_dead_edges (DF_REF_BB (use)))
1422*38fd1498Szrj return true;
1423*38fd1498Szrj }
1424*38fd1498Szrj return false;
1425*38fd1498Szrj }
1426*38fd1498Szrj
1427*38fd1498Szrj
1428*38fd1498Szrj static void
fwprop_init(void)1429*38fd1498Szrj fwprop_init (void)
1430*38fd1498Szrj {
1431*38fd1498Szrj num_changes = 0;
1432*38fd1498Szrj calculate_dominance_info (CDI_DOMINATORS);
1433*38fd1498Szrj
1434*38fd1498Szrj /* We do not always want to propagate into loops, so we have to find
1435*38fd1498Szrj loops and be careful about them. Avoid CFG modifications so that
1436*38fd1498Szrj we don't have to update dominance information afterwards for
1437*38fd1498Szrj build_single_def_use_links. */
1438*38fd1498Szrj loop_optimizer_init (AVOID_CFG_MODIFICATIONS);
1439*38fd1498Szrj
1440*38fd1498Szrj build_single_def_use_links ();
1441*38fd1498Szrj df_set_flags (DF_DEFER_INSN_RESCAN);
1442*38fd1498Szrj
1443*38fd1498Szrj active_defs = XNEWVEC (df_ref, max_reg_num ());
1444*38fd1498Szrj if (flag_checking)
1445*38fd1498Szrj active_defs_check = sparseset_alloc (max_reg_num ());
1446*38fd1498Szrj
1447*38fd1498Szrj propagations_left = DF_USES_TABLE_SIZE ();
1448*38fd1498Szrj }
1449*38fd1498Szrj
1450*38fd1498Szrj static void
fwprop_done(void)1451*38fd1498Szrj fwprop_done (void)
1452*38fd1498Szrj {
1453*38fd1498Szrj loop_optimizer_finalize ();
1454*38fd1498Szrj
1455*38fd1498Szrj use_def_ref.release ();
1456*38fd1498Szrj free (active_defs);
1457*38fd1498Szrj if (flag_checking)
1458*38fd1498Szrj sparseset_free (active_defs_check);
1459*38fd1498Szrj
1460*38fd1498Szrj free_dominance_info (CDI_DOMINATORS);
1461*38fd1498Szrj cleanup_cfg (0);
1462*38fd1498Szrj delete_trivially_dead_insns (get_insns (), max_reg_num ());
1463*38fd1498Szrj
1464*38fd1498Szrj if (dump_file)
1465*38fd1498Szrj fprintf (dump_file,
1466*38fd1498Szrj "\nNumber of successful forward propagations: %d\n\n",
1467*38fd1498Szrj num_changes);
1468*38fd1498Szrj }
1469*38fd1498Szrj
1470*38fd1498Szrj
1471*38fd1498Szrj /* Main entry point. */
1472*38fd1498Szrj
1473*38fd1498Szrj static bool
gate_fwprop(void)1474*38fd1498Szrj gate_fwprop (void)
1475*38fd1498Szrj {
1476*38fd1498Szrj return optimize > 0 && flag_forward_propagate;
1477*38fd1498Szrj }
1478*38fd1498Szrj
1479*38fd1498Szrj static unsigned int
fwprop(void)1480*38fd1498Szrj fwprop (void)
1481*38fd1498Szrj {
1482*38fd1498Szrj unsigned i;
1483*38fd1498Szrj
1484*38fd1498Szrj fwprop_init ();
1485*38fd1498Szrj
1486*38fd1498Szrj /* Go through all the uses. df_uses_create will create new ones at the
1487*38fd1498Szrj end, and we'll go through them as well.
1488*38fd1498Szrj
1489*38fd1498Szrj Do not forward propagate addresses into loops until after unrolling.
1490*38fd1498Szrj CSE did so because it was able to fix its own mess, but we are not. */
1491*38fd1498Szrj
1492*38fd1498Szrj for (i = 0; i < DF_USES_TABLE_SIZE (); i++)
1493*38fd1498Szrj {
1494*38fd1498Szrj if (!propagations_left)
1495*38fd1498Szrj break;
1496*38fd1498Szrj
1497*38fd1498Szrj df_ref use = DF_USES_GET (i);
1498*38fd1498Szrj if (use)
1499*38fd1498Szrj if (DF_REF_TYPE (use) == DF_REF_REG_USE
1500*38fd1498Szrj || DF_REF_BB (use)->loop_father == NULL
1501*38fd1498Szrj /* The outer most loop is not really a loop. */
1502*38fd1498Szrj || loop_outer (DF_REF_BB (use)->loop_father) == NULL)
1503*38fd1498Szrj forward_propagate_into (use);
1504*38fd1498Szrj }
1505*38fd1498Szrj
1506*38fd1498Szrj fwprop_done ();
1507*38fd1498Szrj return 0;
1508*38fd1498Szrj }
1509*38fd1498Szrj
1510*38fd1498Szrj namespace {
1511*38fd1498Szrj
1512*38fd1498Szrj const pass_data pass_data_rtl_fwprop =
1513*38fd1498Szrj {
1514*38fd1498Szrj RTL_PASS, /* type */
1515*38fd1498Szrj "fwprop1", /* name */
1516*38fd1498Szrj OPTGROUP_NONE, /* optinfo_flags */
1517*38fd1498Szrj TV_FWPROP, /* tv_id */
1518*38fd1498Szrj 0, /* properties_required */
1519*38fd1498Szrj 0, /* properties_provided */
1520*38fd1498Szrj 0, /* properties_destroyed */
1521*38fd1498Szrj 0, /* todo_flags_start */
1522*38fd1498Szrj TODO_df_finish, /* todo_flags_finish */
1523*38fd1498Szrj };
1524*38fd1498Szrj
1525*38fd1498Szrj class pass_rtl_fwprop : public rtl_opt_pass
1526*38fd1498Szrj {
1527*38fd1498Szrj public:
pass_rtl_fwprop(gcc::context * ctxt)1528*38fd1498Szrj pass_rtl_fwprop (gcc::context *ctxt)
1529*38fd1498Szrj : rtl_opt_pass (pass_data_rtl_fwprop, ctxt)
1530*38fd1498Szrj {}
1531*38fd1498Szrj
1532*38fd1498Szrj /* opt_pass methods: */
gate(function *)1533*38fd1498Szrj virtual bool gate (function *) { return gate_fwprop (); }
execute(function *)1534*38fd1498Szrj virtual unsigned int execute (function *) { return fwprop (); }
1535*38fd1498Szrj
1536*38fd1498Szrj }; // class pass_rtl_fwprop
1537*38fd1498Szrj
1538*38fd1498Szrj } // anon namespace
1539*38fd1498Szrj
1540*38fd1498Szrj rtl_opt_pass *
make_pass_rtl_fwprop(gcc::context * ctxt)1541*38fd1498Szrj make_pass_rtl_fwprop (gcc::context *ctxt)
1542*38fd1498Szrj {
1543*38fd1498Szrj return new pass_rtl_fwprop (ctxt);
1544*38fd1498Szrj }
1545*38fd1498Szrj
1546*38fd1498Szrj static unsigned int
fwprop_addr(void)1547*38fd1498Szrj fwprop_addr (void)
1548*38fd1498Szrj {
1549*38fd1498Szrj unsigned i;
1550*38fd1498Szrj
1551*38fd1498Szrj fwprop_init ();
1552*38fd1498Szrj
1553*38fd1498Szrj /* Go through all the uses. df_uses_create will create new ones at the
1554*38fd1498Szrj end, and we'll go through them as well. */
1555*38fd1498Szrj for (i = 0; i < DF_USES_TABLE_SIZE (); i++)
1556*38fd1498Szrj {
1557*38fd1498Szrj if (!propagations_left)
1558*38fd1498Szrj break;
1559*38fd1498Szrj
1560*38fd1498Szrj df_ref use = DF_USES_GET (i);
1561*38fd1498Szrj if (use)
1562*38fd1498Szrj if (DF_REF_TYPE (use) != DF_REF_REG_USE
1563*38fd1498Szrj && DF_REF_BB (use)->loop_father != NULL
1564*38fd1498Szrj /* The outer most loop is not really a loop. */
1565*38fd1498Szrj && loop_outer (DF_REF_BB (use)->loop_father) != NULL)
1566*38fd1498Szrj forward_propagate_into (use);
1567*38fd1498Szrj }
1568*38fd1498Szrj
1569*38fd1498Szrj fwprop_done ();
1570*38fd1498Szrj return 0;
1571*38fd1498Szrj }
1572*38fd1498Szrj
1573*38fd1498Szrj namespace {
1574*38fd1498Szrj
1575*38fd1498Szrj const pass_data pass_data_rtl_fwprop_addr =
1576*38fd1498Szrj {
1577*38fd1498Szrj RTL_PASS, /* type */
1578*38fd1498Szrj "fwprop2", /* name */
1579*38fd1498Szrj OPTGROUP_NONE, /* optinfo_flags */
1580*38fd1498Szrj TV_FWPROP, /* tv_id */
1581*38fd1498Szrj 0, /* properties_required */
1582*38fd1498Szrj 0, /* properties_provided */
1583*38fd1498Szrj 0, /* properties_destroyed */
1584*38fd1498Szrj 0, /* todo_flags_start */
1585*38fd1498Szrj TODO_df_finish, /* todo_flags_finish */
1586*38fd1498Szrj };
1587*38fd1498Szrj
1588*38fd1498Szrj class pass_rtl_fwprop_addr : public rtl_opt_pass
1589*38fd1498Szrj {
1590*38fd1498Szrj public:
pass_rtl_fwprop_addr(gcc::context * ctxt)1591*38fd1498Szrj pass_rtl_fwprop_addr (gcc::context *ctxt)
1592*38fd1498Szrj : rtl_opt_pass (pass_data_rtl_fwprop_addr, ctxt)
1593*38fd1498Szrj {}
1594*38fd1498Szrj
1595*38fd1498Szrj /* opt_pass methods: */
gate(function *)1596*38fd1498Szrj virtual bool gate (function *) { return gate_fwprop (); }
execute(function *)1597*38fd1498Szrj virtual unsigned int execute (function *) { return fwprop_addr (); }
1598*38fd1498Szrj
1599*38fd1498Szrj }; // class pass_rtl_fwprop_addr
1600*38fd1498Szrj
1601*38fd1498Szrj } // anon namespace
1602*38fd1498Szrj
1603*38fd1498Szrj rtl_opt_pass *
make_pass_rtl_fwprop_addr(gcc::context * ctxt)1604*38fd1498Szrj make_pass_rtl_fwprop_addr (gcc::context *ctxt)
1605*38fd1498Szrj {
1606*38fd1498Szrj return new pass_rtl_fwprop_addr (ctxt);
1607*38fd1498Szrj }
1608