1*38fd1498Szrj /* Post reload partially redundant load elimination
2*38fd1498Szrj Copyright (C) 2004-2018 Free Software Foundation, Inc.
3*38fd1498Szrj
4*38fd1498Szrj This file is part of GCC.
5*38fd1498Szrj
6*38fd1498Szrj GCC is free software; you can redistribute it and/or modify it under
7*38fd1498Szrj the terms of the GNU General Public License as published by the Free
8*38fd1498Szrj Software Foundation; either version 3, or (at your option) any later
9*38fd1498Szrj version.
10*38fd1498Szrj
11*38fd1498Szrj GCC is distributed in the hope that it will be useful, but WITHOUT ANY
12*38fd1498Szrj WARRANTY; without even the implied warranty of MERCHANTABILITY or
13*38fd1498Szrj FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14*38fd1498Szrj for more details.
15*38fd1498Szrj
16*38fd1498Szrj You should have received a copy of the GNU General Public License
17*38fd1498Szrj along with GCC; see the file COPYING3. If not see
18*38fd1498Szrj <http://www.gnu.org/licenses/>. */
19*38fd1498Szrj
20*38fd1498Szrj #include "config.h"
21*38fd1498Szrj #include "system.h"
22*38fd1498Szrj #include "coretypes.h"
23*38fd1498Szrj #include "backend.h"
24*38fd1498Szrj #include "target.h"
25*38fd1498Szrj #include "rtl.h"
26*38fd1498Szrj #include "tree.h"
27*38fd1498Szrj #include "predict.h"
28*38fd1498Szrj #include "df.h"
29*38fd1498Szrj #include "memmodel.h"
30*38fd1498Szrj #include "tm_p.h"
31*38fd1498Szrj #include "insn-config.h"
32*38fd1498Szrj #include "emit-rtl.h"
33*38fd1498Szrj #include "recog.h"
34*38fd1498Szrj
35*38fd1498Szrj #include "cfgrtl.h"
36*38fd1498Szrj #include "profile.h"
37*38fd1498Szrj #include "expr.h"
38*38fd1498Szrj #include "params.h"
39*38fd1498Szrj #include "tree-pass.h"
40*38fd1498Szrj #include "dbgcnt.h"
41*38fd1498Szrj #include "gcse-common.h"
42*38fd1498Szrj
43*38fd1498Szrj /* The following code implements gcse after reload, the purpose of this
44*38fd1498Szrj pass is to cleanup redundant loads generated by reload and other
45*38fd1498Szrj optimizations that come after gcse. It searches for simple inter-block
46*38fd1498Szrj redundancies and tries to eliminate them by adding moves and loads
47*38fd1498Szrj in cold places.
48*38fd1498Szrj
49*38fd1498Szrj Perform partially redundant load elimination, try to eliminate redundant
50*38fd1498Szrj loads created by the reload pass. We try to look for full or partial
51*38fd1498Szrj redundant loads fed by one or more loads/stores in predecessor BBs,
52*38fd1498Szrj and try adding loads to make them fully redundant. We also check if
53*38fd1498Szrj it's worth adding loads to be able to delete the redundant load.
54*38fd1498Szrj
55*38fd1498Szrj Algorithm:
56*38fd1498Szrj 1. Build available expressions hash table:
57*38fd1498Szrj For each load/store instruction, if the loaded/stored memory didn't
58*38fd1498Szrj change until the end of the basic block add this memory expression to
59*38fd1498Szrj the hash table.
60*38fd1498Szrj 2. Perform Redundancy elimination:
61*38fd1498Szrj For each load instruction do the following:
62*38fd1498Szrj perform partial redundancy elimination, check if it's worth adding
63*38fd1498Szrj loads to make the load fully redundant. If so add loads and
64*38fd1498Szrj register copies and delete the load.
65*38fd1498Szrj 3. Delete instructions made redundant in step 2.
66*38fd1498Szrj
67*38fd1498Szrj Future enhancement:
68*38fd1498Szrj If the loaded register is used/defined between load and some store,
69*38fd1498Szrj look for some other free register between load and all its stores,
70*38fd1498Szrj and replace the load with a copy from this register to the loaded
71*38fd1498Szrj register.
72*38fd1498Szrj */
73*38fd1498Szrj
74*38fd1498Szrj
75*38fd1498Szrj /* Keep statistics of this pass. */
76*38fd1498Szrj static struct
77*38fd1498Szrj {
78*38fd1498Szrj int moves_inserted;
79*38fd1498Szrj int copies_inserted;
80*38fd1498Szrj int insns_deleted;
81*38fd1498Szrj } stats;
82*38fd1498Szrj
83*38fd1498Szrj /* We need to keep a hash table of expressions. The table entries are of
84*38fd1498Szrj type 'struct expr', and for each expression there is a single linked
85*38fd1498Szrj list of occurrences. */
86*38fd1498Szrj
87*38fd1498Szrj /* Expression elements in the hash table. */
88*38fd1498Szrj struct expr
89*38fd1498Szrj {
90*38fd1498Szrj /* The expression (SET_SRC for expressions, PATTERN for assignments). */
91*38fd1498Szrj rtx expr;
92*38fd1498Szrj
93*38fd1498Szrj /* The same hash for this entry. */
94*38fd1498Szrj hashval_t hash;
95*38fd1498Szrj
96*38fd1498Szrj /* Index in the transparent bitmaps. */
97*38fd1498Szrj unsigned int bitmap_index;
98*38fd1498Szrj
99*38fd1498Szrj /* List of available occurrence in basic blocks in the function. */
100*38fd1498Szrj struct occr *avail_occr;
101*38fd1498Szrj };
102*38fd1498Szrj
103*38fd1498Szrj /* Hashtable helpers. */
104*38fd1498Szrj
105*38fd1498Szrj struct expr_hasher : nofree_ptr_hash <expr>
106*38fd1498Szrj {
107*38fd1498Szrj static inline hashval_t hash (const expr *);
108*38fd1498Szrj static inline bool equal (const expr *, const expr *);
109*38fd1498Szrj };
110*38fd1498Szrj
111*38fd1498Szrj
112*38fd1498Szrj /* Hash expression X.
113*38fd1498Szrj DO_NOT_RECORD_P is a boolean indicating if a volatile operand is found
114*38fd1498Szrj or if the expression contains something we don't want to insert in the
115*38fd1498Szrj table. */
116*38fd1498Szrj
117*38fd1498Szrj static hashval_t
hash_expr(rtx x,int * do_not_record_p)118*38fd1498Szrj hash_expr (rtx x, int *do_not_record_p)
119*38fd1498Szrj {
120*38fd1498Szrj *do_not_record_p = 0;
121*38fd1498Szrj return hash_rtx (x, GET_MODE (x), do_not_record_p,
122*38fd1498Szrj NULL, /*have_reg_qty=*/false);
123*38fd1498Szrj }
124*38fd1498Szrj
125*38fd1498Szrj /* Callback for hashtab.
126*38fd1498Szrj Return the hash value for expression EXP. We don't actually hash
127*38fd1498Szrj here, we just return the cached hash value. */
128*38fd1498Szrj
129*38fd1498Szrj inline hashval_t
hash(const expr * exp)130*38fd1498Szrj expr_hasher::hash (const expr *exp)
131*38fd1498Szrj {
132*38fd1498Szrj return exp->hash;
133*38fd1498Szrj }
134*38fd1498Szrj
135*38fd1498Szrj /* Callback for hashtab.
136*38fd1498Szrj Return nonzero if exp1 is equivalent to exp2. */
137*38fd1498Szrj
138*38fd1498Szrj inline bool
equal(const expr * exp1,const expr * exp2)139*38fd1498Szrj expr_hasher::equal (const expr *exp1, const expr *exp2)
140*38fd1498Szrj {
141*38fd1498Szrj int equiv_p = exp_equiv_p (exp1->expr, exp2->expr, 0, true);
142*38fd1498Szrj
143*38fd1498Szrj gcc_assert (!equiv_p || exp1->hash == exp2->hash);
144*38fd1498Szrj return equiv_p;
145*38fd1498Szrj }
146*38fd1498Szrj
147*38fd1498Szrj /* The table itself. */
148*38fd1498Szrj static hash_table<expr_hasher> *expr_table;
149*38fd1498Szrj
150*38fd1498Szrj
151*38fd1498Szrj static struct obstack expr_obstack;
152*38fd1498Szrj
153*38fd1498Szrj /* Occurrence of an expression.
154*38fd1498Szrj There is at most one occurrence per basic block. If a pattern appears
155*38fd1498Szrj more than once, the last appearance is used. */
156*38fd1498Szrj
157*38fd1498Szrj struct occr
158*38fd1498Szrj {
159*38fd1498Szrj /* Next occurrence of this expression. */
160*38fd1498Szrj struct occr *next;
161*38fd1498Szrj /* The insn that computes the expression. */
162*38fd1498Szrj rtx_insn *insn;
163*38fd1498Szrj /* Nonzero if this [anticipatable] occurrence has been deleted. */
164*38fd1498Szrj char deleted_p;
165*38fd1498Szrj };
166*38fd1498Szrj
167*38fd1498Szrj static struct obstack occr_obstack;
168*38fd1498Szrj
169*38fd1498Szrj /* The following structure holds the information about the occurrences of
170*38fd1498Szrj the redundant instructions. */
171*38fd1498Szrj struct unoccr
172*38fd1498Szrj {
173*38fd1498Szrj struct unoccr *next;
174*38fd1498Szrj edge pred;
175*38fd1498Szrj rtx_insn *insn;
176*38fd1498Szrj };
177*38fd1498Szrj
178*38fd1498Szrj static struct obstack unoccr_obstack;
179*38fd1498Szrj
180*38fd1498Szrj /* Array where each element is the CUID if the insn that last set the hard
181*38fd1498Szrj register with the number of the element, since the start of the current
182*38fd1498Szrj basic block.
183*38fd1498Szrj
184*38fd1498Szrj This array is used during the building of the hash table (step 1) to
185*38fd1498Szrj determine if a reg is killed before the end of a basic block.
186*38fd1498Szrj
187*38fd1498Szrj It is also used when eliminating partial redundancies (step 2) to see
188*38fd1498Szrj if a reg was modified since the start of a basic block. */
189*38fd1498Szrj static int *reg_avail_info;
190*38fd1498Szrj
191*38fd1498Szrj /* A list of insns that may modify memory within the current basic block. */
192*38fd1498Szrj struct modifies_mem
193*38fd1498Szrj {
194*38fd1498Szrj rtx_insn *insn;
195*38fd1498Szrj struct modifies_mem *next;
196*38fd1498Szrj };
197*38fd1498Szrj static struct modifies_mem *modifies_mem_list;
198*38fd1498Szrj
199*38fd1498Szrj /* The modifies_mem structs also go on an obstack, only this obstack is
200*38fd1498Szrj freed each time after completing the analysis or transformations on
201*38fd1498Szrj a basic block. So we allocate a dummy modifies_mem_obstack_bottom
202*38fd1498Szrj object on the obstack to keep track of the bottom of the obstack. */
203*38fd1498Szrj static struct obstack modifies_mem_obstack;
204*38fd1498Szrj static struct modifies_mem *modifies_mem_obstack_bottom;
205*38fd1498Szrj
206*38fd1498Szrj /* Mapping of insn UIDs to CUIDs.
207*38fd1498Szrj CUIDs are like UIDs except they increase monotonically in each basic
208*38fd1498Szrj block, have no gaps, and only apply to real insns. */
209*38fd1498Szrj static int *uid_cuid;
210*38fd1498Szrj #define INSN_CUID(INSN) (uid_cuid[INSN_UID (INSN)])
211*38fd1498Szrj
212*38fd1498Szrj /* Bitmap of blocks which have memory stores. */
213*38fd1498Szrj static bitmap modify_mem_list_set;
214*38fd1498Szrj
215*38fd1498Szrj /* Bitmap of blocks which have calls. */
216*38fd1498Szrj static bitmap blocks_with_calls;
217*38fd1498Szrj
218*38fd1498Szrj /* Vector indexed by block # with a list of all the insns that
219*38fd1498Szrj modify memory within the block. */
220*38fd1498Szrj static vec<rtx_insn *> *modify_mem_list;
221*38fd1498Szrj
222*38fd1498Szrj /* Vector indexed by block # with a canonicalized list of insns
223*38fd1498Szrj that modify memory in the block. */
224*38fd1498Szrj static vec<modify_pair> *canon_modify_mem_list;
225*38fd1498Szrj
226*38fd1498Szrj /* Vector of simple bitmaps indexed by block number. Each component sbitmap
227*38fd1498Szrj indicates which expressions are transparent through the block. */
228*38fd1498Szrj static sbitmap *transp;
229*38fd1498Szrj
230*38fd1498Szrj
231*38fd1498Szrj /* Helpers for memory allocation/freeing. */
232*38fd1498Szrj static void alloc_mem (void);
233*38fd1498Szrj static void free_mem (void);
234*38fd1498Szrj
235*38fd1498Szrj /* Support for hash table construction and transformations. */
236*38fd1498Szrj static bool oprs_unchanged_p (rtx, rtx_insn *, bool);
237*38fd1498Szrj static void record_last_reg_set_info (rtx_insn *, rtx);
238*38fd1498Szrj static void record_last_reg_set_info_regno (rtx_insn *, int);
239*38fd1498Szrj static void record_last_mem_set_info (rtx_insn *);
240*38fd1498Szrj static void record_last_set_info (rtx, const_rtx, void *);
241*38fd1498Szrj static void record_opr_changes (rtx_insn *);
242*38fd1498Szrj
243*38fd1498Szrj static void find_mem_conflicts (rtx, const_rtx, void *);
244*38fd1498Szrj static int load_killed_in_block_p (int, rtx, bool);
245*38fd1498Szrj static void reset_opr_set_tables (void);
246*38fd1498Szrj
247*38fd1498Szrj /* Hash table support. */
248*38fd1498Szrj static hashval_t hash_expr (rtx, int *);
249*38fd1498Szrj static void insert_expr_in_table (rtx, rtx_insn *);
250*38fd1498Szrj static struct expr *lookup_expr_in_table (rtx);
251*38fd1498Szrj static void dump_hash_table (FILE *);
252*38fd1498Szrj
253*38fd1498Szrj /* Helpers for eliminate_partially_redundant_load. */
254*38fd1498Szrj static bool reg_killed_on_edge (rtx, edge);
255*38fd1498Szrj static bool reg_used_on_edge (rtx, edge);
256*38fd1498Szrj
257*38fd1498Szrj static rtx get_avail_load_store_reg (rtx_insn *);
258*38fd1498Szrj
259*38fd1498Szrj static bool bb_has_well_behaved_predecessors (basic_block);
260*38fd1498Szrj static struct occr* get_bb_avail_insn (basic_block, struct occr *, int);
261*38fd1498Szrj static void hash_scan_set (rtx_insn *);
262*38fd1498Szrj static void compute_hash_table (void);
263*38fd1498Szrj
264*38fd1498Szrj /* The work horses of this pass. */
265*38fd1498Szrj static void eliminate_partially_redundant_load (basic_block,
266*38fd1498Szrj rtx_insn *,
267*38fd1498Szrj struct expr *);
268*38fd1498Szrj static void eliminate_partially_redundant_loads (void);
269*38fd1498Szrj
270*38fd1498Szrj
271*38fd1498Szrj /* Allocate memory for the CUID mapping array and register/memory
272*38fd1498Szrj tracking tables. */
273*38fd1498Szrj
274*38fd1498Szrj static void
alloc_mem(void)275*38fd1498Szrj alloc_mem (void)
276*38fd1498Szrj {
277*38fd1498Szrj int i;
278*38fd1498Szrj basic_block bb;
279*38fd1498Szrj rtx_insn *insn;
280*38fd1498Szrj
281*38fd1498Szrj /* Find the largest UID and create a mapping from UIDs to CUIDs. */
282*38fd1498Szrj uid_cuid = XCNEWVEC (int, get_max_uid () + 1);
283*38fd1498Szrj i = 1;
284*38fd1498Szrj FOR_EACH_BB_FN (bb, cfun)
285*38fd1498Szrj FOR_BB_INSNS (bb, insn)
286*38fd1498Szrj {
287*38fd1498Szrj if (INSN_P (insn))
288*38fd1498Szrj uid_cuid[INSN_UID (insn)] = i++;
289*38fd1498Szrj else
290*38fd1498Szrj uid_cuid[INSN_UID (insn)] = i;
291*38fd1498Szrj }
292*38fd1498Szrj
293*38fd1498Szrj /* Allocate the available expressions hash table. We don't want to
294*38fd1498Szrj make the hash table too small, but unnecessarily making it too large
295*38fd1498Szrj also doesn't help. The i/4 is a gcse.c relic, and seems like a
296*38fd1498Szrj reasonable choice. */
297*38fd1498Szrj expr_table = new hash_table<expr_hasher> (MAX (i / 4, 13));
298*38fd1498Szrj
299*38fd1498Szrj /* We allocate everything on obstacks because we often can roll back
300*38fd1498Szrj the whole obstack to some point. Freeing obstacks is very fast. */
301*38fd1498Szrj gcc_obstack_init (&expr_obstack);
302*38fd1498Szrj gcc_obstack_init (&occr_obstack);
303*38fd1498Szrj gcc_obstack_init (&unoccr_obstack);
304*38fd1498Szrj gcc_obstack_init (&modifies_mem_obstack);
305*38fd1498Szrj
306*38fd1498Szrj /* Working array used to track the last set for each register
307*38fd1498Szrj in the current block. */
308*38fd1498Szrj reg_avail_info = (int *) xmalloc (FIRST_PSEUDO_REGISTER * sizeof (int));
309*38fd1498Szrj
310*38fd1498Szrj /* Put a dummy modifies_mem object on the modifies_mem_obstack, so we
311*38fd1498Szrj can roll it back in reset_opr_set_tables. */
312*38fd1498Szrj modifies_mem_obstack_bottom =
313*38fd1498Szrj (struct modifies_mem *) obstack_alloc (&modifies_mem_obstack,
314*38fd1498Szrj sizeof (struct modifies_mem));
315*38fd1498Szrj
316*38fd1498Szrj blocks_with_calls = BITMAP_ALLOC (NULL);
317*38fd1498Szrj modify_mem_list_set = BITMAP_ALLOC (NULL);
318*38fd1498Szrj
319*38fd1498Szrj modify_mem_list = (vec_rtx_heap *) xcalloc (last_basic_block_for_fn (cfun),
320*38fd1498Szrj sizeof (vec_rtx_heap));
321*38fd1498Szrj canon_modify_mem_list
322*38fd1498Szrj = (vec_modify_pair_heap *) xcalloc (last_basic_block_for_fn (cfun),
323*38fd1498Szrj sizeof (vec_modify_pair_heap));
324*38fd1498Szrj }
325*38fd1498Szrj
326*38fd1498Szrj /* Free memory allocated by alloc_mem. */
327*38fd1498Szrj
328*38fd1498Szrj static void
free_mem(void)329*38fd1498Szrj free_mem (void)
330*38fd1498Szrj {
331*38fd1498Szrj free (uid_cuid);
332*38fd1498Szrj
333*38fd1498Szrj delete expr_table;
334*38fd1498Szrj expr_table = NULL;
335*38fd1498Szrj
336*38fd1498Szrj obstack_free (&expr_obstack, NULL);
337*38fd1498Szrj obstack_free (&occr_obstack, NULL);
338*38fd1498Szrj obstack_free (&unoccr_obstack, NULL);
339*38fd1498Szrj obstack_free (&modifies_mem_obstack, NULL);
340*38fd1498Szrj
341*38fd1498Szrj unsigned i;
342*38fd1498Szrj bitmap_iterator bi;
343*38fd1498Szrj EXECUTE_IF_SET_IN_BITMAP (modify_mem_list_set, 0, i, bi)
344*38fd1498Szrj {
345*38fd1498Szrj modify_mem_list[i].release ();
346*38fd1498Szrj canon_modify_mem_list[i].release ();
347*38fd1498Szrj }
348*38fd1498Szrj
349*38fd1498Szrj BITMAP_FREE (blocks_with_calls);
350*38fd1498Szrj BITMAP_FREE (modify_mem_list_set);
351*38fd1498Szrj free (reg_avail_info);
352*38fd1498Szrj free (modify_mem_list);
353*38fd1498Szrj free (canon_modify_mem_list);
354*38fd1498Szrj }
355*38fd1498Szrj
356*38fd1498Szrj
357*38fd1498Szrj /* Insert expression X in INSN in the hash TABLE.
358*38fd1498Szrj If it is already present, record it as the last occurrence in INSN's
359*38fd1498Szrj basic block. */
360*38fd1498Szrj
361*38fd1498Szrj static void
insert_expr_in_table(rtx x,rtx_insn * insn)362*38fd1498Szrj insert_expr_in_table (rtx x, rtx_insn *insn)
363*38fd1498Szrj {
364*38fd1498Szrj int do_not_record_p;
365*38fd1498Szrj hashval_t hash;
366*38fd1498Szrj struct expr *cur_expr, **slot;
367*38fd1498Szrj struct occr *avail_occr, *last_occr = NULL;
368*38fd1498Szrj
369*38fd1498Szrj hash = hash_expr (x, &do_not_record_p);
370*38fd1498Szrj
371*38fd1498Szrj /* Do not insert expression in the table if it contains volatile operands,
372*38fd1498Szrj or if hash_expr determines the expression is something we don't want
373*38fd1498Szrj to or can't handle. */
374*38fd1498Szrj if (do_not_record_p)
375*38fd1498Szrj return;
376*38fd1498Szrj
377*38fd1498Szrj /* We anticipate that redundant expressions are rare, so for convenience
378*38fd1498Szrj allocate a new hash table element here already and set its fields.
379*38fd1498Szrj If we don't do this, we need a hack with a static struct expr. Anyway,
380*38fd1498Szrj obstack_free is really fast and one more obstack_alloc doesn't hurt if
381*38fd1498Szrj we're going to see more expressions later on. */
382*38fd1498Szrj cur_expr = (struct expr *) obstack_alloc (&expr_obstack,
383*38fd1498Szrj sizeof (struct expr));
384*38fd1498Szrj cur_expr->expr = x;
385*38fd1498Szrj cur_expr->hash = hash;
386*38fd1498Szrj cur_expr->avail_occr = NULL;
387*38fd1498Szrj
388*38fd1498Szrj slot = expr_table->find_slot_with_hash (cur_expr, hash, INSERT);
389*38fd1498Szrj
390*38fd1498Szrj if (! (*slot))
391*38fd1498Szrj {
392*38fd1498Szrj /* The expression isn't found, so insert it. */
393*38fd1498Szrj *slot = cur_expr;
394*38fd1498Szrj
395*38fd1498Szrj /* Anytime we add an entry to the table, record the index
396*38fd1498Szrj of the new entry. The bitmap index starts counting
397*38fd1498Szrj at zero. */
398*38fd1498Szrj cur_expr->bitmap_index = expr_table->elements () - 1;
399*38fd1498Szrj }
400*38fd1498Szrj else
401*38fd1498Szrj {
402*38fd1498Szrj /* The expression is already in the table, so roll back the
403*38fd1498Szrj obstack and use the existing table entry. */
404*38fd1498Szrj obstack_free (&expr_obstack, cur_expr);
405*38fd1498Szrj cur_expr = *slot;
406*38fd1498Szrj }
407*38fd1498Szrj
408*38fd1498Szrj /* Search for another occurrence in the same basic block. */
409*38fd1498Szrj avail_occr = cur_expr->avail_occr;
410*38fd1498Szrj while (avail_occr
411*38fd1498Szrj && BLOCK_FOR_INSN (avail_occr->insn) != BLOCK_FOR_INSN (insn))
412*38fd1498Szrj {
413*38fd1498Szrj /* If an occurrence isn't found, save a pointer to the end of
414*38fd1498Szrj the list. */
415*38fd1498Szrj last_occr = avail_occr;
416*38fd1498Szrj avail_occr = avail_occr->next;
417*38fd1498Szrj }
418*38fd1498Szrj
419*38fd1498Szrj if (avail_occr)
420*38fd1498Szrj /* Found another instance of the expression in the same basic block.
421*38fd1498Szrj Prefer this occurrence to the currently recorded one. We want
422*38fd1498Szrj the last one in the block and the block is scanned from start
423*38fd1498Szrj to end. */
424*38fd1498Szrj avail_occr->insn = insn;
425*38fd1498Szrj else
426*38fd1498Szrj {
427*38fd1498Szrj /* First occurrence of this expression in this basic block. */
428*38fd1498Szrj avail_occr = (struct occr *) obstack_alloc (&occr_obstack,
429*38fd1498Szrj sizeof (struct occr));
430*38fd1498Szrj
431*38fd1498Szrj /* First occurrence of this expression in any block? */
432*38fd1498Szrj if (cur_expr->avail_occr == NULL)
433*38fd1498Szrj cur_expr->avail_occr = avail_occr;
434*38fd1498Szrj else
435*38fd1498Szrj last_occr->next = avail_occr;
436*38fd1498Szrj
437*38fd1498Szrj avail_occr->insn = insn;
438*38fd1498Szrj avail_occr->next = NULL;
439*38fd1498Szrj avail_occr->deleted_p = 0;
440*38fd1498Szrj }
441*38fd1498Szrj }
442*38fd1498Szrj
443*38fd1498Szrj
444*38fd1498Szrj /* Lookup pattern PAT in the expression hash table.
445*38fd1498Szrj The result is a pointer to the table entry, or NULL if not found. */
446*38fd1498Szrj
447*38fd1498Szrj static struct expr *
lookup_expr_in_table(rtx pat)448*38fd1498Szrj lookup_expr_in_table (rtx pat)
449*38fd1498Szrj {
450*38fd1498Szrj int do_not_record_p;
451*38fd1498Szrj struct expr **slot, *tmp_expr;
452*38fd1498Szrj hashval_t hash = hash_expr (pat, &do_not_record_p);
453*38fd1498Szrj
454*38fd1498Szrj if (do_not_record_p)
455*38fd1498Szrj return NULL;
456*38fd1498Szrj
457*38fd1498Szrj tmp_expr = (struct expr *) obstack_alloc (&expr_obstack,
458*38fd1498Szrj sizeof (struct expr));
459*38fd1498Szrj tmp_expr->expr = pat;
460*38fd1498Szrj tmp_expr->hash = hash;
461*38fd1498Szrj tmp_expr->avail_occr = NULL;
462*38fd1498Szrj
463*38fd1498Szrj slot = expr_table->find_slot_with_hash (tmp_expr, hash, INSERT);
464*38fd1498Szrj obstack_free (&expr_obstack, tmp_expr);
465*38fd1498Szrj
466*38fd1498Szrj if (!slot)
467*38fd1498Szrj return NULL;
468*38fd1498Szrj else
469*38fd1498Szrj return (*slot);
470*38fd1498Szrj }
471*38fd1498Szrj
472*38fd1498Szrj
473*38fd1498Szrj /* Dump all expressions and occurrences that are currently in the
474*38fd1498Szrj expression hash table to FILE. */
475*38fd1498Szrj
476*38fd1498Szrj /* This helper is called via htab_traverse. */
477*38fd1498Szrj int
dump_expr_hash_table_entry(expr ** slot,FILE * file)478*38fd1498Szrj dump_expr_hash_table_entry (expr **slot, FILE *file)
479*38fd1498Szrj {
480*38fd1498Szrj struct expr *exprs = *slot;
481*38fd1498Szrj struct occr *occr;
482*38fd1498Szrj
483*38fd1498Szrj fprintf (file, "expr: ");
484*38fd1498Szrj print_rtl (file, exprs->expr);
485*38fd1498Szrj fprintf (file,"\nhashcode: %u\n", exprs->hash);
486*38fd1498Szrj fprintf (file,"list of occurrences:\n");
487*38fd1498Szrj occr = exprs->avail_occr;
488*38fd1498Szrj while (occr)
489*38fd1498Szrj {
490*38fd1498Szrj rtx_insn *insn = occr->insn;
491*38fd1498Szrj print_rtl_single (file, insn);
492*38fd1498Szrj fprintf (file, "\n");
493*38fd1498Szrj occr = occr->next;
494*38fd1498Szrj }
495*38fd1498Szrj fprintf (file, "\n");
496*38fd1498Szrj return 1;
497*38fd1498Szrj }
498*38fd1498Szrj
499*38fd1498Szrj static void
dump_hash_table(FILE * file)500*38fd1498Szrj dump_hash_table (FILE *file)
501*38fd1498Szrj {
502*38fd1498Szrj fprintf (file, "\n\nexpression hash table\n");
503*38fd1498Szrj fprintf (file, "size %ld, %ld elements, %f collision/search ratio\n",
504*38fd1498Szrj (long) expr_table->size (),
505*38fd1498Szrj (long) expr_table->elements (),
506*38fd1498Szrj expr_table->collisions ());
507*38fd1498Szrj if (expr_table->elements () > 0)
508*38fd1498Szrj {
509*38fd1498Szrj fprintf (file, "\n\ntable entries:\n");
510*38fd1498Szrj expr_table->traverse <FILE *, dump_expr_hash_table_entry> (file);
511*38fd1498Szrj }
512*38fd1498Szrj fprintf (file, "\n");
513*38fd1498Szrj }
514*38fd1498Szrj
515*38fd1498Szrj /* Return true if register X is recorded as being set by an instruction
516*38fd1498Szrj whose CUID is greater than the one given. */
517*38fd1498Szrj
518*38fd1498Szrj static bool
reg_changed_after_insn_p(rtx x,int cuid)519*38fd1498Szrj reg_changed_after_insn_p (rtx x, int cuid)
520*38fd1498Szrj {
521*38fd1498Szrj unsigned int regno, end_regno;
522*38fd1498Szrj
523*38fd1498Szrj regno = REGNO (x);
524*38fd1498Szrj end_regno = END_REGNO (x);
525*38fd1498Szrj do
526*38fd1498Szrj if (reg_avail_info[regno] > cuid)
527*38fd1498Szrj return true;
528*38fd1498Szrj while (++regno < end_regno);
529*38fd1498Szrj return false;
530*38fd1498Szrj }
531*38fd1498Szrj
532*38fd1498Szrj /* Return nonzero if the operands of expression X are unchanged
533*38fd1498Szrj 1) from the start of INSN's basic block up to but not including INSN
534*38fd1498Szrj if AFTER_INSN is false, or
535*38fd1498Szrj 2) from INSN to the end of INSN's basic block if AFTER_INSN is true. */
536*38fd1498Szrj
537*38fd1498Szrj static bool
oprs_unchanged_p(rtx x,rtx_insn * insn,bool after_insn)538*38fd1498Szrj oprs_unchanged_p (rtx x, rtx_insn *insn, bool after_insn)
539*38fd1498Szrj {
540*38fd1498Szrj int i, j;
541*38fd1498Szrj enum rtx_code code;
542*38fd1498Szrj const char *fmt;
543*38fd1498Szrj
544*38fd1498Szrj if (x == 0)
545*38fd1498Szrj return 1;
546*38fd1498Szrj
547*38fd1498Szrj code = GET_CODE (x);
548*38fd1498Szrj switch (code)
549*38fd1498Szrj {
550*38fd1498Szrj case REG:
551*38fd1498Szrj /* We are called after register allocation. */
552*38fd1498Szrj gcc_assert (REGNO (x) < FIRST_PSEUDO_REGISTER);
553*38fd1498Szrj if (after_insn)
554*38fd1498Szrj return !reg_changed_after_insn_p (x, INSN_CUID (insn) - 1);
555*38fd1498Szrj else
556*38fd1498Szrj return !reg_changed_after_insn_p (x, 0);
557*38fd1498Szrj
558*38fd1498Szrj case MEM:
559*38fd1498Szrj if (load_killed_in_block_p (INSN_CUID (insn), x, after_insn))
560*38fd1498Szrj return 0;
561*38fd1498Szrj else
562*38fd1498Szrj return oprs_unchanged_p (XEXP (x, 0), insn, after_insn);
563*38fd1498Szrj
564*38fd1498Szrj case PC:
565*38fd1498Szrj case CC0: /*FIXME*/
566*38fd1498Szrj case CONST:
567*38fd1498Szrj CASE_CONST_ANY:
568*38fd1498Szrj case SYMBOL_REF:
569*38fd1498Szrj case LABEL_REF:
570*38fd1498Szrj case ADDR_VEC:
571*38fd1498Szrj case ADDR_DIFF_VEC:
572*38fd1498Szrj return 1;
573*38fd1498Szrj
574*38fd1498Szrj case PRE_DEC:
575*38fd1498Szrj case PRE_INC:
576*38fd1498Szrj case POST_DEC:
577*38fd1498Szrj case POST_INC:
578*38fd1498Szrj case PRE_MODIFY:
579*38fd1498Szrj case POST_MODIFY:
580*38fd1498Szrj if (after_insn)
581*38fd1498Szrj return 0;
582*38fd1498Szrj break;
583*38fd1498Szrj
584*38fd1498Szrj default:
585*38fd1498Szrj break;
586*38fd1498Szrj }
587*38fd1498Szrj
588*38fd1498Szrj for (i = GET_RTX_LENGTH (code) - 1, fmt = GET_RTX_FORMAT (code); i >= 0; i--)
589*38fd1498Szrj {
590*38fd1498Szrj if (fmt[i] == 'e')
591*38fd1498Szrj {
592*38fd1498Szrj if (! oprs_unchanged_p (XEXP (x, i), insn, after_insn))
593*38fd1498Szrj return 0;
594*38fd1498Szrj }
595*38fd1498Szrj else if (fmt[i] == 'E')
596*38fd1498Szrj for (j = 0; j < XVECLEN (x, i); j++)
597*38fd1498Szrj if (! oprs_unchanged_p (XVECEXP (x, i, j), insn, after_insn))
598*38fd1498Szrj return 0;
599*38fd1498Szrj }
600*38fd1498Szrj
601*38fd1498Szrj return 1;
602*38fd1498Szrj }
603*38fd1498Szrj
604*38fd1498Szrj
605*38fd1498Szrj /* Used for communication between find_mem_conflicts and
606*38fd1498Szrj load_killed_in_block_p. Nonzero if find_mem_conflicts finds a
607*38fd1498Szrj conflict between two memory references.
608*38fd1498Szrj This is a bit of a hack to work around the limitations of note_stores. */
609*38fd1498Szrj static int mems_conflict_p;
610*38fd1498Szrj
611*38fd1498Szrj /* DEST is the output of an instruction. If it is a memory reference, and
612*38fd1498Szrj possibly conflicts with the load found in DATA, then set mems_conflict_p
613*38fd1498Szrj to a nonzero value. */
614*38fd1498Szrj
615*38fd1498Szrj static void
find_mem_conflicts(rtx dest,const_rtx setter ATTRIBUTE_UNUSED,void * data)616*38fd1498Szrj find_mem_conflicts (rtx dest, const_rtx setter ATTRIBUTE_UNUSED,
617*38fd1498Szrj void *data)
618*38fd1498Szrj {
619*38fd1498Szrj rtx mem_op = (rtx) data;
620*38fd1498Szrj
621*38fd1498Szrj while (GET_CODE (dest) == SUBREG
622*38fd1498Szrj || GET_CODE (dest) == ZERO_EXTRACT
623*38fd1498Szrj || GET_CODE (dest) == STRICT_LOW_PART)
624*38fd1498Szrj dest = XEXP (dest, 0);
625*38fd1498Szrj
626*38fd1498Szrj /* If DEST is not a MEM, then it will not conflict with the load. Note
627*38fd1498Szrj that function calls are assumed to clobber memory, but are handled
628*38fd1498Szrj elsewhere. */
629*38fd1498Szrj if (! MEM_P (dest))
630*38fd1498Szrj return;
631*38fd1498Szrj
632*38fd1498Szrj if (true_dependence (dest, GET_MODE (dest), mem_op))
633*38fd1498Szrj mems_conflict_p = 1;
634*38fd1498Szrj }
635*38fd1498Szrj
636*38fd1498Szrj
637*38fd1498Szrj /* Return nonzero if the expression in X (a memory reference) is killed
638*38fd1498Szrj in the current basic block before (if AFTER_INSN is false) or after
639*38fd1498Szrj (if AFTER_INSN is true) the insn with the CUID in UID_LIMIT.
640*38fd1498Szrj
641*38fd1498Szrj This function assumes that the modifies_mem table is flushed when
642*38fd1498Szrj the hash table construction or redundancy elimination phases start
643*38fd1498Szrj processing a new basic block. */
644*38fd1498Szrj
645*38fd1498Szrj static int
load_killed_in_block_p(int uid_limit,rtx x,bool after_insn)646*38fd1498Szrj load_killed_in_block_p (int uid_limit, rtx x, bool after_insn)
647*38fd1498Szrj {
648*38fd1498Szrj struct modifies_mem *list_entry = modifies_mem_list;
649*38fd1498Szrj
650*38fd1498Szrj while (list_entry)
651*38fd1498Szrj {
652*38fd1498Szrj rtx_insn *setter = list_entry->insn;
653*38fd1498Szrj
654*38fd1498Szrj /* Ignore entries in the list that do not apply. */
655*38fd1498Szrj if ((after_insn
656*38fd1498Szrj && INSN_CUID (setter) < uid_limit)
657*38fd1498Szrj || (! after_insn
658*38fd1498Szrj && INSN_CUID (setter) > uid_limit))
659*38fd1498Szrj {
660*38fd1498Szrj list_entry = list_entry->next;
661*38fd1498Szrj continue;
662*38fd1498Szrj }
663*38fd1498Szrj
664*38fd1498Szrj /* If SETTER is a call everything is clobbered. Note that calls
665*38fd1498Szrj to pure functions are never put on the list, so we need not
666*38fd1498Szrj worry about them. */
667*38fd1498Szrj if (CALL_P (setter))
668*38fd1498Szrj return 1;
669*38fd1498Szrj
670*38fd1498Szrj /* SETTER must be an insn of some kind that sets memory. Call
671*38fd1498Szrj note_stores to examine each hunk of memory that is modified.
672*38fd1498Szrj It will set mems_conflict_p to nonzero if there may be a
673*38fd1498Szrj conflict between X and SETTER. */
674*38fd1498Szrj mems_conflict_p = 0;
675*38fd1498Szrj note_stores (PATTERN (setter), find_mem_conflicts, x);
676*38fd1498Szrj if (mems_conflict_p)
677*38fd1498Szrj return 1;
678*38fd1498Szrj
679*38fd1498Szrj list_entry = list_entry->next;
680*38fd1498Szrj }
681*38fd1498Szrj return 0;
682*38fd1498Szrj }
683*38fd1498Szrj
684*38fd1498Szrj
685*38fd1498Szrj /* Record register first/last/block set information for REGNO in INSN. */
686*38fd1498Szrj
687*38fd1498Szrj static inline void
record_last_reg_set_info(rtx_insn * insn,rtx reg)688*38fd1498Szrj record_last_reg_set_info (rtx_insn *insn, rtx reg)
689*38fd1498Szrj {
690*38fd1498Szrj unsigned int regno, end_regno;
691*38fd1498Szrj
692*38fd1498Szrj regno = REGNO (reg);
693*38fd1498Szrj end_regno = END_REGNO (reg);
694*38fd1498Szrj do
695*38fd1498Szrj reg_avail_info[regno] = INSN_CUID (insn);
696*38fd1498Szrj while (++regno < end_regno);
697*38fd1498Szrj }
698*38fd1498Szrj
699*38fd1498Szrj static inline void
record_last_reg_set_info_regno(rtx_insn * insn,int regno)700*38fd1498Szrj record_last_reg_set_info_regno (rtx_insn *insn, int regno)
701*38fd1498Szrj {
702*38fd1498Szrj reg_avail_info[regno] = INSN_CUID (insn);
703*38fd1498Szrj }
704*38fd1498Szrj
705*38fd1498Szrj
706*38fd1498Szrj /* Record memory modification information for INSN. We do not actually care
707*38fd1498Szrj about the memory location(s) that are set, or even how they are set (consider
708*38fd1498Szrj a CALL_INSN). We merely need to record which insns modify memory. */
709*38fd1498Szrj
710*38fd1498Szrj static void
record_last_mem_set_info(rtx_insn * insn)711*38fd1498Szrj record_last_mem_set_info (rtx_insn *insn)
712*38fd1498Szrj {
713*38fd1498Szrj struct modifies_mem *list_entry;
714*38fd1498Szrj
715*38fd1498Szrj list_entry = (struct modifies_mem *) obstack_alloc (&modifies_mem_obstack,
716*38fd1498Szrj sizeof (struct modifies_mem));
717*38fd1498Szrj list_entry->insn = insn;
718*38fd1498Szrj list_entry->next = modifies_mem_list;
719*38fd1498Szrj modifies_mem_list = list_entry;
720*38fd1498Szrj
721*38fd1498Szrj record_last_mem_set_info_common (insn, modify_mem_list,
722*38fd1498Szrj canon_modify_mem_list,
723*38fd1498Szrj modify_mem_list_set,
724*38fd1498Szrj blocks_with_calls);
725*38fd1498Szrj }
726*38fd1498Szrj
727*38fd1498Szrj /* Called from compute_hash_table via note_stores to handle one
728*38fd1498Szrj SET or CLOBBER in an insn. DATA is really the instruction in which
729*38fd1498Szrj the SET is taking place. */
730*38fd1498Szrj
731*38fd1498Szrj static void
record_last_set_info(rtx dest,const_rtx setter ATTRIBUTE_UNUSED,void * data)732*38fd1498Szrj record_last_set_info (rtx dest, const_rtx setter ATTRIBUTE_UNUSED, void *data)
733*38fd1498Szrj {
734*38fd1498Szrj rtx_insn *last_set_insn = (rtx_insn *) data;
735*38fd1498Szrj
736*38fd1498Szrj if (GET_CODE (dest) == SUBREG)
737*38fd1498Szrj dest = SUBREG_REG (dest);
738*38fd1498Szrj
739*38fd1498Szrj if (REG_P (dest))
740*38fd1498Szrj record_last_reg_set_info (last_set_insn, dest);
741*38fd1498Szrj else if (MEM_P (dest))
742*38fd1498Szrj {
743*38fd1498Szrj /* Ignore pushes, they don't clobber memory. They may still
744*38fd1498Szrj clobber the stack pointer though. Some targets do argument
745*38fd1498Szrj pushes without adding REG_INC notes. See e.g. PR25196,
746*38fd1498Szrj where a pushsi2 on i386 doesn't have REG_INC notes. Note
747*38fd1498Szrj such changes here too. */
748*38fd1498Szrj if (! push_operand (dest, GET_MODE (dest)))
749*38fd1498Szrj record_last_mem_set_info (last_set_insn);
750*38fd1498Szrj else
751*38fd1498Szrj record_last_reg_set_info_regno (last_set_insn, STACK_POINTER_REGNUM);
752*38fd1498Szrj }
753*38fd1498Szrj }
754*38fd1498Szrj
755*38fd1498Szrj
756*38fd1498Szrj /* Reset tables used to keep track of what's still available since the
757*38fd1498Szrj start of the block. */
758*38fd1498Szrj
759*38fd1498Szrj static void
reset_opr_set_tables(void)760*38fd1498Szrj reset_opr_set_tables (void)
761*38fd1498Szrj {
762*38fd1498Szrj memset (reg_avail_info, 0, FIRST_PSEUDO_REGISTER * sizeof (int));
763*38fd1498Szrj obstack_free (&modifies_mem_obstack, modifies_mem_obstack_bottom);
764*38fd1498Szrj modifies_mem_list = NULL;
765*38fd1498Szrj }
766*38fd1498Szrj
767*38fd1498Szrj
768*38fd1498Szrj /* Record things set by INSN.
769*38fd1498Szrj This data is used by oprs_unchanged_p. */
770*38fd1498Szrj
771*38fd1498Szrj static void
record_opr_changes(rtx_insn * insn)772*38fd1498Szrj record_opr_changes (rtx_insn *insn)
773*38fd1498Szrj {
774*38fd1498Szrj rtx note;
775*38fd1498Szrj
776*38fd1498Szrj /* Find all stores and record them. */
777*38fd1498Szrj note_stores (PATTERN (insn), record_last_set_info, insn);
778*38fd1498Szrj
779*38fd1498Szrj /* Also record autoincremented REGs for this insn as changed. */
780*38fd1498Szrj for (note = REG_NOTES (insn); note; note = XEXP (note, 1))
781*38fd1498Szrj if (REG_NOTE_KIND (note) == REG_INC)
782*38fd1498Szrj record_last_reg_set_info (insn, XEXP (note, 0));
783*38fd1498Szrj
784*38fd1498Szrj /* Finally, if this is a call, record all call clobbers. */
785*38fd1498Szrj if (CALL_P (insn))
786*38fd1498Szrj {
787*38fd1498Szrj unsigned int regno;
788*38fd1498Szrj rtx link, x;
789*38fd1498Szrj hard_reg_set_iterator hrsi;
790*38fd1498Szrj EXECUTE_IF_SET_IN_HARD_REG_SET (regs_invalidated_by_call, 0, regno, hrsi)
791*38fd1498Szrj record_last_reg_set_info_regno (insn, regno);
792*38fd1498Szrj
793*38fd1498Szrj for (link = CALL_INSN_FUNCTION_USAGE (insn); link; link = XEXP (link, 1))
794*38fd1498Szrj if (GET_CODE (XEXP (link, 0)) == CLOBBER)
795*38fd1498Szrj {
796*38fd1498Szrj x = XEXP (XEXP (link, 0), 0);
797*38fd1498Szrj if (REG_P (x))
798*38fd1498Szrj {
799*38fd1498Szrj gcc_assert (HARD_REGISTER_P (x));
800*38fd1498Szrj record_last_reg_set_info (insn, x);
801*38fd1498Szrj }
802*38fd1498Szrj }
803*38fd1498Szrj
804*38fd1498Szrj if (! RTL_CONST_OR_PURE_CALL_P (insn))
805*38fd1498Szrj record_last_mem_set_info (insn);
806*38fd1498Szrj }
807*38fd1498Szrj }
808*38fd1498Szrj
809*38fd1498Szrj
810*38fd1498Szrj /* Scan the pattern of INSN and add an entry to the hash TABLE.
811*38fd1498Szrj After reload we are interested in loads/stores only. */
812*38fd1498Szrj
813*38fd1498Szrj static void
hash_scan_set(rtx_insn * insn)814*38fd1498Szrj hash_scan_set (rtx_insn *insn)
815*38fd1498Szrj {
816*38fd1498Szrj rtx pat = PATTERN (insn);
817*38fd1498Szrj rtx src = SET_SRC (pat);
818*38fd1498Szrj rtx dest = SET_DEST (pat);
819*38fd1498Szrj
820*38fd1498Szrj /* We are only interested in loads and stores. */
821*38fd1498Szrj if (! MEM_P (src) && ! MEM_P (dest))
822*38fd1498Szrj return;
823*38fd1498Szrj
824*38fd1498Szrj /* Don't mess with jumps and nops. */
825*38fd1498Szrj if (JUMP_P (insn) || set_noop_p (pat))
826*38fd1498Szrj return;
827*38fd1498Szrj
828*38fd1498Szrj if (REG_P (dest))
829*38fd1498Szrj {
830*38fd1498Szrj if (/* Don't CSE something if we can't do a reg/reg copy. */
831*38fd1498Szrj can_copy_p (GET_MODE (dest))
832*38fd1498Szrj /* Is SET_SRC something we want to gcse? */
833*38fd1498Szrj && general_operand (src, GET_MODE (src))
834*38fd1498Szrj #ifdef STACK_REGS
835*38fd1498Szrj /* Never consider insns touching the register stack. It may
836*38fd1498Szrj create situations that reg-stack cannot handle (e.g. a stack
837*38fd1498Szrj register live across an abnormal edge). */
838*38fd1498Szrj && (REGNO (dest) < FIRST_STACK_REG || REGNO (dest) > LAST_STACK_REG)
839*38fd1498Szrj #endif
840*38fd1498Szrj /* An expression is not available if its operands are
841*38fd1498Szrj subsequently modified, including this insn. */
842*38fd1498Szrj && oprs_unchanged_p (src, insn, true))
843*38fd1498Szrj {
844*38fd1498Szrj insert_expr_in_table (src, insn);
845*38fd1498Szrj }
846*38fd1498Szrj }
847*38fd1498Szrj else if (REG_P (src))
848*38fd1498Szrj {
849*38fd1498Szrj /* Only record sets of pseudo-regs in the hash table. */
850*38fd1498Szrj if (/* Don't CSE something if we can't do a reg/reg copy. */
851*38fd1498Szrj can_copy_p (GET_MODE (src))
852*38fd1498Szrj /* Is SET_DEST something we want to gcse? */
853*38fd1498Szrj && general_operand (dest, GET_MODE (dest))
854*38fd1498Szrj #ifdef STACK_REGS
855*38fd1498Szrj /* As above for STACK_REGS. */
856*38fd1498Szrj && (REGNO (src) < FIRST_STACK_REG || REGNO (src) > LAST_STACK_REG)
857*38fd1498Szrj #endif
858*38fd1498Szrj && ! (flag_float_store && FLOAT_MODE_P (GET_MODE (dest)))
859*38fd1498Szrj /* Check if the memory expression is killed after insn. */
860*38fd1498Szrj && ! load_killed_in_block_p (INSN_CUID (insn) + 1, dest, true)
861*38fd1498Szrj && oprs_unchanged_p (XEXP (dest, 0), insn, true))
862*38fd1498Szrj {
863*38fd1498Szrj insert_expr_in_table (dest, insn);
864*38fd1498Szrj }
865*38fd1498Szrj }
866*38fd1498Szrj }
867*38fd1498Szrj
868*38fd1498Szrj
869*38fd1498Szrj /* Create hash table of memory expressions available at end of basic
870*38fd1498Szrj blocks. Basically you should think of this hash table as the
871*38fd1498Szrj representation of AVAIL_OUT. This is the set of expressions that
872*38fd1498Szrj is generated in a basic block and not killed before the end of the
873*38fd1498Szrj same basic block. Notice that this is really a local computation. */
874*38fd1498Szrj
875*38fd1498Szrj static void
compute_hash_table(void)876*38fd1498Szrj compute_hash_table (void)
877*38fd1498Szrj {
878*38fd1498Szrj basic_block bb;
879*38fd1498Szrj
880*38fd1498Szrj FOR_EACH_BB_FN (bb, cfun)
881*38fd1498Szrj {
882*38fd1498Szrj rtx_insn *insn;
883*38fd1498Szrj
884*38fd1498Szrj /* First pass over the instructions records information used to
885*38fd1498Szrj determine when registers and memory are last set.
886*38fd1498Szrj Since we compute a "local" AVAIL_OUT, reset the tables that
887*38fd1498Szrj help us keep track of what has been modified since the start
888*38fd1498Szrj of the block. */
889*38fd1498Szrj reset_opr_set_tables ();
890*38fd1498Szrj FOR_BB_INSNS (bb, insn)
891*38fd1498Szrj {
892*38fd1498Szrj if (INSN_P (insn))
893*38fd1498Szrj record_opr_changes (insn);
894*38fd1498Szrj }
895*38fd1498Szrj
896*38fd1498Szrj /* The next pass actually builds the hash table. */
897*38fd1498Szrj FOR_BB_INSNS (bb, insn)
898*38fd1498Szrj if (INSN_P (insn) && GET_CODE (PATTERN (insn)) == SET)
899*38fd1498Szrj hash_scan_set (insn);
900*38fd1498Szrj }
901*38fd1498Szrj }
902*38fd1498Szrj
903*38fd1498Szrj
904*38fd1498Szrj /* Check if register REG is killed in any insn waiting to be inserted on
905*38fd1498Szrj edge E. This function is required to check that our data flow analysis
906*38fd1498Szrj is still valid prior to commit_edge_insertions. */
907*38fd1498Szrj
908*38fd1498Szrj static bool
reg_killed_on_edge(rtx reg,edge e)909*38fd1498Szrj reg_killed_on_edge (rtx reg, edge e)
910*38fd1498Szrj {
911*38fd1498Szrj rtx_insn *insn;
912*38fd1498Szrj
913*38fd1498Szrj for (insn = e->insns.r; insn; insn = NEXT_INSN (insn))
914*38fd1498Szrj if (INSN_P (insn) && reg_set_p (reg, insn))
915*38fd1498Szrj return true;
916*38fd1498Szrj
917*38fd1498Szrj return false;
918*38fd1498Szrj }
919*38fd1498Szrj
920*38fd1498Szrj /* Similar to above - check if register REG is used in any insn waiting
921*38fd1498Szrj to be inserted on edge E.
922*38fd1498Szrj Assumes no such insn can be a CALL_INSN; if so call reg_used_between_p
923*38fd1498Szrj with PREV(insn),NEXT(insn) instead of calling reg_overlap_mentioned_p. */
924*38fd1498Szrj
925*38fd1498Szrj static bool
reg_used_on_edge(rtx reg,edge e)926*38fd1498Szrj reg_used_on_edge (rtx reg, edge e)
927*38fd1498Szrj {
928*38fd1498Szrj rtx_insn *insn;
929*38fd1498Szrj
930*38fd1498Szrj for (insn = e->insns.r; insn; insn = NEXT_INSN (insn))
931*38fd1498Szrj if (INSN_P (insn) && reg_overlap_mentioned_p (reg, PATTERN (insn)))
932*38fd1498Szrj return true;
933*38fd1498Szrj
934*38fd1498Szrj return false;
935*38fd1498Szrj }
936*38fd1498Szrj
937*38fd1498Szrj /* Return the loaded/stored register of a load/store instruction. */
938*38fd1498Szrj
939*38fd1498Szrj static rtx
get_avail_load_store_reg(rtx_insn * insn)940*38fd1498Szrj get_avail_load_store_reg (rtx_insn *insn)
941*38fd1498Szrj {
942*38fd1498Szrj if (REG_P (SET_DEST (PATTERN (insn))))
943*38fd1498Szrj /* A load. */
944*38fd1498Szrj return SET_DEST (PATTERN (insn));
945*38fd1498Szrj else
946*38fd1498Szrj {
947*38fd1498Szrj /* A store. */
948*38fd1498Szrj gcc_assert (REG_P (SET_SRC (PATTERN (insn))));
949*38fd1498Szrj return SET_SRC (PATTERN (insn));
950*38fd1498Szrj }
951*38fd1498Szrj }
952*38fd1498Szrj
953*38fd1498Szrj /* Return nonzero if the predecessors of BB are "well behaved". */
954*38fd1498Szrj
955*38fd1498Szrj static bool
bb_has_well_behaved_predecessors(basic_block bb)956*38fd1498Szrj bb_has_well_behaved_predecessors (basic_block bb)
957*38fd1498Szrj {
958*38fd1498Szrj edge pred;
959*38fd1498Szrj edge_iterator ei;
960*38fd1498Szrj
961*38fd1498Szrj if (EDGE_COUNT (bb->preds) == 0)
962*38fd1498Szrj return false;
963*38fd1498Szrj
964*38fd1498Szrj FOR_EACH_EDGE (pred, ei, bb->preds)
965*38fd1498Szrj {
966*38fd1498Szrj /* commit_one_edge_insertion refuses to insert on abnormal edges even if
967*38fd1498Szrj the source has only one successor so EDGE_CRITICAL_P is too weak. */
968*38fd1498Szrj if ((pred->flags & EDGE_ABNORMAL) && !single_pred_p (pred->dest))
969*38fd1498Szrj return false;
970*38fd1498Szrj
971*38fd1498Szrj if ((pred->flags & EDGE_ABNORMAL_CALL) && cfun->has_nonlocal_label)
972*38fd1498Szrj return false;
973*38fd1498Szrj
974*38fd1498Szrj if (tablejump_p (BB_END (pred->src), NULL, NULL))
975*38fd1498Szrj return false;
976*38fd1498Szrj }
977*38fd1498Szrj return true;
978*38fd1498Szrj }
979*38fd1498Szrj
980*38fd1498Szrj
981*38fd1498Szrj /* Search for the occurrences of expression in BB. */
982*38fd1498Szrj
983*38fd1498Szrj static struct occr*
get_bb_avail_insn(basic_block bb,struct occr * orig_occr,int bitmap_index)984*38fd1498Szrj get_bb_avail_insn (basic_block bb, struct occr *orig_occr, int bitmap_index)
985*38fd1498Szrj {
986*38fd1498Szrj struct occr *occr = orig_occr;
987*38fd1498Szrj
988*38fd1498Szrj for (; occr != NULL; occr = occr->next)
989*38fd1498Szrj if (BLOCK_FOR_INSN (occr->insn) == bb)
990*38fd1498Szrj return occr;
991*38fd1498Szrj
992*38fd1498Szrj /* If we could not find an occurrence in BB, see if BB
993*38fd1498Szrj has a single predecessor with an occurrence that is
994*38fd1498Szrj transparent through BB. */
995*38fd1498Szrj if (single_pred_p (bb)
996*38fd1498Szrj && bitmap_bit_p (transp[bb->index], bitmap_index)
997*38fd1498Szrj && (occr = get_bb_avail_insn (single_pred (bb), orig_occr, bitmap_index)))
998*38fd1498Szrj {
999*38fd1498Szrj rtx avail_reg = get_avail_load_store_reg (occr->insn);
1000*38fd1498Szrj if (!reg_set_between_p (avail_reg,
1001*38fd1498Szrj PREV_INSN (BB_HEAD (bb)),
1002*38fd1498Szrj NEXT_INSN (BB_END (bb)))
1003*38fd1498Szrj && !reg_killed_on_edge (avail_reg, single_pred_edge (bb)))
1004*38fd1498Szrj return occr;
1005*38fd1498Szrj }
1006*38fd1498Szrj
1007*38fd1498Szrj return NULL;
1008*38fd1498Szrj }
1009*38fd1498Szrj
1010*38fd1498Szrj
1011*38fd1498Szrj /* This helper is called via htab_traverse. */
1012*38fd1498Szrj int
compute_expr_transp(expr ** slot,FILE * dump_file ATTRIBUTE_UNUSED)1013*38fd1498Szrj compute_expr_transp (expr **slot, FILE *dump_file ATTRIBUTE_UNUSED)
1014*38fd1498Szrj {
1015*38fd1498Szrj struct expr *expr = *slot;
1016*38fd1498Szrj
1017*38fd1498Szrj compute_transp (expr->expr, expr->bitmap_index, transp,
1018*38fd1498Szrj blocks_with_calls, modify_mem_list_set,
1019*38fd1498Szrj canon_modify_mem_list);
1020*38fd1498Szrj return 1;
1021*38fd1498Szrj }
1022*38fd1498Szrj
1023*38fd1498Szrj /* This handles the case where several stores feed a partially redundant
1024*38fd1498Szrj load. It checks if the redundancy elimination is possible and if it's
1025*38fd1498Szrj worth it.
1026*38fd1498Szrj
1027*38fd1498Szrj Redundancy elimination is possible if,
1028*38fd1498Szrj 1) None of the operands of an insn have been modified since the start
1029*38fd1498Szrj of the current basic block.
1030*38fd1498Szrj 2) In any predecessor of the current basic block, the same expression
1031*38fd1498Szrj is generated.
1032*38fd1498Szrj
1033*38fd1498Szrj See the function body for the heuristics that determine if eliminating
1034*38fd1498Szrj a redundancy is also worth doing, assuming it is possible. */
1035*38fd1498Szrj
1036*38fd1498Szrj static void
eliminate_partially_redundant_load(basic_block bb,rtx_insn * insn,struct expr * expr)1037*38fd1498Szrj eliminate_partially_redundant_load (basic_block bb, rtx_insn *insn,
1038*38fd1498Szrj struct expr *expr)
1039*38fd1498Szrj {
1040*38fd1498Szrj edge pred;
1041*38fd1498Szrj rtx_insn *avail_insn = NULL;
1042*38fd1498Szrj rtx avail_reg;
1043*38fd1498Szrj rtx dest, pat;
1044*38fd1498Szrj struct occr *a_occr;
1045*38fd1498Szrj struct unoccr *occr, *avail_occrs = NULL;
1046*38fd1498Szrj struct unoccr *unoccr, *unavail_occrs = NULL, *rollback_unoccr = NULL;
1047*38fd1498Szrj int npred_ok = 0;
1048*38fd1498Szrj profile_count ok_count = profile_count::zero ();
1049*38fd1498Szrj /* Redundant load execution count. */
1050*38fd1498Szrj profile_count critical_count = profile_count::zero ();
1051*38fd1498Szrj /* Execution count of critical edges. */
1052*38fd1498Szrj edge_iterator ei;
1053*38fd1498Szrj bool critical_edge_split = false;
1054*38fd1498Szrj
1055*38fd1498Szrj /* The execution count of the loads to be added to make the
1056*38fd1498Szrj load fully redundant. */
1057*38fd1498Szrj profile_count not_ok_count = profile_count::zero ();
1058*38fd1498Szrj basic_block pred_bb;
1059*38fd1498Szrj
1060*38fd1498Szrj pat = PATTERN (insn);
1061*38fd1498Szrj dest = SET_DEST (pat);
1062*38fd1498Szrj
1063*38fd1498Szrj /* Check that the loaded register is not used, set, or killed from the
1064*38fd1498Szrj beginning of the block. */
1065*38fd1498Szrj if (reg_changed_after_insn_p (dest, 0)
1066*38fd1498Szrj || reg_used_between_p (dest, PREV_INSN (BB_HEAD (bb)), insn))
1067*38fd1498Szrj return;
1068*38fd1498Szrj
1069*38fd1498Szrj /* Check potential for replacing load with copy for predecessors. */
1070*38fd1498Szrj FOR_EACH_EDGE (pred, ei, bb->preds)
1071*38fd1498Szrj {
1072*38fd1498Szrj rtx_insn *next_pred_bb_end;
1073*38fd1498Szrj
1074*38fd1498Szrj avail_insn = NULL;
1075*38fd1498Szrj avail_reg = NULL_RTX;
1076*38fd1498Szrj pred_bb = pred->src;
1077*38fd1498Szrj for (a_occr = get_bb_avail_insn (pred_bb,
1078*38fd1498Szrj expr->avail_occr,
1079*38fd1498Szrj expr->bitmap_index);
1080*38fd1498Szrj a_occr;
1081*38fd1498Szrj a_occr = get_bb_avail_insn (pred_bb,
1082*38fd1498Szrj a_occr->next,
1083*38fd1498Szrj expr->bitmap_index))
1084*38fd1498Szrj {
1085*38fd1498Szrj /* Check if the loaded register is not used. */
1086*38fd1498Szrj avail_insn = a_occr->insn;
1087*38fd1498Szrj avail_reg = get_avail_load_store_reg (avail_insn);
1088*38fd1498Szrj gcc_assert (avail_reg);
1089*38fd1498Szrj
1090*38fd1498Szrj /* Make sure we can generate a move from register avail_reg to
1091*38fd1498Szrj dest. */
1092*38fd1498Szrj rtx_insn *move = gen_move_insn (copy_rtx (dest),
1093*38fd1498Szrj copy_rtx (avail_reg));
1094*38fd1498Szrj extract_insn (move);
1095*38fd1498Szrj if (! constrain_operands (1, get_preferred_alternatives (insn,
1096*38fd1498Szrj pred_bb))
1097*38fd1498Szrj || reg_killed_on_edge (avail_reg, pred)
1098*38fd1498Szrj || reg_used_on_edge (dest, pred))
1099*38fd1498Szrj {
1100*38fd1498Szrj avail_insn = NULL;
1101*38fd1498Szrj continue;
1102*38fd1498Szrj }
1103*38fd1498Szrj next_pred_bb_end = NEXT_INSN (BB_END (BLOCK_FOR_INSN (avail_insn)));
1104*38fd1498Szrj if (!reg_set_between_p (avail_reg, avail_insn, next_pred_bb_end))
1105*38fd1498Szrj /* AVAIL_INSN remains non-null. */
1106*38fd1498Szrj break;
1107*38fd1498Szrj else
1108*38fd1498Szrj avail_insn = NULL;
1109*38fd1498Szrj }
1110*38fd1498Szrj
1111*38fd1498Szrj if (EDGE_CRITICAL_P (pred) && pred->count ().initialized_p ())
1112*38fd1498Szrj critical_count += pred->count ();
1113*38fd1498Szrj
1114*38fd1498Szrj if (avail_insn != NULL_RTX)
1115*38fd1498Szrj {
1116*38fd1498Szrj npred_ok++;
1117*38fd1498Szrj if (pred->count ().initialized_p ())
1118*38fd1498Szrj ok_count = ok_count + pred->count ();
1119*38fd1498Szrj if (! set_noop_p (PATTERN (gen_move_insn (copy_rtx (dest),
1120*38fd1498Szrj copy_rtx (avail_reg)))))
1121*38fd1498Szrj {
1122*38fd1498Szrj /* Check if there is going to be a split. */
1123*38fd1498Szrj if (EDGE_CRITICAL_P (pred))
1124*38fd1498Szrj critical_edge_split = true;
1125*38fd1498Szrj }
1126*38fd1498Szrj else /* Its a dead move no need to generate. */
1127*38fd1498Szrj continue;
1128*38fd1498Szrj occr = (struct unoccr *) obstack_alloc (&unoccr_obstack,
1129*38fd1498Szrj sizeof (struct unoccr));
1130*38fd1498Szrj occr->insn = avail_insn;
1131*38fd1498Szrj occr->pred = pred;
1132*38fd1498Szrj occr->next = avail_occrs;
1133*38fd1498Szrj avail_occrs = occr;
1134*38fd1498Szrj if (! rollback_unoccr)
1135*38fd1498Szrj rollback_unoccr = occr;
1136*38fd1498Szrj }
1137*38fd1498Szrj else
1138*38fd1498Szrj {
1139*38fd1498Szrj /* Adding a load on a critical edge will cause a split. */
1140*38fd1498Szrj if (EDGE_CRITICAL_P (pred))
1141*38fd1498Szrj critical_edge_split = true;
1142*38fd1498Szrj if (pred->count ().initialized_p ())
1143*38fd1498Szrj not_ok_count = not_ok_count + pred->count ();
1144*38fd1498Szrj unoccr = (struct unoccr *) obstack_alloc (&unoccr_obstack,
1145*38fd1498Szrj sizeof (struct unoccr));
1146*38fd1498Szrj unoccr->insn = NULL;
1147*38fd1498Szrj unoccr->pred = pred;
1148*38fd1498Szrj unoccr->next = unavail_occrs;
1149*38fd1498Szrj unavail_occrs = unoccr;
1150*38fd1498Szrj if (! rollback_unoccr)
1151*38fd1498Szrj rollback_unoccr = unoccr;
1152*38fd1498Szrj }
1153*38fd1498Szrj }
1154*38fd1498Szrj
1155*38fd1498Szrj if (/* No load can be replaced by copy. */
1156*38fd1498Szrj npred_ok == 0
1157*38fd1498Szrj /* Prevent exploding the code. */
1158*38fd1498Szrj || (optimize_bb_for_size_p (bb) && npred_ok > 1)
1159*38fd1498Szrj /* If we don't have profile information we cannot tell if splitting
1160*38fd1498Szrj a critical edge is profitable or not so don't do it. */
1161*38fd1498Szrj || ((! profile_info || profile_status_for_fn (cfun) != PROFILE_READ
1162*38fd1498Szrj || targetm.cannot_modify_jumps_p ())
1163*38fd1498Szrj && critical_edge_split))
1164*38fd1498Szrj goto cleanup;
1165*38fd1498Szrj
1166*38fd1498Szrj /* Check if it's worth applying the partial redundancy elimination. */
1167*38fd1498Szrj if (ok_count.to_gcov_type ()
1168*38fd1498Szrj < GCSE_AFTER_RELOAD_PARTIAL_FRACTION * not_ok_count.to_gcov_type ())
1169*38fd1498Szrj goto cleanup;
1170*38fd1498Szrj if (ok_count.to_gcov_type ()
1171*38fd1498Szrj < GCSE_AFTER_RELOAD_CRITICAL_FRACTION * critical_count.to_gcov_type ())
1172*38fd1498Szrj goto cleanup;
1173*38fd1498Szrj
1174*38fd1498Szrj /* Generate moves to the loaded register from where
1175*38fd1498Szrj the memory is available. */
1176*38fd1498Szrj for (occr = avail_occrs; occr; occr = occr->next)
1177*38fd1498Szrj {
1178*38fd1498Szrj avail_insn = occr->insn;
1179*38fd1498Szrj pred = occr->pred;
1180*38fd1498Szrj /* Set avail_reg to be the register having the value of the
1181*38fd1498Szrj memory. */
1182*38fd1498Szrj avail_reg = get_avail_load_store_reg (avail_insn);
1183*38fd1498Szrj gcc_assert (avail_reg);
1184*38fd1498Szrj
1185*38fd1498Szrj insert_insn_on_edge (gen_move_insn (copy_rtx (dest),
1186*38fd1498Szrj copy_rtx (avail_reg)),
1187*38fd1498Szrj pred);
1188*38fd1498Szrj stats.moves_inserted++;
1189*38fd1498Szrj
1190*38fd1498Szrj if (dump_file)
1191*38fd1498Szrj fprintf (dump_file,
1192*38fd1498Szrj "generating move from %d to %d on edge from %d to %d\n",
1193*38fd1498Szrj REGNO (avail_reg),
1194*38fd1498Szrj REGNO (dest),
1195*38fd1498Szrj pred->src->index,
1196*38fd1498Szrj pred->dest->index);
1197*38fd1498Szrj }
1198*38fd1498Szrj
1199*38fd1498Szrj /* Regenerate loads where the memory is unavailable. */
1200*38fd1498Szrj for (unoccr = unavail_occrs; unoccr; unoccr = unoccr->next)
1201*38fd1498Szrj {
1202*38fd1498Szrj pred = unoccr->pred;
1203*38fd1498Szrj insert_insn_on_edge (copy_insn (PATTERN (insn)), pred);
1204*38fd1498Szrj stats.copies_inserted++;
1205*38fd1498Szrj
1206*38fd1498Szrj if (dump_file)
1207*38fd1498Szrj {
1208*38fd1498Szrj fprintf (dump_file,
1209*38fd1498Szrj "generating on edge from %d to %d a copy of load: ",
1210*38fd1498Szrj pred->src->index,
1211*38fd1498Szrj pred->dest->index);
1212*38fd1498Szrj print_rtl (dump_file, PATTERN (insn));
1213*38fd1498Szrj fprintf (dump_file, "\n");
1214*38fd1498Szrj }
1215*38fd1498Szrj }
1216*38fd1498Szrj
1217*38fd1498Szrj /* Delete the insn if it is not available in this block and mark it
1218*38fd1498Szrj for deletion if it is available. If insn is available it may help
1219*38fd1498Szrj discover additional redundancies, so mark it for later deletion. */
1220*38fd1498Szrj for (a_occr = get_bb_avail_insn (bb, expr->avail_occr, expr->bitmap_index);
1221*38fd1498Szrj a_occr && (a_occr->insn != insn);
1222*38fd1498Szrj a_occr = get_bb_avail_insn (bb, a_occr->next, expr->bitmap_index))
1223*38fd1498Szrj ;
1224*38fd1498Szrj
1225*38fd1498Szrj if (!a_occr)
1226*38fd1498Szrj {
1227*38fd1498Szrj stats.insns_deleted++;
1228*38fd1498Szrj
1229*38fd1498Szrj if (dump_file)
1230*38fd1498Szrj {
1231*38fd1498Szrj fprintf (dump_file, "deleting insn:\n");
1232*38fd1498Szrj print_rtl_single (dump_file, insn);
1233*38fd1498Szrj fprintf (dump_file, "\n");
1234*38fd1498Szrj }
1235*38fd1498Szrj delete_insn (insn);
1236*38fd1498Szrj }
1237*38fd1498Szrj else
1238*38fd1498Szrj a_occr->deleted_p = 1;
1239*38fd1498Szrj
1240*38fd1498Szrj cleanup:
1241*38fd1498Szrj if (rollback_unoccr)
1242*38fd1498Szrj obstack_free (&unoccr_obstack, rollback_unoccr);
1243*38fd1498Szrj }
1244*38fd1498Szrj
1245*38fd1498Szrj /* Performing the redundancy elimination as described before. */
1246*38fd1498Szrj
1247*38fd1498Szrj static void
eliminate_partially_redundant_loads(void)1248*38fd1498Szrj eliminate_partially_redundant_loads (void)
1249*38fd1498Szrj {
1250*38fd1498Szrj rtx_insn *insn;
1251*38fd1498Szrj basic_block bb;
1252*38fd1498Szrj
1253*38fd1498Szrj /* Note we start at block 1. */
1254*38fd1498Szrj
1255*38fd1498Szrj if (ENTRY_BLOCK_PTR_FOR_FN (cfun)->next_bb == EXIT_BLOCK_PTR_FOR_FN (cfun))
1256*38fd1498Szrj return;
1257*38fd1498Szrj
1258*38fd1498Szrj FOR_BB_BETWEEN (bb,
1259*38fd1498Szrj ENTRY_BLOCK_PTR_FOR_FN (cfun)->next_bb->next_bb,
1260*38fd1498Szrj EXIT_BLOCK_PTR_FOR_FN (cfun),
1261*38fd1498Szrj next_bb)
1262*38fd1498Szrj {
1263*38fd1498Szrj /* Don't try anything on basic blocks with strange predecessors. */
1264*38fd1498Szrj if (! bb_has_well_behaved_predecessors (bb))
1265*38fd1498Szrj continue;
1266*38fd1498Szrj
1267*38fd1498Szrj /* Do not try anything on cold basic blocks. */
1268*38fd1498Szrj if (optimize_bb_for_size_p (bb))
1269*38fd1498Szrj continue;
1270*38fd1498Szrj
1271*38fd1498Szrj /* Reset the table of things changed since the start of the current
1272*38fd1498Szrj basic block. */
1273*38fd1498Szrj reset_opr_set_tables ();
1274*38fd1498Szrj
1275*38fd1498Szrj /* Look at all insns in the current basic block and see if there are
1276*38fd1498Szrj any loads in it that we can record. */
1277*38fd1498Szrj FOR_BB_INSNS (bb, insn)
1278*38fd1498Szrj {
1279*38fd1498Szrj /* Is it a load - of the form (set (reg) (mem))? */
1280*38fd1498Szrj if (NONJUMP_INSN_P (insn)
1281*38fd1498Szrj && GET_CODE (PATTERN (insn)) == SET
1282*38fd1498Szrj && REG_P (SET_DEST (PATTERN (insn)))
1283*38fd1498Szrj && MEM_P (SET_SRC (PATTERN (insn))))
1284*38fd1498Szrj {
1285*38fd1498Szrj rtx pat = PATTERN (insn);
1286*38fd1498Szrj rtx src = SET_SRC (pat);
1287*38fd1498Szrj struct expr *expr;
1288*38fd1498Szrj
1289*38fd1498Szrj if (!MEM_VOLATILE_P (src)
1290*38fd1498Szrj && GET_MODE (src) != BLKmode
1291*38fd1498Szrj && general_operand (src, GET_MODE (src))
1292*38fd1498Szrj /* Are the operands unchanged since the start of the
1293*38fd1498Szrj block? */
1294*38fd1498Szrj && oprs_unchanged_p (src, insn, false)
1295*38fd1498Szrj && !(cfun->can_throw_non_call_exceptions && may_trap_p (src))
1296*38fd1498Szrj && !side_effects_p (src)
1297*38fd1498Szrj /* Is the expression recorded? */
1298*38fd1498Szrj && (expr = lookup_expr_in_table (src)) != NULL)
1299*38fd1498Szrj {
1300*38fd1498Szrj /* We now have a load (insn) and an available memory at
1301*38fd1498Szrj its BB start (expr). Try to remove the loads if it is
1302*38fd1498Szrj redundant. */
1303*38fd1498Szrj eliminate_partially_redundant_load (bb, insn, expr);
1304*38fd1498Szrj }
1305*38fd1498Szrj }
1306*38fd1498Szrj
1307*38fd1498Szrj /* Keep track of everything modified by this insn, so that we
1308*38fd1498Szrj know what has been modified since the start of the current
1309*38fd1498Szrj basic block. */
1310*38fd1498Szrj if (INSN_P (insn))
1311*38fd1498Szrj record_opr_changes (insn);
1312*38fd1498Szrj }
1313*38fd1498Szrj }
1314*38fd1498Szrj
1315*38fd1498Szrj commit_edge_insertions ();
1316*38fd1498Szrj }
1317*38fd1498Szrj
1318*38fd1498Szrj /* Go over the expression hash table and delete insns that were
1319*38fd1498Szrj marked for later deletion. */
1320*38fd1498Szrj
1321*38fd1498Szrj /* This helper is called via htab_traverse. */
1322*38fd1498Szrj int
delete_redundant_insns_1(expr ** slot,void * data ATTRIBUTE_UNUSED)1323*38fd1498Szrj delete_redundant_insns_1 (expr **slot, void *data ATTRIBUTE_UNUSED)
1324*38fd1498Szrj {
1325*38fd1498Szrj struct expr *exprs = *slot;
1326*38fd1498Szrj struct occr *occr;
1327*38fd1498Szrj
1328*38fd1498Szrj for (occr = exprs->avail_occr; occr != NULL; occr = occr->next)
1329*38fd1498Szrj {
1330*38fd1498Szrj if (occr->deleted_p && dbg_cnt (gcse2_delete))
1331*38fd1498Szrj {
1332*38fd1498Szrj delete_insn (occr->insn);
1333*38fd1498Szrj stats.insns_deleted++;
1334*38fd1498Szrj
1335*38fd1498Szrj if (dump_file)
1336*38fd1498Szrj {
1337*38fd1498Szrj fprintf (dump_file, "deleting insn:\n");
1338*38fd1498Szrj print_rtl_single (dump_file, occr->insn);
1339*38fd1498Szrj fprintf (dump_file, "\n");
1340*38fd1498Szrj }
1341*38fd1498Szrj }
1342*38fd1498Szrj }
1343*38fd1498Szrj
1344*38fd1498Szrj return 1;
1345*38fd1498Szrj }
1346*38fd1498Szrj
1347*38fd1498Szrj static void
delete_redundant_insns(void)1348*38fd1498Szrj delete_redundant_insns (void)
1349*38fd1498Szrj {
1350*38fd1498Szrj expr_table->traverse <void *, delete_redundant_insns_1> (NULL);
1351*38fd1498Szrj if (dump_file)
1352*38fd1498Szrj fprintf (dump_file, "\n");
1353*38fd1498Szrj }
1354*38fd1498Szrj
1355*38fd1498Szrj /* Main entry point of the GCSE after reload - clean some redundant loads
1356*38fd1498Szrj due to spilling. */
1357*38fd1498Szrj
1358*38fd1498Szrj static void
gcse_after_reload_main(rtx f ATTRIBUTE_UNUSED)1359*38fd1498Szrj gcse_after_reload_main (rtx f ATTRIBUTE_UNUSED)
1360*38fd1498Szrj {
1361*38fd1498Szrj
1362*38fd1498Szrj memset (&stats, 0, sizeof (stats));
1363*38fd1498Szrj
1364*38fd1498Szrj /* Allocate memory for this pass.
1365*38fd1498Szrj Also computes and initializes the insns' CUIDs. */
1366*38fd1498Szrj alloc_mem ();
1367*38fd1498Szrj
1368*38fd1498Szrj /* We need alias analysis. */
1369*38fd1498Szrj init_alias_analysis ();
1370*38fd1498Szrj
1371*38fd1498Szrj compute_hash_table ();
1372*38fd1498Szrj
1373*38fd1498Szrj if (dump_file)
1374*38fd1498Szrj dump_hash_table (dump_file);
1375*38fd1498Szrj
1376*38fd1498Szrj if (expr_table->elements () > 0)
1377*38fd1498Szrj {
1378*38fd1498Szrj /* Knowing which MEMs are transparent through a block can signifiantly
1379*38fd1498Szrj increase the number of redundant loads found. So compute transparency
1380*38fd1498Szrj information for each memory expression in the hash table. */
1381*38fd1498Szrj df_analyze ();
1382*38fd1498Szrj /* This can not be part of the normal allocation routine because
1383*38fd1498Szrj we have to know the number of elements in the hash table. */
1384*38fd1498Szrj transp = sbitmap_vector_alloc (last_basic_block_for_fn (cfun),
1385*38fd1498Szrj expr_table->elements ());
1386*38fd1498Szrj bitmap_vector_ones (transp, last_basic_block_for_fn (cfun));
1387*38fd1498Szrj expr_table->traverse <FILE *, compute_expr_transp> (dump_file);
1388*38fd1498Szrj eliminate_partially_redundant_loads ();
1389*38fd1498Szrj delete_redundant_insns ();
1390*38fd1498Szrj sbitmap_vector_free (transp);
1391*38fd1498Szrj
1392*38fd1498Szrj if (dump_file)
1393*38fd1498Szrj {
1394*38fd1498Szrj fprintf (dump_file, "GCSE AFTER RELOAD stats:\n");
1395*38fd1498Szrj fprintf (dump_file, "copies inserted: %d\n", stats.copies_inserted);
1396*38fd1498Szrj fprintf (dump_file, "moves inserted: %d\n", stats.moves_inserted);
1397*38fd1498Szrj fprintf (dump_file, "insns deleted: %d\n", stats.insns_deleted);
1398*38fd1498Szrj fprintf (dump_file, "\n\n");
1399*38fd1498Szrj }
1400*38fd1498Szrj
1401*38fd1498Szrj statistics_counter_event (cfun, "copies inserted",
1402*38fd1498Szrj stats.copies_inserted);
1403*38fd1498Szrj statistics_counter_event (cfun, "moves inserted",
1404*38fd1498Szrj stats.moves_inserted);
1405*38fd1498Szrj statistics_counter_event (cfun, "insns deleted",
1406*38fd1498Szrj stats.insns_deleted);
1407*38fd1498Szrj }
1408*38fd1498Szrj
1409*38fd1498Szrj /* We are finished with alias. */
1410*38fd1498Szrj end_alias_analysis ();
1411*38fd1498Szrj
1412*38fd1498Szrj free_mem ();
1413*38fd1498Szrj }
1414*38fd1498Szrj
1415*38fd1498Szrj
1416*38fd1498Szrj
1417*38fd1498Szrj static unsigned int
rest_of_handle_gcse2(void)1418*38fd1498Szrj rest_of_handle_gcse2 (void)
1419*38fd1498Szrj {
1420*38fd1498Szrj gcse_after_reload_main (get_insns ());
1421*38fd1498Szrj rebuild_jump_labels (get_insns ());
1422*38fd1498Szrj return 0;
1423*38fd1498Szrj }
1424*38fd1498Szrj
1425*38fd1498Szrj namespace {
1426*38fd1498Szrj
1427*38fd1498Szrj const pass_data pass_data_gcse2 =
1428*38fd1498Szrj {
1429*38fd1498Szrj RTL_PASS, /* type */
1430*38fd1498Szrj "gcse2", /* name */
1431*38fd1498Szrj OPTGROUP_NONE, /* optinfo_flags */
1432*38fd1498Szrj TV_GCSE_AFTER_RELOAD, /* tv_id */
1433*38fd1498Szrj 0, /* properties_required */
1434*38fd1498Szrj 0, /* properties_provided */
1435*38fd1498Szrj 0, /* properties_destroyed */
1436*38fd1498Szrj 0, /* todo_flags_start */
1437*38fd1498Szrj 0, /* todo_flags_finish */
1438*38fd1498Szrj };
1439*38fd1498Szrj
1440*38fd1498Szrj class pass_gcse2 : public rtl_opt_pass
1441*38fd1498Szrj {
1442*38fd1498Szrj public:
pass_gcse2(gcc::context * ctxt)1443*38fd1498Szrj pass_gcse2 (gcc::context *ctxt)
1444*38fd1498Szrj : rtl_opt_pass (pass_data_gcse2, ctxt)
1445*38fd1498Szrj {}
1446*38fd1498Szrj
1447*38fd1498Szrj /* opt_pass methods: */
gate(function * fun)1448*38fd1498Szrj virtual bool gate (function *fun)
1449*38fd1498Szrj {
1450*38fd1498Szrj return (optimize > 0 && flag_gcse_after_reload
1451*38fd1498Szrj && optimize_function_for_speed_p (fun));
1452*38fd1498Szrj }
1453*38fd1498Szrj
execute(function *)1454*38fd1498Szrj virtual unsigned int execute (function *) { return rest_of_handle_gcse2 (); }
1455*38fd1498Szrj
1456*38fd1498Szrj }; // class pass_gcse2
1457*38fd1498Szrj
1458*38fd1498Szrj } // anon namespace
1459*38fd1498Szrj
1460*38fd1498Szrj rtl_opt_pass *
make_pass_gcse2(gcc::context * ctxt)1461*38fd1498Szrj make_pass_gcse2 (gcc::context *ctxt)
1462*38fd1498Szrj {
1463*38fd1498Szrj return new pass_gcse2 (ctxt);
1464*38fd1498Szrj }
1465