xref: /netbsd-src/external/gpl3/gcc.old/dist/gcc/postreload-gcse.c (revision a24efa7dea9f1f56c3bdb15a927d3516792ace1c)
1 /* Post reload partially redundant load elimination
2    Copyright (C) 2004-2013 Free Software Foundation, Inc.
3 
4 This file is part of GCC.
5 
6 GCC is free software; you can redistribute it and/or modify it under
7 the terms of the GNU General Public License as published by the Free
8 Software Foundation; either version 3, or (at your option) any later
9 version.
10 
11 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
12 WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
14 for more details.
15 
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING3.  If not see
18 <http://www.gnu.org/licenses/>.  */
19 
20 #include "config.h"
21 #include "system.h"
22 #include "coretypes.h"
23 #include "tm.h"
24 #include "diagnostic-core.h"
25 
26 #include "rtl.h"
27 #include "tree.h"
28 #include "tm_p.h"
29 #include "regs.h"
30 #include "hard-reg-set.h"
31 #include "flags.h"
32 #include "insn-config.h"
33 #include "recog.h"
34 #include "basic-block.h"
35 #include "function.h"
36 #include "expr.h"
37 #include "except.h"
38 #include "intl.h"
39 #include "obstack.h"
40 #include "hashtab.h"
41 #include "params.h"
42 #include "target.h"
43 #include "tree-pass.h"
44 #include "dbgcnt.h"
45 
46 /* The following code implements gcse after reload, the purpose of this
47    pass is to cleanup redundant loads generated by reload and other
48    optimizations that come after gcse. It searches for simple inter-block
49    redundancies and tries to eliminate them by adding moves and loads
50    in cold places.
51 
52    Perform partially redundant load elimination, try to eliminate redundant
53    loads created by the reload pass.  We try to look for full or partial
54    redundant loads fed by one or more loads/stores in predecessor BBs,
55    and try adding loads to make them fully redundant.  We also check if
56    it's worth adding loads to be able to delete the redundant load.
57 
58    Algorithm:
59    1. Build available expressions hash table:
60        For each load/store instruction, if the loaded/stored memory didn't
61        change until the end of the basic block add this memory expression to
62        the hash table.
63    2. Perform Redundancy elimination:
64       For each load instruction do the following:
65 	 perform partial redundancy elimination, check if it's worth adding
66 	 loads to make the load fully redundant.  If so add loads and
67 	 register copies and delete the load.
68    3. Delete instructions made redundant in step 2.
69 
70    Future enhancement:
71      If the loaded register is used/defined between load and some store,
72      look for some other free register between load and all its stores,
73      and replace the load with a copy from this register to the loaded
74      register.
75 */
76 
77 
78 /* Keep statistics of this pass.  */
79 static struct
80 {
81   int moves_inserted;
82   int copies_inserted;
83   int insns_deleted;
84 } stats;
85 
86 /* We need to keep a hash table of expressions.  The table entries are of
87    type 'struct expr', and for each expression there is a single linked
88    list of occurrences.  */
89 
90 /* The table itself.  */
91 static htab_t expr_table;
92 
93 /* Expression elements in the hash table.  */
94 struct expr
95 {
96   /* The expression (SET_SRC for expressions, PATTERN for assignments).  */
97   rtx expr;
98 
99   /* The same hash for this entry.  */
100   hashval_t hash;
101 
102   /* List of available occurrence in basic blocks in the function.  */
103   struct occr *avail_occr;
104 };
105 
106 static struct obstack expr_obstack;
107 
108 /* Occurrence of an expression.
109    There is at most one occurrence per basic block.  If a pattern appears
110    more than once, the last appearance is used.  */
111 
112 struct occr
113 {
114   /* Next occurrence of this expression.  */
115   struct occr *next;
116   /* The insn that computes the expression.  */
117   rtx insn;
118   /* Nonzero if this [anticipatable] occurrence has been deleted.  */
119   char deleted_p;
120 };
121 
122 static struct obstack occr_obstack;
123 
124 /* The following structure holds the information about the occurrences of
125    the redundant instructions.  */
126 struct unoccr
127 {
128   struct unoccr *next;
129   edge pred;
130   rtx insn;
131 };
132 
133 static struct obstack unoccr_obstack;
134 
135 /* Array where each element is the CUID if the insn that last set the hard
136    register with the number of the element, since the start of the current
137    basic block.
138 
139    This array is used during the building of the hash table (step 1) to
140    determine if a reg is killed before the end of a basic block.
141 
142    It is also used when eliminating partial redundancies (step 2) to see
143    if a reg was modified since the start of a basic block.  */
144 static int *reg_avail_info;
145 
146 /* A list of insns that may modify memory within the current basic block.  */
147 struct modifies_mem
148 {
149   rtx insn;
150   struct modifies_mem *next;
151 };
152 static struct modifies_mem *modifies_mem_list;
153 
154 /* The modifies_mem structs also go on an obstack, only this obstack is
155    freed each time after completing the analysis or transformations on
156    a basic block.  So we allocate a dummy modifies_mem_obstack_bottom
157    object on the obstack to keep track of the bottom of the obstack.  */
158 static struct obstack modifies_mem_obstack;
159 static struct modifies_mem  *modifies_mem_obstack_bottom;
160 
161 /* Mapping of insn UIDs to CUIDs.
162    CUIDs are like UIDs except they increase monotonically in each basic
163    block, have no gaps, and only apply to real insns.  */
164 static int *uid_cuid;
165 #define INSN_CUID(INSN) (uid_cuid[INSN_UID (INSN)])
166 
167 
168 /* Helpers for memory allocation/freeing.  */
169 static void alloc_mem (void);
170 static void free_mem (void);
171 
172 /* Support for hash table construction and transformations.  */
173 static bool oprs_unchanged_p (rtx, rtx, bool);
174 static void record_last_reg_set_info (rtx, rtx);
175 static void record_last_reg_set_info_regno (rtx, int);
176 static void record_last_mem_set_info (rtx);
177 static void record_last_set_info (rtx, const_rtx, void *);
178 static void record_opr_changes (rtx);
179 
180 static void find_mem_conflicts (rtx, const_rtx, void *);
181 static int load_killed_in_block_p (int, rtx, bool);
182 static void reset_opr_set_tables (void);
183 
184 /* Hash table support.  */
185 static hashval_t hash_expr (rtx, int *);
186 static hashval_t hash_expr_for_htab (const void *);
187 static int expr_equiv_p (const void *, const void *);
188 static void insert_expr_in_table (rtx, rtx);
189 static struct expr *lookup_expr_in_table (rtx);
190 static int dump_hash_table_entry (void **, void *);
191 static void dump_hash_table (FILE *);
192 
193 /* Helpers for eliminate_partially_redundant_load.  */
194 static bool reg_killed_on_edge (rtx, edge);
195 static bool reg_used_on_edge (rtx, edge);
196 
197 static rtx get_avail_load_store_reg (rtx);
198 
199 static bool bb_has_well_behaved_predecessors (basic_block);
200 static struct occr* get_bb_avail_insn (basic_block, struct occr *);
201 static void hash_scan_set (rtx);
202 static void compute_hash_table (void);
203 
204 /* The work horses of this pass.  */
205 static void eliminate_partially_redundant_load (basic_block,
206 						rtx,
207 						struct expr *);
208 static void eliminate_partially_redundant_loads (void);
209 
210 
211 /* Allocate memory for the CUID mapping array and register/memory
212    tracking tables.  */
213 
214 static void
215 alloc_mem (void)
216 {
217   int i;
218   basic_block bb;
219   rtx insn;
220 
221   /* Find the largest UID and create a mapping from UIDs to CUIDs.  */
222   uid_cuid = XCNEWVEC (int, get_max_uid () + 1);
223   i = 1;
224   FOR_EACH_BB (bb)
225     FOR_BB_INSNS (bb, insn)
226       {
227         if (INSN_P (insn))
228 	  uid_cuid[INSN_UID (insn)] = i++;
229 	else
230 	  uid_cuid[INSN_UID (insn)] = i;
231       }
232 
233   /* Allocate the available expressions hash table.  We don't want to
234      make the hash table too small, but unnecessarily making it too large
235      also doesn't help.  The i/4 is a gcse.c relic, and seems like a
236      reasonable choice.  */
237   expr_table = htab_create (MAX (i / 4, 13),
238 			    hash_expr_for_htab, expr_equiv_p, NULL);
239 
240   /* We allocate everything on obstacks because we often can roll back
241      the whole obstack to some point.  Freeing obstacks is very fast.  */
242   gcc_obstack_init (&expr_obstack);
243   gcc_obstack_init (&occr_obstack);
244   gcc_obstack_init (&unoccr_obstack);
245   gcc_obstack_init (&modifies_mem_obstack);
246 
247   /* Working array used to track the last set for each register
248      in the current block.  */
249   reg_avail_info = (int *) xmalloc (FIRST_PSEUDO_REGISTER * sizeof (int));
250 
251   /* Put a dummy modifies_mem object on the modifies_mem_obstack, so we
252      can roll it back in reset_opr_set_tables.  */
253   modifies_mem_obstack_bottom =
254     (struct modifies_mem *) obstack_alloc (&modifies_mem_obstack,
255 					   sizeof (struct modifies_mem));
256 }
257 
258 /* Free memory allocated by alloc_mem.  */
259 
260 static void
261 free_mem (void)
262 {
263   free (uid_cuid);
264 
265   htab_delete (expr_table);
266 
267   obstack_free (&expr_obstack, NULL);
268   obstack_free (&occr_obstack, NULL);
269   obstack_free (&unoccr_obstack, NULL);
270   obstack_free (&modifies_mem_obstack, NULL);
271 
272   free (reg_avail_info);
273 }
274 
275 
276 /* Hash expression X.
277    DO_NOT_RECORD_P is a boolean indicating if a volatile operand is found
278    or if the expression contains something we don't want to insert in the
279    table.  */
280 
281 static hashval_t
282 hash_expr (rtx x, int *do_not_record_p)
283 {
284   *do_not_record_p = 0;
285   return hash_rtx (x, GET_MODE (x), do_not_record_p,
286 		   NULL,  /*have_reg_qty=*/false);
287 }
288 
289 /* Callback for hashtab.
290    Return the hash value for expression EXP.  We don't actually hash
291    here, we just return the cached hash value.  */
292 
293 static hashval_t
294 hash_expr_for_htab (const void *expp)
295 {
296   const struct expr *const exp = (const struct expr *) expp;
297   return exp->hash;
298 }
299 
300 /* Callback for hashtab.
301    Return nonzero if exp1 is equivalent to exp2.  */
302 
303 static int
304 expr_equiv_p (const void *exp1p, const void *exp2p)
305 {
306   const struct expr *const exp1 = (const struct expr *) exp1p;
307   const struct expr *const exp2 = (const struct expr *) exp2p;
308   int equiv_p = exp_equiv_p (exp1->expr, exp2->expr, 0, true);
309 
310   gcc_assert (!equiv_p || exp1->hash == exp2->hash);
311   return equiv_p;
312 }
313 
314 
315 /* Insert expression X in INSN in the hash TABLE.
316    If it is already present, record it as the last occurrence in INSN's
317    basic block.  */
318 
319 static void
320 insert_expr_in_table (rtx x, rtx insn)
321 {
322   int do_not_record_p;
323   hashval_t hash;
324   struct expr *cur_expr, **slot;
325   struct occr *avail_occr, *last_occr = NULL;
326 
327   hash = hash_expr (x, &do_not_record_p);
328 
329   /* Do not insert expression in the table if it contains volatile operands,
330      or if hash_expr determines the expression is something we don't want
331      to or can't handle.  */
332   if (do_not_record_p)
333     return;
334 
335   /* We anticipate that redundant expressions are rare, so for convenience
336      allocate a new hash table element here already and set its fields.
337      If we don't do this, we need a hack with a static struct expr.  Anyway,
338      obstack_free is really fast and one more obstack_alloc doesn't hurt if
339      we're going to see more expressions later on.  */
340   cur_expr = (struct expr *) obstack_alloc (&expr_obstack,
341 					    sizeof (struct expr));
342   cur_expr->expr = x;
343   cur_expr->hash = hash;
344   cur_expr->avail_occr = NULL;
345 
346   slot = (struct expr **) htab_find_slot_with_hash (expr_table, cur_expr,
347 						    hash, INSERT);
348 
349   if (! (*slot))
350     /* The expression isn't found, so insert it.  */
351     *slot = cur_expr;
352   else
353     {
354       /* The expression is already in the table, so roll back the
355 	 obstack and use the existing table entry.  */
356       obstack_free (&expr_obstack, cur_expr);
357       cur_expr = *slot;
358     }
359 
360   /* Search for another occurrence in the same basic block.  */
361   avail_occr = cur_expr->avail_occr;
362   while (avail_occr
363 	 && BLOCK_FOR_INSN (avail_occr->insn) != BLOCK_FOR_INSN (insn))
364     {
365       /* If an occurrence isn't found, save a pointer to the end of
366 	 the list.  */
367       last_occr = avail_occr;
368       avail_occr = avail_occr->next;
369     }
370 
371   if (avail_occr)
372     /* Found another instance of the expression in the same basic block.
373        Prefer this occurrence to the currently recorded one.  We want
374        the last one in the block and the block is scanned from start
375        to end.  */
376     avail_occr->insn = insn;
377   else
378     {
379       /* First occurrence of this expression in this basic block.  */
380       avail_occr = (struct occr *) obstack_alloc (&occr_obstack,
381 						  sizeof (struct occr));
382 
383       /* First occurrence of this expression in any block?  */
384       if (cur_expr->avail_occr == NULL)
385         cur_expr->avail_occr = avail_occr;
386       else
387         last_occr->next = avail_occr;
388 
389       avail_occr->insn = insn;
390       avail_occr->next = NULL;
391       avail_occr->deleted_p = 0;
392     }
393 }
394 
395 
396 /* Lookup pattern PAT in the expression hash table.
397    The result is a pointer to the table entry, or NULL if not found.  */
398 
399 static struct expr *
400 lookup_expr_in_table (rtx pat)
401 {
402   int do_not_record_p;
403   struct expr **slot, *tmp_expr;
404   hashval_t hash = hash_expr (pat, &do_not_record_p);
405 
406   if (do_not_record_p)
407     return NULL;
408 
409   tmp_expr = (struct expr *) obstack_alloc (&expr_obstack,
410 					    sizeof (struct expr));
411   tmp_expr->expr = pat;
412   tmp_expr->hash = hash;
413   tmp_expr->avail_occr = NULL;
414 
415   slot = (struct expr **) htab_find_slot_with_hash (expr_table, tmp_expr,
416                                                     hash, INSERT);
417   obstack_free (&expr_obstack, tmp_expr);
418 
419   if (!slot)
420     return NULL;
421   else
422     return (*slot);
423 }
424 
425 
426 /* Dump all expressions and occurrences that are currently in the
427    expression hash table to FILE.  */
428 
429 /* This helper is called via htab_traverse.  */
430 static int
431 dump_hash_table_entry (void **slot, void *filep)
432 {
433   struct expr *expr = (struct expr *) *slot;
434   FILE *file = (FILE *) filep;
435   struct occr *occr;
436 
437   fprintf (file, "expr: ");
438   print_rtl (file, expr->expr);
439   fprintf (file,"\nhashcode: %u\n", expr->hash);
440   fprintf (file,"list of occurrences:\n");
441   occr = expr->avail_occr;
442   while (occr)
443     {
444       rtx insn = occr->insn;
445       print_rtl_single (file, insn);
446       fprintf (file, "\n");
447       occr = occr->next;
448     }
449   fprintf (file, "\n");
450   return 1;
451 }
452 
453 static void
454 dump_hash_table (FILE *file)
455 {
456   fprintf (file, "\n\nexpression hash table\n");
457   fprintf (file, "size %ld, %ld elements, %f collision/search ratio\n",
458            (long) htab_size (expr_table),
459            (long) htab_elements (expr_table),
460            htab_collisions (expr_table));
461   if (htab_elements (expr_table) > 0)
462     {
463       fprintf (file, "\n\ntable entries:\n");
464       htab_traverse (expr_table, dump_hash_table_entry, file);
465     }
466   fprintf (file, "\n");
467 }
468 
469 /* Return true if register X is recorded as being set by an instruction
470    whose CUID is greater than the one given.  */
471 
472 static bool
473 reg_changed_after_insn_p (rtx x, int cuid)
474 {
475   unsigned int regno, end_regno;
476 
477   regno = REGNO (x);
478   end_regno = END_HARD_REGNO (x);
479   do
480     if (reg_avail_info[regno] > cuid)
481       return true;
482   while (++regno < end_regno);
483   return false;
484 }
485 
486 /* Return nonzero if the operands of expression X are unchanged
487    1) from the start of INSN's basic block up to but not including INSN
488       if AFTER_INSN is false, or
489    2) from INSN to the end of INSN's basic block if AFTER_INSN is true.  */
490 
491 static bool
492 oprs_unchanged_p (rtx x, rtx insn, bool after_insn)
493 {
494   int i, j;
495   enum rtx_code code;
496   const char *fmt;
497 
498   if (x == 0)
499     return 1;
500 
501   code = GET_CODE (x);
502   switch (code)
503     {
504     case REG:
505       /* We are called after register allocation.  */
506       gcc_assert (REGNO (x) < FIRST_PSEUDO_REGISTER);
507       if (after_insn)
508 	return !reg_changed_after_insn_p (x, INSN_CUID (insn) - 1);
509       else
510 	return !reg_changed_after_insn_p (x, 0);
511 
512     case MEM:
513       if (load_killed_in_block_p (INSN_CUID (insn), x, after_insn))
514 	return 0;
515       else
516 	return oprs_unchanged_p (XEXP (x, 0), insn, after_insn);
517 
518     case PC:
519     case CC0: /*FIXME*/
520     case CONST:
521     CASE_CONST_ANY:
522     case SYMBOL_REF:
523     case LABEL_REF:
524     case ADDR_VEC:
525     case ADDR_DIFF_VEC:
526       return 1;
527 
528     case PRE_DEC:
529     case PRE_INC:
530     case POST_DEC:
531     case POST_INC:
532     case PRE_MODIFY:
533     case POST_MODIFY:
534       if (after_insn)
535 	return 0;
536       break;
537 
538     default:
539       break;
540     }
541 
542   for (i = GET_RTX_LENGTH (code) - 1, fmt = GET_RTX_FORMAT (code); i >= 0; i--)
543     {
544       if (fmt[i] == 'e')
545 	{
546 	  if (! oprs_unchanged_p (XEXP (x, i), insn, after_insn))
547 	    return 0;
548 	}
549       else if (fmt[i] == 'E')
550 	for (j = 0; j < XVECLEN (x, i); j++)
551 	  if (! oprs_unchanged_p (XVECEXP (x, i, j), insn, after_insn))
552 	    return 0;
553     }
554 
555   return 1;
556 }
557 
558 
559 /* Used for communication between find_mem_conflicts and
560    load_killed_in_block_p.  Nonzero if find_mem_conflicts finds a
561    conflict between two memory references.
562    This is a bit of a hack to work around the limitations of note_stores.  */
563 static int mems_conflict_p;
564 
565 /* DEST is the output of an instruction.  If it is a memory reference, and
566    possibly conflicts with the load found in DATA, then set mems_conflict_p
567    to a nonzero value.  */
568 
569 static void
570 find_mem_conflicts (rtx dest, const_rtx setter ATTRIBUTE_UNUSED,
571 		    void *data)
572 {
573   rtx mem_op = (rtx) data;
574 
575   while (GET_CODE (dest) == SUBREG
576 	 || GET_CODE (dest) == ZERO_EXTRACT
577 	 || GET_CODE (dest) == STRICT_LOW_PART)
578     dest = XEXP (dest, 0);
579 
580   /* If DEST is not a MEM, then it will not conflict with the load.  Note
581      that function calls are assumed to clobber memory, but are handled
582      elsewhere.  */
583   if (! MEM_P (dest))
584     return;
585 
586   if (true_dependence (dest, GET_MODE (dest), mem_op))
587     mems_conflict_p = 1;
588 }
589 
590 
591 /* Return nonzero if the expression in X (a memory reference) is killed
592    in the current basic block before (if AFTER_INSN is false) or after
593    (if AFTER_INSN is true) the insn with the CUID in UID_LIMIT.
594 
595    This function assumes that the modifies_mem table is flushed when
596    the hash table construction or redundancy elimination phases start
597    processing a new basic block.  */
598 
599 static int
600 load_killed_in_block_p (int uid_limit, rtx x, bool after_insn)
601 {
602   struct modifies_mem *list_entry = modifies_mem_list;
603 
604   while (list_entry)
605     {
606       rtx setter = list_entry->insn;
607 
608       /* Ignore entries in the list that do not apply.  */
609       if ((after_insn
610 	   && INSN_CUID (setter) < uid_limit)
611 	  || (! after_insn
612 	      && INSN_CUID (setter) > uid_limit))
613 	{
614 	  list_entry = list_entry->next;
615 	  continue;
616 	}
617 
618       /* If SETTER is a call everything is clobbered.  Note that calls
619 	 to pure functions are never put on the list, so we need not
620 	 worry about them.  */
621       if (CALL_P (setter))
622 	return 1;
623 
624       /* SETTER must be an insn of some kind that sets memory.  Call
625 	 note_stores to examine each hunk of memory that is modified.
626 	 It will set mems_conflict_p to nonzero if there may be a
627 	 conflict between X and SETTER.  */
628       mems_conflict_p = 0;
629       note_stores (PATTERN (setter), find_mem_conflicts, x);
630       if (mems_conflict_p)
631 	return 1;
632 
633       list_entry = list_entry->next;
634     }
635   return 0;
636 }
637 
638 
639 /* Record register first/last/block set information for REGNO in INSN.  */
640 
641 static inline void
642 record_last_reg_set_info (rtx insn, rtx reg)
643 {
644   unsigned int regno, end_regno;
645 
646   regno = REGNO (reg);
647   end_regno = END_HARD_REGNO (reg);
648   do
649     reg_avail_info[regno] = INSN_CUID (insn);
650   while (++regno < end_regno);
651 }
652 
653 static inline void
654 record_last_reg_set_info_regno (rtx insn, int regno)
655 {
656   reg_avail_info[regno] = INSN_CUID (insn);
657 }
658 
659 
660 /* Record memory modification information for INSN.  We do not actually care
661    about the memory location(s) that are set, or even how they are set (consider
662    a CALL_INSN).  We merely need to record which insns modify memory.  */
663 
664 static void
665 record_last_mem_set_info (rtx insn)
666 {
667   struct modifies_mem *list_entry;
668 
669   list_entry = (struct modifies_mem *) obstack_alloc (&modifies_mem_obstack,
670 						      sizeof (struct modifies_mem));
671   list_entry->insn = insn;
672   list_entry->next = modifies_mem_list;
673   modifies_mem_list = list_entry;
674 }
675 
676 /* Called from compute_hash_table via note_stores to handle one
677    SET or CLOBBER in an insn.  DATA is really the instruction in which
678    the SET is taking place.  */
679 
680 static void
681 record_last_set_info (rtx dest, const_rtx setter ATTRIBUTE_UNUSED, void *data)
682 {
683   rtx last_set_insn = (rtx) data;
684 
685   if (GET_CODE (dest) == SUBREG)
686     dest = SUBREG_REG (dest);
687 
688   if (REG_P (dest))
689     record_last_reg_set_info (last_set_insn, dest);
690   else if (MEM_P (dest))
691     {
692       /* Ignore pushes, they don't clobber memory.  They may still
693 	 clobber the stack pointer though.  Some targets do argument
694 	 pushes without adding REG_INC notes.  See e.g. PR25196,
695 	 where a pushsi2 on i386 doesn't have REG_INC notes.  Note
696 	 such changes here too.  */
697       if (! push_operand (dest, GET_MODE (dest)))
698 	record_last_mem_set_info (last_set_insn);
699       else
700 	record_last_reg_set_info_regno (last_set_insn, STACK_POINTER_REGNUM);
701     }
702 }
703 
704 
705 /* Reset tables used to keep track of what's still available since the
706    start of the block.  */
707 
708 static void
709 reset_opr_set_tables (void)
710 {
711   memset (reg_avail_info, 0, FIRST_PSEUDO_REGISTER * sizeof (int));
712   obstack_free (&modifies_mem_obstack, modifies_mem_obstack_bottom);
713   modifies_mem_list = NULL;
714 }
715 
716 
717 /* Record things set by INSN.
718    This data is used by oprs_unchanged_p.  */
719 
720 static void
721 record_opr_changes (rtx insn)
722 {
723   rtx note;
724 
725   /* Find all stores and record them.  */
726   note_stores (PATTERN (insn), record_last_set_info, insn);
727 
728   /* Also record autoincremented REGs for this insn as changed.  */
729   for (note = REG_NOTES (insn); note; note = XEXP (note, 1))
730     if (REG_NOTE_KIND (note) == REG_INC)
731       record_last_reg_set_info (insn, XEXP (note, 0));
732 
733   /* Finally, if this is a call, record all call clobbers.  */
734   if (CALL_P (insn))
735     {
736       unsigned int regno;
737       rtx link, x;
738       hard_reg_set_iterator hrsi;
739       EXECUTE_IF_SET_IN_HARD_REG_SET (regs_invalidated_by_call, 0, regno, hrsi)
740 	record_last_reg_set_info_regno (insn, regno);
741 
742       for (link = CALL_INSN_FUNCTION_USAGE (insn); link; link = XEXP (link, 1))
743 	if (GET_CODE (XEXP (link, 0)) == CLOBBER)
744 	  {
745 	    x = XEXP (XEXP (link, 0), 0);
746 	    if (REG_P (x))
747 	      {
748 		gcc_assert (HARD_REGISTER_P (x));
749 		record_last_reg_set_info (insn, x);
750 	      }
751 	  }
752 
753       if (! RTL_CONST_OR_PURE_CALL_P (insn))
754 	record_last_mem_set_info (insn);
755     }
756 }
757 
758 
759 /* Scan the pattern of INSN and add an entry to the hash TABLE.
760    After reload we are interested in loads/stores only.  */
761 
762 static void
763 hash_scan_set (rtx insn)
764 {
765   rtx pat = PATTERN (insn);
766   rtx src = SET_SRC (pat);
767   rtx dest = SET_DEST (pat);
768 
769   /* We are only interested in loads and stores.  */
770   if (! MEM_P (src) && ! MEM_P (dest))
771     return;
772 
773   /* Don't mess with jumps and nops.  */
774   if (JUMP_P (insn) || set_noop_p (pat))
775     return;
776 
777   if (REG_P (dest))
778     {
779       if (/* Don't CSE something if we can't do a reg/reg copy.  */
780 	  can_copy_p (GET_MODE (dest))
781 	  /* Is SET_SRC something we want to gcse?  */
782 	  && general_operand (src, GET_MODE (src))
783 #ifdef STACK_REGS
784 	  /* Never consider insns touching the register stack.  It may
785 	     create situations that reg-stack cannot handle (e.g. a stack
786 	     register live across an abnormal edge).  */
787 	  && (REGNO (dest) < FIRST_STACK_REG || REGNO (dest) > LAST_STACK_REG)
788 #endif
789 	  /* An expression is not available if its operands are
790 	     subsequently modified, including this insn.  */
791 	  && oprs_unchanged_p (src, insn, true))
792 	{
793 	  insert_expr_in_table (src, insn);
794 	}
795     }
796   else if (REG_P (src))
797     {
798       /* Only record sets of pseudo-regs in the hash table.  */
799       if (/* Don't CSE something if we can't do a reg/reg copy.  */
800 	  can_copy_p (GET_MODE (src))
801 	  /* Is SET_DEST something we want to gcse?  */
802 	  && general_operand (dest, GET_MODE (dest))
803 #ifdef STACK_REGS
804 	  /* As above for STACK_REGS.  */
805 	  && (REGNO (src) < FIRST_STACK_REG || REGNO (src) > LAST_STACK_REG)
806 #endif
807 	  && ! (flag_float_store && FLOAT_MODE_P (GET_MODE (dest)))
808 	  /* Check if the memory expression is killed after insn.  */
809 	  && ! load_killed_in_block_p (INSN_CUID (insn) + 1, dest, true)
810 	  && oprs_unchanged_p (XEXP (dest, 0), insn, true))
811 	{
812 	  insert_expr_in_table (dest, insn);
813 	}
814     }
815 }
816 
817 
818 /* Create hash table of memory expressions available at end of basic
819    blocks.  Basically you should think of this hash table as the
820    representation of AVAIL_OUT.  This is the set of expressions that
821    is generated in a basic block and not killed before the end of the
822    same basic block.  Notice that this is really a local computation.  */
823 
824 static void
825 compute_hash_table (void)
826 {
827   basic_block bb;
828 
829   FOR_EACH_BB (bb)
830     {
831       rtx insn;
832 
833       /* First pass over the instructions records information used to
834 	 determine when registers and memory are last set.
835 	 Since we compute a "local" AVAIL_OUT, reset the tables that
836 	 help us keep track of what has been modified since the start
837 	 of the block.  */
838       reset_opr_set_tables ();
839       FOR_BB_INSNS (bb, insn)
840 	{
841 	  if (INSN_P (insn))
842             record_opr_changes (insn);
843 	}
844 
845       /* The next pass actually builds the hash table.  */
846       FOR_BB_INSNS (bb, insn)
847 	if (INSN_P (insn) && GET_CODE (PATTERN (insn)) == SET)
848 	  hash_scan_set (insn);
849     }
850 }
851 
852 
853 /* Check if register REG is killed in any insn waiting to be inserted on
854    edge E.  This function is required to check that our data flow analysis
855    is still valid prior to commit_edge_insertions.  */
856 
857 static bool
858 reg_killed_on_edge (rtx reg, edge e)
859 {
860   rtx insn;
861 
862   for (insn = e->insns.r; insn; insn = NEXT_INSN (insn))
863     if (INSN_P (insn) && reg_set_p (reg, insn))
864       return true;
865 
866   return false;
867 }
868 
869 /* Similar to above - check if register REG is used in any insn waiting
870    to be inserted on edge E.
871    Assumes no such insn can be a CALL_INSN; if so call reg_used_between_p
872    with PREV(insn),NEXT(insn) instead of calling reg_overlap_mentioned_p.  */
873 
874 static bool
875 reg_used_on_edge (rtx reg, edge e)
876 {
877   rtx insn;
878 
879   for (insn = e->insns.r; insn; insn = NEXT_INSN (insn))
880     if (INSN_P (insn) && reg_overlap_mentioned_p (reg, PATTERN (insn)))
881       return true;
882 
883   return false;
884 }
885 
886 /* Return the loaded/stored register of a load/store instruction.  */
887 
888 static rtx
889 get_avail_load_store_reg (rtx insn)
890 {
891   if (REG_P (SET_DEST (PATTERN (insn))))
892     /* A load.  */
893     return SET_DEST(PATTERN(insn));
894   else
895     {
896       /* A store.  */
897       gcc_assert (REG_P (SET_SRC (PATTERN (insn))));
898       return SET_SRC (PATTERN (insn));
899     }
900 }
901 
902 /* Return nonzero if the predecessors of BB are "well behaved".  */
903 
904 static bool
905 bb_has_well_behaved_predecessors (basic_block bb)
906 {
907   edge pred;
908   edge_iterator ei;
909 
910   if (EDGE_COUNT (bb->preds) == 0)
911     return false;
912 
913   FOR_EACH_EDGE (pred, ei, bb->preds)
914     {
915       if ((pred->flags & EDGE_ABNORMAL) && EDGE_CRITICAL_P (pred))
916 	return false;
917 
918       if ((pred->flags & EDGE_ABNORMAL_CALL) && cfun->has_nonlocal_label)
919 	return false;
920 
921       if (JUMP_TABLE_DATA_P (BB_END (pred->src)))
922 	return false;
923     }
924   return true;
925 }
926 
927 
928 /* Search for the occurrences of expression in BB.  */
929 
930 static struct occr*
931 get_bb_avail_insn (basic_block bb, struct occr *occr)
932 {
933   for (; occr != NULL; occr = occr->next)
934     if (BLOCK_FOR_INSN (occr->insn) == bb)
935       return occr;
936   return NULL;
937 }
938 
939 
940 /* This handles the case where several stores feed a partially redundant
941    load. It checks if the redundancy elimination is possible and if it's
942    worth it.
943 
944    Redundancy elimination is possible if,
945    1) None of the operands of an insn have been modified since the start
946       of the current basic block.
947    2) In any predecessor of the current basic block, the same expression
948       is generated.
949 
950    See the function body for the heuristics that determine if eliminating
951    a redundancy is also worth doing, assuming it is possible.  */
952 
953 static void
954 eliminate_partially_redundant_load (basic_block bb, rtx insn,
955 				    struct expr *expr)
956 {
957   edge pred;
958   rtx avail_insn = NULL_RTX;
959   rtx avail_reg;
960   rtx dest, pat;
961   struct occr *a_occr;
962   struct unoccr *occr, *avail_occrs = NULL;
963   struct unoccr *unoccr, *unavail_occrs = NULL, *rollback_unoccr = NULL;
964   int npred_ok = 0;
965   gcov_type ok_count = 0; /* Redundant load execution count.  */
966   gcov_type critical_count = 0; /* Execution count of critical edges.  */
967   edge_iterator ei;
968   bool critical_edge_split = false;
969 
970   /* The execution count of the loads to be added to make the
971      load fully redundant.  */
972   gcov_type not_ok_count = 0;
973   basic_block pred_bb;
974 
975   pat = PATTERN (insn);
976   dest = SET_DEST (pat);
977 
978   /* Check that the loaded register is not used, set, or killed from the
979      beginning of the block.  */
980   if (reg_changed_after_insn_p (dest, 0)
981       || reg_used_between_p (dest, PREV_INSN (BB_HEAD (bb)), insn))
982     return;
983 
984   /* Check potential for replacing load with copy for predecessors.  */
985   FOR_EACH_EDGE (pred, ei, bb->preds)
986     {
987       rtx next_pred_bb_end;
988 
989       avail_insn = NULL_RTX;
990       avail_reg = NULL_RTX;
991       pred_bb = pred->src;
992       next_pred_bb_end = NEXT_INSN (BB_END (pred_bb));
993       for (a_occr = get_bb_avail_insn (pred_bb, expr->avail_occr); a_occr;
994 	   a_occr = get_bb_avail_insn (pred_bb, a_occr->next))
995 	{
996 	  /* Check if the loaded register is not used.  */
997 	  avail_insn = a_occr->insn;
998 	  avail_reg = get_avail_load_store_reg (avail_insn);
999 	  gcc_assert (avail_reg);
1000 
1001 	  /* Make sure we can generate a move from register avail_reg to
1002 	     dest.  */
1003 	  extract_insn (gen_move_insn (copy_rtx (dest),
1004 				       copy_rtx (avail_reg)));
1005 	  if (! constrain_operands (1)
1006 	      || reg_killed_on_edge (avail_reg, pred)
1007 	      || reg_used_on_edge (dest, pred))
1008 	    {
1009 	      avail_insn = NULL;
1010 	      continue;
1011 	    }
1012 	  if (!reg_set_between_p (avail_reg, avail_insn, next_pred_bb_end))
1013 	    /* AVAIL_INSN remains non-null.  */
1014 	    break;
1015 	  else
1016 	    avail_insn = NULL;
1017 	}
1018 
1019       if (EDGE_CRITICAL_P (pred))
1020 	critical_count += pred->count;
1021 
1022       if (avail_insn != NULL_RTX)
1023 	{
1024 	  npred_ok++;
1025 	  ok_count += pred->count;
1026 	  if (! set_noop_p (PATTERN (gen_move_insn (copy_rtx (dest),
1027 						    copy_rtx (avail_reg)))))
1028 	    {
1029 	      /* Check if there is going to be a split.  */
1030 	      if (EDGE_CRITICAL_P (pred))
1031 		critical_edge_split = true;
1032 	    }
1033 	  else /* Its a dead move no need to generate.  */
1034 	    continue;
1035 	  occr = (struct unoccr *) obstack_alloc (&unoccr_obstack,
1036 						  sizeof (struct unoccr));
1037 	  occr->insn = avail_insn;
1038 	  occr->pred = pred;
1039 	  occr->next = avail_occrs;
1040 	  avail_occrs = occr;
1041 	  if (! rollback_unoccr)
1042 	    rollback_unoccr = occr;
1043 	}
1044       else
1045 	{
1046 	  /* Adding a load on a critical edge will cause a split.  */
1047 	  if (EDGE_CRITICAL_P (pred))
1048 	    critical_edge_split = true;
1049 	  not_ok_count += pred->count;
1050 	  unoccr = (struct unoccr *) obstack_alloc (&unoccr_obstack,
1051 						    sizeof (struct unoccr));
1052 	  unoccr->insn = NULL_RTX;
1053 	  unoccr->pred = pred;
1054 	  unoccr->next = unavail_occrs;
1055 	  unavail_occrs = unoccr;
1056 	  if (! rollback_unoccr)
1057 	    rollback_unoccr = unoccr;
1058 	}
1059     }
1060 
1061   if (/* No load can be replaced by copy.  */
1062       npred_ok == 0
1063       /* Prevent exploding the code.  */
1064       || (optimize_bb_for_size_p (bb) && npred_ok > 1)
1065       /* If we don't have profile information we cannot tell if splitting
1066          a critical edge is profitable or not so don't do it.  */
1067       || ((! profile_info || ! flag_branch_probabilities
1068 	   || targetm.cannot_modify_jumps_p ())
1069 	  && critical_edge_split))
1070     goto cleanup;
1071 
1072   /* Check if it's worth applying the partial redundancy elimination.  */
1073   if (ok_count < GCSE_AFTER_RELOAD_PARTIAL_FRACTION * not_ok_count)
1074     goto cleanup;
1075   if (ok_count < GCSE_AFTER_RELOAD_CRITICAL_FRACTION * critical_count)
1076     goto cleanup;
1077 
1078   /* Generate moves to the loaded register from where
1079      the memory is available.  */
1080   for (occr = avail_occrs; occr; occr = occr->next)
1081     {
1082       avail_insn = occr->insn;
1083       pred = occr->pred;
1084       /* Set avail_reg to be the register having the value of the
1085 	 memory.  */
1086       avail_reg = get_avail_load_store_reg (avail_insn);
1087       gcc_assert (avail_reg);
1088 
1089       insert_insn_on_edge (gen_move_insn (copy_rtx (dest),
1090 					  copy_rtx (avail_reg)),
1091 			   pred);
1092       stats.moves_inserted++;
1093 
1094       if (dump_file)
1095 	fprintf (dump_file,
1096 		 "generating move from %d to %d on edge from %d to %d\n",
1097 		 REGNO (avail_reg),
1098 		 REGNO (dest),
1099 		 pred->src->index,
1100 		 pred->dest->index);
1101     }
1102 
1103   /* Regenerate loads where the memory is unavailable.  */
1104   for (unoccr = unavail_occrs; unoccr; unoccr = unoccr->next)
1105     {
1106       pred = unoccr->pred;
1107       insert_insn_on_edge (copy_insn (PATTERN (insn)), pred);
1108       stats.copies_inserted++;
1109 
1110       if (dump_file)
1111 	{
1112 	  fprintf (dump_file,
1113 		   "generating on edge from %d to %d a copy of load: ",
1114 		   pred->src->index,
1115 		   pred->dest->index);
1116 	  print_rtl (dump_file, PATTERN (insn));
1117 	  fprintf (dump_file, "\n");
1118 	}
1119     }
1120 
1121   /* Delete the insn if it is not available in this block and mark it
1122      for deletion if it is available. If insn is available it may help
1123      discover additional redundancies, so mark it for later deletion.  */
1124   for (a_occr = get_bb_avail_insn (bb, expr->avail_occr);
1125        a_occr && (a_occr->insn != insn);
1126        a_occr = get_bb_avail_insn (bb, a_occr->next))
1127     ;
1128 
1129   if (!a_occr)
1130     {
1131       stats.insns_deleted++;
1132 
1133       if (dump_file)
1134 	{
1135 	  fprintf (dump_file, "deleting insn:\n");
1136           print_rtl_single (dump_file, insn);
1137           fprintf (dump_file, "\n");
1138 	}
1139       delete_insn (insn);
1140     }
1141   else
1142     a_occr->deleted_p = 1;
1143 
1144 cleanup:
1145   if (rollback_unoccr)
1146     obstack_free (&unoccr_obstack, rollback_unoccr);
1147 }
1148 
1149 /* Performing the redundancy elimination as described before.  */
1150 
1151 static void
1152 eliminate_partially_redundant_loads (void)
1153 {
1154   rtx insn;
1155   basic_block bb;
1156 
1157   /* Note we start at block 1.  */
1158 
1159   if (ENTRY_BLOCK_PTR->next_bb == EXIT_BLOCK_PTR)
1160     return;
1161 
1162   FOR_BB_BETWEEN (bb,
1163 		  ENTRY_BLOCK_PTR->next_bb->next_bb,
1164 		  EXIT_BLOCK_PTR,
1165 		  next_bb)
1166     {
1167       /* Don't try anything on basic blocks with strange predecessors.  */
1168       if (! bb_has_well_behaved_predecessors (bb))
1169 	continue;
1170 
1171       /* Do not try anything on cold basic blocks.  */
1172       if (optimize_bb_for_size_p (bb))
1173 	continue;
1174 
1175       /* Reset the table of things changed since the start of the current
1176 	 basic block.  */
1177       reset_opr_set_tables ();
1178 
1179       /* Look at all insns in the current basic block and see if there are
1180 	 any loads in it that we can record.  */
1181       FOR_BB_INSNS (bb, insn)
1182 	{
1183 	  /* Is it a load - of the form (set (reg) (mem))?  */
1184 	  if (NONJUMP_INSN_P (insn)
1185               && GET_CODE (PATTERN (insn)) == SET
1186 	      && REG_P (SET_DEST (PATTERN (insn)))
1187 	      && MEM_P (SET_SRC (PATTERN (insn))))
1188 	    {
1189 	      rtx pat = PATTERN (insn);
1190 	      rtx src = SET_SRC (pat);
1191 	      struct expr *expr;
1192 
1193 	      if (!MEM_VOLATILE_P (src)
1194 		  && GET_MODE (src) != BLKmode
1195 		  && general_operand (src, GET_MODE (src))
1196 		  /* Are the operands unchanged since the start of the
1197 		     block?  */
1198 		  && oprs_unchanged_p (src, insn, false)
1199 		  && !(cfun->can_throw_non_call_exceptions && may_trap_p (src))
1200 		  && !side_effects_p (src)
1201 		  /* Is the expression recorded?  */
1202 		  && (expr = lookup_expr_in_table (src)) != NULL)
1203 		{
1204 		  /* We now have a load (insn) and an available memory at
1205 		     its BB start (expr). Try to remove the loads if it is
1206 		     redundant.  */
1207 		  eliminate_partially_redundant_load (bb, insn, expr);
1208 		}
1209 	    }
1210 
1211 	  /* Keep track of everything modified by this insn, so that we
1212 	     know what has been modified since the start of the current
1213 	     basic block.  */
1214 	  if (INSN_P (insn))
1215 	    record_opr_changes (insn);
1216 	}
1217     }
1218 
1219   commit_edge_insertions ();
1220 }
1221 
1222 /* Go over the expression hash table and delete insns that were
1223    marked for later deletion.  */
1224 
1225 /* This helper is called via htab_traverse.  */
1226 static int
1227 delete_redundant_insns_1 (void **slot, void *data ATTRIBUTE_UNUSED)
1228 {
1229   struct expr *expr = (struct expr *) *slot;
1230   struct occr *occr;
1231 
1232   for (occr = expr->avail_occr; occr != NULL; occr = occr->next)
1233     {
1234       if (occr->deleted_p && dbg_cnt (gcse2_delete))
1235 	{
1236 	  delete_insn (occr->insn);
1237 	  stats.insns_deleted++;
1238 
1239 	  if (dump_file)
1240 	    {
1241 	      fprintf (dump_file, "deleting insn:\n");
1242 	      print_rtl_single (dump_file, occr->insn);
1243 	      fprintf (dump_file, "\n");
1244 	    }
1245 	}
1246     }
1247 
1248   return 1;
1249 }
1250 
1251 static void
1252 delete_redundant_insns (void)
1253 {
1254   htab_traverse (expr_table, delete_redundant_insns_1, NULL);
1255   if (dump_file)
1256     fprintf (dump_file, "\n");
1257 }
1258 
1259 /* Main entry point of the GCSE after reload - clean some redundant loads
1260    due to spilling.  */
1261 
1262 static void
1263 gcse_after_reload_main (rtx f ATTRIBUTE_UNUSED)
1264 {
1265 
1266   memset (&stats, 0, sizeof (stats));
1267 
1268   /* Allocate memory for this pass.
1269      Also computes and initializes the insns' CUIDs.  */
1270   alloc_mem ();
1271 
1272   /* We need alias analysis.  */
1273   init_alias_analysis ();
1274 
1275   compute_hash_table ();
1276 
1277   if (dump_file)
1278     dump_hash_table (dump_file);
1279 
1280   if (htab_elements (expr_table) > 0)
1281     {
1282       eliminate_partially_redundant_loads ();
1283       delete_redundant_insns ();
1284 
1285       if (dump_file)
1286 	{
1287 	  fprintf (dump_file, "GCSE AFTER RELOAD stats:\n");
1288 	  fprintf (dump_file, "copies inserted: %d\n", stats.copies_inserted);
1289 	  fprintf (dump_file, "moves inserted:  %d\n", stats.moves_inserted);
1290 	  fprintf (dump_file, "insns deleted:   %d\n", stats.insns_deleted);
1291 	  fprintf (dump_file, "\n\n");
1292 	}
1293 
1294       statistics_counter_event (cfun, "copies inserted",
1295 				stats.copies_inserted);
1296       statistics_counter_event (cfun, "moves inserted",
1297 				stats.moves_inserted);
1298       statistics_counter_event (cfun, "insns deleted",
1299 				stats.insns_deleted);
1300     }
1301 
1302   /* We are finished with alias.  */
1303   end_alias_analysis ();
1304 
1305   free_mem ();
1306 }
1307 
1308 
1309 static bool
1310 gate_handle_gcse2 (void)
1311 {
1312   return (optimize > 0 && flag_gcse_after_reload
1313 	  && optimize_function_for_speed_p (cfun));
1314 }
1315 
1316 
1317 static unsigned int
1318 rest_of_handle_gcse2 (void)
1319 {
1320   gcse_after_reload_main (get_insns ());
1321   rebuild_jump_labels (get_insns ());
1322   return 0;
1323 }
1324 
1325 struct rtl_opt_pass pass_gcse2 =
1326 {
1327  {
1328   RTL_PASS,
1329   "gcse2",                              /* name */
1330   OPTGROUP_NONE,                        /* optinfo_flags */
1331   gate_handle_gcse2,                    /* gate */
1332   rest_of_handle_gcse2,                 /* execute */
1333   NULL,                                 /* sub */
1334   NULL,                                 /* next */
1335   0,                                    /* static_pass_number */
1336   TV_GCSE_AFTER_RELOAD,                 /* tv_id */
1337   0,                                    /* properties_required */
1338   0,                                    /* properties_provided */
1339   0,                                    /* properties_destroyed */
1340   0,                                    /* todo_flags_start */
1341   TODO_verify_rtl_sharing
1342   | TODO_verify_flow | TODO_ggc_collect /* todo_flags_finish */
1343  }
1344 };
1345