xref: /netbsd-src/external/gpl3/gcc.old/dist/gcc/dse.c (revision 64389c4288f86150ed488b3b08c6d7d92b095270)
1 /* RTL dead store elimination.
2    Copyright (C) 2005-2020 Free Software Foundation, Inc.
3 
4    Contributed by Richard Sandiford <rsandifor@codesourcery.com>
5    and Kenneth Zadeck <zadeck@naturalbridge.com>
6 
7 This file is part of GCC.
8 
9 GCC is free software; you can redistribute it and/or modify it under
10 the terms of the GNU General Public License as published by the Free
11 Software Foundation; either version 3, or (at your option) any later
12 version.
13 
14 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
15 WARRANTY; without even the implied warranty of MERCHANTABILITY or
16 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
17 for more details.
18 
19 You should have received a copy of the GNU General Public License
20 along with GCC; see the file COPYING3.  If not see
21 <http://www.gnu.org/licenses/>.  */
22 
23 #undef BASELINE
24 
25 #include "config.h"
26 #include "system.h"
27 #include "coretypes.h"
28 #include "backend.h"
29 #include "target.h"
30 #include "rtl.h"
31 #include "tree.h"
32 #include "gimple.h"
33 #include "predict.h"
34 #include "df.h"
35 #include "memmodel.h"
36 #include "tm_p.h"
37 #include "gimple-ssa.h"
38 #include "expmed.h"
39 #include "optabs.h"
40 #include "emit-rtl.h"
41 #include "recog.h"
42 #include "alias.h"
43 #include "stor-layout.h"
44 #include "cfgrtl.h"
45 #include "cselib.h"
46 #include "tree-pass.h"
47 #include "explow.h"
48 #include "expr.h"
49 #include "dbgcnt.h"
50 #include "rtl-iter.h"
51 #include "cfgcleanup.h"
52 #include "calls.h"
53 
54 /* This file contains three techniques for performing Dead Store
55    Elimination (dse).
56 
57    * The first technique performs dse locally on any base address.  It
58    is based on the cselib which is a local value numbering technique.
59    This technique is local to a basic block but deals with a fairly
60    general addresses.
61 
62    * The second technique performs dse globally but is restricted to
63    base addresses that are either constant or are relative to the
64    frame_pointer.
65 
66    * The third technique, (which is only done after register allocation)
67    processes the spill slots.  This differs from the second
68    technique because it takes advantage of the fact that spilling is
69    completely free from the effects of aliasing.
70 
71    Logically, dse is a backwards dataflow problem.  A store can be
72    deleted if it if cannot be reached in the backward direction by any
73    use of the value being stored.  However, the local technique uses a
74    forwards scan of the basic block because cselib requires that the
75    block be processed in that order.
76 
77    The pass is logically broken into 7 steps:
78 
79    0) Initialization.
80 
81    1) The local algorithm, as well as scanning the insns for the two
82    global algorithms.
83 
84    2) Analysis to see if the global algs are necessary.  In the case
85    of stores base on a constant address, there must be at least two
86    stores to that address, to make it possible to delete some of the
87    stores.  In the case of stores off of the frame or spill related
88    stores, only one store to an address is necessary because those
89    stores die at the end of the function.
90 
91    3) Set up the global dataflow equations based on processing the
92    info parsed in the first step.
93 
94    4) Solve the dataflow equations.
95 
96    5) Delete the insns that the global analysis has indicated are
97    unnecessary.
98 
99    6) Delete insns that store the same value as preceding store
100    where the earlier store couldn't be eliminated.
101 
102    7) Cleanup.
103 
104    This step uses cselib and canon_rtx to build the largest expression
105    possible for each address.  This pass is a forwards pass through
106    each basic block.  From the point of view of the global technique,
107    the first pass could examine a block in either direction.  The
108    forwards ordering is to accommodate cselib.
109 
110    We make a simplifying assumption: addresses fall into four broad
111    categories:
112 
113    1) base has rtx_varies_p == false, offset is constant.
114    2) base has rtx_varies_p == false, offset variable.
115    3) base has rtx_varies_p == true, offset constant.
116    4) base has rtx_varies_p == true, offset variable.
117 
118    The local passes are able to process all 4 kinds of addresses.  The
119    global pass only handles 1).
120 
121    The global problem is formulated as follows:
122 
123      A store, S1, to address A, where A is not relative to the stack
124      frame, can be eliminated if all paths from S1 to the end of the
125      function contain another store to A before a read to A.
126 
127      If the address A is relative to the stack frame, a store S2 to A
128      can be eliminated if there are no paths from S2 that reach the
129      end of the function that read A before another store to A.  In
130      this case S2 can be deleted if there are paths from S2 to the
131      end of the function that have no reads or writes to A.  This
132      second case allows stores to the stack frame to be deleted that
133      would otherwise die when the function returns.  This cannot be
134      done if stores_off_frame_dead_at_return is not true.  See the doc
135      for that variable for when this variable is false.
136 
137      The global problem is formulated as a backwards set union
138      dataflow problem where the stores are the gens and reads are the
139      kills.  Set union problems are rare and require some special
140      handling given our representation of bitmaps.  A straightforward
141      implementation requires a lot of bitmaps filled with 1s.
142      These are expensive and cumbersome in our bitmap formulation so
143      care has been taken to avoid large vectors filled with 1s.  See
144      the comments in bb_info and in the dataflow confluence functions
145      for details.
146 
147    There are two places for further enhancements to this algorithm:
148 
149    1) The original dse which was embedded in a pass called flow also
150    did local address forwarding.  For example in
151 
152    A <- r100
153    ... <- A
154 
155    flow would replace the right hand side of the second insn with a
156    reference to r100.  Most of the information is available to add this
157    to this pass.  It has not done it because it is a lot of work in
158    the case that either r100 is assigned to between the first and
159    second insn and/or the second insn is a load of part of the value
160    stored by the first insn.
161 
162    insn 5 in gcc.c-torture/compile/990203-1.c simple case.
163    insn 15 in gcc.c-torture/execute/20001017-2.c simple case.
164    insn 25 in gcc.c-torture/execute/20001026-1.c simple case.
165    insn 44 in gcc.c-torture/execute/20010910-1.c simple case.
166 
167    2) The cleaning up of spill code is quite profitable.  It currently
168    depends on reading tea leaves and chicken entrails left by reload.
169    This pass depends on reload creating a singleton alias set for each
170    spill slot and telling the next dse pass which of these alias sets
171    are the singletons.  Rather than analyze the addresses of the
172    spills, dse's spill processing just does analysis of the loads and
173    stores that use those alias sets.  There are three cases where this
174    falls short:
175 
176      a) Reload sometimes creates the slot for one mode of access, and
177      then inserts loads and/or stores for a smaller mode.  In this
178      case, the current code just punts on the slot.  The proper thing
179      to do is to back out and use one bit vector position for each
180      byte of the entity associated with the slot.  This depends on
181      KNOWING that reload always generates the accesses for each of the
182      bytes in some canonical (read that easy to understand several
183      passes after reload happens) way.
184 
185      b) Reload sometimes decides that spill slot it allocated was not
186      large enough for the mode and goes back and allocates more slots
187      with the same mode and alias set.  The backout in this case is a
188      little more graceful than (a).  In this case the slot is unmarked
189      as being a spill slot and if final address comes out to be based
190      off the frame pointer, the global algorithm handles this slot.
191 
192      c) For any pass that may prespill, there is currently no
193      mechanism to tell the dse pass that the slot being used has the
194      special properties that reload uses.  It may be that all that is
195      required is to have those passes make the same calls that reload
196      does, assuming that the alias sets can be manipulated in the same
197      way.  */
198 
199 /* There are limits to the size of constant offsets we model for the
200    global problem.  There are certainly test cases, that exceed this
201    limit, however, it is unlikely that there are important programs
202    that really have constant offsets this size.  */
203 #define MAX_OFFSET (64 * 1024)
204 
205 /* Obstack for the DSE dataflow bitmaps.  We don't want to put these
206    on the default obstack because these bitmaps can grow quite large
207    (~2GB for the small (!) test case of PR54146) and we'll hold on to
208    all that memory until the end of the compiler run.
209    As a bonus, delete_tree_live_info can destroy all the bitmaps by just
210    releasing the whole obstack.  */
211 static bitmap_obstack dse_bitmap_obstack;
212 
213 /* Obstack for other data.  As for above: Kinda nice to be able to
214    throw it all away at the end in one big sweep.  */
215 static struct obstack dse_obstack;
216 
217 /* Scratch bitmap for cselib's cselib_expand_value_rtx.  */
218 static bitmap scratch = NULL;
219 
220 struct insn_info_type;
221 
222 /* This structure holds information about a candidate store.  */
223 class store_info
224 {
225 public:
226 
227   /* False means this is a clobber.  */
228   bool is_set;
229 
230   /* False if a single HOST_WIDE_INT bitmap is used for positions_needed.  */
231   bool is_large;
232 
233   /* The id of the mem group of the base address.  If rtx_varies_p is
234      true, this is -1.  Otherwise, it is the index into the group
235      table.  */
236   int group_id;
237 
238   /* This is the cselib value.  */
239   cselib_val *cse_base;
240 
241   /* This canonized mem.  */
242   rtx mem;
243 
244   /* Canonized MEM address for use by canon_true_dependence.  */
245   rtx mem_addr;
246 
247   /* The offset of the first byte associated with the operation.  */
248   poly_int64 offset;
249 
250   /* The number of bytes covered by the operation.  This is always exact
251      and known (rather than -1).  */
252   poly_int64 width;
253 
254   union
255     {
256       /* A bitmask as wide as the number of bytes in the word that
257 	 contains a 1 if the byte may be needed.  The store is unused if
258 	 all of the bits are 0.  This is used if IS_LARGE is false.  */
259       unsigned HOST_WIDE_INT small_bitmask;
260 
261       struct
262 	{
263 	  /* A bitmap with one bit per byte, or null if the number of
264 	     bytes isn't known at compile time.  A cleared bit means
265 	     the position is needed.  Used if IS_LARGE is true.  */
266 	  bitmap bmap;
267 
268 	  /* When BITMAP is nonnull, this counts the number of set bits
269 	     (i.e. unneeded bytes) in the bitmap.  If it is equal to
270 	     WIDTH, the whole store is unused.
271 
272 	     When BITMAP is null:
273 	     - the store is definitely not needed when COUNT == 1
274 	     - all the store is needed when COUNT == 0 and RHS is nonnull
275 	     - otherwise we don't know which parts of the store are needed.  */
276 	  int count;
277 	} large;
278     } positions_needed;
279 
280   /* The next store info for this insn.  */
281   class store_info *next;
282 
283   /* The right hand side of the store.  This is used if there is a
284      subsequent reload of the mems address somewhere later in the
285      basic block.  */
286   rtx rhs;
287 
288   /* If rhs is or holds a constant, this contains that constant,
289      otherwise NULL.  */
290   rtx const_rhs;
291 
292   /* Set if this store stores the same constant value as REDUNDANT_REASON
293      insn stored.  These aren't eliminated early, because doing that
294      might prevent the earlier larger store to be eliminated.  */
295   struct insn_info_type *redundant_reason;
296 };
297 
298 /* Return a bitmask with the first N low bits set.  */
299 
300 static unsigned HOST_WIDE_INT
301 #ifdef NB_FIX_VAX_BACKEND
lowpart_bitmask(unsigned int n)302 lowpart_bitmask (unsigned int n)
303 #else
304 lowpart_bitmask (int n)
305 #endif
306 {
307   unsigned HOST_WIDE_INT mask = HOST_WIDE_INT_M1U;
308 #ifdef NB_FIX_VAX_BACKEND
309   if (n < 1)
310     return 0;
311   if (n >= HOST_BITS_PER_WIDE_INT)
312     return mask;
313 #else // XXXMRG
314   gcc_assert(n >= 0 && n <= HOST_BITS_PER_WIDE_INT);
315   if (n == 0)
316     return 0;
317 #endif
318   return mask >> (HOST_BITS_PER_WIDE_INT - n);
319 }
320 
321 static object_allocator<store_info> cse_store_info_pool ("cse_store_info_pool");
322 
323 static object_allocator<store_info> rtx_store_info_pool ("rtx_store_info_pool");
324 
325 /* This structure holds information about a load.  These are only
326    built for rtx bases.  */
327 class read_info_type
328 {
329 public:
330   /* The id of the mem group of the base address.  */
331   int group_id;
332 
333   /* The offset of the first byte associated with the operation.  */
334   poly_int64 offset;
335 
336   /* The number of bytes covered by the operation, or -1 if not known.  */
337   poly_int64 width;
338 
339   /* The mem being read.  */
340   rtx mem;
341 
342   /* The next read_info for this insn.  */
343   class read_info_type *next;
344 };
345 typedef class read_info_type *read_info_t;
346 
347 static object_allocator<read_info_type> read_info_type_pool ("read_info_pool");
348 
349 /* One of these records is created for each insn.  */
350 
351 struct insn_info_type
352 {
353   /* Set true if the insn contains a store but the insn itself cannot
354      be deleted.  This is set if the insn is a parallel and there is
355      more than one non dead output or if the insn is in some way
356      volatile.  */
357   bool cannot_delete;
358 
359   /* This field is only used by the global algorithm.  It is set true
360      if the insn contains any read of mem except for a (1).  This is
361      also set if the insn is a call or has a clobber mem.  If the insn
362      contains a wild read, the use_rec will be null.  */
363   bool wild_read;
364 
365   /* This is true only for CALL instructions which could potentially read
366      any non-frame memory location. This field is used by the global
367      algorithm.  */
368   bool non_frame_wild_read;
369 
370   /* This field is only used for the processing of const functions.
371      These functions cannot read memory, but they can read the stack
372      because that is where they may get their parms.  We need to be
373      this conservative because, like the store motion pass, we don't
374      consider CALL_INSN_FUNCTION_USAGE when processing call insns.
375      Moreover, we need to distinguish two cases:
376      1. Before reload (register elimination), the stores related to
377 	outgoing arguments are stack pointer based and thus deemed
378 	of non-constant base in this pass.  This requires special
379 	handling but also means that the frame pointer based stores
380 	need not be killed upon encountering a const function call.
381      2. After reload, the stores related to outgoing arguments can be
382 	either stack pointer or hard frame pointer based.  This means
383 	that we have no other choice than also killing all the frame
384 	pointer based stores upon encountering a const function call.
385      This field is set after reload for const function calls and before
386      reload for const tail function calls on targets where arg pointer
387      is the frame pointer.  Having this set is less severe than a wild
388      read, it just means that all the frame related stores are killed
389      rather than all the stores.  */
390   bool frame_read;
391 
392   /* This field is only used for the processing of const functions.
393      It is set if the insn may contain a stack pointer based store.  */
394   bool stack_pointer_based;
395 
396   /* This is true if any of the sets within the store contains a
397      cselib base.  Such stores can only be deleted by the local
398      algorithm.  */
399   bool contains_cselib_groups;
400 
401   /* The insn. */
402   rtx_insn *insn;
403 
404   /* The list of mem sets or mem clobbers that are contained in this
405      insn.  If the insn is deletable, it contains only one mem set.
406      But it could also contain clobbers.  Insns that contain more than
407      one mem set are not deletable, but each of those mems are here in
408      order to provide info to delete other insns.  */
409   store_info *store_rec;
410 
411   /* The linked list of mem uses in this insn.  Only the reads from
412      rtx bases are listed here.  The reads to cselib bases are
413      completely processed during the first scan and so are never
414      created.  */
415   read_info_t read_rec;
416 
417   /* The live fixed registers.  We assume only fixed registers can
418      cause trouble by being clobbered from an expanded pattern;
419      storing only the live fixed registers (rather than all registers)
420      means less memory needs to be allocated / copied for the individual
421      stores.  */
422   regset fixed_regs_live;
423 
424   /* The prev insn in the basic block.  */
425   struct insn_info_type * prev_insn;
426 
427   /* The linked list of insns that are in consideration for removal in
428      the forwards pass through the basic block.  This pointer may be
429      trash as it is not cleared when a wild read occurs.  The only
430      time it is guaranteed to be correct is when the traversal starts
431      at active_local_stores.  */
432   struct insn_info_type * next_local_store;
433 };
434 typedef struct insn_info_type *insn_info_t;
435 
436 static object_allocator<insn_info_type> insn_info_type_pool ("insn_info_pool");
437 
438 /* The linked list of stores that are under consideration in this
439    basic block.  */
440 static insn_info_t active_local_stores;
441 static int active_local_stores_len;
442 
443 struct dse_bb_info_type
444 {
445   /* Pointer to the insn info for the last insn in the block.  These
446      are linked so this is how all of the insns are reached.  During
447      scanning this is the current insn being scanned.  */
448   insn_info_t last_insn;
449 
450   /* The info for the global dataflow problem.  */
451 
452 
453   /* This is set if the transfer function should and in the wild_read
454      bitmap before applying the kill and gen sets.  That vector knocks
455      out most of the bits in the bitmap and thus speeds up the
456      operations.  */
457   bool apply_wild_read;
458 
459   /* The following 4 bitvectors hold information about which positions
460      of which stores are live or dead.  They are indexed by
461      get_bitmap_index.  */
462 
463   /* The set of store positions that exist in this block before a wild read.  */
464   bitmap gen;
465 
466   /* The set of load positions that exist in this block above the
467      same position of a store.  */
468   bitmap kill;
469 
470   /* The set of stores that reach the top of the block without being
471      killed by a read.
472 
473      Do not represent the in if it is all ones.  Note that this is
474      what the bitvector should logically be initialized to for a set
475      intersection problem.  However, like the kill set, this is too
476      expensive.  So initially, the in set will only be created for the
477      exit block and any block that contains a wild read.  */
478   bitmap in;
479 
480   /* The set of stores that reach the bottom of the block from it's
481      successors.
482 
483      Do not represent the in if it is all ones.  Note that this is
484      what the bitvector should logically be initialized to for a set
485      intersection problem.  However, like the kill and in set, this is
486      too expensive.  So what is done is that the confluence operator
487      just initializes the vector from one of the out sets of the
488      successors of the block.  */
489   bitmap out;
490 
491   /* The following bitvector is indexed by the reg number.  It
492      contains the set of regs that are live at the current instruction
493      being processed.  While it contains info for all of the
494      registers, only the hard registers are actually examined.  It is used
495      to assure that shift and/or add sequences that are inserted do not
496      accidentally clobber live hard regs.  */
497   bitmap regs_live;
498 };
499 
500 typedef struct dse_bb_info_type *bb_info_t;
501 
502 static object_allocator<dse_bb_info_type> dse_bb_info_type_pool
503   ("bb_info_pool");
504 
505 /* Table to hold all bb_infos.  */
506 static bb_info_t *bb_table;
507 
508 /* There is a group_info for each rtx base that is used to reference
509    memory.  There are also not many of the rtx bases because they are
510    very limited in scope.  */
511 
512 struct group_info
513 {
514   /* The actual base of the address.  */
515   rtx rtx_base;
516 
517   /* The sequential id of the base.  This allows us to have a
518      canonical ordering of these that is not based on addresses.  */
519   int id;
520 
521   /* True if there are any positions that are to be processed
522      globally.  */
523   bool process_globally;
524 
525   /* True if the base of this group is either the frame_pointer or
526      hard_frame_pointer.  */
527   bool frame_related;
528 
529   /* A mem wrapped around the base pointer for the group in order to do
530      read dependency.  It must be given BLKmode in order to encompass all
531      the possible offsets from the base.  */
532   rtx base_mem;
533 
534   /* Canonized version of base_mem's address.  */
535   rtx canon_base_addr;
536 
537   /* These two sets of two bitmaps are used to keep track of how many
538      stores are actually referencing that position from this base.  We
539      only do this for rtx bases as this will be used to assign
540      positions in the bitmaps for the global problem.  Bit N is set in
541      store1 on the first store for offset N.  Bit N is set in store2
542      for the second store to offset N.  This is all we need since we
543      only care about offsets that have two or more stores for them.
544 
545      The "_n" suffix is for offsets less than 0 and the "_p" suffix is
546      for 0 and greater offsets.
547 
548      There is one special case here, for stores into the stack frame,
549      we will or store1 into store2 before deciding which stores look
550      at globally.  This is because stores to the stack frame that have
551      no other reads before the end of the function can also be
552      deleted.  */
553   bitmap store1_n, store1_p, store2_n, store2_p;
554 
555   /* These bitmaps keep track of offsets in this group escape this function.
556      An offset escapes if it corresponds to a named variable whose
557      addressable flag is set.  */
558   bitmap escaped_n, escaped_p;
559 
560   /* The positions in this bitmap have the same assignments as the in,
561      out, gen and kill bitmaps.  This bitmap is all zeros except for
562      the positions that are occupied by stores for this group.  */
563   bitmap group_kill;
564 
565   /* The offset_map is used to map the offsets from this base into
566      positions in the global bitmaps.  It is only created after all of
567      the all of stores have been scanned and we know which ones we
568      care about.  */
569   int *offset_map_n, *offset_map_p;
570   int offset_map_size_n, offset_map_size_p;
571 };
572 
573 static object_allocator<group_info> group_info_pool ("rtx_group_info_pool");
574 
575 /* Index into the rtx_group_vec.  */
576 static int rtx_group_next_id;
577 
578 
579 static vec<group_info *> rtx_group_vec;
580 
581 
582 /* This structure holds the set of changes that are being deferred
583    when removing read operation.  See replace_read.  */
584 struct deferred_change
585 {
586 
587   /* The mem that is being replaced.  */
588   rtx *loc;
589 
590   /* The reg it is being replaced with.  */
591   rtx reg;
592 
593   struct deferred_change *next;
594 };
595 
596 static object_allocator<deferred_change> deferred_change_pool
597   ("deferred_change_pool");
598 
599 static deferred_change *deferred_change_list = NULL;
600 
601 /* This is true except if cfun->stdarg -- i.e. we cannot do
602    this for vararg functions because they play games with the frame.  */
603 static bool stores_off_frame_dead_at_return;
604 
605 /* Counter for stats.  */
606 static int globally_deleted;
607 static int locally_deleted;
608 
609 static bitmap all_blocks;
610 
611 /* Locations that are killed by calls in the global phase.  */
612 static bitmap kill_on_calls;
613 
614 /* The number of bits used in the global bitmaps.  */
615 static unsigned int current_position;
616 
617 /* Print offset range [OFFSET, OFFSET + WIDTH) to FILE.  */
618 
619 static void
print_range(FILE * file,poly_int64 offset,poly_int64 width)620 print_range (FILE *file, poly_int64 offset, poly_int64 width)
621 {
622   fprintf (file, "[");
623   print_dec (offset, file, SIGNED);
624   fprintf (file, "..");
625   print_dec (offset + width, file, SIGNED);
626   fprintf (file, ")");
627 }
628 
629 /*----------------------------------------------------------------------------
630    Zeroth step.
631 
632    Initialization.
633 ----------------------------------------------------------------------------*/
634 
635 
636 /* Hashtable callbacks for maintaining the "bases" field of
637    store_group_info, given that the addresses are function invariants.  */
638 
639 struct invariant_group_base_hasher : nofree_ptr_hash <group_info>
640 {
641   static inline hashval_t hash (const group_info *);
642   static inline bool equal (const group_info *, const group_info *);
643 };
644 
645 inline bool
equal(const group_info * gi1,const group_info * gi2)646 invariant_group_base_hasher::equal (const group_info *gi1,
647 				    const group_info *gi2)
648 {
649   return rtx_equal_p (gi1->rtx_base, gi2->rtx_base);
650 }
651 
652 inline hashval_t
hash(const group_info * gi)653 invariant_group_base_hasher::hash (const group_info *gi)
654 {
655   int do_not_record;
656   return hash_rtx (gi->rtx_base, Pmode, &do_not_record, NULL, false);
657 }
658 
659 /* Tables of group_info structures, hashed by base value.  */
660 static hash_table<invariant_group_base_hasher> *rtx_group_table;
661 
662 
663 /* Get the GROUP for BASE.  Add a new group if it is not there.  */
664 
665 static group_info *
get_group_info(rtx base)666 get_group_info (rtx base)
667 {
668   struct group_info tmp_gi;
669   group_info *gi;
670   group_info **slot;
671 
672   gcc_assert (base != NULL_RTX);
673 
674   /* Find the store_base_info structure for BASE, creating a new one
675      if necessary.  */
676   tmp_gi.rtx_base = base;
677   slot = rtx_group_table->find_slot (&tmp_gi, INSERT);
678   gi = *slot;
679 
680   if (gi == NULL)
681     {
682       *slot = gi = group_info_pool.allocate ();
683       gi->rtx_base = base;
684       gi->id = rtx_group_next_id++;
685       gi->base_mem = gen_rtx_MEM (BLKmode, base);
686       gi->canon_base_addr = canon_rtx (base);
687       gi->store1_n = BITMAP_ALLOC (&dse_bitmap_obstack);
688       gi->store1_p = BITMAP_ALLOC (&dse_bitmap_obstack);
689       gi->store2_n = BITMAP_ALLOC (&dse_bitmap_obstack);
690       gi->store2_p = BITMAP_ALLOC (&dse_bitmap_obstack);
691       gi->escaped_p = BITMAP_ALLOC (&dse_bitmap_obstack);
692       gi->escaped_n = BITMAP_ALLOC (&dse_bitmap_obstack);
693       gi->group_kill = BITMAP_ALLOC (&dse_bitmap_obstack);
694       gi->process_globally = false;
695       gi->frame_related =
696 	(base == frame_pointer_rtx) || (base == hard_frame_pointer_rtx);
697       gi->offset_map_size_n = 0;
698       gi->offset_map_size_p = 0;
699       gi->offset_map_n = NULL;
700       gi->offset_map_p = NULL;
701       rtx_group_vec.safe_push (gi);
702     }
703 
704   return gi;
705 }
706 
707 
708 /* Initialization of data structures.  */
709 
710 static void
dse_step0(void)711 dse_step0 (void)
712 {
713   locally_deleted = 0;
714   globally_deleted = 0;
715 
716   bitmap_obstack_initialize (&dse_bitmap_obstack);
717   gcc_obstack_init (&dse_obstack);
718 
719   scratch = BITMAP_ALLOC (&reg_obstack);
720   kill_on_calls = BITMAP_ALLOC (&dse_bitmap_obstack);
721 
722 
723   rtx_group_table = new hash_table<invariant_group_base_hasher> (11);
724 
725   bb_table = XNEWVEC (bb_info_t, last_basic_block_for_fn (cfun));
726   rtx_group_next_id = 0;
727 
728   stores_off_frame_dead_at_return = !cfun->stdarg;
729 
730   init_alias_analysis ();
731 }
732 
733 
734 
735 /*----------------------------------------------------------------------------
736    First step.
737 
738    Scan all of the insns.  Any random ordering of the blocks is fine.
739    Each block is scanned in forward order to accommodate cselib which
740    is used to remove stores with non-constant bases.
741 ----------------------------------------------------------------------------*/
742 
743 /* Delete all of the store_info recs from INSN_INFO.  */
744 
745 static void
free_store_info(insn_info_t insn_info)746 free_store_info (insn_info_t insn_info)
747 {
748   store_info *cur = insn_info->store_rec;
749   while (cur)
750     {
751       store_info *next = cur->next;
752       if (cur->is_large)
753 	BITMAP_FREE (cur->positions_needed.large.bmap);
754       if (cur->cse_base)
755 	cse_store_info_pool.remove (cur);
756       else
757 	rtx_store_info_pool.remove (cur);
758       cur = next;
759     }
760 
761   insn_info->cannot_delete = true;
762   insn_info->contains_cselib_groups = false;
763   insn_info->store_rec = NULL;
764 }
765 
766 struct note_add_store_info
767 {
768   rtx_insn *first, *current;
769   regset fixed_regs_live;
770   bool failure;
771 };
772 
773 /* Callback for emit_inc_dec_insn_before via note_stores.
774    Check if a register is clobbered which is live afterwards.  */
775 
776 static void
note_add_store(rtx loc,const_rtx expr ATTRIBUTE_UNUSED,void * data)777 note_add_store (rtx loc, const_rtx expr ATTRIBUTE_UNUSED, void *data)
778 {
779   rtx_insn *insn;
780   note_add_store_info *info = (note_add_store_info *) data;
781 
782   if (!REG_P (loc))
783     return;
784 
785   /* If this register is referenced by the current or an earlier insn,
786      that's OK.  E.g. this applies to the register that is being incremented
787      with this addition.  */
788   for (insn = info->first;
789        insn != NEXT_INSN (info->current);
790        insn = NEXT_INSN (insn))
791     if (reg_referenced_p (loc, PATTERN (insn)))
792       return;
793 
794   /* If we come here, we have a clobber of a register that's only OK
795      if that register is not live.  If we don't have liveness information
796      available, fail now.  */
797   if (!info->fixed_regs_live)
798     {
799       info->failure = true;
800       return;
801     }
802   /* Now check if this is a live fixed register.  */
803   unsigned int end_regno = END_REGNO (loc);
804   for (unsigned int regno = REGNO (loc); regno < end_regno; ++regno)
805     if (REGNO_REG_SET_P (info->fixed_regs_live, regno))
806       info->failure = true;
807 }
808 
809 /* Callback for for_each_inc_dec that emits an INSN that sets DEST to
810    SRC + SRCOFF before insn ARG.  */
811 
812 static int
emit_inc_dec_insn_before(rtx mem ATTRIBUTE_UNUSED,rtx op ATTRIBUTE_UNUSED,rtx dest,rtx src,rtx srcoff,void * arg)813 emit_inc_dec_insn_before (rtx mem ATTRIBUTE_UNUSED,
814 			  rtx op ATTRIBUTE_UNUSED,
815 			  rtx dest, rtx src, rtx srcoff, void *arg)
816 {
817   insn_info_t insn_info = (insn_info_t) arg;
818   rtx_insn *insn = insn_info->insn, *new_insn, *cur;
819   note_add_store_info info;
820 
821   /* We can reuse all operands without copying, because we are about
822      to delete the insn that contained it.  */
823   if (srcoff)
824     {
825       start_sequence ();
826       emit_insn (gen_add3_insn (dest, src, srcoff));
827       new_insn = get_insns ();
828       end_sequence ();
829     }
830   else
831     new_insn = gen_move_insn (dest, src);
832   info.first = new_insn;
833   info.fixed_regs_live = insn_info->fixed_regs_live;
834   info.failure = false;
835   for (cur = new_insn; cur; cur = NEXT_INSN (cur))
836     {
837       info.current = cur;
838       note_stores (cur, note_add_store, &info);
839     }
840 
841   /* If a failure was flagged above, return 1 so that for_each_inc_dec will
842      return it immediately, communicating the failure to its caller.  */
843   if (info.failure)
844     return 1;
845 
846   emit_insn_before (new_insn, insn);
847 
848   return 0;
849 }
850 
851 /* Before we delete INSN_INFO->INSN, make sure that the auto inc/dec, if it
852    is there, is split into a separate insn.
853    Return true on success (or if there was nothing to do), false on failure.  */
854 
855 static bool
check_for_inc_dec_1(insn_info_t insn_info)856 check_for_inc_dec_1 (insn_info_t insn_info)
857 {
858   rtx_insn *insn = insn_info->insn;
859   rtx note = find_reg_note (insn, REG_INC, NULL_RTX);
860   if (note)
861     return for_each_inc_dec (PATTERN (insn), emit_inc_dec_insn_before,
862 			     insn_info) == 0;
863 
864   /* Punt on stack pushes, those don't have REG_INC notes and we are
865      unprepared to deal with distribution of REG_ARGS_SIZE notes etc.  */
866   subrtx_iterator::array_type array;
867   FOR_EACH_SUBRTX (iter, array, PATTERN (insn), NONCONST)
868     {
869       const_rtx x = *iter;
870       if (GET_RTX_CLASS (GET_CODE (x)) == RTX_AUTOINC)
871 	return false;
872     }
873 
874   return true;
875 }
876 
877 
878 /* Entry point for postreload.  If you work on reload_cse, or you need this
879    anywhere else, consider if you can provide register liveness information
880    and add a parameter to this function so that it can be passed down in
881    insn_info.fixed_regs_live.  */
882 bool
check_for_inc_dec(rtx_insn * insn)883 check_for_inc_dec (rtx_insn *insn)
884 {
885   insn_info_type insn_info;
886   rtx note;
887 
888   insn_info.insn = insn;
889   insn_info.fixed_regs_live = NULL;
890   note = find_reg_note (insn, REG_INC, NULL_RTX);
891   if (note)
892     return for_each_inc_dec (PATTERN (insn), emit_inc_dec_insn_before,
893 			     &insn_info) == 0;
894 
895   /* Punt on stack pushes, those don't have REG_INC notes and we are
896      unprepared to deal with distribution of REG_ARGS_SIZE notes etc.  */
897   subrtx_iterator::array_type array;
898   FOR_EACH_SUBRTX (iter, array, PATTERN (insn), NONCONST)
899     {
900       const_rtx x = *iter;
901       if (GET_RTX_CLASS (GET_CODE (x)) == RTX_AUTOINC)
902 	return false;
903     }
904 
905   return true;
906 }
907 
908 /* Delete the insn and free all of the fields inside INSN_INFO.  */
909 
910 static void
delete_dead_store_insn(insn_info_t insn_info)911 delete_dead_store_insn (insn_info_t insn_info)
912 {
913   read_info_t read_info;
914 
915   if (!dbg_cnt (dse))
916     return;
917 
918   if (!check_for_inc_dec_1 (insn_info))
919     return;
920   if (dump_file && (dump_flags & TDF_DETAILS))
921     fprintf (dump_file, "Locally deleting insn %d\n",
922 	     INSN_UID (insn_info->insn));
923 
924   free_store_info (insn_info);
925   read_info = insn_info->read_rec;
926 
927   while (read_info)
928     {
929       read_info_t next = read_info->next;
930       read_info_type_pool.remove (read_info);
931       read_info = next;
932     }
933   insn_info->read_rec = NULL;
934 
935   delete_insn (insn_info->insn);
936   locally_deleted++;
937   insn_info->insn = NULL;
938 
939   insn_info->wild_read = false;
940 }
941 
942 /* Return whether DECL, a local variable, can possibly escape the current
943    function scope.  */
944 
945 static bool
local_variable_can_escape(tree decl)946 local_variable_can_escape (tree decl)
947 {
948   if (TREE_ADDRESSABLE (decl))
949     return true;
950 
951   /* If this is a partitioned variable, we need to consider all the variables
952      in the partition.  This is necessary because a store into one of them can
953      be replaced with a store into another and this may not change the outcome
954      of the escape analysis.  */
955   if (cfun->gimple_df->decls_to_pointers != NULL)
956     {
957       tree *namep = cfun->gimple_df->decls_to_pointers->get (decl);
958       if (namep)
959 	return TREE_ADDRESSABLE (*namep);
960     }
961 
962   return false;
963 }
964 
965 /* Return whether EXPR can possibly escape the current function scope.  */
966 
967 static bool
can_escape(tree expr)968 can_escape (tree expr)
969 {
970   tree base;
971   if (!expr)
972     return true;
973   base = get_base_address (expr);
974   if (DECL_P (base)
975       && !may_be_aliased (base)
976       && !(VAR_P (base)
977 	   && !DECL_EXTERNAL (base)
978 	   && !TREE_STATIC (base)
979 	   && local_variable_can_escape (base)))
980     return false;
981   return true;
982 }
983 
984 /* Set the store* bitmaps offset_map_size* fields in GROUP based on
985    OFFSET and WIDTH.  */
986 
987 static void
set_usage_bits(group_info * group,poly_int64 offset,poly_int64 width,tree expr)988 set_usage_bits (group_info *group, poly_int64 offset, poly_int64 width,
989                 tree expr)
990 {
991   /* Non-constant offsets and widths act as global kills, so there's no point
992      trying to use them to derive global DSE candidates.  */
993   HOST_WIDE_INT i, const_offset, const_width;
994   bool expr_escapes = can_escape (expr);
995   if (offset.is_constant (&const_offset)
996       && width.is_constant (&const_width)
997       && const_offset > -MAX_OFFSET
998       && const_offset + const_width < MAX_OFFSET)
999     for (i = const_offset; i < const_offset + const_width; ++i)
1000       {
1001 	bitmap store1;
1002 	bitmap store2;
1003         bitmap escaped;
1004 	int ai;
1005 	if (i < 0)
1006 	  {
1007 	    store1 = group->store1_n;
1008 	    store2 = group->store2_n;
1009 	    escaped = group->escaped_n;
1010 	    ai = -i;
1011 	  }
1012 	else
1013 	  {
1014 	    store1 = group->store1_p;
1015 	    store2 = group->store2_p;
1016 	    escaped = group->escaped_p;
1017 	    ai = i;
1018 	  }
1019 
1020 	if (!bitmap_set_bit (store1, ai))
1021 	  bitmap_set_bit (store2, ai);
1022 	else
1023 	  {
1024 	    if (i < 0)
1025 	      {
1026 		if (group->offset_map_size_n < ai)
1027 		  group->offset_map_size_n = ai;
1028 	      }
1029 	    else
1030 	      {
1031 		if (group->offset_map_size_p < ai)
1032 		  group->offset_map_size_p = ai;
1033 	      }
1034 	  }
1035         if (expr_escapes)
1036           bitmap_set_bit (escaped, ai);
1037       }
1038 }
1039 
1040 static void
reset_active_stores(void)1041 reset_active_stores (void)
1042 {
1043   active_local_stores = NULL;
1044   active_local_stores_len = 0;
1045 }
1046 
1047 /* Free all READ_REC of the LAST_INSN of BB_INFO.  */
1048 
1049 static void
free_read_records(bb_info_t bb_info)1050 free_read_records (bb_info_t bb_info)
1051 {
1052   insn_info_t insn_info = bb_info->last_insn;
1053   read_info_t *ptr = &insn_info->read_rec;
1054   while (*ptr)
1055     {
1056       read_info_t next = (*ptr)->next;
1057       read_info_type_pool.remove (*ptr);
1058       *ptr = next;
1059     }
1060 }
1061 
1062 /* Set the BB_INFO so that the last insn is marked as a wild read.  */
1063 
1064 static void
add_wild_read(bb_info_t bb_info)1065 add_wild_read (bb_info_t bb_info)
1066 {
1067   insn_info_t insn_info = bb_info->last_insn;
1068   insn_info->wild_read = true;
1069   free_read_records (bb_info);
1070   reset_active_stores ();
1071 }
1072 
1073 /* Set the BB_INFO so that the last insn is marked as a wild read of
1074    non-frame locations.  */
1075 
1076 static void
add_non_frame_wild_read(bb_info_t bb_info)1077 add_non_frame_wild_read (bb_info_t bb_info)
1078 {
1079   insn_info_t insn_info = bb_info->last_insn;
1080   insn_info->non_frame_wild_read = true;
1081   free_read_records (bb_info);
1082   reset_active_stores ();
1083 }
1084 
1085 /* Return true if X is a constant or one of the registers that behave
1086    as a constant over the life of a function.  This is equivalent to
1087    !rtx_varies_p for memory addresses.  */
1088 
1089 static bool
const_or_frame_p(rtx x)1090 const_or_frame_p (rtx x)
1091 {
1092   if (CONSTANT_P (x))
1093     return true;
1094 
1095   if (GET_CODE (x) == REG)
1096     {
1097       /* Note that we have to test for the actual rtx used for the frame
1098 	 and arg pointers and not just the register number in case we have
1099 	 eliminated the frame and/or arg pointer and are using it
1100 	 for pseudos.  */
1101       if (x == frame_pointer_rtx || x == hard_frame_pointer_rtx
1102 	  /* The arg pointer varies if it is not a fixed register.  */
1103 	  || (x == arg_pointer_rtx && fixed_regs[ARG_POINTER_REGNUM])
1104 	  || x == pic_offset_table_rtx)
1105 	return true;
1106       return false;
1107     }
1108 
1109   return false;
1110 }
1111 
1112 /* Take all reasonable action to put the address of MEM into the form
1113    that we can do analysis on.
1114 
1115    The gold standard is to get the address into the form: address +
1116    OFFSET where address is something that rtx_varies_p considers a
1117    constant.  When we can get the address in this form, we can do
1118    global analysis on it.  Note that for constant bases, address is
1119    not actually returned, only the group_id.  The address can be
1120    obtained from that.
1121 
1122    If that fails, we try cselib to get a value we can at least use
1123    locally.  If that fails we return false.
1124 
1125    The GROUP_ID is set to -1 for cselib bases and the index of the
1126    group for non_varying bases.
1127 
1128    FOR_READ is true if this is a mem read and false if not.  */
1129 
1130 static bool
canon_address(rtx mem,int * group_id,poly_int64 * offset,cselib_val ** base)1131 canon_address (rtx mem,
1132 	       int *group_id,
1133 	       poly_int64 *offset,
1134 	       cselib_val **base)
1135 {
1136   machine_mode address_mode = get_address_mode (mem);
1137   rtx mem_address = XEXP (mem, 0);
1138   rtx expanded_address, address;
1139   int expanded;
1140 
1141   cselib_lookup (mem_address, address_mode, 1, GET_MODE (mem));
1142 
1143   if (dump_file && (dump_flags & TDF_DETAILS))
1144     {
1145       fprintf (dump_file, "  mem: ");
1146       print_inline_rtx (dump_file, mem_address, 0);
1147       fprintf (dump_file, "\n");
1148     }
1149 
1150   /* First see if just canon_rtx (mem_address) is const or frame,
1151      if not, try cselib_expand_value_rtx and call canon_rtx on that.  */
1152   address = NULL_RTX;
1153   for (expanded = 0; expanded < 2; expanded++)
1154     {
1155       if (expanded)
1156 	{
1157 	  /* Use cselib to replace all of the reg references with the full
1158 	     expression.  This will take care of the case where we have
1159 
1160 	     r_x = base + offset;
1161 	     val = *r_x;
1162 
1163 	     by making it into
1164 
1165 	     val = *(base + offset);  */
1166 
1167 	  expanded_address = cselib_expand_value_rtx (mem_address,
1168 						      scratch, 5);
1169 
1170 	  /* If this fails, just go with the address from first
1171 	     iteration.  */
1172 	  if (!expanded_address)
1173 	    break;
1174 	}
1175       else
1176 	expanded_address = mem_address;
1177 
1178       /* Split the address into canonical BASE + OFFSET terms.  */
1179       address = canon_rtx (expanded_address);
1180 
1181       *offset = 0;
1182 
1183       if (dump_file && (dump_flags & TDF_DETAILS))
1184 	{
1185 	  if (expanded)
1186 	    {
1187 	      fprintf (dump_file, "\n   after cselib_expand address: ");
1188 	      print_inline_rtx (dump_file, expanded_address, 0);
1189 	      fprintf (dump_file, "\n");
1190 	    }
1191 
1192 	  fprintf (dump_file, "\n   after canon_rtx address: ");
1193 	  print_inline_rtx (dump_file, address, 0);
1194 	  fprintf (dump_file, "\n");
1195 	}
1196 
1197       if (GET_CODE (address) == CONST)
1198 	address = XEXP (address, 0);
1199 
1200       address = strip_offset_and_add (address, offset);
1201 
1202       if (ADDR_SPACE_GENERIC_P (MEM_ADDR_SPACE (mem))
1203 	  && const_or_frame_p (address))
1204 	{
1205 	  group_info *group = get_group_info (address);
1206 
1207 	  if (dump_file && (dump_flags & TDF_DETAILS))
1208 	    {
1209 	      fprintf (dump_file, "  gid=%d offset=", group->id);
1210 	      print_dec (*offset, dump_file);
1211 	      fprintf (dump_file, "\n");
1212 	    }
1213 	  *base = NULL;
1214 	  *group_id = group->id;
1215 	  return true;
1216 	}
1217     }
1218 
1219   *base = cselib_lookup (address, address_mode, true, GET_MODE (mem));
1220   *group_id = -1;
1221 
1222   if (*base == NULL)
1223     {
1224       if (dump_file && (dump_flags & TDF_DETAILS))
1225 	fprintf (dump_file, " no cselib val - should be a wild read.\n");
1226       return false;
1227     }
1228   if (dump_file && (dump_flags & TDF_DETAILS))
1229     {
1230       fprintf (dump_file, "  varying cselib base=%u:%u offset = ",
1231 	       (*base)->uid, (*base)->hash);
1232       print_dec (*offset, dump_file);
1233       fprintf (dump_file, "\n");
1234     }
1235   return true;
1236 }
1237 
1238 
1239 /* Clear the rhs field from the active_local_stores array.  */
1240 
1241 static void
clear_rhs_from_active_local_stores(void)1242 clear_rhs_from_active_local_stores (void)
1243 {
1244   insn_info_t ptr = active_local_stores;
1245 
1246   while (ptr)
1247     {
1248       store_info *store_info = ptr->store_rec;
1249       /* Skip the clobbers.  */
1250       while (!store_info->is_set)
1251 	store_info = store_info->next;
1252 
1253       store_info->rhs = NULL;
1254       store_info->const_rhs = NULL;
1255 
1256       ptr = ptr->next_local_store;
1257     }
1258 }
1259 
1260 
1261 /* Mark byte POS bytes from the beginning of store S_INFO as unneeded.  */
1262 
1263 static inline void
set_position_unneeded(store_info * s_info,int pos)1264 set_position_unneeded (store_info *s_info, int pos)
1265 {
1266   if (__builtin_expect (s_info->is_large, false))
1267     {
1268       if (bitmap_set_bit (s_info->positions_needed.large.bmap, pos))
1269 	s_info->positions_needed.large.count++;
1270     }
1271   else
1272     s_info->positions_needed.small_bitmask
1273       &= ~(HOST_WIDE_INT_1U << pos);
1274 }
1275 
1276 /* Mark the whole store S_INFO as unneeded.  */
1277 
1278 static inline void
set_all_positions_unneeded(store_info * s_info)1279 set_all_positions_unneeded (store_info *s_info)
1280 {
1281   if (__builtin_expect (s_info->is_large, false))
1282     {
1283       HOST_WIDE_INT width;
1284       if (s_info->width.is_constant (&width))
1285 	{
1286 	  bitmap_set_range (s_info->positions_needed.large.bmap, 0, width);
1287 	  s_info->positions_needed.large.count = width;
1288 	}
1289       else
1290 	{
1291 	  gcc_checking_assert (!s_info->positions_needed.large.bmap);
1292 	  s_info->positions_needed.large.count = 1;
1293 	}
1294     }
1295   else
1296     s_info->positions_needed.small_bitmask = HOST_WIDE_INT_0U;
1297 }
1298 
1299 /* Return TRUE if any bytes from S_INFO store are needed.  */
1300 
1301 static inline bool
any_positions_needed_p(store_info * s_info)1302 any_positions_needed_p (store_info *s_info)
1303 {
1304   if (__builtin_expect (s_info->is_large, false))
1305     {
1306       HOST_WIDE_INT width;
1307       if (s_info->width.is_constant (&width))
1308 	{
1309 	  gcc_checking_assert (s_info->positions_needed.large.bmap);
1310 	  return s_info->positions_needed.large.count < width;
1311 	}
1312       else
1313 	{
1314 	  gcc_checking_assert (!s_info->positions_needed.large.bmap);
1315 	  return s_info->positions_needed.large.count == 0;
1316 	}
1317     }
1318   else
1319     return (s_info->positions_needed.small_bitmask != HOST_WIDE_INT_0U);
1320 }
1321 
1322 /* Return TRUE if all bytes START through START+WIDTH-1 from S_INFO
1323    store are known to be needed.  */
1324 
1325 static inline bool
all_positions_needed_p(store_info * s_info,poly_int64 start,poly_int64 width)1326 all_positions_needed_p (store_info *s_info, poly_int64 start,
1327 			poly_int64 width)
1328 {
1329   gcc_assert (s_info->rhs);
1330   if (!s_info->width.is_constant ())
1331     {
1332       gcc_assert (s_info->is_large
1333 		  && !s_info->positions_needed.large.bmap);
1334       return s_info->positions_needed.large.count == 0;
1335     }
1336 
1337   /* Otherwise, if START and WIDTH are non-constant, we're asking about
1338      a non-constant region of a constant-sized store.  We can't say for
1339      sure that all positions are needed.  */
1340   HOST_WIDE_INT const_start, const_width;
1341   if (!start.is_constant (&const_start)
1342       || !width.is_constant (&const_width))
1343     return false;
1344 
1345   if (__builtin_expect (s_info->is_large, false))
1346     {
1347       for (HOST_WIDE_INT i = const_start; i < const_start + const_width; ++i)
1348 	if (bitmap_bit_p (s_info->positions_needed.large.bmap, i))
1349 	  return false;
1350       return true;
1351     }
1352 #ifdef NB_FIX_VAX_BACKEND
1353   else if (const_start >= HOST_BITS_PER_WIDE_INT || const_start < 0)
1354     return true;
1355 #endif
1356   else
1357     {
1358       unsigned HOST_WIDE_INT mask
1359 	= lowpart_bitmask (const_width) << const_start;
1360       return (s_info->positions_needed.small_bitmask & mask) == mask;
1361     }
1362 }
1363 
1364 
1365 static rtx get_stored_val (store_info *, machine_mode, poly_int64,
1366 			   poly_int64, basic_block, bool);
1367 
1368 
1369 /* BODY is an instruction pattern that belongs to INSN.  Return 1 if
1370    there is a candidate store, after adding it to the appropriate
1371    local store group if so.  */
1372 
1373 static int
record_store(rtx body,bb_info_t bb_info)1374 record_store (rtx body, bb_info_t bb_info)
1375 {
1376   rtx mem, rhs, const_rhs, mem_addr;
1377   poly_int64 offset = 0;
1378   poly_int64 width = 0;
1379   insn_info_t insn_info = bb_info->last_insn;
1380   store_info *store_info = NULL;
1381   int group_id;
1382   cselib_val *base = NULL;
1383   insn_info_t ptr, last, redundant_reason;
1384   bool store_is_unused;
1385 
1386   if (GET_CODE (body) != SET && GET_CODE (body) != CLOBBER)
1387     return 0;
1388 
1389   mem = SET_DEST (body);
1390 
1391   /* If this is not used, then this cannot be used to keep the insn
1392      from being deleted.  On the other hand, it does provide something
1393      that can be used to prove that another store is dead.  */
1394   store_is_unused
1395     = (find_reg_note (insn_info->insn, REG_UNUSED, mem) != NULL);
1396 
1397   /* Check whether that value is a suitable memory location.  */
1398   if (!MEM_P (mem))
1399     {
1400       /* If the set or clobber is unused, then it does not effect our
1401 	 ability to get rid of the entire insn.  */
1402       if (!store_is_unused)
1403 	insn_info->cannot_delete = true;
1404       return 0;
1405     }
1406 
1407   /* At this point we know mem is a mem. */
1408   if (GET_MODE (mem) == BLKmode)
1409     {
1410       HOST_WIDE_INT const_size;
1411       if (GET_CODE (XEXP (mem, 0)) == SCRATCH)
1412 	{
1413 	  if (dump_file && (dump_flags & TDF_DETAILS))
1414 	    fprintf (dump_file, " adding wild read for (clobber (mem:BLK (scratch))\n");
1415 	  add_wild_read (bb_info);
1416 	  insn_info->cannot_delete = true;
1417 	  return 0;
1418 	}
1419       /* Handle (set (mem:BLK (addr) [... S36 ...]) (const_int 0))
1420 	 as memset (addr, 0, 36);  */
1421       else if (!MEM_SIZE_KNOWN_P (mem)
1422 	       || maybe_le (MEM_SIZE (mem), 0)
1423 	       /* This is a limit on the bitmap size, which is only relevant
1424 		  for constant-sized MEMs.  */
1425 	       || (MEM_SIZE (mem).is_constant (&const_size)
1426 		   && const_size > MAX_OFFSET)
1427 	       || GET_CODE (body) != SET
1428 	       || !CONST_INT_P (SET_SRC (body)))
1429 	{
1430 	  if (!store_is_unused)
1431 	    {
1432 	      /* If the set or clobber is unused, then it does not effect our
1433 		 ability to get rid of the entire insn.  */
1434 	      insn_info->cannot_delete = true;
1435 	      clear_rhs_from_active_local_stores ();
1436 	    }
1437 	  return 0;
1438 	}
1439     }
1440 
1441   /* We can still process a volatile mem, we just cannot delete it.  */
1442   if (MEM_VOLATILE_P (mem))
1443     insn_info->cannot_delete = true;
1444 
1445   if (!canon_address (mem, &group_id, &offset, &base))
1446     {
1447       clear_rhs_from_active_local_stores ();
1448       return 0;
1449     }
1450 
1451   if (GET_MODE (mem) == BLKmode)
1452     width = MEM_SIZE (mem);
1453   else
1454     width = GET_MODE_SIZE (GET_MODE (mem));
1455 
1456   if (!endpoint_representable_p (offset, width))
1457     {
1458       clear_rhs_from_active_local_stores ();
1459       return 0;
1460     }
1461 
1462   if (known_eq (width, 0))
1463     return 0;
1464 
1465   if (group_id >= 0)
1466     {
1467       /* In the restrictive case where the base is a constant or the
1468 	 frame pointer we can do global analysis.  */
1469 
1470       group_info *group
1471 	= rtx_group_vec[group_id];
1472       tree expr = MEM_EXPR (mem);
1473 
1474       store_info = rtx_store_info_pool.allocate ();
1475       set_usage_bits (group, offset, width, expr);
1476 
1477       if (dump_file && (dump_flags & TDF_DETAILS))
1478 	{
1479 	  fprintf (dump_file, " processing const base store gid=%d",
1480 		   group_id);
1481 	  print_range (dump_file, offset, width);
1482 	  fprintf (dump_file, "\n");
1483 	}
1484     }
1485   else
1486     {
1487       if (may_be_sp_based_p (XEXP (mem, 0)))
1488 	insn_info->stack_pointer_based = true;
1489       insn_info->contains_cselib_groups = true;
1490 
1491       store_info = cse_store_info_pool.allocate ();
1492       group_id = -1;
1493 
1494       if (dump_file && (dump_flags & TDF_DETAILS))
1495 	{
1496 	  fprintf (dump_file, " processing cselib store ");
1497 	  print_range (dump_file, offset, width);
1498 	  fprintf (dump_file, "\n");
1499 	}
1500     }
1501 
1502   const_rhs = rhs = NULL_RTX;
1503   if (GET_CODE (body) == SET
1504       /* No place to keep the value after ra.  */
1505       && !reload_completed
1506       && (REG_P (SET_SRC (body))
1507 	  || GET_CODE (SET_SRC (body)) == SUBREG
1508 	  || CONSTANT_P (SET_SRC (body)))
1509       && !MEM_VOLATILE_P (mem)
1510       /* Sometimes the store and reload is used for truncation and
1511 	 rounding.  */
1512       && !(FLOAT_MODE_P (GET_MODE (mem)) && (flag_float_store)))
1513     {
1514       rhs = SET_SRC (body);
1515       if (CONSTANT_P (rhs))
1516 	const_rhs = rhs;
1517       else if (body == PATTERN (insn_info->insn))
1518 	{
1519 	  rtx tem = find_reg_note (insn_info->insn, REG_EQUAL, NULL_RTX);
1520 	  if (tem && CONSTANT_P (XEXP (tem, 0)))
1521 	    const_rhs = XEXP (tem, 0);
1522 	}
1523       if (const_rhs == NULL_RTX && REG_P (rhs))
1524 	{
1525 	  rtx tem = cselib_expand_value_rtx (rhs, scratch, 5);
1526 
1527 	  if (tem && CONSTANT_P (tem))
1528 	    const_rhs = tem;
1529 	}
1530     }
1531 
1532   /* Check to see if this stores causes some other stores to be
1533      dead.  */
1534   ptr = active_local_stores;
1535   last = NULL;
1536   redundant_reason = NULL;
1537   mem = canon_rtx (mem);
1538 
1539   if (group_id < 0)
1540     mem_addr = base->val_rtx;
1541   else
1542     {
1543       group_info *group = rtx_group_vec[group_id];
1544       mem_addr = group->canon_base_addr;
1545     }
1546   if (maybe_ne (offset, 0))
1547     mem_addr = plus_constant (get_address_mode (mem), mem_addr, offset);
1548 
1549   while (ptr)
1550     {
1551       insn_info_t next = ptr->next_local_store;
1552       class store_info *s_info = ptr->store_rec;
1553       bool del = true;
1554 
1555       /* Skip the clobbers. We delete the active insn if this insn
1556 	 shadows the set.  To have been put on the active list, it
1557 	 has exactly on set. */
1558       while (!s_info->is_set)
1559 	s_info = s_info->next;
1560 
1561       if (s_info->group_id == group_id && s_info->cse_base == base)
1562 	{
1563 	  HOST_WIDE_INT i;
1564 	  if (dump_file && (dump_flags & TDF_DETAILS))
1565 	    {
1566 	      fprintf (dump_file, "    trying store in insn=%d gid=%d",
1567 		       INSN_UID (ptr->insn), s_info->group_id);
1568 	      print_range (dump_file, s_info->offset, s_info->width);
1569 	      fprintf (dump_file, "\n");
1570 	    }
1571 
1572 	  /* Even if PTR won't be eliminated as unneeded, if both
1573 	     PTR and this insn store the same constant value, we might
1574 	     eliminate this insn instead.  */
1575 	  if (s_info->const_rhs
1576 	      && const_rhs
1577 	      && known_subrange_p (offset, width,
1578 				   s_info->offset, s_info->width)
1579 	      && all_positions_needed_p (s_info, offset - s_info->offset,
1580 					 width)
1581 	      /* We can only remove the later store if the earlier aliases
1582 		 at least all accesses the later one.  */
1583 	      && ((MEM_ALIAS_SET (mem) == MEM_ALIAS_SET (s_info->mem)
1584 		   || alias_set_subset_of (MEM_ALIAS_SET (mem),
1585 					   MEM_ALIAS_SET (s_info->mem)))
1586 		  && (!MEM_EXPR (s_info->mem)
1587 		      || refs_same_for_tbaa_p (MEM_EXPR (s_info->mem),
1588 					       MEM_EXPR (mem)))))
1589 	    {
1590 	      if (GET_MODE (mem) == BLKmode)
1591 		{
1592 		  if (GET_MODE (s_info->mem) == BLKmode
1593 		      && s_info->const_rhs == const_rhs)
1594 		    redundant_reason = ptr;
1595 		}
1596 	      else if (s_info->const_rhs == const0_rtx
1597 		       && const_rhs == const0_rtx)
1598 		redundant_reason = ptr;
1599 	      else
1600 		{
1601 		  rtx val;
1602 		  start_sequence ();
1603 		  val = get_stored_val (s_info, GET_MODE (mem), offset, width,
1604 					BLOCK_FOR_INSN (insn_info->insn),
1605 					true);
1606 		  if (get_insns () != NULL)
1607 		    val = NULL_RTX;
1608 		  end_sequence ();
1609 		  if (val && rtx_equal_p (val, const_rhs))
1610 		    redundant_reason = ptr;
1611 		}
1612 	    }
1613 
1614 	  HOST_WIDE_INT begin_unneeded, const_s_width, const_width;
1615 	  if (known_subrange_p (s_info->offset, s_info->width, offset, width))
1616 	    /* The new store touches every byte that S_INFO does.  */
1617 	    set_all_positions_unneeded (s_info);
1618 	  else if ((offset - s_info->offset).is_constant (&begin_unneeded)
1619 		   && s_info->width.is_constant (&const_s_width)
1620 		   && width.is_constant (&const_width))
1621 	    {
1622 	      HOST_WIDE_INT end_unneeded = begin_unneeded + const_width;
1623 	      begin_unneeded = MAX (begin_unneeded, 0);
1624 	      end_unneeded = MIN (end_unneeded, const_s_width);
1625 	      for (i = begin_unneeded; i < end_unneeded; ++i)
1626 		set_position_unneeded (s_info, i);
1627 	    }
1628 	  else
1629 	    {
1630 	      /* We don't know which parts of S_INFO are needed and
1631 		 which aren't, so invalidate the RHS.  */
1632 	      s_info->rhs = NULL;
1633 	      s_info->const_rhs = NULL;
1634 	    }
1635 	}
1636       else if (s_info->rhs)
1637 	/* Need to see if it is possible for this store to overwrite
1638 	   the value of store_info.  If it is, set the rhs to NULL to
1639 	   keep it from being used to remove a load.  */
1640 	{
1641 	  if (canon_output_dependence (s_info->mem, true,
1642 				       mem, GET_MODE (mem),
1643 				       mem_addr))
1644 	    {
1645 	      s_info->rhs = NULL;
1646 	      s_info->const_rhs = NULL;
1647 	    }
1648 	}
1649 
1650       /* An insn can be deleted if every position of every one of
1651 	 its s_infos is zero.  */
1652       if (any_positions_needed_p (s_info))
1653 	del = false;
1654 
1655       if (del)
1656 	{
1657 	  insn_info_t insn_to_delete = ptr;
1658 
1659 	  active_local_stores_len--;
1660 	  if (last)
1661 	    last->next_local_store = ptr->next_local_store;
1662 	  else
1663 	    active_local_stores = ptr->next_local_store;
1664 
1665 	  if (!insn_to_delete->cannot_delete)
1666 	    delete_dead_store_insn (insn_to_delete);
1667 	}
1668       else
1669 	last = ptr;
1670 
1671       ptr = next;
1672     }
1673 
1674   /* Finish filling in the store_info.  */
1675   store_info->next = insn_info->store_rec;
1676   insn_info->store_rec = store_info;
1677   store_info->mem = mem;
1678   store_info->mem_addr = mem_addr;
1679   store_info->cse_base = base;
1680   HOST_WIDE_INT const_width;
1681   if (!width.is_constant (&const_width))
1682     {
1683       store_info->is_large = true;
1684       store_info->positions_needed.large.count = 0;
1685       store_info->positions_needed.large.bmap = NULL;
1686     }
1687   else if (const_width > HOST_BITS_PER_WIDE_INT)
1688     {
1689       store_info->is_large = true;
1690       store_info->positions_needed.large.count = 0;
1691       store_info->positions_needed.large.bmap = BITMAP_ALLOC (&dse_bitmap_obstack);
1692     }
1693   else
1694     {
1695       store_info->is_large = false;
1696       store_info->positions_needed.small_bitmask
1697 	= lowpart_bitmask (const_width);
1698     }
1699   store_info->group_id = group_id;
1700   store_info->offset = offset;
1701   store_info->width = width;
1702   store_info->is_set = GET_CODE (body) == SET;
1703   store_info->rhs = rhs;
1704   store_info->const_rhs = const_rhs;
1705   store_info->redundant_reason = redundant_reason;
1706 
1707   /* If this is a clobber, we return 0.  We will only be able to
1708      delete this insn if there is only one store USED store, but we
1709      can use the clobber to delete other stores earlier.  */
1710   return store_info->is_set ? 1 : 0;
1711 }
1712 
1713 
1714 static void
dump_insn_info(const char * start,insn_info_t insn_info)1715 dump_insn_info (const char * start, insn_info_t insn_info)
1716 {
1717   fprintf (dump_file, "%s insn=%d %s\n", start,
1718 	   INSN_UID (insn_info->insn),
1719 	   insn_info->store_rec ? "has store" : "naked");
1720 }
1721 
1722 
1723 /* If the modes are different and the value's source and target do not
1724    line up, we need to extract the value from lower part of the rhs of
1725    the store, shift it, and then put it into a form that can be shoved
1726    into the read_insn.  This function generates a right SHIFT of a
1727    value that is at least ACCESS_SIZE bytes wide of READ_MODE.  The
1728    shift sequence is returned or NULL if we failed to find a
1729    shift.  */
1730 
1731 static rtx
find_shift_sequence(poly_int64 access_size,store_info * store_info,machine_mode read_mode,poly_int64 shift,bool speed,bool require_cst)1732 find_shift_sequence (poly_int64 access_size,
1733 		     store_info *store_info,
1734 		     machine_mode read_mode,
1735 		     poly_int64 shift, bool speed, bool require_cst)
1736 {
1737   machine_mode store_mode = GET_MODE (store_info->mem);
1738   scalar_int_mode new_mode;
1739   rtx read_reg = NULL;
1740 
1741   /* Some machines like the x86 have shift insns for each size of
1742      operand.  Other machines like the ppc or the ia-64 may only have
1743      shift insns that shift values within 32 or 64 bit registers.
1744      This loop tries to find the smallest shift insn that will right
1745      justify the value we want to read but is available in one insn on
1746      the machine.  */
1747 
1748   opt_scalar_int_mode new_mode_iter;
1749   FOR_EACH_MODE_IN_CLASS (new_mode_iter, MODE_INT)
1750     {
1751       rtx target, new_reg, new_lhs;
1752       rtx_insn *shift_seq, *insn;
1753       int cost;
1754 
1755       new_mode = new_mode_iter.require ();
1756       if (GET_MODE_BITSIZE (new_mode) > BITS_PER_WORD)
1757 	break;
1758       if (maybe_lt (GET_MODE_SIZE (new_mode), access_size))
1759 	continue;
1760 
1761       /* If a constant was stored into memory, try to simplify it here,
1762 	 otherwise the cost of the shift might preclude this optimization
1763 	 e.g. at -Os, even when no actual shift will be needed.  */
1764       if (store_info->const_rhs)
1765 	{
1766 	  poly_uint64 byte = subreg_lowpart_offset (new_mode, store_mode);
1767 	  rtx ret = simplify_subreg (new_mode, store_info->const_rhs,
1768 				     store_mode, byte);
1769 	  if (ret && CONSTANT_P (ret))
1770 	    {
1771 	      rtx shift_rtx = gen_int_shift_amount (new_mode, shift);
1772 	      ret = simplify_const_binary_operation (LSHIFTRT, new_mode,
1773 						     ret, shift_rtx);
1774 	      if (ret && CONSTANT_P (ret))
1775 		{
1776 		  byte = subreg_lowpart_offset (read_mode, new_mode);
1777 		  ret = simplify_subreg (read_mode, ret, new_mode, byte);
1778 		  if (ret && CONSTANT_P (ret)
1779 		      && (set_src_cost (ret, read_mode, speed)
1780 			  <= COSTS_N_INSNS (1)))
1781 		    return ret;
1782 		}
1783 	    }
1784 	}
1785 
1786       if (require_cst)
1787 	return NULL_RTX;
1788 
1789       /* Try a wider mode if truncating the store mode to NEW_MODE
1790 	 requires a real instruction.  */
1791       if (maybe_lt (GET_MODE_SIZE (new_mode), GET_MODE_SIZE (store_mode))
1792 	  && !TRULY_NOOP_TRUNCATION_MODES_P (new_mode, store_mode))
1793 	continue;
1794 
1795       /* Also try a wider mode if the necessary punning is either not
1796 	 desirable or not possible.  */
1797       if (!CONSTANT_P (store_info->rhs)
1798 	  && !targetm.modes_tieable_p (new_mode, store_mode))
1799 	continue;
1800 
1801       new_reg = gen_reg_rtx (new_mode);
1802 
1803       start_sequence ();
1804 
1805       /* In theory we could also check for an ashr.  Ian Taylor knows
1806 	 of one dsp where the cost of these two was not the same.  But
1807 	 this really is a rare case anyway.  */
1808       target = expand_binop (new_mode, lshr_optab, new_reg,
1809 			     gen_int_shift_amount (new_mode, shift),
1810 			     new_reg, 1, OPTAB_DIRECT);
1811 
1812       shift_seq = get_insns ();
1813       end_sequence ();
1814 
1815       if (target != new_reg || shift_seq == NULL)
1816 	continue;
1817 
1818       cost = 0;
1819       for (insn = shift_seq; insn != NULL_RTX; insn = NEXT_INSN (insn))
1820 	if (INSN_P (insn))
1821 	  cost += insn_cost (insn, speed);
1822 
1823       /* The computation up to here is essentially independent
1824 	 of the arguments and could be precomputed.  It may
1825 	 not be worth doing so.  We could precompute if
1826 	 worthwhile or at least cache the results.  The result
1827 	 technically depends on both SHIFT and ACCESS_SIZE,
1828 	 but in practice the answer will depend only on ACCESS_SIZE.  */
1829 
1830       if (cost > COSTS_N_INSNS (1))
1831 	continue;
1832 
1833       new_lhs = extract_low_bits (new_mode, store_mode,
1834 				  copy_rtx (store_info->rhs));
1835       if (new_lhs == NULL_RTX)
1836 	continue;
1837 
1838       /* We found an acceptable shift.  Generate a move to
1839 	 take the value from the store and put it into the
1840 	 shift pseudo, then shift it, then generate another
1841 	 move to put in into the target of the read.  */
1842       emit_move_insn (new_reg, new_lhs);
1843       emit_insn (shift_seq);
1844       read_reg = extract_low_bits (read_mode, new_mode, new_reg);
1845       break;
1846     }
1847 
1848   return read_reg;
1849 }
1850 
1851 
1852 /* Call back for note_stores to find the hard regs set or clobbered by
1853    insn.  Data is a bitmap of the hardregs set so far.  */
1854 
1855 static void
look_for_hardregs(rtx x,const_rtx pat ATTRIBUTE_UNUSED,void * data)1856 look_for_hardregs (rtx x, const_rtx pat ATTRIBUTE_UNUSED, void *data)
1857 {
1858   bitmap regs_set = (bitmap) data;
1859 
1860   if (REG_P (x)
1861       && HARD_REGISTER_P (x))
1862     bitmap_set_range (regs_set, REGNO (x), REG_NREGS (x));
1863 }
1864 
1865 /* Helper function for replace_read and record_store.
1866    Attempt to return a value of mode READ_MODE stored in STORE_INFO,
1867    consisting of READ_WIDTH bytes starting from READ_OFFSET.  Return NULL
1868    if not successful.  If REQUIRE_CST is true, return always constant.  */
1869 
1870 static rtx
get_stored_val(store_info * store_info,machine_mode read_mode,poly_int64 read_offset,poly_int64 read_width,basic_block bb,bool require_cst)1871 get_stored_val (store_info *store_info, machine_mode read_mode,
1872 		poly_int64 read_offset, poly_int64 read_width,
1873 		basic_block bb, bool require_cst)
1874 {
1875   machine_mode store_mode = GET_MODE (store_info->mem);
1876   poly_int64 gap;
1877   rtx read_reg;
1878 
1879   /* To get here the read is within the boundaries of the write so
1880      shift will never be negative.  Start out with the shift being in
1881      bytes.  */
1882   if (store_mode == BLKmode)
1883     gap = 0;
1884   else if (BYTES_BIG_ENDIAN)
1885     gap = ((store_info->offset + store_info->width)
1886 	   - (read_offset + read_width));
1887   else
1888     gap = read_offset - store_info->offset;
1889 
1890   if (gap.is_constant () && maybe_ne (gap, 0))
1891     {
1892       poly_int64 shift = gap * BITS_PER_UNIT;
1893       poly_int64 access_size = GET_MODE_SIZE (read_mode) + gap;
1894       read_reg = find_shift_sequence (access_size, store_info, read_mode,
1895 				      shift, optimize_bb_for_speed_p (bb),
1896 				      require_cst);
1897     }
1898   else if (store_mode == BLKmode)
1899     {
1900       /* The store is a memset (addr, const_val, const_size).  */
1901       gcc_assert (CONST_INT_P (store_info->rhs));
1902       scalar_int_mode int_store_mode;
1903       if (!int_mode_for_mode (read_mode).exists (&int_store_mode))
1904 	read_reg = NULL_RTX;
1905       else if (store_info->rhs == const0_rtx)
1906 	read_reg = extract_low_bits (read_mode, int_store_mode, const0_rtx);
1907       else if (GET_MODE_BITSIZE (int_store_mode) > HOST_BITS_PER_WIDE_INT
1908 	       || BITS_PER_UNIT >= HOST_BITS_PER_WIDE_INT)
1909 	read_reg = NULL_RTX;
1910       else
1911 	{
1912 	  unsigned HOST_WIDE_INT c
1913 	    = INTVAL (store_info->rhs)
1914 	      & ((HOST_WIDE_INT_1 << BITS_PER_UNIT) - 1);
1915 	  int shift = BITS_PER_UNIT;
1916 	  while (shift < HOST_BITS_PER_WIDE_INT)
1917 	    {
1918 	      c |= (c << shift);
1919 	      shift <<= 1;
1920 	    }
1921 	  read_reg = gen_int_mode (c, int_store_mode);
1922 	  read_reg = extract_low_bits (read_mode, int_store_mode, read_reg);
1923 	}
1924     }
1925   else if (store_info->const_rhs
1926 	   && (require_cst
1927 	       || GET_MODE_CLASS (read_mode) != GET_MODE_CLASS (store_mode)))
1928     read_reg = extract_low_bits (read_mode, store_mode,
1929 				 copy_rtx (store_info->const_rhs));
1930   else
1931     read_reg = extract_low_bits (read_mode, store_mode,
1932 				 copy_rtx (store_info->rhs));
1933   if (require_cst && read_reg && !CONSTANT_P (read_reg))
1934     read_reg = NULL_RTX;
1935   return read_reg;
1936 }
1937 
1938 /* Take a sequence of:
1939      A <- r1
1940      ...
1941      ... <- A
1942 
1943    and change it into
1944    r2 <- r1
1945    A <- r1
1946    ...
1947    ... <- r2
1948 
1949    or
1950 
1951    r3 <- extract (r1)
1952    r3 <- r3 >> shift
1953    r2 <- extract (r3)
1954    ... <- r2
1955 
1956    or
1957 
1958    r2 <- extract (r1)
1959    ... <- r2
1960 
1961    Depending on the alignment and the mode of the store and
1962    subsequent load.
1963 
1964 
1965    The STORE_INFO and STORE_INSN are for the store and READ_INFO
1966    and READ_INSN are for the read.  Return true if the replacement
1967    went ok.  */
1968 
1969 static bool
replace_read(store_info * store_info,insn_info_t store_insn,read_info_t read_info,insn_info_t read_insn,rtx * loc)1970 replace_read (store_info *store_info, insn_info_t store_insn,
1971 	      read_info_t read_info, insn_info_t read_insn, rtx *loc)
1972 {
1973   machine_mode store_mode = GET_MODE (store_info->mem);
1974   machine_mode read_mode = GET_MODE (read_info->mem);
1975   rtx_insn *insns, *this_insn;
1976   rtx read_reg;
1977   basic_block bb;
1978 
1979   if (!dbg_cnt (dse))
1980     return false;
1981 
1982   /* Create a sequence of instructions to set up the read register.
1983      This sequence goes immediately before the store and its result
1984      is read by the load.
1985 
1986      We need to keep this in perspective.  We are replacing a read
1987      with a sequence of insns, but the read will almost certainly be
1988      in cache, so it is not going to be an expensive one.  Thus, we
1989      are not willing to do a multi insn shift or worse a subroutine
1990      call to get rid of the read.  */
1991   if (dump_file && (dump_flags & TDF_DETAILS))
1992     fprintf (dump_file, "trying to replace %smode load in insn %d"
1993 	     " from %smode store in insn %d\n",
1994 	     GET_MODE_NAME (read_mode), INSN_UID (read_insn->insn),
1995 	     GET_MODE_NAME (store_mode), INSN_UID (store_insn->insn));
1996   start_sequence ();
1997   bb = BLOCK_FOR_INSN (read_insn->insn);
1998   read_reg = get_stored_val (store_info,
1999 			     read_mode, read_info->offset, read_info->width,
2000 			     bb, false);
2001   if (read_reg == NULL_RTX)
2002     {
2003       end_sequence ();
2004       if (dump_file && (dump_flags & TDF_DETAILS))
2005 	fprintf (dump_file, " -- could not extract bits of stored value\n");
2006       return false;
2007     }
2008   /* Force the value into a new register so that it won't be clobbered
2009      between the store and the load.  */
2010   read_reg = copy_to_mode_reg (read_mode, read_reg);
2011   insns = get_insns ();
2012   end_sequence ();
2013 
2014   if (insns != NULL_RTX)
2015     {
2016       /* Now we have to scan the set of new instructions to see if the
2017 	 sequence contains and sets of hardregs that happened to be
2018 	 live at this point.  For instance, this can happen if one of
2019 	 the insns sets the CC and the CC happened to be live at that
2020 	 point.  This does occasionally happen, see PR 37922.  */
2021       bitmap regs_set = BITMAP_ALLOC (&reg_obstack);
2022 
2023       for (this_insn = insns;
2024 	   this_insn != NULL_RTX; this_insn = NEXT_INSN (this_insn))
2025 	{
2026 	  if (insn_invalid_p (this_insn, false))
2027 	    {
2028 	      if (dump_file && (dump_flags & TDF_DETAILS))
2029 		{
2030 		  fprintf (dump_file, " -- replacing the loaded MEM with ");
2031 		  print_simple_rtl (dump_file, read_reg);
2032 		  fprintf (dump_file, " led to an invalid instruction\n");
2033 		}
2034 	      BITMAP_FREE (regs_set);
2035 	      return false;
2036 	    }
2037 	  note_stores (this_insn, look_for_hardregs, regs_set);
2038 	}
2039 
2040       if (store_insn->fixed_regs_live)
2041 	bitmap_and_into (regs_set, store_insn->fixed_regs_live);
2042       if (!bitmap_empty_p (regs_set))
2043 	{
2044 	  if (dump_file && (dump_flags & TDF_DETAILS))
2045 	    {
2046 	      fprintf (dump_file, "abandoning replacement because sequence "
2047 				  "clobbers live hardregs:");
2048 	      df_print_regset (dump_file, regs_set);
2049 	    }
2050 
2051 	  BITMAP_FREE (regs_set);
2052 	  return false;
2053 	}
2054       BITMAP_FREE (regs_set);
2055     }
2056 
2057   subrtx_iterator::array_type array;
2058   FOR_EACH_SUBRTX (iter, array, *loc, NONCONST)
2059     {
2060       const_rtx x = *iter;
2061       if (GET_RTX_CLASS (GET_CODE (x)) == RTX_AUTOINC)
2062 	{
2063 	  if (dump_file && (dump_flags & TDF_DETAILS))
2064 	    fprintf (dump_file, " -- replacing the MEM failed due to address "
2065 				"side-effects\n");
2066 	  return false;
2067 	}
2068     }
2069 
2070   if (validate_change (read_insn->insn, loc, read_reg, 0))
2071     {
2072       deferred_change *change = deferred_change_pool.allocate ();
2073 
2074       /* Insert this right before the store insn where it will be safe
2075 	 from later insns that might change it before the read.  */
2076       emit_insn_before (insns, store_insn->insn);
2077 
2078       /* And now for the kludge part: cselib croaks if you just
2079 	 return at this point.  There are two reasons for this:
2080 
2081 	 1) Cselib has an idea of how many pseudos there are and
2082 	 that does not include the new ones we just added.
2083 
2084 	 2) Cselib does not know about the move insn we added
2085 	 above the store_info, and there is no way to tell it
2086 	 about it, because it has "moved on".
2087 
2088 	 Problem (1) is fixable with a certain amount of engineering.
2089 	 Problem (2) is requires starting the bb from scratch.  This
2090 	 could be expensive.
2091 
2092 	 So we are just going to have to lie.  The move/extraction
2093 	 insns are not really an issue, cselib did not see them.  But
2094 	 the use of the new pseudo read_insn is a real problem because
2095 	 cselib has not scanned this insn.  The way that we solve this
2096 	 problem is that we are just going to put the mem back for now
2097 	 and when we are finished with the block, we undo this.  We
2098 	 keep a table of mems to get rid of.  At the end of the basic
2099 	 block we can put them back.  */
2100 
2101       *loc = read_info->mem;
2102       change->next = deferred_change_list;
2103       deferred_change_list = change;
2104       change->loc = loc;
2105       change->reg = read_reg;
2106 
2107       /* Get rid of the read_info, from the point of view of the
2108 	 rest of dse, play like this read never happened.  */
2109       read_insn->read_rec = read_info->next;
2110       read_info_type_pool.remove (read_info);
2111       if (dump_file && (dump_flags & TDF_DETAILS))
2112 	{
2113 	  fprintf (dump_file, " -- replaced the loaded MEM with ");
2114 	  print_simple_rtl (dump_file, read_reg);
2115 	  fprintf (dump_file, "\n");
2116 	}
2117       return true;
2118     }
2119   else
2120     {
2121       if (dump_file && (dump_flags & TDF_DETAILS))
2122 	{
2123 	  fprintf (dump_file, " -- replacing the loaded MEM with ");
2124 	  print_simple_rtl (dump_file, read_reg);
2125 	  fprintf (dump_file, " led to an invalid instruction\n");
2126 	}
2127       return false;
2128     }
2129 }
2130 
2131 /* Check the address of MEM *LOC and kill any appropriate stores that may
2132    be active.  */
2133 
2134 static void
check_mem_read_rtx(rtx * loc,bb_info_t bb_info)2135 check_mem_read_rtx (rtx *loc, bb_info_t bb_info)
2136 {
2137   rtx mem = *loc, mem_addr;
2138   insn_info_t insn_info;
2139   poly_int64 offset = 0;
2140   poly_int64 width = 0;
2141   cselib_val *base = NULL;
2142   int group_id;
2143   read_info_t read_info;
2144 
2145   insn_info = bb_info->last_insn;
2146 
2147   if ((MEM_ALIAS_SET (mem) == ALIAS_SET_MEMORY_BARRIER)
2148       || MEM_VOLATILE_P (mem))
2149     {
2150       if (crtl->stack_protect_guard
2151 	  && (MEM_EXPR (mem) == crtl->stack_protect_guard
2152 	      || (crtl->stack_protect_guard_decl
2153 		  && MEM_EXPR (mem) == crtl->stack_protect_guard_decl))
2154 	  && MEM_VOLATILE_P (mem))
2155 	{
2156 	  /* This is either the stack protector canary on the stack,
2157 	     which ought to be written by a MEM_VOLATILE_P store and
2158 	     thus shouldn't be deleted and is read at the very end of
2159 	     function, but shouldn't conflict with any other store.
2160 	     Or it is __stack_chk_guard variable or TLS or whatever else
2161 	     MEM holding the canary value, which really shouldn't be
2162 	     ever modified in -fstack-protector* protected functions,
2163 	     otherwise the prologue store wouldn't match the epilogue
2164 	     check.  */
2165 	  if (dump_file && (dump_flags & TDF_DETAILS))
2166 	    fprintf (dump_file, " stack protector canary read ignored.\n");
2167 	  insn_info->cannot_delete = true;
2168 	  return;
2169 	}
2170 
2171       if (dump_file && (dump_flags & TDF_DETAILS))
2172 	fprintf (dump_file, " adding wild read, volatile or barrier.\n");
2173       add_wild_read (bb_info);
2174       insn_info->cannot_delete = true;
2175       return;
2176     }
2177 
2178   /* If it is reading readonly mem, then there can be no conflict with
2179      another write. */
2180   if (MEM_READONLY_P (mem))
2181     return;
2182 
2183   if (!canon_address (mem, &group_id, &offset, &base))
2184     {
2185       if (dump_file && (dump_flags & TDF_DETAILS))
2186 	fprintf (dump_file, " adding wild read, canon_address failure.\n");
2187       add_wild_read (bb_info);
2188       return;
2189     }
2190 
2191   if (GET_MODE (mem) == BLKmode)
2192     width = -1;
2193   else
2194     width = GET_MODE_SIZE (GET_MODE (mem));
2195 
2196   if (!endpoint_representable_p (offset, known_eq (width, -1) ? 1 : width))
2197     {
2198       if (dump_file && (dump_flags & TDF_DETAILS))
2199 	fprintf (dump_file, " adding wild read, due to overflow.\n");
2200       add_wild_read (bb_info);
2201       return;
2202     }
2203 
2204   read_info = read_info_type_pool.allocate ();
2205   read_info->group_id = group_id;
2206   read_info->mem = mem;
2207   read_info->offset = offset;
2208   read_info->width = width;
2209   read_info->next = insn_info->read_rec;
2210   insn_info->read_rec = read_info;
2211   if (group_id < 0)
2212     mem_addr = base->val_rtx;
2213   else
2214     {
2215       group_info *group = rtx_group_vec[group_id];
2216       mem_addr = group->canon_base_addr;
2217     }
2218   if (maybe_ne (offset, 0))
2219     mem_addr = plus_constant (get_address_mode (mem), mem_addr, offset);
2220   /* Avoid passing VALUE RTXen as mem_addr to canon_true_dependence
2221      which will over and over re-create proper RTL and re-apply the
2222      offset above.  See PR80960 where we almost allocate 1.6GB of PLUS
2223      RTXen that way.  */
2224   mem_addr = get_addr (mem_addr);
2225 
2226   if (group_id >= 0)
2227     {
2228       /* This is the restricted case where the base is a constant or
2229 	 the frame pointer and offset is a constant.  */
2230       insn_info_t i_ptr = active_local_stores;
2231       insn_info_t last = NULL;
2232 
2233       if (dump_file && (dump_flags & TDF_DETAILS))
2234 	{
2235 	  if (!known_size_p (width))
2236 	    fprintf (dump_file, " processing const load gid=%d[BLK]\n",
2237 		     group_id);
2238 	  else
2239 	    {
2240 	      fprintf (dump_file, " processing const load gid=%d", group_id);
2241 	      print_range (dump_file, offset, width);
2242 	      fprintf (dump_file, "\n");
2243 	    }
2244 	}
2245 
2246       while (i_ptr)
2247 	{
2248 	  bool remove = false;
2249 	  store_info *store_info = i_ptr->store_rec;
2250 
2251 	  /* Skip the clobbers.  */
2252 	  while (!store_info->is_set)
2253 	    store_info = store_info->next;
2254 
2255 	  /* There are three cases here.  */
2256 	  if (store_info->group_id < 0)
2257 	    /* We have a cselib store followed by a read from a
2258 	       const base. */
2259 	    remove
2260 	      = canon_true_dependence (store_info->mem,
2261 				       GET_MODE (store_info->mem),
2262 				       store_info->mem_addr,
2263 				       mem, mem_addr);
2264 
2265 	  else if (group_id == store_info->group_id)
2266 	    {
2267 	      /* This is a block mode load.  We may get lucky and
2268 		 canon_true_dependence may save the day.  */
2269 	      if (!known_size_p (width))
2270 		remove
2271 		  = canon_true_dependence (store_info->mem,
2272 					   GET_MODE (store_info->mem),
2273 					   store_info->mem_addr,
2274 					   mem, mem_addr);
2275 
2276 	      /* If this read is just reading back something that we just
2277 		 stored, rewrite the read.  */
2278 	      else
2279 		{
2280 		  if (store_info->rhs
2281 		      && known_subrange_p (offset, width, store_info->offset,
2282 					   store_info->width)
2283 		      && all_positions_needed_p (store_info,
2284 						 offset - store_info->offset,
2285 						 width)
2286 		      && replace_read (store_info, i_ptr, read_info,
2287 				       insn_info, loc))
2288 		    return;
2289 
2290 		  /* The bases are the same, just see if the offsets
2291 		     could overlap.  */
2292 		  if (ranges_maybe_overlap_p (offset, width,
2293 					      store_info->offset,
2294 					      store_info->width))
2295 		    remove = true;
2296 		}
2297 	    }
2298 
2299 	  /* else
2300 	     The else case that is missing here is that the
2301 	     bases are constant but different.  There is nothing
2302 	     to do here because there is no overlap.  */
2303 
2304 	  if (remove)
2305 	    {
2306 	      if (dump_file && (dump_flags & TDF_DETAILS))
2307 		dump_insn_info ("removing from active", i_ptr);
2308 
2309 	      active_local_stores_len--;
2310 	      if (last)
2311 		last->next_local_store = i_ptr->next_local_store;
2312 	      else
2313 		active_local_stores = i_ptr->next_local_store;
2314 	    }
2315 	  else
2316 	    last = i_ptr;
2317 	  i_ptr = i_ptr->next_local_store;
2318 	}
2319     }
2320   else
2321     {
2322       insn_info_t i_ptr = active_local_stores;
2323       insn_info_t last = NULL;
2324       if (dump_file && (dump_flags & TDF_DETAILS))
2325 	{
2326 	  fprintf (dump_file, " processing cselib load mem:");
2327 	  print_inline_rtx (dump_file, mem, 0);
2328 	  fprintf (dump_file, "\n");
2329 	}
2330 
2331       while (i_ptr)
2332 	{
2333 	  bool remove = false;
2334 	  store_info *store_info = i_ptr->store_rec;
2335 
2336 	  if (dump_file && (dump_flags & TDF_DETAILS))
2337 	    fprintf (dump_file, " processing cselib load against insn %d\n",
2338 		     INSN_UID (i_ptr->insn));
2339 
2340 	  /* Skip the clobbers.  */
2341 	  while (!store_info->is_set)
2342 	    store_info = store_info->next;
2343 
2344 	  /* If this read is just reading back something that we just
2345 	     stored, rewrite the read.  */
2346 	  if (store_info->rhs
2347 	      && store_info->group_id == -1
2348 	      && store_info->cse_base == base
2349 	      && known_subrange_p (offset, width, store_info->offset,
2350 				   store_info->width)
2351 	      && all_positions_needed_p (store_info,
2352 					 offset - store_info->offset, width)
2353 	      && replace_read (store_info, i_ptr,  read_info, insn_info, loc))
2354 	    return;
2355 
2356 	  remove = canon_true_dependence (store_info->mem,
2357 					  GET_MODE (store_info->mem),
2358 					  store_info->mem_addr,
2359 					  mem, mem_addr);
2360 
2361 	  if (remove)
2362 	    {
2363 	      if (dump_file && (dump_flags & TDF_DETAILS))
2364 		dump_insn_info ("removing from active", i_ptr);
2365 
2366 	      active_local_stores_len--;
2367 	      if (last)
2368 		last->next_local_store = i_ptr->next_local_store;
2369 	      else
2370 		active_local_stores = i_ptr->next_local_store;
2371 	    }
2372 	  else
2373 	    last = i_ptr;
2374 	  i_ptr = i_ptr->next_local_store;
2375 	}
2376     }
2377 }
2378 
2379 /* A note_uses callback in which DATA points the INSN_INFO for
2380    as check_mem_read_rtx.  Nullify the pointer if i_m_r_m_r returns
2381    true for any part of *LOC.  */
2382 
2383 static void
check_mem_read_use(rtx * loc,void * data)2384 check_mem_read_use (rtx *loc, void *data)
2385 {
2386   subrtx_ptr_iterator::array_type array;
2387   FOR_EACH_SUBRTX_PTR (iter, array, loc, NONCONST)
2388     {
2389       rtx *loc = *iter;
2390       if (MEM_P (*loc))
2391 	check_mem_read_rtx (loc, (bb_info_t) data);
2392     }
2393 }
2394 
2395 
2396 /* Get arguments passed to CALL_INSN.  Return TRUE if successful.
2397    So far it only handles arguments passed in registers.  */
2398 
2399 static bool
get_call_args(rtx call_insn,tree fn,rtx * args,int nargs)2400 get_call_args (rtx call_insn, tree fn, rtx *args, int nargs)
2401 {
2402   CUMULATIVE_ARGS args_so_far_v;
2403   cumulative_args_t args_so_far;
2404   tree arg;
2405   int idx;
2406 
2407   INIT_CUMULATIVE_ARGS (args_so_far_v, TREE_TYPE (fn), NULL_RTX, 0, 3);
2408   args_so_far = pack_cumulative_args (&args_so_far_v);
2409 
2410   arg = TYPE_ARG_TYPES (TREE_TYPE (fn));
2411   for (idx = 0;
2412        arg != void_list_node && idx < nargs;
2413        arg = TREE_CHAIN (arg), idx++)
2414     {
2415       scalar_int_mode mode;
2416       rtx reg, link, tmp;
2417 
2418       if (!is_int_mode (TYPE_MODE (TREE_VALUE (arg)), &mode))
2419 	return false;
2420 
2421       function_arg_info arg (mode, /*named=*/true);
2422       reg = targetm.calls.function_arg (args_so_far, arg);
2423       if (!reg || !REG_P (reg) || GET_MODE (reg) != mode)
2424 	return false;
2425 
2426       for (link = CALL_INSN_FUNCTION_USAGE (call_insn);
2427 	   link;
2428 	   link = XEXP (link, 1))
2429 	if (GET_CODE (XEXP (link, 0)) == USE)
2430 	  {
2431 	    scalar_int_mode arg_mode;
2432 	    args[idx] = XEXP (XEXP (link, 0), 0);
2433 	    if (REG_P (args[idx])
2434 		&& REGNO (args[idx]) == REGNO (reg)
2435 		&& (GET_MODE (args[idx]) == mode
2436 		    || (is_int_mode (GET_MODE (args[idx]), &arg_mode)
2437 			&& (GET_MODE_SIZE (arg_mode) <= UNITS_PER_WORD)
2438 			&& (GET_MODE_SIZE (arg_mode) > GET_MODE_SIZE (mode)))))
2439 	      break;
2440 	  }
2441       if (!link)
2442 	return false;
2443 
2444       tmp = cselib_expand_value_rtx (args[idx], scratch, 5);
2445       if (GET_MODE (args[idx]) != mode)
2446 	{
2447 	  if (!tmp || !CONST_INT_P (tmp))
2448 	    return false;
2449 	  tmp = gen_int_mode (INTVAL (tmp), mode);
2450 	}
2451       if (tmp)
2452 	args[idx] = tmp;
2453 
2454       targetm.calls.function_arg_advance (args_so_far, arg);
2455     }
2456   if (arg != void_list_node || idx != nargs)
2457     return false;
2458   return true;
2459 }
2460 
2461 /* Return a bitmap of the fixed registers contained in IN.  */
2462 
2463 static bitmap
copy_fixed_regs(const_bitmap in)2464 copy_fixed_regs (const_bitmap in)
2465 {
2466   bitmap ret;
2467 
2468   ret = ALLOC_REG_SET (NULL);
2469   bitmap_and (ret, in, bitmap_view<HARD_REG_SET> (fixed_reg_set));
2470   return ret;
2471 }
2472 
2473 /* Apply record_store to all candidate stores in INSN.  Mark INSN
2474    if some part of it is not a candidate store and assigns to a
2475    non-register target.  */
2476 
2477 static void
scan_insn(bb_info_t bb_info,rtx_insn * insn,int max_active_local_stores)2478 scan_insn (bb_info_t bb_info, rtx_insn *insn, int max_active_local_stores)
2479 {
2480   rtx body;
2481   insn_info_type *insn_info = insn_info_type_pool.allocate ();
2482   int mems_found = 0;
2483   memset (insn_info, 0, sizeof (struct insn_info_type));
2484 
2485   if (dump_file && (dump_flags & TDF_DETAILS))
2486     fprintf (dump_file, "\n**scanning insn=%d\n",
2487 	     INSN_UID (insn));
2488 
2489   insn_info->prev_insn = bb_info->last_insn;
2490   insn_info->insn = insn;
2491   bb_info->last_insn = insn_info;
2492 
2493   if (DEBUG_INSN_P (insn))
2494     {
2495       insn_info->cannot_delete = true;
2496       return;
2497     }
2498 
2499   /* Look at all of the uses in the insn.  */
2500   note_uses (&PATTERN (insn), check_mem_read_use, bb_info);
2501 
2502   if (CALL_P (insn))
2503     {
2504       bool const_call;
2505       rtx call, sym;
2506       tree memset_call = NULL_TREE;
2507 
2508       insn_info->cannot_delete = true;
2509 
2510       /* Const functions cannot do anything bad i.e. read memory,
2511 	 however, they can read their parameters which may have
2512 	 been pushed onto the stack.
2513 	 memset and bzero don't read memory either.  */
2514       const_call = RTL_CONST_CALL_P (insn);
2515       if (!const_call
2516 	  && (call = get_call_rtx_from (insn))
2517 	  && (sym = XEXP (XEXP (call, 0), 0))
2518 	  && GET_CODE (sym) == SYMBOL_REF
2519 	  && SYMBOL_REF_DECL (sym)
2520 	  && TREE_CODE (SYMBOL_REF_DECL (sym)) == FUNCTION_DECL
2521 	  && fndecl_built_in_p (SYMBOL_REF_DECL (sym), BUILT_IN_MEMSET))
2522 	memset_call = SYMBOL_REF_DECL (sym);
2523 
2524       if (const_call || memset_call)
2525 	{
2526 	  insn_info_t i_ptr = active_local_stores;
2527 	  insn_info_t last = NULL;
2528 
2529 	  if (dump_file && (dump_flags & TDF_DETAILS))
2530 	    fprintf (dump_file, "%s call %d\n",
2531 		     const_call ? "const" : "memset", INSN_UID (insn));
2532 
2533 	  /* See the head comment of the frame_read field.  */
2534 	  if (reload_completed
2535 	      /* Tail calls are storing their arguments using
2536 		 arg pointer.  If it is a frame pointer on the target,
2537 		 even before reload we need to kill frame pointer based
2538 		 stores.  */
2539 	      || (SIBLING_CALL_P (insn)
2540 		  && HARD_FRAME_POINTER_IS_ARG_POINTER))
2541 	    insn_info->frame_read = true;
2542 
2543 	  /* Loop over the active stores and remove those which are
2544 	     killed by the const function call.  */
2545 	  while (i_ptr)
2546 	    {
2547 	      bool remove_store = false;
2548 
2549 	      /* The stack pointer based stores are always killed.  */
2550 	      if (i_ptr->stack_pointer_based)
2551 	        remove_store = true;
2552 
2553 	      /* If the frame is read, the frame related stores are killed.  */
2554 	      else if (insn_info->frame_read)
2555 		{
2556 		  store_info *store_info = i_ptr->store_rec;
2557 
2558 		  /* Skip the clobbers.  */
2559 		  while (!store_info->is_set)
2560 		    store_info = store_info->next;
2561 
2562 		  if (store_info->group_id >= 0
2563 		      && rtx_group_vec[store_info->group_id]->frame_related)
2564 		    remove_store = true;
2565 		}
2566 
2567 	      if (remove_store)
2568 		{
2569 		  if (dump_file && (dump_flags & TDF_DETAILS))
2570 		    dump_insn_info ("removing from active", i_ptr);
2571 
2572 		  active_local_stores_len--;
2573 		  if (last)
2574 		    last->next_local_store = i_ptr->next_local_store;
2575 		  else
2576 		    active_local_stores = i_ptr->next_local_store;
2577 		}
2578 	      else
2579 		last = i_ptr;
2580 
2581 	      i_ptr = i_ptr->next_local_store;
2582 	    }
2583 
2584 	  if (memset_call)
2585 	    {
2586 	      rtx args[3];
2587 	      if (get_call_args (insn, memset_call, args, 3)
2588 		  && CONST_INT_P (args[1])
2589 		  && CONST_INT_P (args[2])
2590 		  && INTVAL (args[2]) > 0)
2591 		{
2592 		  rtx mem = gen_rtx_MEM (BLKmode, args[0]);
2593 		  set_mem_size (mem, INTVAL (args[2]));
2594 		  body = gen_rtx_SET (mem, args[1]);
2595 		  mems_found += record_store (body, bb_info);
2596 		  if (dump_file && (dump_flags & TDF_DETAILS))
2597 		    fprintf (dump_file, "handling memset as BLKmode store\n");
2598 		  if (mems_found == 1)
2599 		    {
2600 		      if (active_local_stores_len++ >= max_active_local_stores)
2601 			{
2602 			  active_local_stores_len = 1;
2603 			  active_local_stores = NULL;
2604 			}
2605 		      insn_info->fixed_regs_live
2606 			= copy_fixed_regs (bb_info->regs_live);
2607 		      insn_info->next_local_store = active_local_stores;
2608 		      active_local_stores = insn_info;
2609 		    }
2610 		}
2611 	      else
2612 		clear_rhs_from_active_local_stores ();
2613 	    }
2614 	}
2615       else if (SIBLING_CALL_P (insn)
2616 	       && (reload_completed || HARD_FRAME_POINTER_IS_ARG_POINTER))
2617 	/* Arguments for a sibling call that are pushed to memory are passed
2618 	   using the incoming argument pointer of the current function.  After
2619 	   reload that might be (and likely is) frame pointer based.  And, if
2620 	   it is a frame pointer on the target, even before reload we need to
2621 	   kill frame pointer based stores.  */
2622 	add_wild_read (bb_info);
2623       else
2624 	/* Every other call, including pure functions, may read any memory
2625            that is not relative to the frame.  */
2626         add_non_frame_wild_read (bb_info);
2627 
2628       return;
2629     }
2630 
2631   /* Assuming that there are sets in these insns, we cannot delete
2632      them.  */
2633   if ((GET_CODE (PATTERN (insn)) == CLOBBER)
2634       || volatile_refs_p (PATTERN (insn))
2635       || (!cfun->can_delete_dead_exceptions && !insn_nothrow_p (insn))
2636       || (RTX_FRAME_RELATED_P (insn))
2637       || find_reg_note (insn, REG_FRAME_RELATED_EXPR, NULL_RTX))
2638     insn_info->cannot_delete = true;
2639 
2640   body = PATTERN (insn);
2641   if (GET_CODE (body) == PARALLEL)
2642     {
2643       int i;
2644       for (i = 0; i < XVECLEN (body, 0); i++)
2645 	mems_found += record_store (XVECEXP (body, 0, i), bb_info);
2646     }
2647   else
2648     mems_found += record_store (body, bb_info);
2649 
2650   if (dump_file && (dump_flags & TDF_DETAILS))
2651     fprintf (dump_file, "mems_found = %d, cannot_delete = %s\n",
2652 	     mems_found, insn_info->cannot_delete ? "true" : "false");
2653 
2654   /* If we found some sets of mems, add it into the active_local_stores so
2655      that it can be locally deleted if found dead or used for
2656      replace_read and redundant constant store elimination.  Otherwise mark
2657      it as cannot delete.  This simplifies the processing later.  */
2658   if (mems_found == 1)
2659     {
2660       if (active_local_stores_len++ >= max_active_local_stores)
2661 	{
2662 	  active_local_stores_len = 1;
2663 	  active_local_stores = NULL;
2664 	}
2665       insn_info->fixed_regs_live = copy_fixed_regs (bb_info->regs_live);
2666       insn_info->next_local_store = active_local_stores;
2667       active_local_stores = insn_info;
2668     }
2669   else
2670     insn_info->cannot_delete = true;
2671 }
2672 
2673 
2674 /* Remove BASE from the set of active_local_stores.  This is a
2675    callback from cselib that is used to get rid of the stores in
2676    active_local_stores.  */
2677 
2678 static void
remove_useless_values(cselib_val * base)2679 remove_useless_values (cselib_val *base)
2680 {
2681   insn_info_t insn_info = active_local_stores;
2682   insn_info_t last = NULL;
2683 
2684   while (insn_info)
2685     {
2686       store_info *store_info = insn_info->store_rec;
2687       bool del = false;
2688 
2689       /* If ANY of the store_infos match the cselib group that is
2690 	 being deleted, then the insn cannot be deleted.  */
2691       while (store_info)
2692 	{
2693 	  if ((store_info->group_id == -1)
2694 	      && (store_info->cse_base == base))
2695 	    {
2696 	      del = true;
2697 	      break;
2698 	    }
2699 	  store_info = store_info->next;
2700 	}
2701 
2702       if (del)
2703 	{
2704 	  active_local_stores_len--;
2705 	  if (last)
2706 	    last->next_local_store = insn_info->next_local_store;
2707 	  else
2708 	    active_local_stores = insn_info->next_local_store;
2709 	  free_store_info (insn_info);
2710 	}
2711       else
2712 	last = insn_info;
2713 
2714       insn_info = insn_info->next_local_store;
2715     }
2716 }
2717 
2718 
2719 /* Do all of step 1.  */
2720 
2721 static void
dse_step1(void)2722 dse_step1 (void)
2723 {
2724   basic_block bb;
2725   bitmap regs_live = BITMAP_ALLOC (&reg_obstack);
2726 
2727   cselib_init (0);
2728   all_blocks = BITMAP_ALLOC (NULL);
2729   bitmap_set_bit (all_blocks, ENTRY_BLOCK);
2730   bitmap_set_bit (all_blocks, EXIT_BLOCK);
2731 
2732   /* For -O1 reduce the maximum number of active local stores for RTL DSE
2733      since this can consume huge amounts of memory (PR89115).  */
2734   int max_active_local_stores = param_max_dse_active_local_stores;
2735   if (optimize < 2)
2736     max_active_local_stores /= 10;
2737 
2738   FOR_ALL_BB_FN (bb, cfun)
2739     {
2740       insn_info_t ptr;
2741       bb_info_t bb_info = dse_bb_info_type_pool.allocate ();
2742 
2743       memset (bb_info, 0, sizeof (dse_bb_info_type));
2744       bitmap_set_bit (all_blocks, bb->index);
2745       bb_info->regs_live = regs_live;
2746 
2747       bitmap_copy (regs_live, DF_LR_IN (bb));
2748       df_simulate_initialize_forwards (bb, regs_live);
2749 
2750       bb_table[bb->index] = bb_info;
2751       cselib_discard_hook = remove_useless_values;
2752 
2753       if (bb->index >= NUM_FIXED_BLOCKS)
2754 	{
2755 	  rtx_insn *insn;
2756 
2757 	  active_local_stores = NULL;
2758 	  active_local_stores_len = 0;
2759 	  cselib_clear_table ();
2760 
2761 	  /* Scan the insns.  */
2762 	  FOR_BB_INSNS (bb, insn)
2763 	    {
2764 	      if (INSN_P (insn))
2765 		scan_insn (bb_info, insn, max_active_local_stores);
2766 	      cselib_process_insn (insn);
2767 	      if (INSN_P (insn))
2768 		df_simulate_one_insn_forwards (bb, insn, regs_live);
2769 	    }
2770 
2771 	  /* This is something of a hack, because the global algorithm
2772 	     is supposed to take care of the case where stores go dead
2773 	     at the end of the function.  However, the global
2774 	     algorithm must take a more conservative view of block
2775 	     mode reads than the local alg does.  So to get the case
2776 	     where you have a store to the frame followed by a non
2777 	     overlapping block more read, we look at the active local
2778 	     stores at the end of the function and delete all of the
2779 	     frame and spill based ones.  */
2780 	  if (stores_off_frame_dead_at_return
2781 	      && (EDGE_COUNT (bb->succs) == 0
2782 		  || (single_succ_p (bb)
2783 		      && single_succ (bb) == EXIT_BLOCK_PTR_FOR_FN (cfun)
2784 		      && ! crtl->calls_eh_return)))
2785 	    {
2786 	      insn_info_t i_ptr = active_local_stores;
2787 	      while (i_ptr)
2788 		{
2789 		  store_info *store_info = i_ptr->store_rec;
2790 
2791 		  /* Skip the clobbers.  */
2792 		  while (!store_info->is_set)
2793 		    store_info = store_info->next;
2794 		  if (store_info->group_id >= 0)
2795 		    {
2796 		      group_info *group = rtx_group_vec[store_info->group_id];
2797 		      if (group->frame_related && !i_ptr->cannot_delete)
2798 			delete_dead_store_insn (i_ptr);
2799 		    }
2800 
2801 		  i_ptr = i_ptr->next_local_store;
2802 		}
2803 	    }
2804 
2805 	  /* Get rid of the loads that were discovered in
2806 	     replace_read.  Cselib is finished with this block.  */
2807 	  while (deferred_change_list)
2808 	    {
2809 	      deferred_change *next = deferred_change_list->next;
2810 
2811 	      /* There is no reason to validate this change.  That was
2812 		 done earlier.  */
2813 	      *deferred_change_list->loc = deferred_change_list->reg;
2814 	      deferred_change_pool.remove (deferred_change_list);
2815 	      deferred_change_list = next;
2816 	    }
2817 
2818 	  /* Get rid of all of the cselib based store_infos in this
2819 	     block and mark the containing insns as not being
2820 	     deletable.  */
2821 	  ptr = bb_info->last_insn;
2822 	  while (ptr)
2823 	    {
2824 	      if (ptr->contains_cselib_groups)
2825 		{
2826 		  store_info *s_info = ptr->store_rec;
2827 		  while (s_info && !s_info->is_set)
2828 		    s_info = s_info->next;
2829 		  if (s_info
2830 		      && s_info->redundant_reason
2831 		      && s_info->redundant_reason->insn
2832 		      && !ptr->cannot_delete)
2833 		    {
2834 		      if (dump_file && (dump_flags & TDF_DETAILS))
2835 			fprintf (dump_file, "Locally deleting insn %d "
2836 					    "because insn %d stores the "
2837 					    "same value and couldn't be "
2838 					    "eliminated\n",
2839 				 INSN_UID (ptr->insn),
2840 				 INSN_UID (s_info->redundant_reason->insn));
2841 		      delete_dead_store_insn (ptr);
2842 		    }
2843 		  free_store_info (ptr);
2844 		}
2845 	      else
2846 		{
2847 		  store_info *s_info;
2848 
2849 		  /* Free at least positions_needed bitmaps.  */
2850 		  for (s_info = ptr->store_rec; s_info; s_info = s_info->next)
2851 		    if (s_info->is_large)
2852 		      {
2853 			BITMAP_FREE (s_info->positions_needed.large.bmap);
2854 			s_info->is_large = false;
2855 		      }
2856 		}
2857 	      ptr = ptr->prev_insn;
2858 	    }
2859 
2860 	  cse_store_info_pool.release ();
2861 	}
2862       bb_info->regs_live = NULL;
2863     }
2864 
2865   BITMAP_FREE (regs_live);
2866   cselib_finish ();
2867   rtx_group_table->empty ();
2868 }
2869 
2870 
2871 /*----------------------------------------------------------------------------
2872    Second step.
2873 
2874    Assign each byte position in the stores that we are going to
2875    analyze globally to a position in the bitmaps.  Returns true if
2876    there are any bit positions assigned.
2877 ----------------------------------------------------------------------------*/
2878 
2879 static void
dse_step2_init(void)2880 dse_step2_init (void)
2881 {
2882   unsigned int i;
2883   group_info *group;
2884 
2885   FOR_EACH_VEC_ELT (rtx_group_vec, i, group)
2886     {
2887       /* For all non stack related bases, we only consider a store to
2888 	 be deletable if there are two or more stores for that
2889 	 position.  This is because it takes one store to make the
2890 	 other store redundant.  However, for the stores that are
2891 	 stack related, we consider them if there is only one store
2892 	 for the position.  We do this because the stack related
2893 	 stores can be deleted if their is no read between them and
2894 	 the end of the function.
2895 
2896 	 To make this work in the current framework, we take the stack
2897 	 related bases add all of the bits from store1 into store2.
2898 	 This has the effect of making the eligible even if there is
2899 	 only one store.   */
2900 
2901       if (stores_off_frame_dead_at_return && group->frame_related)
2902 	{
2903 	  bitmap_ior_into (group->store2_n, group->store1_n);
2904 	  bitmap_ior_into (group->store2_p, group->store1_p);
2905 	  if (dump_file && (dump_flags & TDF_DETAILS))
2906 	    fprintf (dump_file, "group %d is frame related ", i);
2907 	}
2908 
2909       group->offset_map_size_n++;
2910       group->offset_map_n = XOBNEWVEC (&dse_obstack, int,
2911 				       group->offset_map_size_n);
2912       group->offset_map_size_p++;
2913       group->offset_map_p = XOBNEWVEC (&dse_obstack, int,
2914 				       group->offset_map_size_p);
2915       group->process_globally = false;
2916       if (dump_file && (dump_flags & TDF_DETAILS))
2917 	{
2918 	  fprintf (dump_file, "group %d(%d+%d): ", i,
2919 		   (int)bitmap_count_bits (group->store2_n),
2920 		   (int)bitmap_count_bits (group->store2_p));
2921 	  bitmap_print (dump_file, group->store2_n, "n ", " ");
2922 	  bitmap_print (dump_file, group->store2_p, "p ", "\n");
2923 	}
2924     }
2925 }
2926 
2927 
2928 /* Init the offset tables.  */
2929 
2930 static bool
dse_step2(void)2931 dse_step2 (void)
2932 {
2933   unsigned int i;
2934   group_info *group;
2935   /* Position 0 is unused because 0 is used in the maps to mean
2936      unused.  */
2937   current_position = 1;
2938   FOR_EACH_VEC_ELT (rtx_group_vec, i, group)
2939     {
2940       bitmap_iterator bi;
2941       unsigned int j;
2942 
2943       memset (group->offset_map_n, 0, sizeof (int) * group->offset_map_size_n);
2944       memset (group->offset_map_p, 0, sizeof (int) * group->offset_map_size_p);
2945       bitmap_clear (group->group_kill);
2946 
2947       EXECUTE_IF_SET_IN_BITMAP (group->store2_n, 0, j, bi)
2948 	{
2949 	  bitmap_set_bit (group->group_kill, current_position);
2950           if (bitmap_bit_p (group->escaped_n, j))
2951 	    bitmap_set_bit (kill_on_calls, current_position);
2952 	  group->offset_map_n[j] = current_position++;
2953 	  group->process_globally = true;
2954 	}
2955       EXECUTE_IF_SET_IN_BITMAP (group->store2_p, 0, j, bi)
2956 	{
2957 	  bitmap_set_bit (group->group_kill, current_position);
2958           if (bitmap_bit_p (group->escaped_p, j))
2959 	    bitmap_set_bit (kill_on_calls, current_position);
2960 	  group->offset_map_p[j] = current_position++;
2961 	  group->process_globally = true;
2962 	}
2963     }
2964   return current_position != 1;
2965 }
2966 
2967 
2968 
2969 /*----------------------------------------------------------------------------
2970   Third step.
2971 
2972   Build the bit vectors for the transfer functions.
2973 ----------------------------------------------------------------------------*/
2974 
2975 
2976 /* Look up the bitmap index for OFFSET in GROUP_INFO.  If it is not
2977    there, return 0.  */
2978 
2979 static int
get_bitmap_index(group_info * group_info,HOST_WIDE_INT offset)2980 get_bitmap_index (group_info *group_info, HOST_WIDE_INT offset)
2981 {
2982   if (offset < 0)
2983     {
2984       HOST_WIDE_INT offset_p = -offset;
2985       if (offset_p >= group_info->offset_map_size_n)
2986 	return 0;
2987       return group_info->offset_map_n[offset_p];
2988     }
2989   else
2990     {
2991       if (offset >= group_info->offset_map_size_p)
2992 	return 0;
2993       return group_info->offset_map_p[offset];
2994     }
2995 }
2996 
2997 
2998 /* Process the STORE_INFOs into the bitmaps into GEN and KILL.  KILL
2999    may be NULL. */
3000 
3001 static void
scan_stores(store_info * store_info,bitmap gen,bitmap kill)3002 scan_stores (store_info *store_info, bitmap gen, bitmap kill)
3003 {
3004   while (store_info)
3005     {
3006       HOST_WIDE_INT i, offset, width;
3007       group_info *group_info
3008 	= rtx_group_vec[store_info->group_id];
3009       /* We can (conservatively) ignore stores whose bounds aren't known;
3010 	 they simply don't generate new global dse opportunities.  */
3011       if (group_info->process_globally
3012 	  && store_info->offset.is_constant (&offset)
3013 	  && store_info->width.is_constant (&width))
3014 	{
3015 	  HOST_WIDE_INT end = offset + width;
3016 	  for (i = offset; i < end; i++)
3017 	    {
3018 	      int index = get_bitmap_index (group_info, i);
3019 	      if (index != 0)
3020 		{
3021 		  bitmap_set_bit (gen, index);
3022 		  if (kill)
3023 		    bitmap_clear_bit (kill, index);
3024 		}
3025 	    }
3026 	}
3027       store_info = store_info->next;
3028     }
3029 }
3030 
3031 
3032 /* Process the READ_INFOs into the bitmaps into GEN and KILL.  KILL
3033    may be NULL.  */
3034 
3035 static void
scan_reads(insn_info_t insn_info,bitmap gen,bitmap kill)3036 scan_reads (insn_info_t insn_info, bitmap gen, bitmap kill)
3037 {
3038   read_info_t read_info = insn_info->read_rec;
3039   int i;
3040   group_info *group;
3041 
3042   /* If this insn reads the frame, kill all the frame related stores.  */
3043   if (insn_info->frame_read)
3044     {
3045       FOR_EACH_VEC_ELT (rtx_group_vec, i, group)
3046 	if (group->process_globally && group->frame_related)
3047 	  {
3048 	    if (kill)
3049 	      bitmap_ior_into (kill, group->group_kill);
3050 	    bitmap_and_compl_into (gen, group->group_kill);
3051 	  }
3052     }
3053   if (insn_info->non_frame_wild_read)
3054     {
3055       /* Kill all non-frame related stores.  Kill all stores of variables that
3056          escape.  */
3057       if (kill)
3058         bitmap_ior_into (kill, kill_on_calls);
3059       bitmap_and_compl_into (gen, kill_on_calls);
3060       FOR_EACH_VEC_ELT (rtx_group_vec, i, group)
3061 	if (group->process_globally && !group->frame_related)
3062 	  {
3063 	    if (kill)
3064 	      bitmap_ior_into (kill, group->group_kill);
3065 	    bitmap_and_compl_into (gen, group->group_kill);
3066 	  }
3067     }
3068   while (read_info)
3069     {
3070       FOR_EACH_VEC_ELT (rtx_group_vec, i, group)
3071 	{
3072 	  if (group->process_globally)
3073 	    {
3074 	      if (i == read_info->group_id)
3075 		{
3076 		  HOST_WIDE_INT offset, width;
3077 		  /* Reads with non-constant size kill all DSE opportunities
3078 		     in the group.  */
3079 		  if (!read_info->offset.is_constant (&offset)
3080 		      || !read_info->width.is_constant (&width)
3081 		      || !known_size_p (width))
3082 		    {
3083 		      /* Handle block mode reads.  */
3084 		      if (kill)
3085 			bitmap_ior_into (kill, group->group_kill);
3086 		      bitmap_and_compl_into (gen, group->group_kill);
3087 		    }
3088 		  else
3089 		    {
3090 		      /* The groups are the same, just process the
3091 			 offsets.  */
3092 		      HOST_WIDE_INT j;
3093 		      HOST_WIDE_INT end = offset + width;
3094 		      for (j = offset; j < end; j++)
3095 			{
3096 			  int index = get_bitmap_index (group, j);
3097 			  if (index != 0)
3098 			    {
3099 			      if (kill)
3100 				bitmap_set_bit (kill, index);
3101 			      bitmap_clear_bit (gen, index);
3102 			    }
3103 			}
3104 		    }
3105 		}
3106 	      else
3107 		{
3108 		  /* The groups are different, if the alias sets
3109 		     conflict, clear the entire group.  We only need
3110 		     to apply this test if the read_info is a cselib
3111 		     read.  Anything with a constant base cannot alias
3112 		     something else with a different constant
3113 		     base.  */
3114 		  if ((read_info->group_id < 0)
3115 		      && canon_true_dependence (group->base_mem,
3116 						GET_MODE (group->base_mem),
3117 						group->canon_base_addr,
3118 						read_info->mem, NULL_RTX))
3119 		    {
3120 		      if (kill)
3121 			bitmap_ior_into (kill, group->group_kill);
3122 		      bitmap_and_compl_into (gen, group->group_kill);
3123 		    }
3124 		}
3125 	    }
3126 	}
3127 
3128       read_info = read_info->next;
3129     }
3130 }
3131 
3132 
3133 /* Return the insn in BB_INFO before the first wild read or if there
3134    are no wild reads in the block, return the last insn.  */
3135 
3136 static insn_info_t
find_insn_before_first_wild_read(bb_info_t bb_info)3137 find_insn_before_first_wild_read (bb_info_t bb_info)
3138 {
3139   insn_info_t insn_info = bb_info->last_insn;
3140   insn_info_t last_wild_read = NULL;
3141 
3142   while (insn_info)
3143     {
3144       if (insn_info->wild_read)
3145 	{
3146 	  last_wild_read = insn_info->prev_insn;
3147 	  /* Block starts with wild read.  */
3148 	  if (!last_wild_read)
3149 	    return NULL;
3150 	}
3151 
3152       insn_info = insn_info->prev_insn;
3153     }
3154 
3155   if (last_wild_read)
3156     return last_wild_read;
3157   else
3158     return bb_info->last_insn;
3159 }
3160 
3161 
3162 /* Scan the insns in BB_INFO starting at PTR and going to the top of
3163    the block in order to build the gen and kill sets for the block.
3164    We start at ptr which may be the last insn in the block or may be
3165    the first insn with a wild read.  In the latter case we are able to
3166    skip the rest of the block because it just does not matter:
3167    anything that happens is hidden by the wild read.  */
3168 
3169 static void
dse_step3_scan(basic_block bb)3170 dse_step3_scan (basic_block bb)
3171 {
3172   bb_info_t bb_info = bb_table[bb->index];
3173   insn_info_t insn_info;
3174 
3175   insn_info = find_insn_before_first_wild_read (bb_info);
3176 
3177   /* In the spill case or in the no_spill case if there is no wild
3178      read in the block, we will need a kill set.  */
3179   if (insn_info == bb_info->last_insn)
3180     {
3181       if (bb_info->kill)
3182 	bitmap_clear (bb_info->kill);
3183       else
3184 	bb_info->kill = BITMAP_ALLOC (&dse_bitmap_obstack);
3185     }
3186   else
3187     if (bb_info->kill)
3188       BITMAP_FREE (bb_info->kill);
3189 
3190   while (insn_info)
3191     {
3192       /* There may have been code deleted by the dce pass run before
3193 	 this phase.  */
3194       if (insn_info->insn && INSN_P (insn_info->insn))
3195 	{
3196 	  scan_stores (insn_info->store_rec, bb_info->gen, bb_info->kill);
3197 	  scan_reads (insn_info, bb_info->gen, bb_info->kill);
3198 	}
3199 
3200       insn_info = insn_info->prev_insn;
3201     }
3202 }
3203 
3204 
3205 /* Set the gen set of the exit block, and also any block with no
3206    successors that does not have a wild read.  */
3207 
3208 static void
dse_step3_exit_block_scan(bb_info_t bb_info)3209 dse_step3_exit_block_scan (bb_info_t bb_info)
3210 {
3211   /* The gen set is all 0's for the exit block except for the
3212      frame_pointer_group.  */
3213 
3214   if (stores_off_frame_dead_at_return)
3215     {
3216       unsigned int i;
3217       group_info *group;
3218 
3219       FOR_EACH_VEC_ELT (rtx_group_vec, i, group)
3220 	{
3221 	  if (group->process_globally && group->frame_related)
3222 	    bitmap_ior_into (bb_info->gen, group->group_kill);
3223 	}
3224     }
3225 }
3226 
3227 
3228 /* Find all of the blocks that are not backwards reachable from the
3229    exit block or any block with no successors (BB).  These are the
3230    infinite loops or infinite self loops.  These blocks will still
3231    have their bits set in UNREACHABLE_BLOCKS.  */
3232 
3233 static void
mark_reachable_blocks(sbitmap unreachable_blocks,basic_block bb)3234 mark_reachable_blocks (sbitmap unreachable_blocks, basic_block bb)
3235 {
3236   edge e;
3237   edge_iterator ei;
3238 
3239   if (bitmap_bit_p (unreachable_blocks, bb->index))
3240     {
3241       bitmap_clear_bit (unreachable_blocks, bb->index);
3242       FOR_EACH_EDGE (e, ei, bb->preds)
3243 	{
3244 	  mark_reachable_blocks (unreachable_blocks, e->src);
3245 	}
3246     }
3247 }
3248 
3249 /* Build the transfer functions for the function.  */
3250 
3251 static void
dse_step3()3252 dse_step3 ()
3253 {
3254   basic_block bb;
3255   sbitmap_iterator sbi;
3256   bitmap all_ones = NULL;
3257   unsigned int i;
3258 
3259   auto_sbitmap unreachable_blocks (last_basic_block_for_fn (cfun));
3260   bitmap_ones (unreachable_blocks);
3261 
3262   FOR_ALL_BB_FN (bb, cfun)
3263     {
3264       bb_info_t bb_info = bb_table[bb->index];
3265       if (bb_info->gen)
3266 	bitmap_clear (bb_info->gen);
3267       else
3268 	bb_info->gen = BITMAP_ALLOC (&dse_bitmap_obstack);
3269 
3270       if (bb->index == ENTRY_BLOCK)
3271 	;
3272       else if (bb->index == EXIT_BLOCK)
3273 	dse_step3_exit_block_scan (bb_info);
3274       else
3275 	dse_step3_scan (bb);
3276       if (EDGE_COUNT (bb->succs) == 0)
3277 	mark_reachable_blocks (unreachable_blocks, bb);
3278 
3279       /* If this is the second time dataflow is run, delete the old
3280 	 sets.  */
3281       if (bb_info->in)
3282 	BITMAP_FREE (bb_info->in);
3283       if (bb_info->out)
3284 	BITMAP_FREE (bb_info->out);
3285     }
3286 
3287   /* For any block in an infinite loop, we must initialize the out set
3288      to all ones.  This could be expensive, but almost never occurs in
3289      practice. However, it is common in regression tests.  */
3290   EXECUTE_IF_SET_IN_BITMAP (unreachable_blocks, 0, i, sbi)
3291     {
3292       if (bitmap_bit_p (all_blocks, i))
3293 	{
3294 	  bb_info_t bb_info = bb_table[i];
3295 	  if (!all_ones)
3296 	    {
3297 	      unsigned int j;
3298 	      group_info *group;
3299 
3300 	      all_ones = BITMAP_ALLOC (&dse_bitmap_obstack);
3301 	      FOR_EACH_VEC_ELT (rtx_group_vec, j, group)
3302 		bitmap_ior_into (all_ones, group->group_kill);
3303 	    }
3304 	  if (!bb_info->out)
3305 	    {
3306 	      bb_info->out = BITMAP_ALLOC (&dse_bitmap_obstack);
3307 	      bitmap_copy (bb_info->out, all_ones);
3308 	    }
3309 	}
3310     }
3311 
3312   if (all_ones)
3313     BITMAP_FREE (all_ones);
3314 }
3315 
3316 
3317 
3318 /*----------------------------------------------------------------------------
3319    Fourth step.
3320 
3321    Solve the bitvector equations.
3322 ----------------------------------------------------------------------------*/
3323 
3324 
3325 /* Confluence function for blocks with no successors.  Create an out
3326    set from the gen set of the exit block.  This block logically has
3327    the exit block as a successor.  */
3328 
3329 
3330 
3331 static void
dse_confluence_0(basic_block bb)3332 dse_confluence_0 (basic_block bb)
3333 {
3334   bb_info_t bb_info = bb_table[bb->index];
3335 
3336   if (bb->index == EXIT_BLOCK)
3337     return;
3338 
3339   if (!bb_info->out)
3340     {
3341       bb_info->out = BITMAP_ALLOC (&dse_bitmap_obstack);
3342       bitmap_copy (bb_info->out, bb_table[EXIT_BLOCK]->gen);
3343     }
3344 }
3345 
3346 /* Propagate the information from the in set of the dest of E to the
3347    out set of the src of E.  If the various in or out sets are not
3348    there, that means they are all ones.  */
3349 
3350 static bool
dse_confluence_n(edge e)3351 dse_confluence_n (edge e)
3352 {
3353   bb_info_t src_info = bb_table[e->src->index];
3354   bb_info_t dest_info = bb_table[e->dest->index];
3355 
3356   if (dest_info->in)
3357     {
3358       if (src_info->out)
3359 	bitmap_and_into (src_info->out, dest_info->in);
3360       else
3361 	{
3362 	  src_info->out = BITMAP_ALLOC (&dse_bitmap_obstack);
3363 	  bitmap_copy (src_info->out, dest_info->in);
3364 	}
3365     }
3366   return true;
3367 }
3368 
3369 
3370 /* Propagate the info from the out to the in set of BB_INDEX's basic
3371    block.  There are three cases:
3372 
3373    1) The block has no kill set.  In this case the kill set is all
3374    ones.  It does not matter what the out set of the block is, none of
3375    the info can reach the top.  The only thing that reaches the top is
3376    the gen set and we just copy the set.
3377 
3378    2) There is a kill set but no out set and bb has successors.  In
3379    this case we just return. Eventually an out set will be created and
3380    it is better to wait than to create a set of ones.
3381 
3382    3) There is both a kill and out set.  We apply the obvious transfer
3383    function.
3384 */
3385 
3386 static bool
dse_transfer_function(int bb_index)3387 dse_transfer_function (int bb_index)
3388 {
3389   bb_info_t bb_info = bb_table[bb_index];
3390 
3391   if (bb_info->kill)
3392     {
3393       if (bb_info->out)
3394 	{
3395 	  /* Case 3 above.  */
3396 	  if (bb_info->in)
3397 	    return bitmap_ior_and_compl (bb_info->in, bb_info->gen,
3398 					 bb_info->out, bb_info->kill);
3399 	  else
3400 	    {
3401 	      bb_info->in = BITMAP_ALLOC (&dse_bitmap_obstack);
3402 	      bitmap_ior_and_compl (bb_info->in, bb_info->gen,
3403 				    bb_info->out, bb_info->kill);
3404 	      return true;
3405 	    }
3406 	}
3407       else
3408 	/* Case 2 above.  */
3409 	return false;
3410     }
3411   else
3412     {
3413       /* Case 1 above.  If there is already an in set, nothing
3414 	 happens.  */
3415       if (bb_info->in)
3416 	return false;
3417       else
3418 	{
3419 	  bb_info->in = BITMAP_ALLOC (&dse_bitmap_obstack);
3420 	  bitmap_copy (bb_info->in, bb_info->gen);
3421 	  return true;
3422 	}
3423     }
3424 }
3425 
3426 /* Solve the dataflow equations.  */
3427 
3428 static void
dse_step4(void)3429 dse_step4 (void)
3430 {
3431   df_simple_dataflow (DF_BACKWARD, NULL, dse_confluence_0,
3432 		      dse_confluence_n, dse_transfer_function,
3433 	   	      all_blocks, df_get_postorder (DF_BACKWARD),
3434 		      df_get_n_blocks (DF_BACKWARD));
3435   if (dump_file && (dump_flags & TDF_DETAILS))
3436     {
3437       basic_block bb;
3438 
3439       fprintf (dump_file, "\n\n*** Global dataflow info after analysis.\n");
3440       FOR_ALL_BB_FN (bb, cfun)
3441 	{
3442 	  bb_info_t bb_info = bb_table[bb->index];
3443 
3444 	  df_print_bb_index (bb, dump_file);
3445 	  if (bb_info->in)
3446 	    bitmap_print (dump_file, bb_info->in, "  in:   ", "\n");
3447 	  else
3448 	    fprintf (dump_file, "  in:   *MISSING*\n");
3449 	  if (bb_info->gen)
3450 	    bitmap_print (dump_file, bb_info->gen, "  gen:  ", "\n");
3451 	  else
3452 	    fprintf (dump_file, "  gen:  *MISSING*\n");
3453 	  if (bb_info->kill)
3454 	    bitmap_print (dump_file, bb_info->kill, "  kill: ", "\n");
3455 	  else
3456 	    fprintf (dump_file, "  kill: *MISSING*\n");
3457 	  if (bb_info->out)
3458 	    bitmap_print (dump_file, bb_info->out, "  out:  ", "\n");
3459 	  else
3460 	    fprintf (dump_file, "  out:  *MISSING*\n\n");
3461 	}
3462     }
3463 }
3464 
3465 
3466 
3467 /*----------------------------------------------------------------------------
3468    Fifth step.
3469 
3470    Delete the stores that can only be deleted using the global information.
3471 ----------------------------------------------------------------------------*/
3472 
3473 
3474 static void
dse_step5(void)3475 dse_step5 (void)
3476 {
3477   basic_block bb;
3478   FOR_EACH_BB_FN (bb, cfun)
3479     {
3480       bb_info_t bb_info = bb_table[bb->index];
3481       insn_info_t insn_info = bb_info->last_insn;
3482       bitmap v = bb_info->out;
3483 
3484       while (insn_info)
3485 	{
3486 	  bool deleted = false;
3487 	  if (dump_file && insn_info->insn)
3488 	    {
3489 	      fprintf (dump_file, "starting to process insn %d\n",
3490 		       INSN_UID (insn_info->insn));
3491 	      bitmap_print (dump_file, v, "  v:  ", "\n");
3492 	    }
3493 
3494 	  /* There may have been code deleted by the dce pass run before
3495 	     this phase.  */
3496 	  if (insn_info->insn
3497 	      && INSN_P (insn_info->insn)
3498 	      && (!insn_info->cannot_delete)
3499 	      && (!bitmap_empty_p (v)))
3500 	    {
3501 	      store_info *store_info = insn_info->store_rec;
3502 
3503 	      /* Try to delete the current insn.  */
3504 	      deleted = true;
3505 
3506 	      /* Skip the clobbers.  */
3507 	      while (!store_info->is_set)
3508 		store_info = store_info->next;
3509 
3510 	      HOST_WIDE_INT i, offset, width;
3511 	      group_info *group_info = rtx_group_vec[store_info->group_id];
3512 
3513 	      if (!store_info->offset.is_constant (&offset)
3514 		  || !store_info->width.is_constant (&width))
3515 		deleted = false;
3516 	      else
3517 		{
3518 		  HOST_WIDE_INT end = offset + width;
3519 		  for (i = offset; i < end; i++)
3520 		    {
3521 		      int index = get_bitmap_index (group_info, i);
3522 
3523 		      if (dump_file && (dump_flags & TDF_DETAILS))
3524 			fprintf (dump_file, "i = %d, index = %d\n",
3525 				 (int) i, index);
3526 		      if (index == 0 || !bitmap_bit_p (v, index))
3527 			{
3528 			  if (dump_file && (dump_flags & TDF_DETAILS))
3529 			    fprintf (dump_file, "failing at i = %d\n",
3530 				     (int) i);
3531 			  deleted = false;
3532 			  break;
3533 			}
3534 		    }
3535 		}
3536 	      if (deleted)
3537 		{
3538 		  if (dbg_cnt (dse)
3539 		      && check_for_inc_dec_1 (insn_info))
3540 		    {
3541 		      delete_insn (insn_info->insn);
3542 		      insn_info->insn = NULL;
3543 		      globally_deleted++;
3544 		    }
3545 		}
3546 	    }
3547 	  /* We do want to process the local info if the insn was
3548 	     deleted.  For instance, if the insn did a wild read, we
3549 	     no longer need to trash the info.  */
3550 	  if (insn_info->insn
3551 	      && INSN_P (insn_info->insn)
3552 	      && (!deleted))
3553 	    {
3554 	      scan_stores (insn_info->store_rec, v, NULL);
3555 	      if (insn_info->wild_read)
3556 		{
3557 		  if (dump_file && (dump_flags & TDF_DETAILS))
3558 		    fprintf (dump_file, "wild read\n");
3559 		  bitmap_clear (v);
3560 		}
3561 	      else if (insn_info->read_rec
3562 		       || insn_info->non_frame_wild_read
3563 		       || insn_info->frame_read)
3564 		{
3565 		  if (dump_file && (dump_flags & TDF_DETAILS))
3566 		    {
3567 		      if (!insn_info->non_frame_wild_read
3568 			  && !insn_info->frame_read)
3569 			fprintf (dump_file, "regular read\n");
3570 		      if (insn_info->non_frame_wild_read)
3571 			fprintf (dump_file, "non-frame wild read\n");
3572 		      if (insn_info->frame_read)
3573 			fprintf (dump_file, "frame read\n");
3574 		    }
3575 		  scan_reads (insn_info, v, NULL);
3576 		}
3577 	    }
3578 
3579 	  insn_info = insn_info->prev_insn;
3580 	}
3581     }
3582 }
3583 
3584 
3585 
3586 /*----------------------------------------------------------------------------
3587    Sixth step.
3588 
3589    Delete stores made redundant by earlier stores (which store the same
3590    value) that couldn't be eliminated.
3591 ----------------------------------------------------------------------------*/
3592 
3593 static void
dse_step6(void)3594 dse_step6 (void)
3595 {
3596   basic_block bb;
3597 
3598   FOR_ALL_BB_FN (bb, cfun)
3599     {
3600       bb_info_t bb_info = bb_table[bb->index];
3601       insn_info_t insn_info = bb_info->last_insn;
3602 
3603       while (insn_info)
3604 	{
3605 	  /* There may have been code deleted by the dce pass run before
3606 	     this phase.  */
3607 	  if (insn_info->insn
3608 	      && INSN_P (insn_info->insn)
3609 	      && !insn_info->cannot_delete)
3610 	    {
3611 	      store_info *s_info = insn_info->store_rec;
3612 
3613 	      while (s_info && !s_info->is_set)
3614 		s_info = s_info->next;
3615 	      if (s_info
3616 		  && s_info->redundant_reason
3617 		  && s_info->redundant_reason->insn
3618 		  && INSN_P (s_info->redundant_reason->insn))
3619 		{
3620 		  rtx_insn *rinsn = s_info->redundant_reason->insn;
3621 		  if (dump_file && (dump_flags & TDF_DETAILS))
3622 		    fprintf (dump_file, "Locally deleting insn %d "
3623 					"because insn %d stores the "
3624 					"same value and couldn't be "
3625 					"eliminated\n",
3626 					INSN_UID (insn_info->insn),
3627 					INSN_UID (rinsn));
3628 		  delete_dead_store_insn (insn_info);
3629 		}
3630 	    }
3631 	  insn_info = insn_info->prev_insn;
3632 	}
3633     }
3634 }
3635 
3636 /*----------------------------------------------------------------------------
3637    Seventh step.
3638 
3639    Destroy everything left standing.
3640 ----------------------------------------------------------------------------*/
3641 
3642 static void
dse_step7(void)3643 dse_step7 (void)
3644 {
3645   bitmap_obstack_release (&dse_bitmap_obstack);
3646   obstack_free (&dse_obstack, NULL);
3647 
3648   end_alias_analysis ();
3649   free (bb_table);
3650   delete rtx_group_table;
3651   rtx_group_table = NULL;
3652   rtx_group_vec.release ();
3653   BITMAP_FREE (all_blocks);
3654   BITMAP_FREE (scratch);
3655 
3656   rtx_store_info_pool.release ();
3657   read_info_type_pool.release ();
3658   insn_info_type_pool.release ();
3659   dse_bb_info_type_pool.release ();
3660   group_info_pool.release ();
3661   deferred_change_pool.release ();
3662 }
3663 
3664 
3665 /* -------------------------------------------------------------------------
3666    DSE
3667    ------------------------------------------------------------------------- */
3668 
3669 /* Callback for running pass_rtl_dse.  */
3670 
3671 static unsigned int
rest_of_handle_dse(void)3672 rest_of_handle_dse (void)
3673 {
3674   df_set_flags (DF_DEFER_INSN_RESCAN);
3675 
3676   /* Need the notes since we must track live hardregs in the forwards
3677      direction.  */
3678   df_note_add_problem ();
3679   df_analyze ();
3680 
3681   dse_step0 ();
3682   dse_step1 ();
3683   dse_step2_init ();
3684   if (dse_step2 ())
3685     {
3686       df_set_flags (DF_LR_RUN_DCE);
3687       df_analyze ();
3688       if (dump_file && (dump_flags & TDF_DETAILS))
3689 	fprintf (dump_file, "doing global processing\n");
3690       dse_step3 ();
3691       dse_step4 ();
3692       dse_step5 ();
3693     }
3694 
3695   dse_step6 ();
3696   dse_step7 ();
3697 
3698   if (dump_file)
3699     fprintf (dump_file, "dse: local deletions = %d, global deletions = %d\n",
3700 	     locally_deleted, globally_deleted);
3701 
3702   /* DSE can eliminate potentially-trapping MEMs.
3703      Remove any EH edges associated with them.  */
3704   if ((locally_deleted || globally_deleted)
3705       && cfun->can_throw_non_call_exceptions
3706       && purge_all_dead_edges ())
3707     {
3708       free_dominance_info (CDI_DOMINATORS);
3709       cleanup_cfg (0);
3710     }
3711 
3712   return 0;
3713 }
3714 
3715 namespace {
3716 
3717 const pass_data pass_data_rtl_dse1 =
3718 {
3719   RTL_PASS, /* type */
3720   "dse1", /* name */
3721   OPTGROUP_NONE, /* optinfo_flags */
3722   TV_DSE1, /* tv_id */
3723   0, /* properties_required */
3724   0, /* properties_provided */
3725   0, /* properties_destroyed */
3726   0, /* todo_flags_start */
3727   TODO_df_finish, /* todo_flags_finish */
3728 };
3729 
3730 class pass_rtl_dse1 : public rtl_opt_pass
3731 {
3732 public:
pass_rtl_dse1(gcc::context * ctxt)3733   pass_rtl_dse1 (gcc::context *ctxt)
3734     : rtl_opt_pass (pass_data_rtl_dse1, ctxt)
3735   {}
3736 
3737   /* opt_pass methods: */
gate(function *)3738   virtual bool gate (function *)
3739     {
3740       return optimize > 0 && flag_dse && dbg_cnt (dse1);
3741     }
3742 
execute(function *)3743   virtual unsigned int execute (function *) { return rest_of_handle_dse (); }
3744 
3745 }; // class pass_rtl_dse1
3746 
3747 } // anon namespace
3748 
3749 rtl_opt_pass *
make_pass_rtl_dse1(gcc::context * ctxt)3750 make_pass_rtl_dse1 (gcc::context *ctxt)
3751 {
3752   return new pass_rtl_dse1 (ctxt);
3753 }
3754 
3755 namespace {
3756 
3757 const pass_data pass_data_rtl_dse2 =
3758 {
3759   RTL_PASS, /* type */
3760   "dse2", /* name */
3761   OPTGROUP_NONE, /* optinfo_flags */
3762   TV_DSE2, /* tv_id */
3763   0, /* properties_required */
3764   0, /* properties_provided */
3765   0, /* properties_destroyed */
3766   0, /* todo_flags_start */
3767   TODO_df_finish, /* todo_flags_finish */
3768 };
3769 
3770 class pass_rtl_dse2 : public rtl_opt_pass
3771 {
3772 public:
pass_rtl_dse2(gcc::context * ctxt)3773   pass_rtl_dse2 (gcc::context *ctxt)
3774     : rtl_opt_pass (pass_data_rtl_dse2, ctxt)
3775   {}
3776 
3777   /* opt_pass methods: */
gate(function *)3778   virtual bool gate (function *)
3779     {
3780       return optimize > 0 && flag_dse && dbg_cnt (dse2);
3781     }
3782 
execute(function *)3783   virtual unsigned int execute (function *) { return rest_of_handle_dse (); }
3784 
3785 }; // class pass_rtl_dse2
3786 
3787 } // anon namespace
3788 
3789 rtl_opt_pass *
make_pass_rtl_dse2(gcc::context * ctxt)3790 make_pass_rtl_dse2 (gcc::context *ctxt)
3791 {
3792   return new pass_rtl_dse2 (ctxt);
3793 }
3794