1*38fd1498Szrj /* RTL dead store elimination.
2*38fd1498Szrj Copyright (C) 2005-2018 Free Software Foundation, Inc.
3*38fd1498Szrj
4*38fd1498Szrj Contributed by Richard Sandiford <rsandifor@codesourcery.com>
5*38fd1498Szrj and Kenneth Zadeck <zadeck@naturalbridge.com>
6*38fd1498Szrj
7*38fd1498Szrj This file is part of GCC.
8*38fd1498Szrj
9*38fd1498Szrj GCC is free software; you can redistribute it and/or modify it under
10*38fd1498Szrj the terms of the GNU General Public License as published by the Free
11*38fd1498Szrj Software Foundation; either version 3, or (at your option) any later
12*38fd1498Szrj version.
13*38fd1498Szrj
14*38fd1498Szrj GCC is distributed in the hope that it will be useful, but WITHOUT ANY
15*38fd1498Szrj WARRANTY; without even the implied warranty of MERCHANTABILITY or
16*38fd1498Szrj FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
17*38fd1498Szrj for more details.
18*38fd1498Szrj
19*38fd1498Szrj You should have received a copy of the GNU General Public License
20*38fd1498Szrj along with GCC; see the file COPYING3. If not see
21*38fd1498Szrj <http://www.gnu.org/licenses/>. */
22*38fd1498Szrj
23*38fd1498Szrj #undef BASELINE
24*38fd1498Szrj
25*38fd1498Szrj #include "config.h"
26*38fd1498Szrj #include "system.h"
27*38fd1498Szrj #include "coretypes.h"
28*38fd1498Szrj #include "backend.h"
29*38fd1498Szrj #include "target.h"
30*38fd1498Szrj #include "rtl.h"
31*38fd1498Szrj #include "tree.h"
32*38fd1498Szrj #include "gimple.h"
33*38fd1498Szrj #include "predict.h"
34*38fd1498Szrj #include "df.h"
35*38fd1498Szrj #include "memmodel.h"
36*38fd1498Szrj #include "tm_p.h"
37*38fd1498Szrj #include "gimple-ssa.h"
38*38fd1498Szrj #include "expmed.h"
39*38fd1498Szrj #include "optabs.h"
40*38fd1498Szrj #include "emit-rtl.h"
41*38fd1498Szrj #include "recog.h"
42*38fd1498Szrj #include "alias.h"
43*38fd1498Szrj #include "stor-layout.h"
44*38fd1498Szrj #include "cfgrtl.h"
45*38fd1498Szrj #include "cselib.h"
46*38fd1498Szrj #include "tree-pass.h"
47*38fd1498Szrj #include "explow.h"
48*38fd1498Szrj #include "expr.h"
49*38fd1498Szrj #include "dbgcnt.h"
50*38fd1498Szrj #include "params.h"
51*38fd1498Szrj #include "rtl-iter.h"
52*38fd1498Szrj #include "cfgcleanup.h"
53*38fd1498Szrj
54*38fd1498Szrj /* This file contains three techniques for performing Dead Store
55*38fd1498Szrj Elimination (dse).
56*38fd1498Szrj
57*38fd1498Szrj * The first technique performs dse locally on any base address. It
58*38fd1498Szrj is based on the cselib which is a local value numbering technique.
59*38fd1498Szrj This technique is local to a basic block but deals with a fairly
60*38fd1498Szrj general addresses.
61*38fd1498Szrj
62*38fd1498Szrj * The second technique performs dse globally but is restricted to
63*38fd1498Szrj base addresses that are either constant or are relative to the
64*38fd1498Szrj frame_pointer.
65*38fd1498Szrj
66*38fd1498Szrj * The third technique, (which is only done after register allocation)
67*38fd1498Szrj processes the spill slots. This differs from the second
68*38fd1498Szrj technique because it takes advantage of the fact that spilling is
69*38fd1498Szrj completely free from the effects of aliasing.
70*38fd1498Szrj
71*38fd1498Szrj Logically, dse is a backwards dataflow problem. A store can be
72*38fd1498Szrj deleted if it if cannot be reached in the backward direction by any
73*38fd1498Szrj use of the value being stored. However, the local technique uses a
74*38fd1498Szrj forwards scan of the basic block because cselib requires that the
75*38fd1498Szrj block be processed in that order.
76*38fd1498Szrj
77*38fd1498Szrj The pass is logically broken into 7 steps:
78*38fd1498Szrj
79*38fd1498Szrj 0) Initialization.
80*38fd1498Szrj
81*38fd1498Szrj 1) The local algorithm, as well as scanning the insns for the two
82*38fd1498Szrj global algorithms.
83*38fd1498Szrj
84*38fd1498Szrj 2) Analysis to see if the global algs are necessary. In the case
85*38fd1498Szrj of stores base on a constant address, there must be at least two
86*38fd1498Szrj stores to that address, to make it possible to delete some of the
87*38fd1498Szrj stores. In the case of stores off of the frame or spill related
88*38fd1498Szrj stores, only one store to an address is necessary because those
89*38fd1498Szrj stores die at the end of the function.
90*38fd1498Szrj
91*38fd1498Szrj 3) Set up the global dataflow equations based on processing the
92*38fd1498Szrj info parsed in the first step.
93*38fd1498Szrj
94*38fd1498Szrj 4) Solve the dataflow equations.
95*38fd1498Szrj
96*38fd1498Szrj 5) Delete the insns that the global analysis has indicated are
97*38fd1498Szrj unnecessary.
98*38fd1498Szrj
99*38fd1498Szrj 6) Delete insns that store the same value as preceding store
100*38fd1498Szrj where the earlier store couldn't be eliminated.
101*38fd1498Szrj
102*38fd1498Szrj 7) Cleanup.
103*38fd1498Szrj
104*38fd1498Szrj This step uses cselib and canon_rtx to build the largest expression
105*38fd1498Szrj possible for each address. This pass is a forwards pass through
106*38fd1498Szrj each basic block. From the point of view of the global technique,
107*38fd1498Szrj the first pass could examine a block in either direction. The
108*38fd1498Szrj forwards ordering is to accommodate cselib.
109*38fd1498Szrj
110*38fd1498Szrj We make a simplifying assumption: addresses fall into four broad
111*38fd1498Szrj categories:
112*38fd1498Szrj
113*38fd1498Szrj 1) base has rtx_varies_p == false, offset is constant.
114*38fd1498Szrj 2) base has rtx_varies_p == false, offset variable.
115*38fd1498Szrj 3) base has rtx_varies_p == true, offset constant.
116*38fd1498Szrj 4) base has rtx_varies_p == true, offset variable.
117*38fd1498Szrj
118*38fd1498Szrj The local passes are able to process all 4 kinds of addresses. The
119*38fd1498Szrj global pass only handles 1).
120*38fd1498Szrj
121*38fd1498Szrj The global problem is formulated as follows:
122*38fd1498Szrj
123*38fd1498Szrj A store, S1, to address A, where A is not relative to the stack
124*38fd1498Szrj frame, can be eliminated if all paths from S1 to the end of the
125*38fd1498Szrj function contain another store to A before a read to A.
126*38fd1498Szrj
127*38fd1498Szrj If the address A is relative to the stack frame, a store S2 to A
128*38fd1498Szrj can be eliminated if there are no paths from S2 that reach the
129*38fd1498Szrj end of the function that read A before another store to A. In
130*38fd1498Szrj this case S2 can be deleted if there are paths from S2 to the
131*38fd1498Szrj end of the function that have no reads or writes to A. This
132*38fd1498Szrj second case allows stores to the stack frame to be deleted that
133*38fd1498Szrj would otherwise die when the function returns. This cannot be
134*38fd1498Szrj done if stores_off_frame_dead_at_return is not true. See the doc
135*38fd1498Szrj for that variable for when this variable is false.
136*38fd1498Szrj
137*38fd1498Szrj The global problem is formulated as a backwards set union
138*38fd1498Szrj dataflow problem where the stores are the gens and reads are the
139*38fd1498Szrj kills. Set union problems are rare and require some special
140*38fd1498Szrj handling given our representation of bitmaps. A straightforward
141*38fd1498Szrj implementation requires a lot of bitmaps filled with 1s.
142*38fd1498Szrj These are expensive and cumbersome in our bitmap formulation so
143*38fd1498Szrj care has been taken to avoid large vectors filled with 1s. See
144*38fd1498Szrj the comments in bb_info and in the dataflow confluence functions
145*38fd1498Szrj for details.
146*38fd1498Szrj
147*38fd1498Szrj There are two places for further enhancements to this algorithm:
148*38fd1498Szrj
149*38fd1498Szrj 1) The original dse which was embedded in a pass called flow also
150*38fd1498Szrj did local address forwarding. For example in
151*38fd1498Szrj
152*38fd1498Szrj A <- r100
153*38fd1498Szrj ... <- A
154*38fd1498Szrj
155*38fd1498Szrj flow would replace the right hand side of the second insn with a
156*38fd1498Szrj reference to r100. Most of the information is available to add this
157*38fd1498Szrj to this pass. It has not done it because it is a lot of work in
158*38fd1498Szrj the case that either r100 is assigned to between the first and
159*38fd1498Szrj second insn and/or the second insn is a load of part of the value
160*38fd1498Szrj stored by the first insn.
161*38fd1498Szrj
162*38fd1498Szrj insn 5 in gcc.c-torture/compile/990203-1.c simple case.
163*38fd1498Szrj insn 15 in gcc.c-torture/execute/20001017-2.c simple case.
164*38fd1498Szrj insn 25 in gcc.c-torture/execute/20001026-1.c simple case.
165*38fd1498Szrj insn 44 in gcc.c-torture/execute/20010910-1.c simple case.
166*38fd1498Szrj
167*38fd1498Szrj 2) The cleaning up of spill code is quite profitable. It currently
168*38fd1498Szrj depends on reading tea leaves and chicken entrails left by reload.
169*38fd1498Szrj This pass depends on reload creating a singleton alias set for each
170*38fd1498Szrj spill slot and telling the next dse pass which of these alias sets
171*38fd1498Szrj are the singletons. Rather than analyze the addresses of the
172*38fd1498Szrj spills, dse's spill processing just does analysis of the loads and
173*38fd1498Szrj stores that use those alias sets. There are three cases where this
174*38fd1498Szrj falls short:
175*38fd1498Szrj
176*38fd1498Szrj a) Reload sometimes creates the slot for one mode of access, and
177*38fd1498Szrj then inserts loads and/or stores for a smaller mode. In this
178*38fd1498Szrj case, the current code just punts on the slot. The proper thing
179*38fd1498Szrj to do is to back out and use one bit vector position for each
180*38fd1498Szrj byte of the entity associated with the slot. This depends on
181*38fd1498Szrj KNOWING that reload always generates the accesses for each of the
182*38fd1498Szrj bytes in some canonical (read that easy to understand several
183*38fd1498Szrj passes after reload happens) way.
184*38fd1498Szrj
185*38fd1498Szrj b) Reload sometimes decides that spill slot it allocated was not
186*38fd1498Szrj large enough for the mode and goes back and allocates more slots
187*38fd1498Szrj with the same mode and alias set. The backout in this case is a
188*38fd1498Szrj little more graceful than (a). In this case the slot is unmarked
189*38fd1498Szrj as being a spill slot and if final address comes out to be based
190*38fd1498Szrj off the frame pointer, the global algorithm handles this slot.
191*38fd1498Szrj
192*38fd1498Szrj c) For any pass that may prespill, there is currently no
193*38fd1498Szrj mechanism to tell the dse pass that the slot being used has the
194*38fd1498Szrj special properties that reload uses. It may be that all that is
195*38fd1498Szrj required is to have those passes make the same calls that reload
196*38fd1498Szrj does, assuming that the alias sets can be manipulated in the same
197*38fd1498Szrj way. */
198*38fd1498Szrj
199*38fd1498Szrj /* There are limits to the size of constant offsets we model for the
200*38fd1498Szrj global problem. There are certainly test cases, that exceed this
201*38fd1498Szrj limit, however, it is unlikely that there are important programs
202*38fd1498Szrj that really have constant offsets this size. */
203*38fd1498Szrj #define MAX_OFFSET (64 * 1024)
204*38fd1498Szrj
205*38fd1498Szrj /* Obstack for the DSE dataflow bitmaps. We don't want to put these
206*38fd1498Szrj on the default obstack because these bitmaps can grow quite large
207*38fd1498Szrj (~2GB for the small (!) test case of PR54146) and we'll hold on to
208*38fd1498Szrj all that memory until the end of the compiler run.
209*38fd1498Szrj As a bonus, delete_tree_live_info can destroy all the bitmaps by just
210*38fd1498Szrj releasing the whole obstack. */
211*38fd1498Szrj static bitmap_obstack dse_bitmap_obstack;
212*38fd1498Szrj
213*38fd1498Szrj /* Obstack for other data. As for above: Kinda nice to be able to
214*38fd1498Szrj throw it all away at the end in one big sweep. */
215*38fd1498Szrj static struct obstack dse_obstack;
216*38fd1498Szrj
217*38fd1498Szrj /* Scratch bitmap for cselib's cselib_expand_value_rtx. */
218*38fd1498Szrj static bitmap scratch = NULL;
219*38fd1498Szrj
220*38fd1498Szrj struct insn_info_type;
221*38fd1498Szrj
222*38fd1498Szrj /* This structure holds information about a candidate store. */
223*38fd1498Szrj struct store_info
224*38fd1498Szrj {
225*38fd1498Szrj
226*38fd1498Szrj /* False means this is a clobber. */
227*38fd1498Szrj bool is_set;
228*38fd1498Szrj
229*38fd1498Szrj /* False if a single HOST_WIDE_INT bitmap is used for positions_needed. */
230*38fd1498Szrj bool is_large;
231*38fd1498Szrj
232*38fd1498Szrj /* The id of the mem group of the base address. If rtx_varies_p is
233*38fd1498Szrj true, this is -1. Otherwise, it is the index into the group
234*38fd1498Szrj table. */
235*38fd1498Szrj int group_id;
236*38fd1498Szrj
237*38fd1498Szrj /* This is the cselib value. */
238*38fd1498Szrj cselib_val *cse_base;
239*38fd1498Szrj
240*38fd1498Szrj /* This canonized mem. */
241*38fd1498Szrj rtx mem;
242*38fd1498Szrj
243*38fd1498Szrj /* Canonized MEM address for use by canon_true_dependence. */
244*38fd1498Szrj rtx mem_addr;
245*38fd1498Szrj
246*38fd1498Szrj /* The offset of the first byte associated with the operation. */
247*38fd1498Szrj poly_int64 offset;
248*38fd1498Szrj
249*38fd1498Szrj /* The number of bytes covered by the operation. This is always exact
250*38fd1498Szrj and known (rather than -1). */
251*38fd1498Szrj poly_int64 width;
252*38fd1498Szrj
253*38fd1498Szrj union
254*38fd1498Szrj {
255*38fd1498Szrj /* A bitmask as wide as the number of bytes in the word that
256*38fd1498Szrj contains a 1 if the byte may be needed. The store is unused if
257*38fd1498Szrj all of the bits are 0. This is used if IS_LARGE is false. */
258*38fd1498Szrj unsigned HOST_WIDE_INT small_bitmask;
259*38fd1498Szrj
260*38fd1498Szrj struct
261*38fd1498Szrj {
262*38fd1498Szrj /* A bitmap with one bit per byte, or null if the number of
263*38fd1498Szrj bytes isn't known at compile time. A cleared bit means
264*38fd1498Szrj the position is needed. Used if IS_LARGE is true. */
265*38fd1498Szrj bitmap bmap;
266*38fd1498Szrj
267*38fd1498Szrj /* When BITMAP is nonnull, this counts the number of set bits
268*38fd1498Szrj (i.e. unneeded bytes) in the bitmap. If it is equal to
269*38fd1498Szrj WIDTH, the whole store is unused.
270*38fd1498Szrj
271*38fd1498Szrj When BITMAP is null:
272*38fd1498Szrj - the store is definitely not needed when COUNT == 1
273*38fd1498Szrj - all the store is needed when COUNT == 0 and RHS is nonnull
274*38fd1498Szrj - otherwise we don't know which parts of the store are needed. */
275*38fd1498Szrj int count;
276*38fd1498Szrj } large;
277*38fd1498Szrj } positions_needed;
278*38fd1498Szrj
279*38fd1498Szrj /* The next store info for this insn. */
280*38fd1498Szrj struct store_info *next;
281*38fd1498Szrj
282*38fd1498Szrj /* The right hand side of the store. This is used if there is a
283*38fd1498Szrj subsequent reload of the mems address somewhere later in the
284*38fd1498Szrj basic block. */
285*38fd1498Szrj rtx rhs;
286*38fd1498Szrj
287*38fd1498Szrj /* If rhs is or holds a constant, this contains that constant,
288*38fd1498Szrj otherwise NULL. */
289*38fd1498Szrj rtx const_rhs;
290*38fd1498Szrj
291*38fd1498Szrj /* Set if this store stores the same constant value as REDUNDANT_REASON
292*38fd1498Szrj insn stored. These aren't eliminated early, because doing that
293*38fd1498Szrj might prevent the earlier larger store to be eliminated. */
294*38fd1498Szrj struct insn_info_type *redundant_reason;
295*38fd1498Szrj };
296*38fd1498Szrj
297*38fd1498Szrj /* Return a bitmask with the first N low bits set. */
298*38fd1498Szrj
299*38fd1498Szrj static unsigned HOST_WIDE_INT
lowpart_bitmask(int n)300*38fd1498Szrj lowpart_bitmask (int n)
301*38fd1498Szrj {
302*38fd1498Szrj unsigned HOST_WIDE_INT mask = HOST_WIDE_INT_M1U;
303*38fd1498Szrj return mask >> (HOST_BITS_PER_WIDE_INT - n);
304*38fd1498Szrj }
305*38fd1498Szrj
306*38fd1498Szrj static object_allocator<store_info> cse_store_info_pool ("cse_store_info_pool");
307*38fd1498Szrj
308*38fd1498Szrj static object_allocator<store_info> rtx_store_info_pool ("rtx_store_info_pool");
309*38fd1498Szrj
310*38fd1498Szrj /* This structure holds information about a load. These are only
311*38fd1498Szrj built for rtx bases. */
312*38fd1498Szrj struct read_info_type
313*38fd1498Szrj {
314*38fd1498Szrj /* The id of the mem group of the base address. */
315*38fd1498Szrj int group_id;
316*38fd1498Szrj
317*38fd1498Szrj /* The offset of the first byte associated with the operation. */
318*38fd1498Szrj poly_int64 offset;
319*38fd1498Szrj
320*38fd1498Szrj /* The number of bytes covered by the operation, or -1 if not known. */
321*38fd1498Szrj poly_int64 width;
322*38fd1498Szrj
323*38fd1498Szrj /* The mem being read. */
324*38fd1498Szrj rtx mem;
325*38fd1498Szrj
326*38fd1498Szrj /* The next read_info for this insn. */
327*38fd1498Szrj struct read_info_type *next;
328*38fd1498Szrj };
329*38fd1498Szrj typedef struct read_info_type *read_info_t;
330*38fd1498Szrj
331*38fd1498Szrj static object_allocator<read_info_type> read_info_type_pool ("read_info_pool");
332*38fd1498Szrj
333*38fd1498Szrj /* One of these records is created for each insn. */
334*38fd1498Szrj
335*38fd1498Szrj struct insn_info_type
336*38fd1498Szrj {
337*38fd1498Szrj /* Set true if the insn contains a store but the insn itself cannot
338*38fd1498Szrj be deleted. This is set if the insn is a parallel and there is
339*38fd1498Szrj more than one non dead output or if the insn is in some way
340*38fd1498Szrj volatile. */
341*38fd1498Szrj bool cannot_delete;
342*38fd1498Szrj
343*38fd1498Szrj /* This field is only used by the global algorithm. It is set true
344*38fd1498Szrj if the insn contains any read of mem except for a (1). This is
345*38fd1498Szrj also set if the insn is a call or has a clobber mem. If the insn
346*38fd1498Szrj contains a wild read, the use_rec will be null. */
347*38fd1498Szrj bool wild_read;
348*38fd1498Szrj
349*38fd1498Szrj /* This is true only for CALL instructions which could potentially read
350*38fd1498Szrj any non-frame memory location. This field is used by the global
351*38fd1498Szrj algorithm. */
352*38fd1498Szrj bool non_frame_wild_read;
353*38fd1498Szrj
354*38fd1498Szrj /* This field is only used for the processing of const functions.
355*38fd1498Szrj These functions cannot read memory, but they can read the stack
356*38fd1498Szrj because that is where they may get their parms. We need to be
357*38fd1498Szrj this conservative because, like the store motion pass, we don't
358*38fd1498Szrj consider CALL_INSN_FUNCTION_USAGE when processing call insns.
359*38fd1498Szrj Moreover, we need to distinguish two cases:
360*38fd1498Szrj 1. Before reload (register elimination), the stores related to
361*38fd1498Szrj outgoing arguments are stack pointer based and thus deemed
362*38fd1498Szrj of non-constant base in this pass. This requires special
363*38fd1498Szrj handling but also means that the frame pointer based stores
364*38fd1498Szrj need not be killed upon encountering a const function call.
365*38fd1498Szrj 2. After reload, the stores related to outgoing arguments can be
366*38fd1498Szrj either stack pointer or hard frame pointer based. This means
367*38fd1498Szrj that we have no other choice than also killing all the frame
368*38fd1498Szrj pointer based stores upon encountering a const function call.
369*38fd1498Szrj This field is set after reload for const function calls and before
370*38fd1498Szrj reload for const tail function calls on targets where arg pointer
371*38fd1498Szrj is the frame pointer. Having this set is less severe than a wild
372*38fd1498Szrj read, it just means that all the frame related stores are killed
373*38fd1498Szrj rather than all the stores. */
374*38fd1498Szrj bool frame_read;
375*38fd1498Szrj
376*38fd1498Szrj /* This field is only used for the processing of const functions.
377*38fd1498Szrj It is set if the insn may contain a stack pointer based store. */
378*38fd1498Szrj bool stack_pointer_based;
379*38fd1498Szrj
380*38fd1498Szrj /* This is true if any of the sets within the store contains a
381*38fd1498Szrj cselib base. Such stores can only be deleted by the local
382*38fd1498Szrj algorithm. */
383*38fd1498Szrj bool contains_cselib_groups;
384*38fd1498Szrj
385*38fd1498Szrj /* The insn. */
386*38fd1498Szrj rtx_insn *insn;
387*38fd1498Szrj
388*38fd1498Szrj /* The list of mem sets or mem clobbers that are contained in this
389*38fd1498Szrj insn. If the insn is deletable, it contains only one mem set.
390*38fd1498Szrj But it could also contain clobbers. Insns that contain more than
391*38fd1498Szrj one mem set are not deletable, but each of those mems are here in
392*38fd1498Szrj order to provide info to delete other insns. */
393*38fd1498Szrj store_info *store_rec;
394*38fd1498Szrj
395*38fd1498Szrj /* The linked list of mem uses in this insn. Only the reads from
396*38fd1498Szrj rtx bases are listed here. The reads to cselib bases are
397*38fd1498Szrj completely processed during the first scan and so are never
398*38fd1498Szrj created. */
399*38fd1498Szrj read_info_t read_rec;
400*38fd1498Szrj
401*38fd1498Szrj /* The live fixed registers. We assume only fixed registers can
402*38fd1498Szrj cause trouble by being clobbered from an expanded pattern;
403*38fd1498Szrj storing only the live fixed registers (rather than all registers)
404*38fd1498Szrj means less memory needs to be allocated / copied for the individual
405*38fd1498Szrj stores. */
406*38fd1498Szrj regset fixed_regs_live;
407*38fd1498Szrj
408*38fd1498Szrj /* The prev insn in the basic block. */
409*38fd1498Szrj struct insn_info_type * prev_insn;
410*38fd1498Szrj
411*38fd1498Szrj /* The linked list of insns that are in consideration for removal in
412*38fd1498Szrj the forwards pass through the basic block. This pointer may be
413*38fd1498Szrj trash as it is not cleared when a wild read occurs. The only
414*38fd1498Szrj time it is guaranteed to be correct is when the traversal starts
415*38fd1498Szrj at active_local_stores. */
416*38fd1498Szrj struct insn_info_type * next_local_store;
417*38fd1498Szrj };
418*38fd1498Szrj typedef struct insn_info_type *insn_info_t;
419*38fd1498Szrj
420*38fd1498Szrj static object_allocator<insn_info_type> insn_info_type_pool ("insn_info_pool");
421*38fd1498Szrj
422*38fd1498Szrj /* The linked list of stores that are under consideration in this
423*38fd1498Szrj basic block. */
424*38fd1498Szrj static insn_info_t active_local_stores;
425*38fd1498Szrj static int active_local_stores_len;
426*38fd1498Szrj
427*38fd1498Szrj struct dse_bb_info_type
428*38fd1498Szrj {
429*38fd1498Szrj /* Pointer to the insn info for the last insn in the block. These
430*38fd1498Szrj are linked so this is how all of the insns are reached. During
431*38fd1498Szrj scanning this is the current insn being scanned. */
432*38fd1498Szrj insn_info_t last_insn;
433*38fd1498Szrj
434*38fd1498Szrj /* The info for the global dataflow problem. */
435*38fd1498Szrj
436*38fd1498Szrj
437*38fd1498Szrj /* This is set if the transfer function should and in the wild_read
438*38fd1498Szrj bitmap before applying the kill and gen sets. That vector knocks
439*38fd1498Szrj out most of the bits in the bitmap and thus speeds up the
440*38fd1498Szrj operations. */
441*38fd1498Szrj bool apply_wild_read;
442*38fd1498Szrj
443*38fd1498Szrj /* The following 4 bitvectors hold information about which positions
444*38fd1498Szrj of which stores are live or dead. They are indexed by
445*38fd1498Szrj get_bitmap_index. */
446*38fd1498Szrj
447*38fd1498Szrj /* The set of store positions that exist in this block before a wild read. */
448*38fd1498Szrj bitmap gen;
449*38fd1498Szrj
450*38fd1498Szrj /* The set of load positions that exist in this block above the
451*38fd1498Szrj same position of a store. */
452*38fd1498Szrj bitmap kill;
453*38fd1498Szrj
454*38fd1498Szrj /* The set of stores that reach the top of the block without being
455*38fd1498Szrj killed by a read.
456*38fd1498Szrj
457*38fd1498Szrj Do not represent the in if it is all ones. Note that this is
458*38fd1498Szrj what the bitvector should logically be initialized to for a set
459*38fd1498Szrj intersection problem. However, like the kill set, this is too
460*38fd1498Szrj expensive. So initially, the in set will only be created for the
461*38fd1498Szrj exit block and any block that contains a wild read. */
462*38fd1498Szrj bitmap in;
463*38fd1498Szrj
464*38fd1498Szrj /* The set of stores that reach the bottom of the block from it's
465*38fd1498Szrj successors.
466*38fd1498Szrj
467*38fd1498Szrj Do not represent the in if it is all ones. Note that this is
468*38fd1498Szrj what the bitvector should logically be initialized to for a set
469*38fd1498Szrj intersection problem. However, like the kill and in set, this is
470*38fd1498Szrj too expensive. So what is done is that the confluence operator
471*38fd1498Szrj just initializes the vector from one of the out sets of the
472*38fd1498Szrj successors of the block. */
473*38fd1498Szrj bitmap out;
474*38fd1498Szrj
475*38fd1498Szrj /* The following bitvector is indexed by the reg number. It
476*38fd1498Szrj contains the set of regs that are live at the current instruction
477*38fd1498Szrj being processed. While it contains info for all of the
478*38fd1498Szrj registers, only the hard registers are actually examined. It is used
479*38fd1498Szrj to assure that shift and/or add sequences that are inserted do not
480*38fd1498Szrj accidentally clobber live hard regs. */
481*38fd1498Szrj bitmap regs_live;
482*38fd1498Szrj };
483*38fd1498Szrj
484*38fd1498Szrj typedef struct dse_bb_info_type *bb_info_t;
485*38fd1498Szrj
486*38fd1498Szrj static object_allocator<dse_bb_info_type> dse_bb_info_type_pool
487*38fd1498Szrj ("bb_info_pool");
488*38fd1498Szrj
489*38fd1498Szrj /* Table to hold all bb_infos. */
490*38fd1498Szrj static bb_info_t *bb_table;
491*38fd1498Szrj
492*38fd1498Szrj /* There is a group_info for each rtx base that is used to reference
493*38fd1498Szrj memory. There are also not many of the rtx bases because they are
494*38fd1498Szrj very limited in scope. */
495*38fd1498Szrj
496*38fd1498Szrj struct group_info
497*38fd1498Szrj {
498*38fd1498Szrj /* The actual base of the address. */
499*38fd1498Szrj rtx rtx_base;
500*38fd1498Szrj
501*38fd1498Szrj /* The sequential id of the base. This allows us to have a
502*38fd1498Szrj canonical ordering of these that is not based on addresses. */
503*38fd1498Szrj int id;
504*38fd1498Szrj
505*38fd1498Szrj /* True if there are any positions that are to be processed
506*38fd1498Szrj globally. */
507*38fd1498Szrj bool process_globally;
508*38fd1498Szrj
509*38fd1498Szrj /* True if the base of this group is either the frame_pointer or
510*38fd1498Szrj hard_frame_pointer. */
511*38fd1498Szrj bool frame_related;
512*38fd1498Szrj
513*38fd1498Szrj /* A mem wrapped around the base pointer for the group in order to do
514*38fd1498Szrj read dependency. It must be given BLKmode in order to encompass all
515*38fd1498Szrj the possible offsets from the base. */
516*38fd1498Szrj rtx base_mem;
517*38fd1498Szrj
518*38fd1498Szrj /* Canonized version of base_mem's address. */
519*38fd1498Szrj rtx canon_base_addr;
520*38fd1498Szrj
521*38fd1498Szrj /* These two sets of two bitmaps are used to keep track of how many
522*38fd1498Szrj stores are actually referencing that position from this base. We
523*38fd1498Szrj only do this for rtx bases as this will be used to assign
524*38fd1498Szrj positions in the bitmaps for the global problem. Bit N is set in
525*38fd1498Szrj store1 on the first store for offset N. Bit N is set in store2
526*38fd1498Szrj for the second store to offset N. This is all we need since we
527*38fd1498Szrj only care about offsets that have two or more stores for them.
528*38fd1498Szrj
529*38fd1498Szrj The "_n" suffix is for offsets less than 0 and the "_p" suffix is
530*38fd1498Szrj for 0 and greater offsets.
531*38fd1498Szrj
532*38fd1498Szrj There is one special case here, for stores into the stack frame,
533*38fd1498Szrj we will or store1 into store2 before deciding which stores look
534*38fd1498Szrj at globally. This is because stores to the stack frame that have
535*38fd1498Szrj no other reads before the end of the function can also be
536*38fd1498Szrj deleted. */
537*38fd1498Szrj bitmap store1_n, store1_p, store2_n, store2_p;
538*38fd1498Szrj
539*38fd1498Szrj /* These bitmaps keep track of offsets in this group escape this function.
540*38fd1498Szrj An offset escapes if it corresponds to a named variable whose
541*38fd1498Szrj addressable flag is set. */
542*38fd1498Szrj bitmap escaped_n, escaped_p;
543*38fd1498Szrj
544*38fd1498Szrj /* The positions in this bitmap have the same assignments as the in,
545*38fd1498Szrj out, gen and kill bitmaps. This bitmap is all zeros except for
546*38fd1498Szrj the positions that are occupied by stores for this group. */
547*38fd1498Szrj bitmap group_kill;
548*38fd1498Szrj
549*38fd1498Szrj /* The offset_map is used to map the offsets from this base into
550*38fd1498Szrj positions in the global bitmaps. It is only created after all of
551*38fd1498Szrj the all of stores have been scanned and we know which ones we
552*38fd1498Szrj care about. */
553*38fd1498Szrj int *offset_map_n, *offset_map_p;
554*38fd1498Szrj int offset_map_size_n, offset_map_size_p;
555*38fd1498Szrj };
556*38fd1498Szrj
557*38fd1498Szrj static object_allocator<group_info> group_info_pool ("rtx_group_info_pool");
558*38fd1498Szrj
559*38fd1498Szrj /* Index into the rtx_group_vec. */
560*38fd1498Szrj static int rtx_group_next_id;
561*38fd1498Szrj
562*38fd1498Szrj
563*38fd1498Szrj static vec<group_info *> rtx_group_vec;
564*38fd1498Szrj
565*38fd1498Szrj
566*38fd1498Szrj /* This structure holds the set of changes that are being deferred
567*38fd1498Szrj when removing read operation. See replace_read. */
568*38fd1498Szrj struct deferred_change
569*38fd1498Szrj {
570*38fd1498Szrj
571*38fd1498Szrj /* The mem that is being replaced. */
572*38fd1498Szrj rtx *loc;
573*38fd1498Szrj
574*38fd1498Szrj /* The reg it is being replaced with. */
575*38fd1498Szrj rtx reg;
576*38fd1498Szrj
577*38fd1498Szrj struct deferred_change *next;
578*38fd1498Szrj };
579*38fd1498Szrj
580*38fd1498Szrj static object_allocator<deferred_change> deferred_change_pool
581*38fd1498Szrj ("deferred_change_pool");
582*38fd1498Szrj
583*38fd1498Szrj static deferred_change *deferred_change_list = NULL;
584*38fd1498Szrj
585*38fd1498Szrj /* This is true except if cfun->stdarg -- i.e. we cannot do
586*38fd1498Szrj this for vararg functions because they play games with the frame. */
587*38fd1498Szrj static bool stores_off_frame_dead_at_return;
588*38fd1498Szrj
589*38fd1498Szrj /* Counter for stats. */
590*38fd1498Szrj static int globally_deleted;
591*38fd1498Szrj static int locally_deleted;
592*38fd1498Szrj
593*38fd1498Szrj static bitmap all_blocks;
594*38fd1498Szrj
595*38fd1498Szrj /* Locations that are killed by calls in the global phase. */
596*38fd1498Szrj static bitmap kill_on_calls;
597*38fd1498Szrj
598*38fd1498Szrj /* The number of bits used in the global bitmaps. */
599*38fd1498Szrj static unsigned int current_position;
600*38fd1498Szrj
601*38fd1498Szrj /* Print offset range [OFFSET, OFFSET + WIDTH) to FILE. */
602*38fd1498Szrj
603*38fd1498Szrj static void
print_range(FILE * file,poly_int64 offset,poly_int64 width)604*38fd1498Szrj print_range (FILE *file, poly_int64 offset, poly_int64 width)
605*38fd1498Szrj {
606*38fd1498Szrj fprintf (file, "[");
607*38fd1498Szrj print_dec (offset, file, SIGNED);
608*38fd1498Szrj fprintf (file, "..");
609*38fd1498Szrj print_dec (offset + width, file, SIGNED);
610*38fd1498Szrj fprintf (file, ")");
611*38fd1498Szrj }
612*38fd1498Szrj
613*38fd1498Szrj /*----------------------------------------------------------------------------
614*38fd1498Szrj Zeroth step.
615*38fd1498Szrj
616*38fd1498Szrj Initialization.
617*38fd1498Szrj ----------------------------------------------------------------------------*/
618*38fd1498Szrj
619*38fd1498Szrj
620*38fd1498Szrj /* Hashtable callbacks for maintaining the "bases" field of
621*38fd1498Szrj store_group_info, given that the addresses are function invariants. */
622*38fd1498Szrj
623*38fd1498Szrj struct invariant_group_base_hasher : nofree_ptr_hash <group_info>
624*38fd1498Szrj {
625*38fd1498Szrj static inline hashval_t hash (const group_info *);
626*38fd1498Szrj static inline bool equal (const group_info *, const group_info *);
627*38fd1498Szrj };
628*38fd1498Szrj
629*38fd1498Szrj inline bool
equal(const group_info * gi1,const group_info * gi2)630*38fd1498Szrj invariant_group_base_hasher::equal (const group_info *gi1,
631*38fd1498Szrj const group_info *gi2)
632*38fd1498Szrj {
633*38fd1498Szrj return rtx_equal_p (gi1->rtx_base, gi2->rtx_base);
634*38fd1498Szrj }
635*38fd1498Szrj
636*38fd1498Szrj inline hashval_t
hash(const group_info * gi)637*38fd1498Szrj invariant_group_base_hasher::hash (const group_info *gi)
638*38fd1498Szrj {
639*38fd1498Szrj int do_not_record;
640*38fd1498Szrj return hash_rtx (gi->rtx_base, Pmode, &do_not_record, NULL, false);
641*38fd1498Szrj }
642*38fd1498Szrj
643*38fd1498Szrj /* Tables of group_info structures, hashed by base value. */
644*38fd1498Szrj static hash_table<invariant_group_base_hasher> *rtx_group_table;
645*38fd1498Szrj
646*38fd1498Szrj
647*38fd1498Szrj /* Get the GROUP for BASE. Add a new group if it is not there. */
648*38fd1498Szrj
649*38fd1498Szrj static group_info *
get_group_info(rtx base)650*38fd1498Szrj get_group_info (rtx base)
651*38fd1498Szrj {
652*38fd1498Szrj struct group_info tmp_gi;
653*38fd1498Szrj group_info *gi;
654*38fd1498Szrj group_info **slot;
655*38fd1498Szrj
656*38fd1498Szrj gcc_assert (base != NULL_RTX);
657*38fd1498Szrj
658*38fd1498Szrj /* Find the store_base_info structure for BASE, creating a new one
659*38fd1498Szrj if necessary. */
660*38fd1498Szrj tmp_gi.rtx_base = base;
661*38fd1498Szrj slot = rtx_group_table->find_slot (&tmp_gi, INSERT);
662*38fd1498Szrj gi = *slot;
663*38fd1498Szrj
664*38fd1498Szrj if (gi == NULL)
665*38fd1498Szrj {
666*38fd1498Szrj *slot = gi = group_info_pool.allocate ();
667*38fd1498Szrj gi->rtx_base = base;
668*38fd1498Szrj gi->id = rtx_group_next_id++;
669*38fd1498Szrj gi->base_mem = gen_rtx_MEM (BLKmode, base);
670*38fd1498Szrj gi->canon_base_addr = canon_rtx (base);
671*38fd1498Szrj gi->store1_n = BITMAP_ALLOC (&dse_bitmap_obstack);
672*38fd1498Szrj gi->store1_p = BITMAP_ALLOC (&dse_bitmap_obstack);
673*38fd1498Szrj gi->store2_n = BITMAP_ALLOC (&dse_bitmap_obstack);
674*38fd1498Szrj gi->store2_p = BITMAP_ALLOC (&dse_bitmap_obstack);
675*38fd1498Szrj gi->escaped_p = BITMAP_ALLOC (&dse_bitmap_obstack);
676*38fd1498Szrj gi->escaped_n = BITMAP_ALLOC (&dse_bitmap_obstack);
677*38fd1498Szrj gi->group_kill = BITMAP_ALLOC (&dse_bitmap_obstack);
678*38fd1498Szrj gi->process_globally = false;
679*38fd1498Szrj gi->frame_related =
680*38fd1498Szrj (base == frame_pointer_rtx) || (base == hard_frame_pointer_rtx);
681*38fd1498Szrj gi->offset_map_size_n = 0;
682*38fd1498Szrj gi->offset_map_size_p = 0;
683*38fd1498Szrj gi->offset_map_n = NULL;
684*38fd1498Szrj gi->offset_map_p = NULL;
685*38fd1498Szrj rtx_group_vec.safe_push (gi);
686*38fd1498Szrj }
687*38fd1498Szrj
688*38fd1498Szrj return gi;
689*38fd1498Szrj }
690*38fd1498Szrj
691*38fd1498Szrj
692*38fd1498Szrj /* Initialization of data structures. */
693*38fd1498Szrj
694*38fd1498Szrj static void
dse_step0(void)695*38fd1498Szrj dse_step0 (void)
696*38fd1498Szrj {
697*38fd1498Szrj locally_deleted = 0;
698*38fd1498Szrj globally_deleted = 0;
699*38fd1498Szrj
700*38fd1498Szrj bitmap_obstack_initialize (&dse_bitmap_obstack);
701*38fd1498Szrj gcc_obstack_init (&dse_obstack);
702*38fd1498Szrj
703*38fd1498Szrj scratch = BITMAP_ALLOC (®_obstack);
704*38fd1498Szrj kill_on_calls = BITMAP_ALLOC (&dse_bitmap_obstack);
705*38fd1498Szrj
706*38fd1498Szrj
707*38fd1498Szrj rtx_group_table = new hash_table<invariant_group_base_hasher> (11);
708*38fd1498Szrj
709*38fd1498Szrj bb_table = XNEWVEC (bb_info_t, last_basic_block_for_fn (cfun));
710*38fd1498Szrj rtx_group_next_id = 0;
711*38fd1498Szrj
712*38fd1498Szrj stores_off_frame_dead_at_return = !cfun->stdarg;
713*38fd1498Szrj
714*38fd1498Szrj init_alias_analysis ();
715*38fd1498Szrj }
716*38fd1498Szrj
717*38fd1498Szrj
718*38fd1498Szrj
719*38fd1498Szrj /*----------------------------------------------------------------------------
720*38fd1498Szrj First step.
721*38fd1498Szrj
722*38fd1498Szrj Scan all of the insns. Any random ordering of the blocks is fine.
723*38fd1498Szrj Each block is scanned in forward order to accommodate cselib which
724*38fd1498Szrj is used to remove stores with non-constant bases.
725*38fd1498Szrj ----------------------------------------------------------------------------*/
726*38fd1498Szrj
727*38fd1498Szrj /* Delete all of the store_info recs from INSN_INFO. */
728*38fd1498Szrj
729*38fd1498Szrj static void
free_store_info(insn_info_t insn_info)730*38fd1498Szrj free_store_info (insn_info_t insn_info)
731*38fd1498Szrj {
732*38fd1498Szrj store_info *cur = insn_info->store_rec;
733*38fd1498Szrj while (cur)
734*38fd1498Szrj {
735*38fd1498Szrj store_info *next = cur->next;
736*38fd1498Szrj if (cur->is_large)
737*38fd1498Szrj BITMAP_FREE (cur->positions_needed.large.bmap);
738*38fd1498Szrj if (cur->cse_base)
739*38fd1498Szrj cse_store_info_pool.remove (cur);
740*38fd1498Szrj else
741*38fd1498Szrj rtx_store_info_pool.remove (cur);
742*38fd1498Szrj cur = next;
743*38fd1498Szrj }
744*38fd1498Szrj
745*38fd1498Szrj insn_info->cannot_delete = true;
746*38fd1498Szrj insn_info->contains_cselib_groups = false;
747*38fd1498Szrj insn_info->store_rec = NULL;
748*38fd1498Szrj }
749*38fd1498Szrj
750*38fd1498Szrj struct note_add_store_info
751*38fd1498Szrj {
752*38fd1498Szrj rtx_insn *first, *current;
753*38fd1498Szrj regset fixed_regs_live;
754*38fd1498Szrj bool failure;
755*38fd1498Szrj };
756*38fd1498Szrj
757*38fd1498Szrj /* Callback for emit_inc_dec_insn_before via note_stores.
758*38fd1498Szrj Check if a register is clobbered which is live afterwards. */
759*38fd1498Szrj
760*38fd1498Szrj static void
note_add_store(rtx loc,const_rtx expr ATTRIBUTE_UNUSED,void * data)761*38fd1498Szrj note_add_store (rtx loc, const_rtx expr ATTRIBUTE_UNUSED, void *data)
762*38fd1498Szrj {
763*38fd1498Szrj rtx_insn *insn;
764*38fd1498Szrj note_add_store_info *info = (note_add_store_info *) data;
765*38fd1498Szrj
766*38fd1498Szrj if (!REG_P (loc))
767*38fd1498Szrj return;
768*38fd1498Szrj
769*38fd1498Szrj /* If this register is referenced by the current or an earlier insn,
770*38fd1498Szrj that's OK. E.g. this applies to the register that is being incremented
771*38fd1498Szrj with this addition. */
772*38fd1498Szrj for (insn = info->first;
773*38fd1498Szrj insn != NEXT_INSN (info->current);
774*38fd1498Szrj insn = NEXT_INSN (insn))
775*38fd1498Szrj if (reg_referenced_p (loc, PATTERN (insn)))
776*38fd1498Szrj return;
777*38fd1498Szrj
778*38fd1498Szrj /* If we come here, we have a clobber of a register that's only OK
779*38fd1498Szrj if that register is not live. If we don't have liveness information
780*38fd1498Szrj available, fail now. */
781*38fd1498Szrj if (!info->fixed_regs_live)
782*38fd1498Szrj {
783*38fd1498Szrj info->failure = true;
784*38fd1498Szrj return;
785*38fd1498Szrj }
786*38fd1498Szrj /* Now check if this is a live fixed register. */
787*38fd1498Szrj unsigned int end_regno = END_REGNO (loc);
788*38fd1498Szrj for (unsigned int regno = REGNO (loc); regno < end_regno; ++regno)
789*38fd1498Szrj if (REGNO_REG_SET_P (info->fixed_regs_live, regno))
790*38fd1498Szrj info->failure = true;
791*38fd1498Szrj }
792*38fd1498Szrj
793*38fd1498Szrj /* Callback for for_each_inc_dec that emits an INSN that sets DEST to
794*38fd1498Szrj SRC + SRCOFF before insn ARG. */
795*38fd1498Szrj
796*38fd1498Szrj static int
emit_inc_dec_insn_before(rtx mem ATTRIBUTE_UNUSED,rtx op ATTRIBUTE_UNUSED,rtx dest,rtx src,rtx srcoff,void * arg)797*38fd1498Szrj emit_inc_dec_insn_before (rtx mem ATTRIBUTE_UNUSED,
798*38fd1498Szrj rtx op ATTRIBUTE_UNUSED,
799*38fd1498Szrj rtx dest, rtx src, rtx srcoff, void *arg)
800*38fd1498Szrj {
801*38fd1498Szrj insn_info_t insn_info = (insn_info_t) arg;
802*38fd1498Szrj rtx_insn *insn = insn_info->insn, *new_insn, *cur;
803*38fd1498Szrj note_add_store_info info;
804*38fd1498Szrj
805*38fd1498Szrj /* We can reuse all operands without copying, because we are about
806*38fd1498Szrj to delete the insn that contained it. */
807*38fd1498Szrj if (srcoff)
808*38fd1498Szrj {
809*38fd1498Szrj start_sequence ();
810*38fd1498Szrj emit_insn (gen_add3_insn (dest, src, srcoff));
811*38fd1498Szrj new_insn = get_insns ();
812*38fd1498Szrj end_sequence ();
813*38fd1498Szrj }
814*38fd1498Szrj else
815*38fd1498Szrj new_insn = gen_move_insn (dest, src);
816*38fd1498Szrj info.first = new_insn;
817*38fd1498Szrj info.fixed_regs_live = insn_info->fixed_regs_live;
818*38fd1498Szrj info.failure = false;
819*38fd1498Szrj for (cur = new_insn; cur; cur = NEXT_INSN (cur))
820*38fd1498Szrj {
821*38fd1498Szrj info.current = cur;
822*38fd1498Szrj note_stores (PATTERN (cur), note_add_store, &info);
823*38fd1498Szrj }
824*38fd1498Szrj
825*38fd1498Szrj /* If a failure was flagged above, return 1 so that for_each_inc_dec will
826*38fd1498Szrj return it immediately, communicating the failure to its caller. */
827*38fd1498Szrj if (info.failure)
828*38fd1498Szrj return 1;
829*38fd1498Szrj
830*38fd1498Szrj emit_insn_before (new_insn, insn);
831*38fd1498Szrj
832*38fd1498Szrj return 0;
833*38fd1498Szrj }
834*38fd1498Szrj
835*38fd1498Szrj /* Before we delete INSN_INFO->INSN, make sure that the auto inc/dec, if it
836*38fd1498Szrj is there, is split into a separate insn.
837*38fd1498Szrj Return true on success (or if there was nothing to do), false on failure. */
838*38fd1498Szrj
839*38fd1498Szrj static bool
check_for_inc_dec_1(insn_info_t insn_info)840*38fd1498Szrj check_for_inc_dec_1 (insn_info_t insn_info)
841*38fd1498Szrj {
842*38fd1498Szrj rtx_insn *insn = insn_info->insn;
843*38fd1498Szrj rtx note = find_reg_note (insn, REG_INC, NULL_RTX);
844*38fd1498Szrj if (note)
845*38fd1498Szrj return for_each_inc_dec (PATTERN (insn), emit_inc_dec_insn_before,
846*38fd1498Szrj insn_info) == 0;
847*38fd1498Szrj return true;
848*38fd1498Szrj }
849*38fd1498Szrj
850*38fd1498Szrj
851*38fd1498Szrj /* Entry point for postreload. If you work on reload_cse, or you need this
852*38fd1498Szrj anywhere else, consider if you can provide register liveness information
853*38fd1498Szrj and add a parameter to this function so that it can be passed down in
854*38fd1498Szrj insn_info.fixed_regs_live. */
855*38fd1498Szrj bool
check_for_inc_dec(rtx_insn * insn)856*38fd1498Szrj check_for_inc_dec (rtx_insn *insn)
857*38fd1498Szrj {
858*38fd1498Szrj insn_info_type insn_info;
859*38fd1498Szrj rtx note;
860*38fd1498Szrj
861*38fd1498Szrj insn_info.insn = insn;
862*38fd1498Szrj insn_info.fixed_regs_live = NULL;
863*38fd1498Szrj note = find_reg_note (insn, REG_INC, NULL_RTX);
864*38fd1498Szrj if (note)
865*38fd1498Szrj return for_each_inc_dec (PATTERN (insn), emit_inc_dec_insn_before,
866*38fd1498Szrj &insn_info) == 0;
867*38fd1498Szrj return true;
868*38fd1498Szrj }
869*38fd1498Szrj
870*38fd1498Szrj /* Delete the insn and free all of the fields inside INSN_INFO. */
871*38fd1498Szrj
872*38fd1498Szrj static void
delete_dead_store_insn(insn_info_t insn_info)873*38fd1498Szrj delete_dead_store_insn (insn_info_t insn_info)
874*38fd1498Szrj {
875*38fd1498Szrj read_info_t read_info;
876*38fd1498Szrj
877*38fd1498Szrj if (!dbg_cnt (dse))
878*38fd1498Szrj return;
879*38fd1498Szrj
880*38fd1498Szrj if (!check_for_inc_dec_1 (insn_info))
881*38fd1498Szrj return;
882*38fd1498Szrj if (dump_file && (dump_flags & TDF_DETAILS))
883*38fd1498Szrj fprintf (dump_file, "Locally deleting insn %d\n",
884*38fd1498Szrj INSN_UID (insn_info->insn));
885*38fd1498Szrj
886*38fd1498Szrj free_store_info (insn_info);
887*38fd1498Szrj read_info = insn_info->read_rec;
888*38fd1498Szrj
889*38fd1498Szrj while (read_info)
890*38fd1498Szrj {
891*38fd1498Szrj read_info_t next = read_info->next;
892*38fd1498Szrj read_info_type_pool.remove (read_info);
893*38fd1498Szrj read_info = next;
894*38fd1498Szrj }
895*38fd1498Szrj insn_info->read_rec = NULL;
896*38fd1498Szrj
897*38fd1498Szrj delete_insn (insn_info->insn);
898*38fd1498Szrj locally_deleted++;
899*38fd1498Szrj insn_info->insn = NULL;
900*38fd1498Szrj
901*38fd1498Szrj insn_info->wild_read = false;
902*38fd1498Szrj }
903*38fd1498Szrj
904*38fd1498Szrj /* Return whether DECL, a local variable, can possibly escape the current
905*38fd1498Szrj function scope. */
906*38fd1498Szrj
907*38fd1498Szrj static bool
local_variable_can_escape(tree decl)908*38fd1498Szrj local_variable_can_escape (tree decl)
909*38fd1498Szrj {
910*38fd1498Szrj if (TREE_ADDRESSABLE (decl))
911*38fd1498Szrj return true;
912*38fd1498Szrj
913*38fd1498Szrj /* If this is a partitioned variable, we need to consider all the variables
914*38fd1498Szrj in the partition. This is necessary because a store into one of them can
915*38fd1498Szrj be replaced with a store into another and this may not change the outcome
916*38fd1498Szrj of the escape analysis. */
917*38fd1498Szrj if (cfun->gimple_df->decls_to_pointers != NULL)
918*38fd1498Szrj {
919*38fd1498Szrj tree *namep = cfun->gimple_df->decls_to_pointers->get (decl);
920*38fd1498Szrj if (namep)
921*38fd1498Szrj return TREE_ADDRESSABLE (*namep);
922*38fd1498Szrj }
923*38fd1498Szrj
924*38fd1498Szrj return false;
925*38fd1498Szrj }
926*38fd1498Szrj
927*38fd1498Szrj /* Return whether EXPR can possibly escape the current function scope. */
928*38fd1498Szrj
929*38fd1498Szrj static bool
can_escape(tree expr)930*38fd1498Szrj can_escape (tree expr)
931*38fd1498Szrj {
932*38fd1498Szrj tree base;
933*38fd1498Szrj if (!expr)
934*38fd1498Szrj return true;
935*38fd1498Szrj base = get_base_address (expr);
936*38fd1498Szrj if (DECL_P (base)
937*38fd1498Szrj && !may_be_aliased (base)
938*38fd1498Szrj && !(VAR_P (base)
939*38fd1498Szrj && !DECL_EXTERNAL (base)
940*38fd1498Szrj && !TREE_STATIC (base)
941*38fd1498Szrj && local_variable_can_escape (base)))
942*38fd1498Szrj return false;
943*38fd1498Szrj return true;
944*38fd1498Szrj }
945*38fd1498Szrj
946*38fd1498Szrj /* Set the store* bitmaps offset_map_size* fields in GROUP based on
947*38fd1498Szrj OFFSET and WIDTH. */
948*38fd1498Szrj
949*38fd1498Szrj static void
set_usage_bits(group_info * group,poly_int64 offset,poly_int64 width,tree expr)950*38fd1498Szrj set_usage_bits (group_info *group, poly_int64 offset, poly_int64 width,
951*38fd1498Szrj tree expr)
952*38fd1498Szrj {
953*38fd1498Szrj /* Non-constant offsets and widths act as global kills, so there's no point
954*38fd1498Szrj trying to use them to derive global DSE candidates. */
955*38fd1498Szrj HOST_WIDE_INT i, const_offset, const_width;
956*38fd1498Szrj bool expr_escapes = can_escape (expr);
957*38fd1498Szrj if (offset.is_constant (&const_offset)
958*38fd1498Szrj && width.is_constant (&const_width)
959*38fd1498Szrj && const_offset > -MAX_OFFSET
960*38fd1498Szrj && const_offset + const_width < MAX_OFFSET)
961*38fd1498Szrj for (i = const_offset; i < const_offset + const_width; ++i)
962*38fd1498Szrj {
963*38fd1498Szrj bitmap store1;
964*38fd1498Szrj bitmap store2;
965*38fd1498Szrj bitmap escaped;
966*38fd1498Szrj int ai;
967*38fd1498Szrj if (i < 0)
968*38fd1498Szrj {
969*38fd1498Szrj store1 = group->store1_n;
970*38fd1498Szrj store2 = group->store2_n;
971*38fd1498Szrj escaped = group->escaped_n;
972*38fd1498Szrj ai = -i;
973*38fd1498Szrj }
974*38fd1498Szrj else
975*38fd1498Szrj {
976*38fd1498Szrj store1 = group->store1_p;
977*38fd1498Szrj store2 = group->store2_p;
978*38fd1498Szrj escaped = group->escaped_p;
979*38fd1498Szrj ai = i;
980*38fd1498Szrj }
981*38fd1498Szrj
982*38fd1498Szrj if (!bitmap_set_bit (store1, ai))
983*38fd1498Szrj bitmap_set_bit (store2, ai);
984*38fd1498Szrj else
985*38fd1498Szrj {
986*38fd1498Szrj if (i < 0)
987*38fd1498Szrj {
988*38fd1498Szrj if (group->offset_map_size_n < ai)
989*38fd1498Szrj group->offset_map_size_n = ai;
990*38fd1498Szrj }
991*38fd1498Szrj else
992*38fd1498Szrj {
993*38fd1498Szrj if (group->offset_map_size_p < ai)
994*38fd1498Szrj group->offset_map_size_p = ai;
995*38fd1498Szrj }
996*38fd1498Szrj }
997*38fd1498Szrj if (expr_escapes)
998*38fd1498Szrj bitmap_set_bit (escaped, ai);
999*38fd1498Szrj }
1000*38fd1498Szrj }
1001*38fd1498Szrj
1002*38fd1498Szrj static void
reset_active_stores(void)1003*38fd1498Szrj reset_active_stores (void)
1004*38fd1498Szrj {
1005*38fd1498Szrj active_local_stores = NULL;
1006*38fd1498Szrj active_local_stores_len = 0;
1007*38fd1498Szrj }
1008*38fd1498Szrj
1009*38fd1498Szrj /* Free all READ_REC of the LAST_INSN of BB_INFO. */
1010*38fd1498Szrj
1011*38fd1498Szrj static void
free_read_records(bb_info_t bb_info)1012*38fd1498Szrj free_read_records (bb_info_t bb_info)
1013*38fd1498Szrj {
1014*38fd1498Szrj insn_info_t insn_info = bb_info->last_insn;
1015*38fd1498Szrj read_info_t *ptr = &insn_info->read_rec;
1016*38fd1498Szrj while (*ptr)
1017*38fd1498Szrj {
1018*38fd1498Szrj read_info_t next = (*ptr)->next;
1019*38fd1498Szrj read_info_type_pool.remove (*ptr);
1020*38fd1498Szrj *ptr = next;
1021*38fd1498Szrj }
1022*38fd1498Szrj }
1023*38fd1498Szrj
1024*38fd1498Szrj /* Set the BB_INFO so that the last insn is marked as a wild read. */
1025*38fd1498Szrj
1026*38fd1498Szrj static void
add_wild_read(bb_info_t bb_info)1027*38fd1498Szrj add_wild_read (bb_info_t bb_info)
1028*38fd1498Szrj {
1029*38fd1498Szrj insn_info_t insn_info = bb_info->last_insn;
1030*38fd1498Szrj insn_info->wild_read = true;
1031*38fd1498Szrj free_read_records (bb_info);
1032*38fd1498Szrj reset_active_stores ();
1033*38fd1498Szrj }
1034*38fd1498Szrj
1035*38fd1498Szrj /* Set the BB_INFO so that the last insn is marked as a wild read of
1036*38fd1498Szrj non-frame locations. */
1037*38fd1498Szrj
1038*38fd1498Szrj static void
add_non_frame_wild_read(bb_info_t bb_info)1039*38fd1498Szrj add_non_frame_wild_read (bb_info_t bb_info)
1040*38fd1498Szrj {
1041*38fd1498Szrj insn_info_t insn_info = bb_info->last_insn;
1042*38fd1498Szrj insn_info->non_frame_wild_read = true;
1043*38fd1498Szrj free_read_records (bb_info);
1044*38fd1498Szrj reset_active_stores ();
1045*38fd1498Szrj }
1046*38fd1498Szrj
1047*38fd1498Szrj /* Return true if X is a constant or one of the registers that behave
1048*38fd1498Szrj as a constant over the life of a function. This is equivalent to
1049*38fd1498Szrj !rtx_varies_p for memory addresses. */
1050*38fd1498Szrj
1051*38fd1498Szrj static bool
const_or_frame_p(rtx x)1052*38fd1498Szrj const_or_frame_p (rtx x)
1053*38fd1498Szrj {
1054*38fd1498Szrj if (CONSTANT_P (x))
1055*38fd1498Szrj return true;
1056*38fd1498Szrj
1057*38fd1498Szrj if (GET_CODE (x) == REG)
1058*38fd1498Szrj {
1059*38fd1498Szrj /* Note that we have to test for the actual rtx used for the frame
1060*38fd1498Szrj and arg pointers and not just the register number in case we have
1061*38fd1498Szrj eliminated the frame and/or arg pointer and are using it
1062*38fd1498Szrj for pseudos. */
1063*38fd1498Szrj if (x == frame_pointer_rtx || x == hard_frame_pointer_rtx
1064*38fd1498Szrj /* The arg pointer varies if it is not a fixed register. */
1065*38fd1498Szrj || (x == arg_pointer_rtx && fixed_regs[ARG_POINTER_REGNUM])
1066*38fd1498Szrj || x == pic_offset_table_rtx)
1067*38fd1498Szrj return true;
1068*38fd1498Szrj return false;
1069*38fd1498Szrj }
1070*38fd1498Szrj
1071*38fd1498Szrj return false;
1072*38fd1498Szrj }
1073*38fd1498Szrj
1074*38fd1498Szrj /* Take all reasonable action to put the address of MEM into the form
1075*38fd1498Szrj that we can do analysis on.
1076*38fd1498Szrj
1077*38fd1498Szrj The gold standard is to get the address into the form: address +
1078*38fd1498Szrj OFFSET where address is something that rtx_varies_p considers a
1079*38fd1498Szrj constant. When we can get the address in this form, we can do
1080*38fd1498Szrj global analysis on it. Note that for constant bases, address is
1081*38fd1498Szrj not actually returned, only the group_id. The address can be
1082*38fd1498Szrj obtained from that.
1083*38fd1498Szrj
1084*38fd1498Szrj If that fails, we try cselib to get a value we can at least use
1085*38fd1498Szrj locally. If that fails we return false.
1086*38fd1498Szrj
1087*38fd1498Szrj The GROUP_ID is set to -1 for cselib bases and the index of the
1088*38fd1498Szrj group for non_varying bases.
1089*38fd1498Szrj
1090*38fd1498Szrj FOR_READ is true if this is a mem read and false if not. */
1091*38fd1498Szrj
1092*38fd1498Szrj static bool
canon_address(rtx mem,int * group_id,poly_int64 * offset,cselib_val ** base)1093*38fd1498Szrj canon_address (rtx mem,
1094*38fd1498Szrj int *group_id,
1095*38fd1498Szrj poly_int64 *offset,
1096*38fd1498Szrj cselib_val **base)
1097*38fd1498Szrj {
1098*38fd1498Szrj machine_mode address_mode = get_address_mode (mem);
1099*38fd1498Szrj rtx mem_address = XEXP (mem, 0);
1100*38fd1498Szrj rtx expanded_address, address;
1101*38fd1498Szrj int expanded;
1102*38fd1498Szrj
1103*38fd1498Szrj cselib_lookup (mem_address, address_mode, 1, GET_MODE (mem));
1104*38fd1498Szrj
1105*38fd1498Szrj if (dump_file && (dump_flags & TDF_DETAILS))
1106*38fd1498Szrj {
1107*38fd1498Szrj fprintf (dump_file, " mem: ");
1108*38fd1498Szrj print_inline_rtx (dump_file, mem_address, 0);
1109*38fd1498Szrj fprintf (dump_file, "\n");
1110*38fd1498Szrj }
1111*38fd1498Szrj
1112*38fd1498Szrj /* First see if just canon_rtx (mem_address) is const or frame,
1113*38fd1498Szrj if not, try cselib_expand_value_rtx and call canon_rtx on that. */
1114*38fd1498Szrj address = NULL_RTX;
1115*38fd1498Szrj for (expanded = 0; expanded < 2; expanded++)
1116*38fd1498Szrj {
1117*38fd1498Szrj if (expanded)
1118*38fd1498Szrj {
1119*38fd1498Szrj /* Use cselib to replace all of the reg references with the full
1120*38fd1498Szrj expression. This will take care of the case where we have
1121*38fd1498Szrj
1122*38fd1498Szrj r_x = base + offset;
1123*38fd1498Szrj val = *r_x;
1124*38fd1498Szrj
1125*38fd1498Szrj by making it into
1126*38fd1498Szrj
1127*38fd1498Szrj val = *(base + offset); */
1128*38fd1498Szrj
1129*38fd1498Szrj expanded_address = cselib_expand_value_rtx (mem_address,
1130*38fd1498Szrj scratch, 5);
1131*38fd1498Szrj
1132*38fd1498Szrj /* If this fails, just go with the address from first
1133*38fd1498Szrj iteration. */
1134*38fd1498Szrj if (!expanded_address)
1135*38fd1498Szrj break;
1136*38fd1498Szrj }
1137*38fd1498Szrj else
1138*38fd1498Szrj expanded_address = mem_address;
1139*38fd1498Szrj
1140*38fd1498Szrj /* Split the address into canonical BASE + OFFSET terms. */
1141*38fd1498Szrj address = canon_rtx (expanded_address);
1142*38fd1498Szrj
1143*38fd1498Szrj *offset = 0;
1144*38fd1498Szrj
1145*38fd1498Szrj if (dump_file && (dump_flags & TDF_DETAILS))
1146*38fd1498Szrj {
1147*38fd1498Szrj if (expanded)
1148*38fd1498Szrj {
1149*38fd1498Szrj fprintf (dump_file, "\n after cselib_expand address: ");
1150*38fd1498Szrj print_inline_rtx (dump_file, expanded_address, 0);
1151*38fd1498Szrj fprintf (dump_file, "\n");
1152*38fd1498Szrj }
1153*38fd1498Szrj
1154*38fd1498Szrj fprintf (dump_file, "\n after canon_rtx address: ");
1155*38fd1498Szrj print_inline_rtx (dump_file, address, 0);
1156*38fd1498Szrj fprintf (dump_file, "\n");
1157*38fd1498Szrj }
1158*38fd1498Szrj
1159*38fd1498Szrj if (GET_CODE (address) == CONST)
1160*38fd1498Szrj address = XEXP (address, 0);
1161*38fd1498Szrj
1162*38fd1498Szrj address = strip_offset_and_add (address, offset);
1163*38fd1498Szrj
1164*38fd1498Szrj if (ADDR_SPACE_GENERIC_P (MEM_ADDR_SPACE (mem))
1165*38fd1498Szrj && const_or_frame_p (address))
1166*38fd1498Szrj {
1167*38fd1498Szrj group_info *group = get_group_info (address);
1168*38fd1498Szrj
1169*38fd1498Szrj if (dump_file && (dump_flags & TDF_DETAILS))
1170*38fd1498Szrj {
1171*38fd1498Szrj fprintf (dump_file, " gid=%d offset=", group->id);
1172*38fd1498Szrj print_dec (*offset, dump_file);
1173*38fd1498Szrj fprintf (dump_file, "\n");
1174*38fd1498Szrj }
1175*38fd1498Szrj *base = NULL;
1176*38fd1498Szrj *group_id = group->id;
1177*38fd1498Szrj return true;
1178*38fd1498Szrj }
1179*38fd1498Szrj }
1180*38fd1498Szrj
1181*38fd1498Szrj *base = cselib_lookup (address, address_mode, true, GET_MODE (mem));
1182*38fd1498Szrj *group_id = -1;
1183*38fd1498Szrj
1184*38fd1498Szrj if (*base == NULL)
1185*38fd1498Szrj {
1186*38fd1498Szrj if (dump_file && (dump_flags & TDF_DETAILS))
1187*38fd1498Szrj fprintf (dump_file, " no cselib val - should be a wild read.\n");
1188*38fd1498Szrj return false;
1189*38fd1498Szrj }
1190*38fd1498Szrj if (dump_file && (dump_flags & TDF_DETAILS))
1191*38fd1498Szrj {
1192*38fd1498Szrj fprintf (dump_file, " varying cselib base=%u:%u offset = ",
1193*38fd1498Szrj (*base)->uid, (*base)->hash);
1194*38fd1498Szrj print_dec (*offset, dump_file);
1195*38fd1498Szrj fprintf (dump_file, "\n");
1196*38fd1498Szrj }
1197*38fd1498Szrj return true;
1198*38fd1498Szrj }
1199*38fd1498Szrj
1200*38fd1498Szrj
1201*38fd1498Szrj /* Clear the rhs field from the active_local_stores array. */
1202*38fd1498Szrj
1203*38fd1498Szrj static void
clear_rhs_from_active_local_stores(void)1204*38fd1498Szrj clear_rhs_from_active_local_stores (void)
1205*38fd1498Szrj {
1206*38fd1498Szrj insn_info_t ptr = active_local_stores;
1207*38fd1498Szrj
1208*38fd1498Szrj while (ptr)
1209*38fd1498Szrj {
1210*38fd1498Szrj store_info *store_info = ptr->store_rec;
1211*38fd1498Szrj /* Skip the clobbers. */
1212*38fd1498Szrj while (!store_info->is_set)
1213*38fd1498Szrj store_info = store_info->next;
1214*38fd1498Szrj
1215*38fd1498Szrj store_info->rhs = NULL;
1216*38fd1498Szrj store_info->const_rhs = NULL;
1217*38fd1498Szrj
1218*38fd1498Szrj ptr = ptr->next_local_store;
1219*38fd1498Szrj }
1220*38fd1498Szrj }
1221*38fd1498Szrj
1222*38fd1498Szrj
1223*38fd1498Szrj /* Mark byte POS bytes from the beginning of store S_INFO as unneeded. */
1224*38fd1498Szrj
1225*38fd1498Szrj static inline void
set_position_unneeded(store_info * s_info,int pos)1226*38fd1498Szrj set_position_unneeded (store_info *s_info, int pos)
1227*38fd1498Szrj {
1228*38fd1498Szrj if (__builtin_expect (s_info->is_large, false))
1229*38fd1498Szrj {
1230*38fd1498Szrj if (bitmap_set_bit (s_info->positions_needed.large.bmap, pos))
1231*38fd1498Szrj s_info->positions_needed.large.count++;
1232*38fd1498Szrj }
1233*38fd1498Szrj else
1234*38fd1498Szrj s_info->positions_needed.small_bitmask
1235*38fd1498Szrj &= ~(HOST_WIDE_INT_1U << pos);
1236*38fd1498Szrj }
1237*38fd1498Szrj
1238*38fd1498Szrj /* Mark the whole store S_INFO as unneeded. */
1239*38fd1498Szrj
1240*38fd1498Szrj static inline void
set_all_positions_unneeded(store_info * s_info)1241*38fd1498Szrj set_all_positions_unneeded (store_info *s_info)
1242*38fd1498Szrj {
1243*38fd1498Szrj if (__builtin_expect (s_info->is_large, false))
1244*38fd1498Szrj {
1245*38fd1498Szrj HOST_WIDE_INT width;
1246*38fd1498Szrj if (s_info->width.is_constant (&width))
1247*38fd1498Szrj {
1248*38fd1498Szrj bitmap_set_range (s_info->positions_needed.large.bmap, 0, width);
1249*38fd1498Szrj s_info->positions_needed.large.count = width;
1250*38fd1498Szrj }
1251*38fd1498Szrj else
1252*38fd1498Szrj {
1253*38fd1498Szrj gcc_checking_assert (!s_info->positions_needed.large.bmap);
1254*38fd1498Szrj s_info->positions_needed.large.count = 1;
1255*38fd1498Szrj }
1256*38fd1498Szrj }
1257*38fd1498Szrj else
1258*38fd1498Szrj s_info->positions_needed.small_bitmask = HOST_WIDE_INT_0U;
1259*38fd1498Szrj }
1260*38fd1498Szrj
1261*38fd1498Szrj /* Return TRUE if any bytes from S_INFO store are needed. */
1262*38fd1498Szrj
1263*38fd1498Szrj static inline bool
any_positions_needed_p(store_info * s_info)1264*38fd1498Szrj any_positions_needed_p (store_info *s_info)
1265*38fd1498Szrj {
1266*38fd1498Szrj if (__builtin_expect (s_info->is_large, false))
1267*38fd1498Szrj {
1268*38fd1498Szrj HOST_WIDE_INT width;
1269*38fd1498Szrj if (s_info->width.is_constant (&width))
1270*38fd1498Szrj {
1271*38fd1498Szrj gcc_checking_assert (s_info->positions_needed.large.bmap);
1272*38fd1498Szrj return s_info->positions_needed.large.count < width;
1273*38fd1498Szrj }
1274*38fd1498Szrj else
1275*38fd1498Szrj {
1276*38fd1498Szrj gcc_checking_assert (!s_info->positions_needed.large.bmap);
1277*38fd1498Szrj return s_info->positions_needed.large.count == 0;
1278*38fd1498Szrj }
1279*38fd1498Szrj }
1280*38fd1498Szrj else
1281*38fd1498Szrj return (s_info->positions_needed.small_bitmask != HOST_WIDE_INT_0U);
1282*38fd1498Szrj }
1283*38fd1498Szrj
1284*38fd1498Szrj /* Return TRUE if all bytes START through START+WIDTH-1 from S_INFO
1285*38fd1498Szrj store are known to be needed. */
1286*38fd1498Szrj
1287*38fd1498Szrj static inline bool
all_positions_needed_p(store_info * s_info,poly_int64 start,poly_int64 width)1288*38fd1498Szrj all_positions_needed_p (store_info *s_info, poly_int64 start,
1289*38fd1498Szrj poly_int64 width)
1290*38fd1498Szrj {
1291*38fd1498Szrj gcc_assert (s_info->rhs);
1292*38fd1498Szrj if (!s_info->width.is_constant ())
1293*38fd1498Szrj {
1294*38fd1498Szrj gcc_assert (s_info->is_large
1295*38fd1498Szrj && !s_info->positions_needed.large.bmap);
1296*38fd1498Szrj return s_info->positions_needed.large.count == 0;
1297*38fd1498Szrj }
1298*38fd1498Szrj
1299*38fd1498Szrj /* Otherwise, if START and WIDTH are non-constant, we're asking about
1300*38fd1498Szrj a non-constant region of a constant-sized store. We can't say for
1301*38fd1498Szrj sure that all positions are needed. */
1302*38fd1498Szrj HOST_WIDE_INT const_start, const_width;
1303*38fd1498Szrj if (!start.is_constant (&const_start)
1304*38fd1498Szrj || !width.is_constant (&const_width))
1305*38fd1498Szrj return false;
1306*38fd1498Szrj
1307*38fd1498Szrj if (__builtin_expect (s_info->is_large, false))
1308*38fd1498Szrj {
1309*38fd1498Szrj for (HOST_WIDE_INT i = const_start; i < const_start + const_width; ++i)
1310*38fd1498Szrj if (bitmap_bit_p (s_info->positions_needed.large.bmap, i))
1311*38fd1498Szrj return false;
1312*38fd1498Szrj return true;
1313*38fd1498Szrj }
1314*38fd1498Szrj else
1315*38fd1498Szrj {
1316*38fd1498Szrj unsigned HOST_WIDE_INT mask
1317*38fd1498Szrj = lowpart_bitmask (const_width) << const_start;
1318*38fd1498Szrj return (s_info->positions_needed.small_bitmask & mask) == mask;
1319*38fd1498Szrj }
1320*38fd1498Szrj }
1321*38fd1498Szrj
1322*38fd1498Szrj
1323*38fd1498Szrj static rtx get_stored_val (store_info *, machine_mode, poly_int64,
1324*38fd1498Szrj poly_int64, basic_block, bool);
1325*38fd1498Szrj
1326*38fd1498Szrj
1327*38fd1498Szrj /* BODY is an instruction pattern that belongs to INSN. Return 1 if
1328*38fd1498Szrj there is a candidate store, after adding it to the appropriate
1329*38fd1498Szrj local store group if so. */
1330*38fd1498Szrj
1331*38fd1498Szrj static int
record_store(rtx body,bb_info_t bb_info)1332*38fd1498Szrj record_store (rtx body, bb_info_t bb_info)
1333*38fd1498Szrj {
1334*38fd1498Szrj rtx mem, rhs, const_rhs, mem_addr;
1335*38fd1498Szrj poly_int64 offset = 0;
1336*38fd1498Szrj poly_int64 width = 0;
1337*38fd1498Szrj insn_info_t insn_info = bb_info->last_insn;
1338*38fd1498Szrj store_info *store_info = NULL;
1339*38fd1498Szrj int group_id;
1340*38fd1498Szrj cselib_val *base = NULL;
1341*38fd1498Szrj insn_info_t ptr, last, redundant_reason;
1342*38fd1498Szrj bool store_is_unused;
1343*38fd1498Szrj
1344*38fd1498Szrj if (GET_CODE (body) != SET && GET_CODE (body) != CLOBBER)
1345*38fd1498Szrj return 0;
1346*38fd1498Szrj
1347*38fd1498Szrj mem = SET_DEST (body);
1348*38fd1498Szrj
1349*38fd1498Szrj /* If this is not used, then this cannot be used to keep the insn
1350*38fd1498Szrj from being deleted. On the other hand, it does provide something
1351*38fd1498Szrj that can be used to prove that another store is dead. */
1352*38fd1498Szrj store_is_unused
1353*38fd1498Szrj = (find_reg_note (insn_info->insn, REG_UNUSED, mem) != NULL);
1354*38fd1498Szrj
1355*38fd1498Szrj /* Check whether that value is a suitable memory location. */
1356*38fd1498Szrj if (!MEM_P (mem))
1357*38fd1498Szrj {
1358*38fd1498Szrj /* If the set or clobber is unused, then it does not effect our
1359*38fd1498Szrj ability to get rid of the entire insn. */
1360*38fd1498Szrj if (!store_is_unused)
1361*38fd1498Szrj insn_info->cannot_delete = true;
1362*38fd1498Szrj return 0;
1363*38fd1498Szrj }
1364*38fd1498Szrj
1365*38fd1498Szrj /* At this point we know mem is a mem. */
1366*38fd1498Szrj if (GET_MODE (mem) == BLKmode)
1367*38fd1498Szrj {
1368*38fd1498Szrj HOST_WIDE_INT const_size;
1369*38fd1498Szrj if (GET_CODE (XEXP (mem, 0)) == SCRATCH)
1370*38fd1498Szrj {
1371*38fd1498Szrj if (dump_file && (dump_flags & TDF_DETAILS))
1372*38fd1498Szrj fprintf (dump_file, " adding wild read for (clobber (mem:BLK (scratch))\n");
1373*38fd1498Szrj add_wild_read (bb_info);
1374*38fd1498Szrj insn_info->cannot_delete = true;
1375*38fd1498Szrj return 0;
1376*38fd1498Szrj }
1377*38fd1498Szrj /* Handle (set (mem:BLK (addr) [... S36 ...]) (const_int 0))
1378*38fd1498Szrj as memset (addr, 0, 36); */
1379*38fd1498Szrj else if (!MEM_SIZE_KNOWN_P (mem)
1380*38fd1498Szrj || maybe_le (MEM_SIZE (mem), 0)
1381*38fd1498Szrj /* This is a limit on the bitmap size, which is only relevant
1382*38fd1498Szrj for constant-sized MEMs. */
1383*38fd1498Szrj || (MEM_SIZE (mem).is_constant (&const_size)
1384*38fd1498Szrj && const_size > MAX_OFFSET)
1385*38fd1498Szrj || GET_CODE (body) != SET
1386*38fd1498Szrj || !CONST_INT_P (SET_SRC (body)))
1387*38fd1498Szrj {
1388*38fd1498Szrj if (!store_is_unused)
1389*38fd1498Szrj {
1390*38fd1498Szrj /* If the set or clobber is unused, then it does not effect our
1391*38fd1498Szrj ability to get rid of the entire insn. */
1392*38fd1498Szrj insn_info->cannot_delete = true;
1393*38fd1498Szrj clear_rhs_from_active_local_stores ();
1394*38fd1498Szrj }
1395*38fd1498Szrj return 0;
1396*38fd1498Szrj }
1397*38fd1498Szrj }
1398*38fd1498Szrj
1399*38fd1498Szrj /* We can still process a volatile mem, we just cannot delete it. */
1400*38fd1498Szrj if (MEM_VOLATILE_P (mem))
1401*38fd1498Szrj insn_info->cannot_delete = true;
1402*38fd1498Szrj
1403*38fd1498Szrj if (!canon_address (mem, &group_id, &offset, &base))
1404*38fd1498Szrj {
1405*38fd1498Szrj clear_rhs_from_active_local_stores ();
1406*38fd1498Szrj return 0;
1407*38fd1498Szrj }
1408*38fd1498Szrj
1409*38fd1498Szrj if (GET_MODE (mem) == BLKmode)
1410*38fd1498Szrj width = MEM_SIZE (mem);
1411*38fd1498Szrj else
1412*38fd1498Szrj width = GET_MODE_SIZE (GET_MODE (mem));
1413*38fd1498Szrj
1414*38fd1498Szrj if (!endpoint_representable_p (offset, width))
1415*38fd1498Szrj {
1416*38fd1498Szrj clear_rhs_from_active_local_stores ();
1417*38fd1498Szrj return 0;
1418*38fd1498Szrj }
1419*38fd1498Szrj
1420*38fd1498Szrj if (known_eq (width, 0))
1421*38fd1498Szrj return 0;
1422*38fd1498Szrj
1423*38fd1498Szrj if (group_id >= 0)
1424*38fd1498Szrj {
1425*38fd1498Szrj /* In the restrictive case where the base is a constant or the
1426*38fd1498Szrj frame pointer we can do global analysis. */
1427*38fd1498Szrj
1428*38fd1498Szrj group_info *group
1429*38fd1498Szrj = rtx_group_vec[group_id];
1430*38fd1498Szrj tree expr = MEM_EXPR (mem);
1431*38fd1498Szrj
1432*38fd1498Szrj store_info = rtx_store_info_pool.allocate ();
1433*38fd1498Szrj set_usage_bits (group, offset, width, expr);
1434*38fd1498Szrj
1435*38fd1498Szrj if (dump_file && (dump_flags & TDF_DETAILS))
1436*38fd1498Szrj {
1437*38fd1498Szrj fprintf (dump_file, " processing const base store gid=%d",
1438*38fd1498Szrj group_id);
1439*38fd1498Szrj print_range (dump_file, offset, width);
1440*38fd1498Szrj fprintf (dump_file, "\n");
1441*38fd1498Szrj }
1442*38fd1498Szrj }
1443*38fd1498Szrj else
1444*38fd1498Szrj {
1445*38fd1498Szrj if (may_be_sp_based_p (XEXP (mem, 0)))
1446*38fd1498Szrj insn_info->stack_pointer_based = true;
1447*38fd1498Szrj insn_info->contains_cselib_groups = true;
1448*38fd1498Szrj
1449*38fd1498Szrj store_info = cse_store_info_pool.allocate ();
1450*38fd1498Szrj group_id = -1;
1451*38fd1498Szrj
1452*38fd1498Szrj if (dump_file && (dump_flags & TDF_DETAILS))
1453*38fd1498Szrj {
1454*38fd1498Szrj fprintf (dump_file, " processing cselib store ");
1455*38fd1498Szrj print_range (dump_file, offset, width);
1456*38fd1498Szrj fprintf (dump_file, "\n");
1457*38fd1498Szrj }
1458*38fd1498Szrj }
1459*38fd1498Szrj
1460*38fd1498Szrj const_rhs = rhs = NULL_RTX;
1461*38fd1498Szrj if (GET_CODE (body) == SET
1462*38fd1498Szrj /* No place to keep the value after ra. */
1463*38fd1498Szrj && !reload_completed
1464*38fd1498Szrj && (REG_P (SET_SRC (body))
1465*38fd1498Szrj || GET_CODE (SET_SRC (body)) == SUBREG
1466*38fd1498Szrj || CONSTANT_P (SET_SRC (body)))
1467*38fd1498Szrj && !MEM_VOLATILE_P (mem)
1468*38fd1498Szrj /* Sometimes the store and reload is used for truncation and
1469*38fd1498Szrj rounding. */
1470*38fd1498Szrj && !(FLOAT_MODE_P (GET_MODE (mem)) && (flag_float_store)))
1471*38fd1498Szrj {
1472*38fd1498Szrj rhs = SET_SRC (body);
1473*38fd1498Szrj if (CONSTANT_P (rhs))
1474*38fd1498Szrj const_rhs = rhs;
1475*38fd1498Szrj else if (body == PATTERN (insn_info->insn))
1476*38fd1498Szrj {
1477*38fd1498Szrj rtx tem = find_reg_note (insn_info->insn, REG_EQUAL, NULL_RTX);
1478*38fd1498Szrj if (tem && CONSTANT_P (XEXP (tem, 0)))
1479*38fd1498Szrj const_rhs = XEXP (tem, 0);
1480*38fd1498Szrj }
1481*38fd1498Szrj if (const_rhs == NULL_RTX && REG_P (rhs))
1482*38fd1498Szrj {
1483*38fd1498Szrj rtx tem = cselib_expand_value_rtx (rhs, scratch, 5);
1484*38fd1498Szrj
1485*38fd1498Szrj if (tem && CONSTANT_P (tem))
1486*38fd1498Szrj const_rhs = tem;
1487*38fd1498Szrj }
1488*38fd1498Szrj }
1489*38fd1498Szrj
1490*38fd1498Szrj /* Check to see if this stores causes some other stores to be
1491*38fd1498Szrj dead. */
1492*38fd1498Szrj ptr = active_local_stores;
1493*38fd1498Szrj last = NULL;
1494*38fd1498Szrj redundant_reason = NULL;
1495*38fd1498Szrj mem = canon_rtx (mem);
1496*38fd1498Szrj
1497*38fd1498Szrj if (group_id < 0)
1498*38fd1498Szrj mem_addr = base->val_rtx;
1499*38fd1498Szrj else
1500*38fd1498Szrj {
1501*38fd1498Szrj group_info *group = rtx_group_vec[group_id];
1502*38fd1498Szrj mem_addr = group->canon_base_addr;
1503*38fd1498Szrj }
1504*38fd1498Szrj if (maybe_ne (offset, 0))
1505*38fd1498Szrj mem_addr = plus_constant (get_address_mode (mem), mem_addr, offset);
1506*38fd1498Szrj
1507*38fd1498Szrj while (ptr)
1508*38fd1498Szrj {
1509*38fd1498Szrj insn_info_t next = ptr->next_local_store;
1510*38fd1498Szrj struct store_info *s_info = ptr->store_rec;
1511*38fd1498Szrj bool del = true;
1512*38fd1498Szrj
1513*38fd1498Szrj /* Skip the clobbers. We delete the active insn if this insn
1514*38fd1498Szrj shadows the set. To have been put on the active list, it
1515*38fd1498Szrj has exactly on set. */
1516*38fd1498Szrj while (!s_info->is_set)
1517*38fd1498Szrj s_info = s_info->next;
1518*38fd1498Szrj
1519*38fd1498Szrj if (s_info->group_id == group_id && s_info->cse_base == base)
1520*38fd1498Szrj {
1521*38fd1498Szrj HOST_WIDE_INT i;
1522*38fd1498Szrj if (dump_file && (dump_flags & TDF_DETAILS))
1523*38fd1498Szrj {
1524*38fd1498Szrj fprintf (dump_file, " trying store in insn=%d gid=%d",
1525*38fd1498Szrj INSN_UID (ptr->insn), s_info->group_id);
1526*38fd1498Szrj print_range (dump_file, s_info->offset, s_info->width);
1527*38fd1498Szrj fprintf (dump_file, "\n");
1528*38fd1498Szrj }
1529*38fd1498Szrj
1530*38fd1498Szrj /* Even if PTR won't be eliminated as unneeded, if both
1531*38fd1498Szrj PTR and this insn store the same constant value, we might
1532*38fd1498Szrj eliminate this insn instead. */
1533*38fd1498Szrj if (s_info->const_rhs
1534*38fd1498Szrj && const_rhs
1535*38fd1498Szrj && known_subrange_p (offset, width,
1536*38fd1498Szrj s_info->offset, s_info->width)
1537*38fd1498Szrj && all_positions_needed_p (s_info, offset - s_info->offset,
1538*38fd1498Szrj width)
1539*38fd1498Szrj /* We can only remove the later store if the earlier aliases
1540*38fd1498Szrj at least all accesses the later one. */
1541*38fd1498Szrj && (MEM_ALIAS_SET (mem) == MEM_ALIAS_SET (s_info->mem)
1542*38fd1498Szrj || alias_set_subset_of (MEM_ALIAS_SET (mem),
1543*38fd1498Szrj MEM_ALIAS_SET (s_info->mem))))
1544*38fd1498Szrj {
1545*38fd1498Szrj if (GET_MODE (mem) == BLKmode)
1546*38fd1498Szrj {
1547*38fd1498Szrj if (GET_MODE (s_info->mem) == BLKmode
1548*38fd1498Szrj && s_info->const_rhs == const_rhs)
1549*38fd1498Szrj redundant_reason = ptr;
1550*38fd1498Szrj }
1551*38fd1498Szrj else if (s_info->const_rhs == const0_rtx
1552*38fd1498Szrj && const_rhs == const0_rtx)
1553*38fd1498Szrj redundant_reason = ptr;
1554*38fd1498Szrj else
1555*38fd1498Szrj {
1556*38fd1498Szrj rtx val;
1557*38fd1498Szrj start_sequence ();
1558*38fd1498Szrj val = get_stored_val (s_info, GET_MODE (mem), offset, width,
1559*38fd1498Szrj BLOCK_FOR_INSN (insn_info->insn),
1560*38fd1498Szrj true);
1561*38fd1498Szrj if (get_insns () != NULL)
1562*38fd1498Szrj val = NULL_RTX;
1563*38fd1498Szrj end_sequence ();
1564*38fd1498Szrj if (val && rtx_equal_p (val, const_rhs))
1565*38fd1498Szrj redundant_reason = ptr;
1566*38fd1498Szrj }
1567*38fd1498Szrj }
1568*38fd1498Szrj
1569*38fd1498Szrj HOST_WIDE_INT begin_unneeded, const_s_width, const_width;
1570*38fd1498Szrj if (known_subrange_p (s_info->offset, s_info->width, offset, width))
1571*38fd1498Szrj /* The new store touches every byte that S_INFO does. */
1572*38fd1498Szrj set_all_positions_unneeded (s_info);
1573*38fd1498Szrj else if ((offset - s_info->offset).is_constant (&begin_unneeded)
1574*38fd1498Szrj && s_info->width.is_constant (&const_s_width)
1575*38fd1498Szrj && width.is_constant (&const_width))
1576*38fd1498Szrj {
1577*38fd1498Szrj HOST_WIDE_INT end_unneeded = begin_unneeded + const_width;
1578*38fd1498Szrj begin_unneeded = MAX (begin_unneeded, 0);
1579*38fd1498Szrj end_unneeded = MIN (end_unneeded, const_s_width);
1580*38fd1498Szrj for (i = begin_unneeded; i < end_unneeded; ++i)
1581*38fd1498Szrj set_position_unneeded (s_info, i);
1582*38fd1498Szrj }
1583*38fd1498Szrj else
1584*38fd1498Szrj {
1585*38fd1498Szrj /* We don't know which parts of S_INFO are needed and
1586*38fd1498Szrj which aren't, so invalidate the RHS. */
1587*38fd1498Szrj s_info->rhs = NULL;
1588*38fd1498Szrj s_info->const_rhs = NULL;
1589*38fd1498Szrj }
1590*38fd1498Szrj }
1591*38fd1498Szrj else if (s_info->rhs)
1592*38fd1498Szrj /* Need to see if it is possible for this store to overwrite
1593*38fd1498Szrj the value of store_info. If it is, set the rhs to NULL to
1594*38fd1498Szrj keep it from being used to remove a load. */
1595*38fd1498Szrj {
1596*38fd1498Szrj if (canon_output_dependence (s_info->mem, true,
1597*38fd1498Szrj mem, GET_MODE (mem),
1598*38fd1498Szrj mem_addr))
1599*38fd1498Szrj {
1600*38fd1498Szrj s_info->rhs = NULL;
1601*38fd1498Szrj s_info->const_rhs = NULL;
1602*38fd1498Szrj }
1603*38fd1498Szrj }
1604*38fd1498Szrj
1605*38fd1498Szrj /* An insn can be deleted if every position of every one of
1606*38fd1498Szrj its s_infos is zero. */
1607*38fd1498Szrj if (any_positions_needed_p (s_info))
1608*38fd1498Szrj del = false;
1609*38fd1498Szrj
1610*38fd1498Szrj if (del)
1611*38fd1498Szrj {
1612*38fd1498Szrj insn_info_t insn_to_delete = ptr;
1613*38fd1498Szrj
1614*38fd1498Szrj active_local_stores_len--;
1615*38fd1498Szrj if (last)
1616*38fd1498Szrj last->next_local_store = ptr->next_local_store;
1617*38fd1498Szrj else
1618*38fd1498Szrj active_local_stores = ptr->next_local_store;
1619*38fd1498Szrj
1620*38fd1498Szrj if (!insn_to_delete->cannot_delete)
1621*38fd1498Szrj delete_dead_store_insn (insn_to_delete);
1622*38fd1498Szrj }
1623*38fd1498Szrj else
1624*38fd1498Szrj last = ptr;
1625*38fd1498Szrj
1626*38fd1498Szrj ptr = next;
1627*38fd1498Szrj }
1628*38fd1498Szrj
1629*38fd1498Szrj /* Finish filling in the store_info. */
1630*38fd1498Szrj store_info->next = insn_info->store_rec;
1631*38fd1498Szrj insn_info->store_rec = store_info;
1632*38fd1498Szrj store_info->mem = mem;
1633*38fd1498Szrj store_info->mem_addr = mem_addr;
1634*38fd1498Szrj store_info->cse_base = base;
1635*38fd1498Szrj HOST_WIDE_INT const_width;
1636*38fd1498Szrj if (!width.is_constant (&const_width))
1637*38fd1498Szrj {
1638*38fd1498Szrj store_info->is_large = true;
1639*38fd1498Szrj store_info->positions_needed.large.count = 0;
1640*38fd1498Szrj store_info->positions_needed.large.bmap = NULL;
1641*38fd1498Szrj }
1642*38fd1498Szrj else if (const_width > HOST_BITS_PER_WIDE_INT)
1643*38fd1498Szrj {
1644*38fd1498Szrj store_info->is_large = true;
1645*38fd1498Szrj store_info->positions_needed.large.count = 0;
1646*38fd1498Szrj store_info->positions_needed.large.bmap = BITMAP_ALLOC (&dse_bitmap_obstack);
1647*38fd1498Szrj }
1648*38fd1498Szrj else
1649*38fd1498Szrj {
1650*38fd1498Szrj store_info->is_large = false;
1651*38fd1498Szrj store_info->positions_needed.small_bitmask
1652*38fd1498Szrj = lowpart_bitmask (const_width);
1653*38fd1498Szrj }
1654*38fd1498Szrj store_info->group_id = group_id;
1655*38fd1498Szrj store_info->offset = offset;
1656*38fd1498Szrj store_info->width = width;
1657*38fd1498Szrj store_info->is_set = GET_CODE (body) == SET;
1658*38fd1498Szrj store_info->rhs = rhs;
1659*38fd1498Szrj store_info->const_rhs = const_rhs;
1660*38fd1498Szrj store_info->redundant_reason = redundant_reason;
1661*38fd1498Szrj
1662*38fd1498Szrj /* If this is a clobber, we return 0. We will only be able to
1663*38fd1498Szrj delete this insn if there is only one store USED store, but we
1664*38fd1498Szrj can use the clobber to delete other stores earlier. */
1665*38fd1498Szrj return store_info->is_set ? 1 : 0;
1666*38fd1498Szrj }
1667*38fd1498Szrj
1668*38fd1498Szrj
1669*38fd1498Szrj static void
dump_insn_info(const char * start,insn_info_t insn_info)1670*38fd1498Szrj dump_insn_info (const char * start, insn_info_t insn_info)
1671*38fd1498Szrj {
1672*38fd1498Szrj fprintf (dump_file, "%s insn=%d %s\n", start,
1673*38fd1498Szrj INSN_UID (insn_info->insn),
1674*38fd1498Szrj insn_info->store_rec ? "has store" : "naked");
1675*38fd1498Szrj }
1676*38fd1498Szrj
1677*38fd1498Szrj
1678*38fd1498Szrj /* If the modes are different and the value's source and target do not
1679*38fd1498Szrj line up, we need to extract the value from lower part of the rhs of
1680*38fd1498Szrj the store, shift it, and then put it into a form that can be shoved
1681*38fd1498Szrj into the read_insn. This function generates a right SHIFT of a
1682*38fd1498Szrj value that is at least ACCESS_SIZE bytes wide of READ_MODE. The
1683*38fd1498Szrj shift sequence is returned or NULL if we failed to find a
1684*38fd1498Szrj shift. */
1685*38fd1498Szrj
1686*38fd1498Szrj static rtx
find_shift_sequence(poly_int64 access_size,store_info * store_info,machine_mode read_mode,poly_int64 shift,bool speed,bool require_cst)1687*38fd1498Szrj find_shift_sequence (poly_int64 access_size,
1688*38fd1498Szrj store_info *store_info,
1689*38fd1498Szrj machine_mode read_mode,
1690*38fd1498Szrj poly_int64 shift, bool speed, bool require_cst)
1691*38fd1498Szrj {
1692*38fd1498Szrj machine_mode store_mode = GET_MODE (store_info->mem);
1693*38fd1498Szrj scalar_int_mode new_mode;
1694*38fd1498Szrj rtx read_reg = NULL;
1695*38fd1498Szrj
1696*38fd1498Szrj /* Some machines like the x86 have shift insns for each size of
1697*38fd1498Szrj operand. Other machines like the ppc or the ia-64 may only have
1698*38fd1498Szrj shift insns that shift values within 32 or 64 bit registers.
1699*38fd1498Szrj This loop tries to find the smallest shift insn that will right
1700*38fd1498Szrj justify the value we want to read but is available in one insn on
1701*38fd1498Szrj the machine. */
1702*38fd1498Szrj
1703*38fd1498Szrj opt_scalar_int_mode new_mode_iter;
1704*38fd1498Szrj FOR_EACH_MODE_FROM (new_mode_iter,
1705*38fd1498Szrj smallest_int_mode_for_size (access_size * BITS_PER_UNIT))
1706*38fd1498Szrj {
1707*38fd1498Szrj rtx target, new_reg, new_lhs;
1708*38fd1498Szrj rtx_insn *shift_seq, *insn;
1709*38fd1498Szrj int cost;
1710*38fd1498Szrj
1711*38fd1498Szrj new_mode = new_mode_iter.require ();
1712*38fd1498Szrj if (GET_MODE_BITSIZE (new_mode) > BITS_PER_WORD)
1713*38fd1498Szrj break;
1714*38fd1498Szrj
1715*38fd1498Szrj /* If a constant was stored into memory, try to simplify it here,
1716*38fd1498Szrj otherwise the cost of the shift might preclude this optimization
1717*38fd1498Szrj e.g. at -Os, even when no actual shift will be needed. */
1718*38fd1498Szrj if (store_info->const_rhs)
1719*38fd1498Szrj {
1720*38fd1498Szrj poly_uint64 byte = subreg_lowpart_offset (new_mode, store_mode);
1721*38fd1498Szrj rtx ret = simplify_subreg (new_mode, store_info->const_rhs,
1722*38fd1498Szrj store_mode, byte);
1723*38fd1498Szrj if (ret && CONSTANT_P (ret))
1724*38fd1498Szrj {
1725*38fd1498Szrj rtx shift_rtx = gen_int_shift_amount (new_mode, shift);
1726*38fd1498Szrj ret = simplify_const_binary_operation (LSHIFTRT, new_mode,
1727*38fd1498Szrj ret, shift_rtx);
1728*38fd1498Szrj if (ret && CONSTANT_P (ret))
1729*38fd1498Szrj {
1730*38fd1498Szrj byte = subreg_lowpart_offset (read_mode, new_mode);
1731*38fd1498Szrj ret = simplify_subreg (read_mode, ret, new_mode, byte);
1732*38fd1498Szrj if (ret && CONSTANT_P (ret)
1733*38fd1498Szrj && (set_src_cost (ret, read_mode, speed)
1734*38fd1498Szrj <= COSTS_N_INSNS (1)))
1735*38fd1498Szrj return ret;
1736*38fd1498Szrj }
1737*38fd1498Szrj }
1738*38fd1498Szrj }
1739*38fd1498Szrj
1740*38fd1498Szrj if (require_cst)
1741*38fd1498Szrj return NULL_RTX;
1742*38fd1498Szrj
1743*38fd1498Szrj /* Try a wider mode if truncating the store mode to NEW_MODE
1744*38fd1498Szrj requires a real instruction. */
1745*38fd1498Szrj if (maybe_lt (GET_MODE_SIZE (new_mode), GET_MODE_SIZE (store_mode))
1746*38fd1498Szrj && !TRULY_NOOP_TRUNCATION_MODES_P (new_mode, store_mode))
1747*38fd1498Szrj continue;
1748*38fd1498Szrj
1749*38fd1498Szrj /* Also try a wider mode if the necessary punning is either not
1750*38fd1498Szrj desirable or not possible. */
1751*38fd1498Szrj if (!CONSTANT_P (store_info->rhs)
1752*38fd1498Szrj && !targetm.modes_tieable_p (new_mode, store_mode))
1753*38fd1498Szrj continue;
1754*38fd1498Szrj
1755*38fd1498Szrj new_reg = gen_reg_rtx (new_mode);
1756*38fd1498Szrj
1757*38fd1498Szrj start_sequence ();
1758*38fd1498Szrj
1759*38fd1498Szrj /* In theory we could also check for an ashr. Ian Taylor knows
1760*38fd1498Szrj of one dsp where the cost of these two was not the same. But
1761*38fd1498Szrj this really is a rare case anyway. */
1762*38fd1498Szrj target = expand_binop (new_mode, lshr_optab, new_reg,
1763*38fd1498Szrj gen_int_shift_amount (new_mode, shift),
1764*38fd1498Szrj new_reg, 1, OPTAB_DIRECT);
1765*38fd1498Szrj
1766*38fd1498Szrj shift_seq = get_insns ();
1767*38fd1498Szrj end_sequence ();
1768*38fd1498Szrj
1769*38fd1498Szrj if (target != new_reg || shift_seq == NULL)
1770*38fd1498Szrj continue;
1771*38fd1498Szrj
1772*38fd1498Szrj cost = 0;
1773*38fd1498Szrj for (insn = shift_seq; insn != NULL_RTX; insn = NEXT_INSN (insn))
1774*38fd1498Szrj if (INSN_P (insn))
1775*38fd1498Szrj cost += insn_cost (insn, speed);
1776*38fd1498Szrj
1777*38fd1498Szrj /* The computation up to here is essentially independent
1778*38fd1498Szrj of the arguments and could be precomputed. It may
1779*38fd1498Szrj not be worth doing so. We could precompute if
1780*38fd1498Szrj worthwhile or at least cache the results. The result
1781*38fd1498Szrj technically depends on both SHIFT and ACCESS_SIZE,
1782*38fd1498Szrj but in practice the answer will depend only on ACCESS_SIZE. */
1783*38fd1498Szrj
1784*38fd1498Szrj if (cost > COSTS_N_INSNS (1))
1785*38fd1498Szrj continue;
1786*38fd1498Szrj
1787*38fd1498Szrj new_lhs = extract_low_bits (new_mode, store_mode,
1788*38fd1498Szrj copy_rtx (store_info->rhs));
1789*38fd1498Szrj if (new_lhs == NULL_RTX)
1790*38fd1498Szrj continue;
1791*38fd1498Szrj
1792*38fd1498Szrj /* We found an acceptable shift. Generate a move to
1793*38fd1498Szrj take the value from the store and put it into the
1794*38fd1498Szrj shift pseudo, then shift it, then generate another
1795*38fd1498Szrj move to put in into the target of the read. */
1796*38fd1498Szrj emit_move_insn (new_reg, new_lhs);
1797*38fd1498Szrj emit_insn (shift_seq);
1798*38fd1498Szrj read_reg = extract_low_bits (read_mode, new_mode, new_reg);
1799*38fd1498Szrj break;
1800*38fd1498Szrj }
1801*38fd1498Szrj
1802*38fd1498Szrj return read_reg;
1803*38fd1498Szrj }
1804*38fd1498Szrj
1805*38fd1498Szrj
1806*38fd1498Szrj /* Call back for note_stores to find the hard regs set or clobbered by
1807*38fd1498Szrj insn. Data is a bitmap of the hardregs set so far. */
1808*38fd1498Szrj
1809*38fd1498Szrj static void
look_for_hardregs(rtx x,const_rtx pat ATTRIBUTE_UNUSED,void * data)1810*38fd1498Szrj look_for_hardregs (rtx x, const_rtx pat ATTRIBUTE_UNUSED, void *data)
1811*38fd1498Szrj {
1812*38fd1498Szrj bitmap regs_set = (bitmap) data;
1813*38fd1498Szrj
1814*38fd1498Szrj if (REG_P (x)
1815*38fd1498Szrj && HARD_REGISTER_P (x))
1816*38fd1498Szrj bitmap_set_range (regs_set, REGNO (x), REG_NREGS (x));
1817*38fd1498Szrj }
1818*38fd1498Szrj
1819*38fd1498Szrj /* Helper function for replace_read and record_store.
1820*38fd1498Szrj Attempt to return a value of mode READ_MODE stored in STORE_INFO,
1821*38fd1498Szrj consisting of READ_WIDTH bytes starting from READ_OFFSET. Return NULL
1822*38fd1498Szrj if not successful. If REQUIRE_CST is true, return always constant. */
1823*38fd1498Szrj
1824*38fd1498Szrj static rtx
get_stored_val(store_info * store_info,machine_mode read_mode,poly_int64 read_offset,poly_int64 read_width,basic_block bb,bool require_cst)1825*38fd1498Szrj get_stored_val (store_info *store_info, machine_mode read_mode,
1826*38fd1498Szrj poly_int64 read_offset, poly_int64 read_width,
1827*38fd1498Szrj basic_block bb, bool require_cst)
1828*38fd1498Szrj {
1829*38fd1498Szrj machine_mode store_mode = GET_MODE (store_info->mem);
1830*38fd1498Szrj poly_int64 gap;
1831*38fd1498Szrj rtx read_reg;
1832*38fd1498Szrj
1833*38fd1498Szrj /* To get here the read is within the boundaries of the write so
1834*38fd1498Szrj shift will never be negative. Start out with the shift being in
1835*38fd1498Szrj bytes. */
1836*38fd1498Szrj if (store_mode == BLKmode)
1837*38fd1498Szrj gap = 0;
1838*38fd1498Szrj else if (BYTES_BIG_ENDIAN)
1839*38fd1498Szrj gap = ((store_info->offset + store_info->width)
1840*38fd1498Szrj - (read_offset + read_width));
1841*38fd1498Szrj else
1842*38fd1498Szrj gap = read_offset - store_info->offset;
1843*38fd1498Szrj
1844*38fd1498Szrj if (maybe_ne (gap, 0))
1845*38fd1498Szrj {
1846*38fd1498Szrj poly_int64 shift = gap * BITS_PER_UNIT;
1847*38fd1498Szrj poly_int64 access_size = GET_MODE_SIZE (read_mode) + gap;
1848*38fd1498Szrj read_reg = find_shift_sequence (access_size, store_info, read_mode,
1849*38fd1498Szrj shift, optimize_bb_for_speed_p (bb),
1850*38fd1498Szrj require_cst);
1851*38fd1498Szrj }
1852*38fd1498Szrj else if (store_mode == BLKmode)
1853*38fd1498Szrj {
1854*38fd1498Szrj /* The store is a memset (addr, const_val, const_size). */
1855*38fd1498Szrj gcc_assert (CONST_INT_P (store_info->rhs));
1856*38fd1498Szrj scalar_int_mode int_store_mode;
1857*38fd1498Szrj if (!int_mode_for_mode (read_mode).exists (&int_store_mode))
1858*38fd1498Szrj read_reg = NULL_RTX;
1859*38fd1498Szrj else if (store_info->rhs == const0_rtx)
1860*38fd1498Szrj read_reg = extract_low_bits (read_mode, int_store_mode, const0_rtx);
1861*38fd1498Szrj else if (GET_MODE_BITSIZE (int_store_mode) > HOST_BITS_PER_WIDE_INT
1862*38fd1498Szrj || BITS_PER_UNIT >= HOST_BITS_PER_WIDE_INT)
1863*38fd1498Szrj read_reg = NULL_RTX;
1864*38fd1498Szrj else
1865*38fd1498Szrj {
1866*38fd1498Szrj unsigned HOST_WIDE_INT c
1867*38fd1498Szrj = INTVAL (store_info->rhs)
1868*38fd1498Szrj & ((HOST_WIDE_INT_1 << BITS_PER_UNIT) - 1);
1869*38fd1498Szrj int shift = BITS_PER_UNIT;
1870*38fd1498Szrj while (shift < HOST_BITS_PER_WIDE_INT)
1871*38fd1498Szrj {
1872*38fd1498Szrj c |= (c << shift);
1873*38fd1498Szrj shift <<= 1;
1874*38fd1498Szrj }
1875*38fd1498Szrj read_reg = gen_int_mode (c, int_store_mode);
1876*38fd1498Szrj read_reg = extract_low_bits (read_mode, int_store_mode, read_reg);
1877*38fd1498Szrj }
1878*38fd1498Szrj }
1879*38fd1498Szrj else if (store_info->const_rhs
1880*38fd1498Szrj && (require_cst
1881*38fd1498Szrj || GET_MODE_CLASS (read_mode) != GET_MODE_CLASS (store_mode)))
1882*38fd1498Szrj read_reg = extract_low_bits (read_mode, store_mode,
1883*38fd1498Szrj copy_rtx (store_info->const_rhs));
1884*38fd1498Szrj else
1885*38fd1498Szrj read_reg = extract_low_bits (read_mode, store_mode,
1886*38fd1498Szrj copy_rtx (store_info->rhs));
1887*38fd1498Szrj if (require_cst && read_reg && !CONSTANT_P (read_reg))
1888*38fd1498Szrj read_reg = NULL_RTX;
1889*38fd1498Szrj return read_reg;
1890*38fd1498Szrj }
1891*38fd1498Szrj
1892*38fd1498Szrj /* Take a sequence of:
1893*38fd1498Szrj A <- r1
1894*38fd1498Szrj ...
1895*38fd1498Szrj ... <- A
1896*38fd1498Szrj
1897*38fd1498Szrj and change it into
1898*38fd1498Szrj r2 <- r1
1899*38fd1498Szrj A <- r1
1900*38fd1498Szrj ...
1901*38fd1498Szrj ... <- r2
1902*38fd1498Szrj
1903*38fd1498Szrj or
1904*38fd1498Szrj
1905*38fd1498Szrj r3 <- extract (r1)
1906*38fd1498Szrj r3 <- r3 >> shift
1907*38fd1498Szrj r2 <- extract (r3)
1908*38fd1498Szrj ... <- r2
1909*38fd1498Szrj
1910*38fd1498Szrj or
1911*38fd1498Szrj
1912*38fd1498Szrj r2 <- extract (r1)
1913*38fd1498Szrj ... <- r2
1914*38fd1498Szrj
1915*38fd1498Szrj Depending on the alignment and the mode of the store and
1916*38fd1498Szrj subsequent load.
1917*38fd1498Szrj
1918*38fd1498Szrj
1919*38fd1498Szrj The STORE_INFO and STORE_INSN are for the store and READ_INFO
1920*38fd1498Szrj and READ_INSN are for the read. Return true if the replacement
1921*38fd1498Szrj went ok. */
1922*38fd1498Szrj
1923*38fd1498Szrj static bool
replace_read(store_info * store_info,insn_info_t store_insn,read_info_t read_info,insn_info_t read_insn,rtx * loc,bitmap regs_live)1924*38fd1498Szrj replace_read (store_info *store_info, insn_info_t store_insn,
1925*38fd1498Szrj read_info_t read_info, insn_info_t read_insn, rtx *loc,
1926*38fd1498Szrj bitmap regs_live)
1927*38fd1498Szrj {
1928*38fd1498Szrj machine_mode store_mode = GET_MODE (store_info->mem);
1929*38fd1498Szrj machine_mode read_mode = GET_MODE (read_info->mem);
1930*38fd1498Szrj rtx_insn *insns, *this_insn;
1931*38fd1498Szrj rtx read_reg;
1932*38fd1498Szrj basic_block bb;
1933*38fd1498Szrj
1934*38fd1498Szrj if (!dbg_cnt (dse))
1935*38fd1498Szrj return false;
1936*38fd1498Szrj
1937*38fd1498Szrj /* Create a sequence of instructions to set up the read register.
1938*38fd1498Szrj This sequence goes immediately before the store and its result
1939*38fd1498Szrj is read by the load.
1940*38fd1498Szrj
1941*38fd1498Szrj We need to keep this in perspective. We are replacing a read
1942*38fd1498Szrj with a sequence of insns, but the read will almost certainly be
1943*38fd1498Szrj in cache, so it is not going to be an expensive one. Thus, we
1944*38fd1498Szrj are not willing to do a multi insn shift or worse a subroutine
1945*38fd1498Szrj call to get rid of the read. */
1946*38fd1498Szrj if (dump_file && (dump_flags & TDF_DETAILS))
1947*38fd1498Szrj fprintf (dump_file, "trying to replace %smode load in insn %d"
1948*38fd1498Szrj " from %smode store in insn %d\n",
1949*38fd1498Szrj GET_MODE_NAME (read_mode), INSN_UID (read_insn->insn),
1950*38fd1498Szrj GET_MODE_NAME (store_mode), INSN_UID (store_insn->insn));
1951*38fd1498Szrj start_sequence ();
1952*38fd1498Szrj bb = BLOCK_FOR_INSN (read_insn->insn);
1953*38fd1498Szrj read_reg = get_stored_val (store_info,
1954*38fd1498Szrj read_mode, read_info->offset, read_info->width,
1955*38fd1498Szrj bb, false);
1956*38fd1498Szrj if (read_reg == NULL_RTX)
1957*38fd1498Szrj {
1958*38fd1498Szrj end_sequence ();
1959*38fd1498Szrj if (dump_file && (dump_flags & TDF_DETAILS))
1960*38fd1498Szrj fprintf (dump_file, " -- could not extract bits of stored value\n");
1961*38fd1498Szrj return false;
1962*38fd1498Szrj }
1963*38fd1498Szrj /* Force the value into a new register so that it won't be clobbered
1964*38fd1498Szrj between the store and the load. */
1965*38fd1498Szrj read_reg = copy_to_mode_reg (read_mode, read_reg);
1966*38fd1498Szrj insns = get_insns ();
1967*38fd1498Szrj end_sequence ();
1968*38fd1498Szrj
1969*38fd1498Szrj if (insns != NULL_RTX)
1970*38fd1498Szrj {
1971*38fd1498Szrj /* Now we have to scan the set of new instructions to see if the
1972*38fd1498Szrj sequence contains and sets of hardregs that happened to be
1973*38fd1498Szrj live at this point. For instance, this can happen if one of
1974*38fd1498Szrj the insns sets the CC and the CC happened to be live at that
1975*38fd1498Szrj point. This does occasionally happen, see PR 37922. */
1976*38fd1498Szrj bitmap regs_set = BITMAP_ALLOC (®_obstack);
1977*38fd1498Szrj
1978*38fd1498Szrj for (this_insn = insns; this_insn != NULL_RTX; this_insn = NEXT_INSN (this_insn))
1979*38fd1498Szrj note_stores (PATTERN (this_insn), look_for_hardregs, regs_set);
1980*38fd1498Szrj
1981*38fd1498Szrj bitmap_and_into (regs_set, regs_live);
1982*38fd1498Szrj if (!bitmap_empty_p (regs_set))
1983*38fd1498Szrj {
1984*38fd1498Szrj if (dump_file && (dump_flags & TDF_DETAILS))
1985*38fd1498Szrj {
1986*38fd1498Szrj fprintf (dump_file,
1987*38fd1498Szrj "abandoning replacement because sequence clobbers live hardregs:");
1988*38fd1498Szrj df_print_regset (dump_file, regs_set);
1989*38fd1498Szrj }
1990*38fd1498Szrj
1991*38fd1498Szrj BITMAP_FREE (regs_set);
1992*38fd1498Szrj return false;
1993*38fd1498Szrj }
1994*38fd1498Szrj BITMAP_FREE (regs_set);
1995*38fd1498Szrj }
1996*38fd1498Szrj
1997*38fd1498Szrj if (validate_change (read_insn->insn, loc, read_reg, 0))
1998*38fd1498Szrj {
1999*38fd1498Szrj deferred_change *change = deferred_change_pool.allocate ();
2000*38fd1498Szrj
2001*38fd1498Szrj /* Insert this right before the store insn where it will be safe
2002*38fd1498Szrj from later insns that might change it before the read. */
2003*38fd1498Szrj emit_insn_before (insns, store_insn->insn);
2004*38fd1498Szrj
2005*38fd1498Szrj /* And now for the kludge part: cselib croaks if you just
2006*38fd1498Szrj return at this point. There are two reasons for this:
2007*38fd1498Szrj
2008*38fd1498Szrj 1) Cselib has an idea of how many pseudos there are and
2009*38fd1498Szrj that does not include the new ones we just added.
2010*38fd1498Szrj
2011*38fd1498Szrj 2) Cselib does not know about the move insn we added
2012*38fd1498Szrj above the store_info, and there is no way to tell it
2013*38fd1498Szrj about it, because it has "moved on".
2014*38fd1498Szrj
2015*38fd1498Szrj Problem (1) is fixable with a certain amount of engineering.
2016*38fd1498Szrj Problem (2) is requires starting the bb from scratch. This
2017*38fd1498Szrj could be expensive.
2018*38fd1498Szrj
2019*38fd1498Szrj So we are just going to have to lie. The move/extraction
2020*38fd1498Szrj insns are not really an issue, cselib did not see them. But
2021*38fd1498Szrj the use of the new pseudo read_insn is a real problem because
2022*38fd1498Szrj cselib has not scanned this insn. The way that we solve this
2023*38fd1498Szrj problem is that we are just going to put the mem back for now
2024*38fd1498Szrj and when we are finished with the block, we undo this. We
2025*38fd1498Szrj keep a table of mems to get rid of. At the end of the basic
2026*38fd1498Szrj block we can put them back. */
2027*38fd1498Szrj
2028*38fd1498Szrj *loc = read_info->mem;
2029*38fd1498Szrj change->next = deferred_change_list;
2030*38fd1498Szrj deferred_change_list = change;
2031*38fd1498Szrj change->loc = loc;
2032*38fd1498Szrj change->reg = read_reg;
2033*38fd1498Szrj
2034*38fd1498Szrj /* Get rid of the read_info, from the point of view of the
2035*38fd1498Szrj rest of dse, play like this read never happened. */
2036*38fd1498Szrj read_insn->read_rec = read_info->next;
2037*38fd1498Szrj read_info_type_pool.remove (read_info);
2038*38fd1498Szrj if (dump_file && (dump_flags & TDF_DETAILS))
2039*38fd1498Szrj {
2040*38fd1498Szrj fprintf (dump_file, " -- replaced the loaded MEM with ");
2041*38fd1498Szrj print_simple_rtl (dump_file, read_reg);
2042*38fd1498Szrj fprintf (dump_file, "\n");
2043*38fd1498Szrj }
2044*38fd1498Szrj return true;
2045*38fd1498Szrj }
2046*38fd1498Szrj else
2047*38fd1498Szrj {
2048*38fd1498Szrj if (dump_file && (dump_flags & TDF_DETAILS))
2049*38fd1498Szrj {
2050*38fd1498Szrj fprintf (dump_file, " -- replacing the loaded MEM with ");
2051*38fd1498Szrj print_simple_rtl (dump_file, read_reg);
2052*38fd1498Szrj fprintf (dump_file, " led to an invalid instruction\n");
2053*38fd1498Szrj }
2054*38fd1498Szrj return false;
2055*38fd1498Szrj }
2056*38fd1498Szrj }
2057*38fd1498Szrj
2058*38fd1498Szrj /* Check the address of MEM *LOC and kill any appropriate stores that may
2059*38fd1498Szrj be active. */
2060*38fd1498Szrj
2061*38fd1498Szrj static void
check_mem_read_rtx(rtx * loc,bb_info_t bb_info)2062*38fd1498Szrj check_mem_read_rtx (rtx *loc, bb_info_t bb_info)
2063*38fd1498Szrj {
2064*38fd1498Szrj rtx mem = *loc, mem_addr;
2065*38fd1498Szrj insn_info_t insn_info;
2066*38fd1498Szrj poly_int64 offset = 0;
2067*38fd1498Szrj poly_int64 width = 0;
2068*38fd1498Szrj cselib_val *base = NULL;
2069*38fd1498Szrj int group_id;
2070*38fd1498Szrj read_info_t read_info;
2071*38fd1498Szrj
2072*38fd1498Szrj insn_info = bb_info->last_insn;
2073*38fd1498Szrj
2074*38fd1498Szrj if ((MEM_ALIAS_SET (mem) == ALIAS_SET_MEMORY_BARRIER)
2075*38fd1498Szrj || (MEM_VOLATILE_P (mem)))
2076*38fd1498Szrj {
2077*38fd1498Szrj if (dump_file && (dump_flags & TDF_DETAILS))
2078*38fd1498Szrj fprintf (dump_file, " adding wild read, volatile or barrier.\n");
2079*38fd1498Szrj add_wild_read (bb_info);
2080*38fd1498Szrj insn_info->cannot_delete = true;
2081*38fd1498Szrj return;
2082*38fd1498Szrj }
2083*38fd1498Szrj
2084*38fd1498Szrj /* If it is reading readonly mem, then there can be no conflict with
2085*38fd1498Szrj another write. */
2086*38fd1498Szrj if (MEM_READONLY_P (mem))
2087*38fd1498Szrj return;
2088*38fd1498Szrj
2089*38fd1498Szrj if (!canon_address (mem, &group_id, &offset, &base))
2090*38fd1498Szrj {
2091*38fd1498Szrj if (dump_file && (dump_flags & TDF_DETAILS))
2092*38fd1498Szrj fprintf (dump_file, " adding wild read, canon_address failure.\n");
2093*38fd1498Szrj add_wild_read (bb_info);
2094*38fd1498Szrj return;
2095*38fd1498Szrj }
2096*38fd1498Szrj
2097*38fd1498Szrj if (GET_MODE (mem) == BLKmode)
2098*38fd1498Szrj width = -1;
2099*38fd1498Szrj else
2100*38fd1498Szrj width = GET_MODE_SIZE (GET_MODE (mem));
2101*38fd1498Szrj
2102*38fd1498Szrj if (!endpoint_representable_p (offset, known_eq (width, -1) ? 1 : width))
2103*38fd1498Szrj {
2104*38fd1498Szrj if (dump_file && (dump_flags & TDF_DETAILS))
2105*38fd1498Szrj fprintf (dump_file, " adding wild read, due to overflow.\n");
2106*38fd1498Szrj add_wild_read (bb_info);
2107*38fd1498Szrj return;
2108*38fd1498Szrj }
2109*38fd1498Szrj
2110*38fd1498Szrj read_info = read_info_type_pool.allocate ();
2111*38fd1498Szrj read_info->group_id = group_id;
2112*38fd1498Szrj read_info->mem = mem;
2113*38fd1498Szrj read_info->offset = offset;
2114*38fd1498Szrj read_info->width = width;
2115*38fd1498Szrj read_info->next = insn_info->read_rec;
2116*38fd1498Szrj insn_info->read_rec = read_info;
2117*38fd1498Szrj if (group_id < 0)
2118*38fd1498Szrj mem_addr = base->val_rtx;
2119*38fd1498Szrj else
2120*38fd1498Szrj {
2121*38fd1498Szrj group_info *group = rtx_group_vec[group_id];
2122*38fd1498Szrj mem_addr = group->canon_base_addr;
2123*38fd1498Szrj }
2124*38fd1498Szrj if (maybe_ne (offset, 0))
2125*38fd1498Szrj mem_addr = plus_constant (get_address_mode (mem), mem_addr, offset);
2126*38fd1498Szrj
2127*38fd1498Szrj if (group_id >= 0)
2128*38fd1498Szrj {
2129*38fd1498Szrj /* This is the restricted case where the base is a constant or
2130*38fd1498Szrj the frame pointer and offset is a constant. */
2131*38fd1498Szrj insn_info_t i_ptr = active_local_stores;
2132*38fd1498Szrj insn_info_t last = NULL;
2133*38fd1498Szrj
2134*38fd1498Szrj if (dump_file && (dump_flags & TDF_DETAILS))
2135*38fd1498Szrj {
2136*38fd1498Szrj if (!known_size_p (width))
2137*38fd1498Szrj fprintf (dump_file, " processing const load gid=%d[BLK]\n",
2138*38fd1498Szrj group_id);
2139*38fd1498Szrj else
2140*38fd1498Szrj {
2141*38fd1498Szrj fprintf (dump_file, " processing const load gid=%d", group_id);
2142*38fd1498Szrj print_range (dump_file, offset, width);
2143*38fd1498Szrj fprintf (dump_file, "\n");
2144*38fd1498Szrj }
2145*38fd1498Szrj }
2146*38fd1498Szrj
2147*38fd1498Szrj while (i_ptr)
2148*38fd1498Szrj {
2149*38fd1498Szrj bool remove = false;
2150*38fd1498Szrj store_info *store_info = i_ptr->store_rec;
2151*38fd1498Szrj
2152*38fd1498Szrj /* Skip the clobbers. */
2153*38fd1498Szrj while (!store_info->is_set)
2154*38fd1498Szrj store_info = store_info->next;
2155*38fd1498Szrj
2156*38fd1498Szrj /* There are three cases here. */
2157*38fd1498Szrj if (store_info->group_id < 0)
2158*38fd1498Szrj /* We have a cselib store followed by a read from a
2159*38fd1498Szrj const base. */
2160*38fd1498Szrj remove
2161*38fd1498Szrj = canon_true_dependence (store_info->mem,
2162*38fd1498Szrj GET_MODE (store_info->mem),
2163*38fd1498Szrj store_info->mem_addr,
2164*38fd1498Szrj mem, mem_addr);
2165*38fd1498Szrj
2166*38fd1498Szrj else if (group_id == store_info->group_id)
2167*38fd1498Szrj {
2168*38fd1498Szrj /* This is a block mode load. We may get lucky and
2169*38fd1498Szrj canon_true_dependence may save the day. */
2170*38fd1498Szrj if (!known_size_p (width))
2171*38fd1498Szrj remove
2172*38fd1498Szrj = canon_true_dependence (store_info->mem,
2173*38fd1498Szrj GET_MODE (store_info->mem),
2174*38fd1498Szrj store_info->mem_addr,
2175*38fd1498Szrj mem, mem_addr);
2176*38fd1498Szrj
2177*38fd1498Szrj /* If this read is just reading back something that we just
2178*38fd1498Szrj stored, rewrite the read. */
2179*38fd1498Szrj else
2180*38fd1498Szrj {
2181*38fd1498Szrj if (store_info->rhs
2182*38fd1498Szrj && known_subrange_p (offset, width, store_info->offset,
2183*38fd1498Szrj store_info->width)
2184*38fd1498Szrj && all_positions_needed_p (store_info,
2185*38fd1498Szrj offset - store_info->offset,
2186*38fd1498Szrj width)
2187*38fd1498Szrj && replace_read (store_info, i_ptr, read_info,
2188*38fd1498Szrj insn_info, loc, bb_info->regs_live))
2189*38fd1498Szrj return;
2190*38fd1498Szrj
2191*38fd1498Szrj /* The bases are the same, just see if the offsets
2192*38fd1498Szrj could overlap. */
2193*38fd1498Szrj if (ranges_maybe_overlap_p (offset, width,
2194*38fd1498Szrj store_info->offset,
2195*38fd1498Szrj store_info->width))
2196*38fd1498Szrj remove = true;
2197*38fd1498Szrj }
2198*38fd1498Szrj }
2199*38fd1498Szrj
2200*38fd1498Szrj /* else
2201*38fd1498Szrj The else case that is missing here is that the
2202*38fd1498Szrj bases are constant but different. There is nothing
2203*38fd1498Szrj to do here because there is no overlap. */
2204*38fd1498Szrj
2205*38fd1498Szrj if (remove)
2206*38fd1498Szrj {
2207*38fd1498Szrj if (dump_file && (dump_flags & TDF_DETAILS))
2208*38fd1498Szrj dump_insn_info ("removing from active", i_ptr);
2209*38fd1498Szrj
2210*38fd1498Szrj active_local_stores_len--;
2211*38fd1498Szrj if (last)
2212*38fd1498Szrj last->next_local_store = i_ptr->next_local_store;
2213*38fd1498Szrj else
2214*38fd1498Szrj active_local_stores = i_ptr->next_local_store;
2215*38fd1498Szrj }
2216*38fd1498Szrj else
2217*38fd1498Szrj last = i_ptr;
2218*38fd1498Szrj i_ptr = i_ptr->next_local_store;
2219*38fd1498Szrj }
2220*38fd1498Szrj }
2221*38fd1498Szrj else
2222*38fd1498Szrj {
2223*38fd1498Szrj insn_info_t i_ptr = active_local_stores;
2224*38fd1498Szrj insn_info_t last = NULL;
2225*38fd1498Szrj if (dump_file && (dump_flags & TDF_DETAILS))
2226*38fd1498Szrj {
2227*38fd1498Szrj fprintf (dump_file, " processing cselib load mem:");
2228*38fd1498Szrj print_inline_rtx (dump_file, mem, 0);
2229*38fd1498Szrj fprintf (dump_file, "\n");
2230*38fd1498Szrj }
2231*38fd1498Szrj
2232*38fd1498Szrj while (i_ptr)
2233*38fd1498Szrj {
2234*38fd1498Szrj bool remove = false;
2235*38fd1498Szrj store_info *store_info = i_ptr->store_rec;
2236*38fd1498Szrj
2237*38fd1498Szrj if (dump_file && (dump_flags & TDF_DETAILS))
2238*38fd1498Szrj fprintf (dump_file, " processing cselib load against insn %d\n",
2239*38fd1498Szrj INSN_UID (i_ptr->insn));
2240*38fd1498Szrj
2241*38fd1498Szrj /* Skip the clobbers. */
2242*38fd1498Szrj while (!store_info->is_set)
2243*38fd1498Szrj store_info = store_info->next;
2244*38fd1498Szrj
2245*38fd1498Szrj /* If this read is just reading back something that we just
2246*38fd1498Szrj stored, rewrite the read. */
2247*38fd1498Szrj if (store_info->rhs
2248*38fd1498Szrj && store_info->group_id == -1
2249*38fd1498Szrj && store_info->cse_base == base
2250*38fd1498Szrj && known_subrange_p (offset, width, store_info->offset,
2251*38fd1498Szrj store_info->width)
2252*38fd1498Szrj && all_positions_needed_p (store_info,
2253*38fd1498Szrj offset - store_info->offset, width)
2254*38fd1498Szrj && replace_read (store_info, i_ptr, read_info, insn_info, loc,
2255*38fd1498Szrj bb_info->regs_live))
2256*38fd1498Szrj return;
2257*38fd1498Szrj
2258*38fd1498Szrj remove = canon_true_dependence (store_info->mem,
2259*38fd1498Szrj GET_MODE (store_info->mem),
2260*38fd1498Szrj store_info->mem_addr,
2261*38fd1498Szrj mem, mem_addr);
2262*38fd1498Szrj
2263*38fd1498Szrj if (remove)
2264*38fd1498Szrj {
2265*38fd1498Szrj if (dump_file && (dump_flags & TDF_DETAILS))
2266*38fd1498Szrj dump_insn_info ("removing from active", i_ptr);
2267*38fd1498Szrj
2268*38fd1498Szrj active_local_stores_len--;
2269*38fd1498Szrj if (last)
2270*38fd1498Szrj last->next_local_store = i_ptr->next_local_store;
2271*38fd1498Szrj else
2272*38fd1498Szrj active_local_stores = i_ptr->next_local_store;
2273*38fd1498Szrj }
2274*38fd1498Szrj else
2275*38fd1498Szrj last = i_ptr;
2276*38fd1498Szrj i_ptr = i_ptr->next_local_store;
2277*38fd1498Szrj }
2278*38fd1498Szrj }
2279*38fd1498Szrj }
2280*38fd1498Szrj
2281*38fd1498Szrj /* A note_uses callback in which DATA points the INSN_INFO for
2282*38fd1498Szrj as check_mem_read_rtx. Nullify the pointer if i_m_r_m_r returns
2283*38fd1498Szrj true for any part of *LOC. */
2284*38fd1498Szrj
2285*38fd1498Szrj static void
check_mem_read_use(rtx * loc,void * data)2286*38fd1498Szrj check_mem_read_use (rtx *loc, void *data)
2287*38fd1498Szrj {
2288*38fd1498Szrj subrtx_ptr_iterator::array_type array;
2289*38fd1498Szrj FOR_EACH_SUBRTX_PTR (iter, array, loc, NONCONST)
2290*38fd1498Szrj {
2291*38fd1498Szrj rtx *loc = *iter;
2292*38fd1498Szrj if (MEM_P (*loc))
2293*38fd1498Szrj check_mem_read_rtx (loc, (bb_info_t) data);
2294*38fd1498Szrj }
2295*38fd1498Szrj }
2296*38fd1498Szrj
2297*38fd1498Szrj
2298*38fd1498Szrj /* Get arguments passed to CALL_INSN. Return TRUE if successful.
2299*38fd1498Szrj So far it only handles arguments passed in registers. */
2300*38fd1498Szrj
2301*38fd1498Szrj static bool
get_call_args(rtx call_insn,tree fn,rtx * args,int nargs)2302*38fd1498Szrj get_call_args (rtx call_insn, tree fn, rtx *args, int nargs)
2303*38fd1498Szrj {
2304*38fd1498Szrj CUMULATIVE_ARGS args_so_far_v;
2305*38fd1498Szrj cumulative_args_t args_so_far;
2306*38fd1498Szrj tree arg;
2307*38fd1498Szrj int idx;
2308*38fd1498Szrj
2309*38fd1498Szrj INIT_CUMULATIVE_ARGS (args_so_far_v, TREE_TYPE (fn), NULL_RTX, 0, 3);
2310*38fd1498Szrj args_so_far = pack_cumulative_args (&args_so_far_v);
2311*38fd1498Szrj
2312*38fd1498Szrj arg = TYPE_ARG_TYPES (TREE_TYPE (fn));
2313*38fd1498Szrj for (idx = 0;
2314*38fd1498Szrj arg != void_list_node && idx < nargs;
2315*38fd1498Szrj arg = TREE_CHAIN (arg), idx++)
2316*38fd1498Szrj {
2317*38fd1498Szrj scalar_int_mode mode;
2318*38fd1498Szrj rtx reg, link, tmp;
2319*38fd1498Szrj
2320*38fd1498Szrj if (!is_int_mode (TYPE_MODE (TREE_VALUE (arg)), &mode))
2321*38fd1498Szrj return false;
2322*38fd1498Szrj
2323*38fd1498Szrj reg = targetm.calls.function_arg (args_so_far, mode, NULL_TREE, true);
2324*38fd1498Szrj if (!reg || !REG_P (reg) || GET_MODE (reg) != mode)
2325*38fd1498Szrj return false;
2326*38fd1498Szrj
2327*38fd1498Szrj for (link = CALL_INSN_FUNCTION_USAGE (call_insn);
2328*38fd1498Szrj link;
2329*38fd1498Szrj link = XEXP (link, 1))
2330*38fd1498Szrj if (GET_CODE (XEXP (link, 0)) == USE)
2331*38fd1498Szrj {
2332*38fd1498Szrj scalar_int_mode arg_mode;
2333*38fd1498Szrj args[idx] = XEXP (XEXP (link, 0), 0);
2334*38fd1498Szrj if (REG_P (args[idx])
2335*38fd1498Szrj && REGNO (args[idx]) == REGNO (reg)
2336*38fd1498Szrj && (GET_MODE (args[idx]) == mode
2337*38fd1498Szrj || (is_int_mode (GET_MODE (args[idx]), &arg_mode)
2338*38fd1498Szrj && (GET_MODE_SIZE (arg_mode) <= UNITS_PER_WORD)
2339*38fd1498Szrj && (GET_MODE_SIZE (arg_mode) > GET_MODE_SIZE (mode)))))
2340*38fd1498Szrj break;
2341*38fd1498Szrj }
2342*38fd1498Szrj if (!link)
2343*38fd1498Szrj return false;
2344*38fd1498Szrj
2345*38fd1498Szrj tmp = cselib_expand_value_rtx (args[idx], scratch, 5);
2346*38fd1498Szrj if (GET_MODE (args[idx]) != mode)
2347*38fd1498Szrj {
2348*38fd1498Szrj if (!tmp || !CONST_INT_P (tmp))
2349*38fd1498Szrj return false;
2350*38fd1498Szrj tmp = gen_int_mode (INTVAL (tmp), mode);
2351*38fd1498Szrj }
2352*38fd1498Szrj if (tmp)
2353*38fd1498Szrj args[idx] = tmp;
2354*38fd1498Szrj
2355*38fd1498Szrj targetm.calls.function_arg_advance (args_so_far, mode, NULL_TREE, true);
2356*38fd1498Szrj }
2357*38fd1498Szrj if (arg != void_list_node || idx != nargs)
2358*38fd1498Szrj return false;
2359*38fd1498Szrj return true;
2360*38fd1498Szrj }
2361*38fd1498Szrj
2362*38fd1498Szrj /* Return a bitmap of the fixed registers contained in IN. */
2363*38fd1498Szrj
2364*38fd1498Szrj static bitmap
copy_fixed_regs(const_bitmap in)2365*38fd1498Szrj copy_fixed_regs (const_bitmap in)
2366*38fd1498Szrj {
2367*38fd1498Szrj bitmap ret;
2368*38fd1498Szrj
2369*38fd1498Szrj ret = ALLOC_REG_SET (NULL);
2370*38fd1498Szrj bitmap_and (ret, in, fixed_reg_set_regset);
2371*38fd1498Szrj return ret;
2372*38fd1498Szrj }
2373*38fd1498Szrj
2374*38fd1498Szrj /* Apply record_store to all candidate stores in INSN. Mark INSN
2375*38fd1498Szrj if some part of it is not a candidate store and assigns to a
2376*38fd1498Szrj non-register target. */
2377*38fd1498Szrj
2378*38fd1498Szrj static void
scan_insn(bb_info_t bb_info,rtx_insn * insn)2379*38fd1498Szrj scan_insn (bb_info_t bb_info, rtx_insn *insn)
2380*38fd1498Szrj {
2381*38fd1498Szrj rtx body;
2382*38fd1498Szrj insn_info_type *insn_info = insn_info_type_pool.allocate ();
2383*38fd1498Szrj int mems_found = 0;
2384*38fd1498Szrj memset (insn_info, 0, sizeof (struct insn_info_type));
2385*38fd1498Szrj
2386*38fd1498Szrj if (dump_file && (dump_flags & TDF_DETAILS))
2387*38fd1498Szrj fprintf (dump_file, "\n**scanning insn=%d\n",
2388*38fd1498Szrj INSN_UID (insn));
2389*38fd1498Szrj
2390*38fd1498Szrj insn_info->prev_insn = bb_info->last_insn;
2391*38fd1498Szrj insn_info->insn = insn;
2392*38fd1498Szrj bb_info->last_insn = insn_info;
2393*38fd1498Szrj
2394*38fd1498Szrj if (DEBUG_INSN_P (insn))
2395*38fd1498Szrj {
2396*38fd1498Szrj insn_info->cannot_delete = true;
2397*38fd1498Szrj return;
2398*38fd1498Szrj }
2399*38fd1498Szrj
2400*38fd1498Szrj /* Look at all of the uses in the insn. */
2401*38fd1498Szrj note_uses (&PATTERN (insn), check_mem_read_use, bb_info);
2402*38fd1498Szrj
2403*38fd1498Szrj if (CALL_P (insn))
2404*38fd1498Szrj {
2405*38fd1498Szrj bool const_call;
2406*38fd1498Szrj rtx call, sym;
2407*38fd1498Szrj tree memset_call = NULL_TREE;
2408*38fd1498Szrj
2409*38fd1498Szrj insn_info->cannot_delete = true;
2410*38fd1498Szrj
2411*38fd1498Szrj /* Const functions cannot do anything bad i.e. read memory,
2412*38fd1498Szrj however, they can read their parameters which may have
2413*38fd1498Szrj been pushed onto the stack.
2414*38fd1498Szrj memset and bzero don't read memory either. */
2415*38fd1498Szrj const_call = RTL_CONST_CALL_P (insn);
2416*38fd1498Szrj if (!const_call
2417*38fd1498Szrj && (call = get_call_rtx_from (insn))
2418*38fd1498Szrj && (sym = XEXP (XEXP (call, 0), 0))
2419*38fd1498Szrj && GET_CODE (sym) == SYMBOL_REF
2420*38fd1498Szrj && SYMBOL_REF_DECL (sym)
2421*38fd1498Szrj && TREE_CODE (SYMBOL_REF_DECL (sym)) == FUNCTION_DECL
2422*38fd1498Szrj && DECL_BUILT_IN_CLASS (SYMBOL_REF_DECL (sym)) == BUILT_IN_NORMAL
2423*38fd1498Szrj && DECL_FUNCTION_CODE (SYMBOL_REF_DECL (sym)) == BUILT_IN_MEMSET)
2424*38fd1498Szrj memset_call = SYMBOL_REF_DECL (sym);
2425*38fd1498Szrj
2426*38fd1498Szrj if (const_call || memset_call)
2427*38fd1498Szrj {
2428*38fd1498Szrj insn_info_t i_ptr = active_local_stores;
2429*38fd1498Szrj insn_info_t last = NULL;
2430*38fd1498Szrj
2431*38fd1498Szrj if (dump_file && (dump_flags & TDF_DETAILS))
2432*38fd1498Szrj fprintf (dump_file, "%s call %d\n",
2433*38fd1498Szrj const_call ? "const" : "memset", INSN_UID (insn));
2434*38fd1498Szrj
2435*38fd1498Szrj /* See the head comment of the frame_read field. */
2436*38fd1498Szrj if (reload_completed
2437*38fd1498Szrj /* Tail calls are storing their arguments using
2438*38fd1498Szrj arg pointer. If it is a frame pointer on the target,
2439*38fd1498Szrj even before reload we need to kill frame pointer based
2440*38fd1498Szrj stores. */
2441*38fd1498Szrj || (SIBLING_CALL_P (insn)
2442*38fd1498Szrj && HARD_FRAME_POINTER_IS_ARG_POINTER))
2443*38fd1498Szrj insn_info->frame_read = true;
2444*38fd1498Szrj
2445*38fd1498Szrj /* Loop over the active stores and remove those which are
2446*38fd1498Szrj killed by the const function call. */
2447*38fd1498Szrj while (i_ptr)
2448*38fd1498Szrj {
2449*38fd1498Szrj bool remove_store = false;
2450*38fd1498Szrj
2451*38fd1498Szrj /* The stack pointer based stores are always killed. */
2452*38fd1498Szrj if (i_ptr->stack_pointer_based)
2453*38fd1498Szrj remove_store = true;
2454*38fd1498Szrj
2455*38fd1498Szrj /* If the frame is read, the frame related stores are killed. */
2456*38fd1498Szrj else if (insn_info->frame_read)
2457*38fd1498Szrj {
2458*38fd1498Szrj store_info *store_info = i_ptr->store_rec;
2459*38fd1498Szrj
2460*38fd1498Szrj /* Skip the clobbers. */
2461*38fd1498Szrj while (!store_info->is_set)
2462*38fd1498Szrj store_info = store_info->next;
2463*38fd1498Szrj
2464*38fd1498Szrj if (store_info->group_id >= 0
2465*38fd1498Szrj && rtx_group_vec[store_info->group_id]->frame_related)
2466*38fd1498Szrj remove_store = true;
2467*38fd1498Szrj }
2468*38fd1498Szrj
2469*38fd1498Szrj if (remove_store)
2470*38fd1498Szrj {
2471*38fd1498Szrj if (dump_file && (dump_flags & TDF_DETAILS))
2472*38fd1498Szrj dump_insn_info ("removing from active", i_ptr);
2473*38fd1498Szrj
2474*38fd1498Szrj active_local_stores_len--;
2475*38fd1498Szrj if (last)
2476*38fd1498Szrj last->next_local_store = i_ptr->next_local_store;
2477*38fd1498Szrj else
2478*38fd1498Szrj active_local_stores = i_ptr->next_local_store;
2479*38fd1498Szrj }
2480*38fd1498Szrj else
2481*38fd1498Szrj last = i_ptr;
2482*38fd1498Szrj
2483*38fd1498Szrj i_ptr = i_ptr->next_local_store;
2484*38fd1498Szrj }
2485*38fd1498Szrj
2486*38fd1498Szrj if (memset_call)
2487*38fd1498Szrj {
2488*38fd1498Szrj rtx args[3];
2489*38fd1498Szrj if (get_call_args (insn, memset_call, args, 3)
2490*38fd1498Szrj && CONST_INT_P (args[1])
2491*38fd1498Szrj && CONST_INT_P (args[2])
2492*38fd1498Szrj && INTVAL (args[2]) > 0)
2493*38fd1498Szrj {
2494*38fd1498Szrj rtx mem = gen_rtx_MEM (BLKmode, args[0]);
2495*38fd1498Szrj set_mem_size (mem, INTVAL (args[2]));
2496*38fd1498Szrj body = gen_rtx_SET (mem, args[1]);
2497*38fd1498Szrj mems_found += record_store (body, bb_info);
2498*38fd1498Szrj if (dump_file && (dump_flags & TDF_DETAILS))
2499*38fd1498Szrj fprintf (dump_file, "handling memset as BLKmode store\n");
2500*38fd1498Szrj if (mems_found == 1)
2501*38fd1498Szrj {
2502*38fd1498Szrj if (active_local_stores_len++
2503*38fd1498Szrj >= PARAM_VALUE (PARAM_MAX_DSE_ACTIVE_LOCAL_STORES))
2504*38fd1498Szrj {
2505*38fd1498Szrj active_local_stores_len = 1;
2506*38fd1498Szrj active_local_stores = NULL;
2507*38fd1498Szrj }
2508*38fd1498Szrj insn_info->fixed_regs_live
2509*38fd1498Szrj = copy_fixed_regs (bb_info->regs_live);
2510*38fd1498Szrj insn_info->next_local_store = active_local_stores;
2511*38fd1498Szrj active_local_stores = insn_info;
2512*38fd1498Szrj }
2513*38fd1498Szrj }
2514*38fd1498Szrj else
2515*38fd1498Szrj clear_rhs_from_active_local_stores ();
2516*38fd1498Szrj }
2517*38fd1498Szrj }
2518*38fd1498Szrj else if (SIBLING_CALL_P (insn) && reload_completed)
2519*38fd1498Szrj /* Arguments for a sibling call that are pushed to memory are passed
2520*38fd1498Szrj using the incoming argument pointer of the current function. After
2521*38fd1498Szrj reload that might be (and likely is) frame pointer based. */
2522*38fd1498Szrj add_wild_read (bb_info);
2523*38fd1498Szrj else
2524*38fd1498Szrj /* Every other call, including pure functions, may read any memory
2525*38fd1498Szrj that is not relative to the frame. */
2526*38fd1498Szrj add_non_frame_wild_read (bb_info);
2527*38fd1498Szrj
2528*38fd1498Szrj return;
2529*38fd1498Szrj }
2530*38fd1498Szrj
2531*38fd1498Szrj /* Assuming that there are sets in these insns, we cannot delete
2532*38fd1498Szrj them. */
2533*38fd1498Szrj if ((GET_CODE (PATTERN (insn)) == CLOBBER)
2534*38fd1498Szrj || volatile_refs_p (PATTERN (insn))
2535*38fd1498Szrj || (!cfun->can_delete_dead_exceptions && !insn_nothrow_p (insn))
2536*38fd1498Szrj || (RTX_FRAME_RELATED_P (insn))
2537*38fd1498Szrj || find_reg_note (insn, REG_FRAME_RELATED_EXPR, NULL_RTX))
2538*38fd1498Szrj insn_info->cannot_delete = true;
2539*38fd1498Szrj
2540*38fd1498Szrj body = PATTERN (insn);
2541*38fd1498Szrj if (GET_CODE (body) == PARALLEL)
2542*38fd1498Szrj {
2543*38fd1498Szrj int i;
2544*38fd1498Szrj for (i = 0; i < XVECLEN (body, 0); i++)
2545*38fd1498Szrj mems_found += record_store (XVECEXP (body, 0, i), bb_info);
2546*38fd1498Szrj }
2547*38fd1498Szrj else
2548*38fd1498Szrj mems_found += record_store (body, bb_info);
2549*38fd1498Szrj
2550*38fd1498Szrj if (dump_file && (dump_flags & TDF_DETAILS))
2551*38fd1498Szrj fprintf (dump_file, "mems_found = %d, cannot_delete = %s\n",
2552*38fd1498Szrj mems_found, insn_info->cannot_delete ? "true" : "false");
2553*38fd1498Szrj
2554*38fd1498Szrj /* If we found some sets of mems, add it into the active_local_stores so
2555*38fd1498Szrj that it can be locally deleted if found dead or used for
2556*38fd1498Szrj replace_read and redundant constant store elimination. Otherwise mark
2557*38fd1498Szrj it as cannot delete. This simplifies the processing later. */
2558*38fd1498Szrj if (mems_found == 1)
2559*38fd1498Szrj {
2560*38fd1498Szrj if (active_local_stores_len++
2561*38fd1498Szrj >= PARAM_VALUE (PARAM_MAX_DSE_ACTIVE_LOCAL_STORES))
2562*38fd1498Szrj {
2563*38fd1498Szrj active_local_stores_len = 1;
2564*38fd1498Szrj active_local_stores = NULL;
2565*38fd1498Szrj }
2566*38fd1498Szrj insn_info->fixed_regs_live = copy_fixed_regs (bb_info->regs_live);
2567*38fd1498Szrj insn_info->next_local_store = active_local_stores;
2568*38fd1498Szrj active_local_stores = insn_info;
2569*38fd1498Szrj }
2570*38fd1498Szrj else
2571*38fd1498Szrj insn_info->cannot_delete = true;
2572*38fd1498Szrj }
2573*38fd1498Szrj
2574*38fd1498Szrj
2575*38fd1498Szrj /* Remove BASE from the set of active_local_stores. This is a
2576*38fd1498Szrj callback from cselib that is used to get rid of the stores in
2577*38fd1498Szrj active_local_stores. */
2578*38fd1498Szrj
2579*38fd1498Szrj static void
remove_useless_values(cselib_val * base)2580*38fd1498Szrj remove_useless_values (cselib_val *base)
2581*38fd1498Szrj {
2582*38fd1498Szrj insn_info_t insn_info = active_local_stores;
2583*38fd1498Szrj insn_info_t last = NULL;
2584*38fd1498Szrj
2585*38fd1498Szrj while (insn_info)
2586*38fd1498Szrj {
2587*38fd1498Szrj store_info *store_info = insn_info->store_rec;
2588*38fd1498Szrj bool del = false;
2589*38fd1498Szrj
2590*38fd1498Szrj /* If ANY of the store_infos match the cselib group that is
2591*38fd1498Szrj being deleted, then the insn can not be deleted. */
2592*38fd1498Szrj while (store_info)
2593*38fd1498Szrj {
2594*38fd1498Szrj if ((store_info->group_id == -1)
2595*38fd1498Szrj && (store_info->cse_base == base))
2596*38fd1498Szrj {
2597*38fd1498Szrj del = true;
2598*38fd1498Szrj break;
2599*38fd1498Szrj }
2600*38fd1498Szrj store_info = store_info->next;
2601*38fd1498Szrj }
2602*38fd1498Szrj
2603*38fd1498Szrj if (del)
2604*38fd1498Szrj {
2605*38fd1498Szrj active_local_stores_len--;
2606*38fd1498Szrj if (last)
2607*38fd1498Szrj last->next_local_store = insn_info->next_local_store;
2608*38fd1498Szrj else
2609*38fd1498Szrj active_local_stores = insn_info->next_local_store;
2610*38fd1498Szrj free_store_info (insn_info);
2611*38fd1498Szrj }
2612*38fd1498Szrj else
2613*38fd1498Szrj last = insn_info;
2614*38fd1498Szrj
2615*38fd1498Szrj insn_info = insn_info->next_local_store;
2616*38fd1498Szrj }
2617*38fd1498Szrj }
2618*38fd1498Szrj
2619*38fd1498Szrj
2620*38fd1498Szrj /* Do all of step 1. */
2621*38fd1498Szrj
2622*38fd1498Szrj static void
dse_step1(void)2623*38fd1498Szrj dse_step1 (void)
2624*38fd1498Szrj {
2625*38fd1498Szrj basic_block bb;
2626*38fd1498Szrj bitmap regs_live = BITMAP_ALLOC (®_obstack);
2627*38fd1498Szrj
2628*38fd1498Szrj cselib_init (0);
2629*38fd1498Szrj all_blocks = BITMAP_ALLOC (NULL);
2630*38fd1498Szrj bitmap_set_bit (all_blocks, ENTRY_BLOCK);
2631*38fd1498Szrj bitmap_set_bit (all_blocks, EXIT_BLOCK);
2632*38fd1498Szrj
2633*38fd1498Szrj FOR_ALL_BB_FN (bb, cfun)
2634*38fd1498Szrj {
2635*38fd1498Szrj insn_info_t ptr;
2636*38fd1498Szrj bb_info_t bb_info = dse_bb_info_type_pool.allocate ();
2637*38fd1498Szrj
2638*38fd1498Szrj memset (bb_info, 0, sizeof (dse_bb_info_type));
2639*38fd1498Szrj bitmap_set_bit (all_blocks, bb->index);
2640*38fd1498Szrj bb_info->regs_live = regs_live;
2641*38fd1498Szrj
2642*38fd1498Szrj bitmap_copy (regs_live, DF_LR_IN (bb));
2643*38fd1498Szrj df_simulate_initialize_forwards (bb, regs_live);
2644*38fd1498Szrj
2645*38fd1498Szrj bb_table[bb->index] = bb_info;
2646*38fd1498Szrj cselib_discard_hook = remove_useless_values;
2647*38fd1498Szrj
2648*38fd1498Szrj if (bb->index >= NUM_FIXED_BLOCKS)
2649*38fd1498Szrj {
2650*38fd1498Szrj rtx_insn *insn;
2651*38fd1498Szrj
2652*38fd1498Szrj active_local_stores = NULL;
2653*38fd1498Szrj active_local_stores_len = 0;
2654*38fd1498Szrj cselib_clear_table ();
2655*38fd1498Szrj
2656*38fd1498Szrj /* Scan the insns. */
2657*38fd1498Szrj FOR_BB_INSNS (bb, insn)
2658*38fd1498Szrj {
2659*38fd1498Szrj if (INSN_P (insn))
2660*38fd1498Szrj scan_insn (bb_info, insn);
2661*38fd1498Szrj cselib_process_insn (insn);
2662*38fd1498Szrj if (INSN_P (insn))
2663*38fd1498Szrj df_simulate_one_insn_forwards (bb, insn, regs_live);
2664*38fd1498Szrj }
2665*38fd1498Szrj
2666*38fd1498Szrj /* This is something of a hack, because the global algorithm
2667*38fd1498Szrj is supposed to take care of the case where stores go dead
2668*38fd1498Szrj at the end of the function. However, the global
2669*38fd1498Szrj algorithm must take a more conservative view of block
2670*38fd1498Szrj mode reads than the local alg does. So to get the case
2671*38fd1498Szrj where you have a store to the frame followed by a non
2672*38fd1498Szrj overlapping block more read, we look at the active local
2673*38fd1498Szrj stores at the end of the function and delete all of the
2674*38fd1498Szrj frame and spill based ones. */
2675*38fd1498Szrj if (stores_off_frame_dead_at_return
2676*38fd1498Szrj && (EDGE_COUNT (bb->succs) == 0
2677*38fd1498Szrj || (single_succ_p (bb)
2678*38fd1498Szrj && single_succ (bb) == EXIT_BLOCK_PTR_FOR_FN (cfun)
2679*38fd1498Szrj && ! crtl->calls_eh_return)))
2680*38fd1498Szrj {
2681*38fd1498Szrj insn_info_t i_ptr = active_local_stores;
2682*38fd1498Szrj while (i_ptr)
2683*38fd1498Szrj {
2684*38fd1498Szrj store_info *store_info = i_ptr->store_rec;
2685*38fd1498Szrj
2686*38fd1498Szrj /* Skip the clobbers. */
2687*38fd1498Szrj while (!store_info->is_set)
2688*38fd1498Szrj store_info = store_info->next;
2689*38fd1498Szrj if (store_info->group_id >= 0)
2690*38fd1498Szrj {
2691*38fd1498Szrj group_info *group = rtx_group_vec[store_info->group_id];
2692*38fd1498Szrj if (group->frame_related && !i_ptr->cannot_delete)
2693*38fd1498Szrj delete_dead_store_insn (i_ptr);
2694*38fd1498Szrj }
2695*38fd1498Szrj
2696*38fd1498Szrj i_ptr = i_ptr->next_local_store;
2697*38fd1498Szrj }
2698*38fd1498Szrj }
2699*38fd1498Szrj
2700*38fd1498Szrj /* Get rid of the loads that were discovered in
2701*38fd1498Szrj replace_read. Cselib is finished with this block. */
2702*38fd1498Szrj while (deferred_change_list)
2703*38fd1498Szrj {
2704*38fd1498Szrj deferred_change *next = deferred_change_list->next;
2705*38fd1498Szrj
2706*38fd1498Szrj /* There is no reason to validate this change. That was
2707*38fd1498Szrj done earlier. */
2708*38fd1498Szrj *deferred_change_list->loc = deferred_change_list->reg;
2709*38fd1498Szrj deferred_change_pool.remove (deferred_change_list);
2710*38fd1498Szrj deferred_change_list = next;
2711*38fd1498Szrj }
2712*38fd1498Szrj
2713*38fd1498Szrj /* Get rid of all of the cselib based store_infos in this
2714*38fd1498Szrj block and mark the containing insns as not being
2715*38fd1498Szrj deletable. */
2716*38fd1498Szrj ptr = bb_info->last_insn;
2717*38fd1498Szrj while (ptr)
2718*38fd1498Szrj {
2719*38fd1498Szrj if (ptr->contains_cselib_groups)
2720*38fd1498Szrj {
2721*38fd1498Szrj store_info *s_info = ptr->store_rec;
2722*38fd1498Szrj while (s_info && !s_info->is_set)
2723*38fd1498Szrj s_info = s_info->next;
2724*38fd1498Szrj if (s_info
2725*38fd1498Szrj && s_info->redundant_reason
2726*38fd1498Szrj && s_info->redundant_reason->insn
2727*38fd1498Szrj && !ptr->cannot_delete)
2728*38fd1498Szrj {
2729*38fd1498Szrj if (dump_file && (dump_flags & TDF_DETAILS))
2730*38fd1498Szrj fprintf (dump_file, "Locally deleting insn %d "
2731*38fd1498Szrj "because insn %d stores the "
2732*38fd1498Szrj "same value and couldn't be "
2733*38fd1498Szrj "eliminated\n",
2734*38fd1498Szrj INSN_UID (ptr->insn),
2735*38fd1498Szrj INSN_UID (s_info->redundant_reason->insn));
2736*38fd1498Szrj delete_dead_store_insn (ptr);
2737*38fd1498Szrj }
2738*38fd1498Szrj free_store_info (ptr);
2739*38fd1498Szrj }
2740*38fd1498Szrj else
2741*38fd1498Szrj {
2742*38fd1498Szrj store_info *s_info;
2743*38fd1498Szrj
2744*38fd1498Szrj /* Free at least positions_needed bitmaps. */
2745*38fd1498Szrj for (s_info = ptr->store_rec; s_info; s_info = s_info->next)
2746*38fd1498Szrj if (s_info->is_large)
2747*38fd1498Szrj {
2748*38fd1498Szrj BITMAP_FREE (s_info->positions_needed.large.bmap);
2749*38fd1498Szrj s_info->is_large = false;
2750*38fd1498Szrj }
2751*38fd1498Szrj }
2752*38fd1498Szrj ptr = ptr->prev_insn;
2753*38fd1498Szrj }
2754*38fd1498Szrj
2755*38fd1498Szrj cse_store_info_pool.release ();
2756*38fd1498Szrj }
2757*38fd1498Szrj bb_info->regs_live = NULL;
2758*38fd1498Szrj }
2759*38fd1498Szrj
2760*38fd1498Szrj BITMAP_FREE (regs_live);
2761*38fd1498Szrj cselib_finish ();
2762*38fd1498Szrj rtx_group_table->empty ();
2763*38fd1498Szrj }
2764*38fd1498Szrj
2765*38fd1498Szrj
2766*38fd1498Szrj /*----------------------------------------------------------------------------
2767*38fd1498Szrj Second step.
2768*38fd1498Szrj
2769*38fd1498Szrj Assign each byte position in the stores that we are going to
2770*38fd1498Szrj analyze globally to a position in the bitmaps. Returns true if
2771*38fd1498Szrj there are any bit positions assigned.
2772*38fd1498Szrj ----------------------------------------------------------------------------*/
2773*38fd1498Szrj
2774*38fd1498Szrj static void
dse_step2_init(void)2775*38fd1498Szrj dse_step2_init (void)
2776*38fd1498Szrj {
2777*38fd1498Szrj unsigned int i;
2778*38fd1498Szrj group_info *group;
2779*38fd1498Szrj
2780*38fd1498Szrj FOR_EACH_VEC_ELT (rtx_group_vec, i, group)
2781*38fd1498Szrj {
2782*38fd1498Szrj /* For all non stack related bases, we only consider a store to
2783*38fd1498Szrj be deletable if there are two or more stores for that
2784*38fd1498Szrj position. This is because it takes one store to make the
2785*38fd1498Szrj other store redundant. However, for the stores that are
2786*38fd1498Szrj stack related, we consider them if there is only one store
2787*38fd1498Szrj for the position. We do this because the stack related
2788*38fd1498Szrj stores can be deleted if their is no read between them and
2789*38fd1498Szrj the end of the function.
2790*38fd1498Szrj
2791*38fd1498Szrj To make this work in the current framework, we take the stack
2792*38fd1498Szrj related bases add all of the bits from store1 into store2.
2793*38fd1498Szrj This has the effect of making the eligible even if there is
2794*38fd1498Szrj only one store. */
2795*38fd1498Szrj
2796*38fd1498Szrj if (stores_off_frame_dead_at_return && group->frame_related)
2797*38fd1498Szrj {
2798*38fd1498Szrj bitmap_ior_into (group->store2_n, group->store1_n);
2799*38fd1498Szrj bitmap_ior_into (group->store2_p, group->store1_p);
2800*38fd1498Szrj if (dump_file && (dump_flags & TDF_DETAILS))
2801*38fd1498Szrj fprintf (dump_file, "group %d is frame related ", i);
2802*38fd1498Szrj }
2803*38fd1498Szrj
2804*38fd1498Szrj group->offset_map_size_n++;
2805*38fd1498Szrj group->offset_map_n = XOBNEWVEC (&dse_obstack, int,
2806*38fd1498Szrj group->offset_map_size_n);
2807*38fd1498Szrj group->offset_map_size_p++;
2808*38fd1498Szrj group->offset_map_p = XOBNEWVEC (&dse_obstack, int,
2809*38fd1498Szrj group->offset_map_size_p);
2810*38fd1498Szrj group->process_globally = false;
2811*38fd1498Szrj if (dump_file && (dump_flags & TDF_DETAILS))
2812*38fd1498Szrj {
2813*38fd1498Szrj fprintf (dump_file, "group %d(%d+%d): ", i,
2814*38fd1498Szrj (int)bitmap_count_bits (group->store2_n),
2815*38fd1498Szrj (int)bitmap_count_bits (group->store2_p));
2816*38fd1498Szrj bitmap_print (dump_file, group->store2_n, "n ", " ");
2817*38fd1498Szrj bitmap_print (dump_file, group->store2_p, "p ", "\n");
2818*38fd1498Szrj }
2819*38fd1498Szrj }
2820*38fd1498Szrj }
2821*38fd1498Szrj
2822*38fd1498Szrj
2823*38fd1498Szrj /* Init the offset tables. */
2824*38fd1498Szrj
2825*38fd1498Szrj static bool
dse_step2(void)2826*38fd1498Szrj dse_step2 (void)
2827*38fd1498Szrj {
2828*38fd1498Szrj unsigned int i;
2829*38fd1498Szrj group_info *group;
2830*38fd1498Szrj /* Position 0 is unused because 0 is used in the maps to mean
2831*38fd1498Szrj unused. */
2832*38fd1498Szrj current_position = 1;
2833*38fd1498Szrj FOR_EACH_VEC_ELT (rtx_group_vec, i, group)
2834*38fd1498Szrj {
2835*38fd1498Szrj bitmap_iterator bi;
2836*38fd1498Szrj unsigned int j;
2837*38fd1498Szrj
2838*38fd1498Szrj memset (group->offset_map_n, 0, sizeof (int) * group->offset_map_size_n);
2839*38fd1498Szrj memset (group->offset_map_p, 0, sizeof (int) * group->offset_map_size_p);
2840*38fd1498Szrj bitmap_clear (group->group_kill);
2841*38fd1498Szrj
2842*38fd1498Szrj EXECUTE_IF_SET_IN_BITMAP (group->store2_n, 0, j, bi)
2843*38fd1498Szrj {
2844*38fd1498Szrj bitmap_set_bit (group->group_kill, current_position);
2845*38fd1498Szrj if (bitmap_bit_p (group->escaped_n, j))
2846*38fd1498Szrj bitmap_set_bit (kill_on_calls, current_position);
2847*38fd1498Szrj group->offset_map_n[j] = current_position++;
2848*38fd1498Szrj group->process_globally = true;
2849*38fd1498Szrj }
2850*38fd1498Szrj EXECUTE_IF_SET_IN_BITMAP (group->store2_p, 0, j, bi)
2851*38fd1498Szrj {
2852*38fd1498Szrj bitmap_set_bit (group->group_kill, current_position);
2853*38fd1498Szrj if (bitmap_bit_p (group->escaped_p, j))
2854*38fd1498Szrj bitmap_set_bit (kill_on_calls, current_position);
2855*38fd1498Szrj group->offset_map_p[j] = current_position++;
2856*38fd1498Szrj group->process_globally = true;
2857*38fd1498Szrj }
2858*38fd1498Szrj }
2859*38fd1498Szrj return current_position != 1;
2860*38fd1498Szrj }
2861*38fd1498Szrj
2862*38fd1498Szrj
2863*38fd1498Szrj
2864*38fd1498Szrj /*----------------------------------------------------------------------------
2865*38fd1498Szrj Third step.
2866*38fd1498Szrj
2867*38fd1498Szrj Build the bit vectors for the transfer functions.
2868*38fd1498Szrj ----------------------------------------------------------------------------*/
2869*38fd1498Szrj
2870*38fd1498Szrj
2871*38fd1498Szrj /* Look up the bitmap index for OFFSET in GROUP_INFO. If it is not
2872*38fd1498Szrj there, return 0. */
2873*38fd1498Szrj
2874*38fd1498Szrj static int
get_bitmap_index(group_info * group_info,HOST_WIDE_INT offset)2875*38fd1498Szrj get_bitmap_index (group_info *group_info, HOST_WIDE_INT offset)
2876*38fd1498Szrj {
2877*38fd1498Szrj if (offset < 0)
2878*38fd1498Szrj {
2879*38fd1498Szrj HOST_WIDE_INT offset_p = -offset;
2880*38fd1498Szrj if (offset_p >= group_info->offset_map_size_n)
2881*38fd1498Szrj return 0;
2882*38fd1498Szrj return group_info->offset_map_n[offset_p];
2883*38fd1498Szrj }
2884*38fd1498Szrj else
2885*38fd1498Szrj {
2886*38fd1498Szrj if (offset >= group_info->offset_map_size_p)
2887*38fd1498Szrj return 0;
2888*38fd1498Szrj return group_info->offset_map_p[offset];
2889*38fd1498Szrj }
2890*38fd1498Szrj }
2891*38fd1498Szrj
2892*38fd1498Szrj
2893*38fd1498Szrj /* Process the STORE_INFOs into the bitmaps into GEN and KILL. KILL
2894*38fd1498Szrj may be NULL. */
2895*38fd1498Szrj
2896*38fd1498Szrj static void
scan_stores(store_info * store_info,bitmap gen,bitmap kill)2897*38fd1498Szrj scan_stores (store_info *store_info, bitmap gen, bitmap kill)
2898*38fd1498Szrj {
2899*38fd1498Szrj while (store_info)
2900*38fd1498Szrj {
2901*38fd1498Szrj HOST_WIDE_INT i, offset, width;
2902*38fd1498Szrj group_info *group_info
2903*38fd1498Szrj = rtx_group_vec[store_info->group_id];
2904*38fd1498Szrj /* We can (conservatively) ignore stores whose bounds aren't known;
2905*38fd1498Szrj they simply don't generate new global dse opportunities. */
2906*38fd1498Szrj if (group_info->process_globally
2907*38fd1498Szrj && store_info->offset.is_constant (&offset)
2908*38fd1498Szrj && store_info->width.is_constant (&width))
2909*38fd1498Szrj {
2910*38fd1498Szrj HOST_WIDE_INT end = offset + width;
2911*38fd1498Szrj for (i = offset; i < end; i++)
2912*38fd1498Szrj {
2913*38fd1498Szrj int index = get_bitmap_index (group_info, i);
2914*38fd1498Szrj if (index != 0)
2915*38fd1498Szrj {
2916*38fd1498Szrj bitmap_set_bit (gen, index);
2917*38fd1498Szrj if (kill)
2918*38fd1498Szrj bitmap_clear_bit (kill, index);
2919*38fd1498Szrj }
2920*38fd1498Szrj }
2921*38fd1498Szrj }
2922*38fd1498Szrj store_info = store_info->next;
2923*38fd1498Szrj }
2924*38fd1498Szrj }
2925*38fd1498Szrj
2926*38fd1498Szrj
2927*38fd1498Szrj /* Process the READ_INFOs into the bitmaps into GEN and KILL. KILL
2928*38fd1498Szrj may be NULL. */
2929*38fd1498Szrj
2930*38fd1498Szrj static void
scan_reads(insn_info_t insn_info,bitmap gen,bitmap kill)2931*38fd1498Szrj scan_reads (insn_info_t insn_info, bitmap gen, bitmap kill)
2932*38fd1498Szrj {
2933*38fd1498Szrj read_info_t read_info = insn_info->read_rec;
2934*38fd1498Szrj int i;
2935*38fd1498Szrj group_info *group;
2936*38fd1498Szrj
2937*38fd1498Szrj /* If this insn reads the frame, kill all the frame related stores. */
2938*38fd1498Szrj if (insn_info->frame_read)
2939*38fd1498Szrj {
2940*38fd1498Szrj FOR_EACH_VEC_ELT (rtx_group_vec, i, group)
2941*38fd1498Szrj if (group->process_globally && group->frame_related)
2942*38fd1498Szrj {
2943*38fd1498Szrj if (kill)
2944*38fd1498Szrj bitmap_ior_into (kill, group->group_kill);
2945*38fd1498Szrj bitmap_and_compl_into (gen, group->group_kill);
2946*38fd1498Szrj }
2947*38fd1498Szrj }
2948*38fd1498Szrj if (insn_info->non_frame_wild_read)
2949*38fd1498Szrj {
2950*38fd1498Szrj /* Kill all non-frame related stores. Kill all stores of variables that
2951*38fd1498Szrj escape. */
2952*38fd1498Szrj if (kill)
2953*38fd1498Szrj bitmap_ior_into (kill, kill_on_calls);
2954*38fd1498Szrj bitmap_and_compl_into (gen, kill_on_calls);
2955*38fd1498Szrj FOR_EACH_VEC_ELT (rtx_group_vec, i, group)
2956*38fd1498Szrj if (group->process_globally && !group->frame_related)
2957*38fd1498Szrj {
2958*38fd1498Szrj if (kill)
2959*38fd1498Szrj bitmap_ior_into (kill, group->group_kill);
2960*38fd1498Szrj bitmap_and_compl_into (gen, group->group_kill);
2961*38fd1498Szrj }
2962*38fd1498Szrj }
2963*38fd1498Szrj while (read_info)
2964*38fd1498Szrj {
2965*38fd1498Szrj FOR_EACH_VEC_ELT (rtx_group_vec, i, group)
2966*38fd1498Szrj {
2967*38fd1498Szrj if (group->process_globally)
2968*38fd1498Szrj {
2969*38fd1498Szrj if (i == read_info->group_id)
2970*38fd1498Szrj {
2971*38fd1498Szrj HOST_WIDE_INT offset, width;
2972*38fd1498Szrj /* Reads with non-constant size kill all DSE opportunities
2973*38fd1498Szrj in the group. */
2974*38fd1498Szrj if (!read_info->offset.is_constant (&offset)
2975*38fd1498Szrj || !read_info->width.is_constant (&width)
2976*38fd1498Szrj || !known_size_p (width))
2977*38fd1498Szrj {
2978*38fd1498Szrj /* Handle block mode reads. */
2979*38fd1498Szrj if (kill)
2980*38fd1498Szrj bitmap_ior_into (kill, group->group_kill);
2981*38fd1498Szrj bitmap_and_compl_into (gen, group->group_kill);
2982*38fd1498Szrj }
2983*38fd1498Szrj else
2984*38fd1498Szrj {
2985*38fd1498Szrj /* The groups are the same, just process the
2986*38fd1498Szrj offsets. */
2987*38fd1498Szrj HOST_WIDE_INT j;
2988*38fd1498Szrj HOST_WIDE_INT end = offset + width;
2989*38fd1498Szrj for (j = offset; j < end; j++)
2990*38fd1498Szrj {
2991*38fd1498Szrj int index = get_bitmap_index (group, j);
2992*38fd1498Szrj if (index != 0)
2993*38fd1498Szrj {
2994*38fd1498Szrj if (kill)
2995*38fd1498Szrj bitmap_set_bit (kill, index);
2996*38fd1498Szrj bitmap_clear_bit (gen, index);
2997*38fd1498Szrj }
2998*38fd1498Szrj }
2999*38fd1498Szrj }
3000*38fd1498Szrj }
3001*38fd1498Szrj else
3002*38fd1498Szrj {
3003*38fd1498Szrj /* The groups are different, if the alias sets
3004*38fd1498Szrj conflict, clear the entire group. We only need
3005*38fd1498Szrj to apply this test if the read_info is a cselib
3006*38fd1498Szrj read. Anything with a constant base cannot alias
3007*38fd1498Szrj something else with a different constant
3008*38fd1498Szrj base. */
3009*38fd1498Szrj if ((read_info->group_id < 0)
3010*38fd1498Szrj && canon_true_dependence (group->base_mem,
3011*38fd1498Szrj GET_MODE (group->base_mem),
3012*38fd1498Szrj group->canon_base_addr,
3013*38fd1498Szrj read_info->mem, NULL_RTX))
3014*38fd1498Szrj {
3015*38fd1498Szrj if (kill)
3016*38fd1498Szrj bitmap_ior_into (kill, group->group_kill);
3017*38fd1498Szrj bitmap_and_compl_into (gen, group->group_kill);
3018*38fd1498Szrj }
3019*38fd1498Szrj }
3020*38fd1498Szrj }
3021*38fd1498Szrj }
3022*38fd1498Szrj
3023*38fd1498Szrj read_info = read_info->next;
3024*38fd1498Szrj }
3025*38fd1498Szrj }
3026*38fd1498Szrj
3027*38fd1498Szrj
3028*38fd1498Szrj /* Return the insn in BB_INFO before the first wild read or if there
3029*38fd1498Szrj are no wild reads in the block, return the last insn. */
3030*38fd1498Szrj
3031*38fd1498Szrj static insn_info_t
find_insn_before_first_wild_read(bb_info_t bb_info)3032*38fd1498Szrj find_insn_before_first_wild_read (bb_info_t bb_info)
3033*38fd1498Szrj {
3034*38fd1498Szrj insn_info_t insn_info = bb_info->last_insn;
3035*38fd1498Szrj insn_info_t last_wild_read = NULL;
3036*38fd1498Szrj
3037*38fd1498Szrj while (insn_info)
3038*38fd1498Szrj {
3039*38fd1498Szrj if (insn_info->wild_read)
3040*38fd1498Szrj {
3041*38fd1498Szrj last_wild_read = insn_info->prev_insn;
3042*38fd1498Szrj /* Block starts with wild read. */
3043*38fd1498Szrj if (!last_wild_read)
3044*38fd1498Szrj return NULL;
3045*38fd1498Szrj }
3046*38fd1498Szrj
3047*38fd1498Szrj insn_info = insn_info->prev_insn;
3048*38fd1498Szrj }
3049*38fd1498Szrj
3050*38fd1498Szrj if (last_wild_read)
3051*38fd1498Szrj return last_wild_read;
3052*38fd1498Szrj else
3053*38fd1498Szrj return bb_info->last_insn;
3054*38fd1498Szrj }
3055*38fd1498Szrj
3056*38fd1498Szrj
3057*38fd1498Szrj /* Scan the insns in BB_INFO starting at PTR and going to the top of
3058*38fd1498Szrj the block in order to build the gen and kill sets for the block.
3059*38fd1498Szrj We start at ptr which may be the last insn in the block or may be
3060*38fd1498Szrj the first insn with a wild read. In the latter case we are able to
3061*38fd1498Szrj skip the rest of the block because it just does not matter:
3062*38fd1498Szrj anything that happens is hidden by the wild read. */
3063*38fd1498Szrj
3064*38fd1498Szrj static void
dse_step3_scan(basic_block bb)3065*38fd1498Szrj dse_step3_scan (basic_block bb)
3066*38fd1498Szrj {
3067*38fd1498Szrj bb_info_t bb_info = bb_table[bb->index];
3068*38fd1498Szrj insn_info_t insn_info;
3069*38fd1498Szrj
3070*38fd1498Szrj insn_info = find_insn_before_first_wild_read (bb_info);
3071*38fd1498Szrj
3072*38fd1498Szrj /* In the spill case or in the no_spill case if there is no wild
3073*38fd1498Szrj read in the block, we will need a kill set. */
3074*38fd1498Szrj if (insn_info == bb_info->last_insn)
3075*38fd1498Szrj {
3076*38fd1498Szrj if (bb_info->kill)
3077*38fd1498Szrj bitmap_clear (bb_info->kill);
3078*38fd1498Szrj else
3079*38fd1498Szrj bb_info->kill = BITMAP_ALLOC (&dse_bitmap_obstack);
3080*38fd1498Szrj }
3081*38fd1498Szrj else
3082*38fd1498Szrj if (bb_info->kill)
3083*38fd1498Szrj BITMAP_FREE (bb_info->kill);
3084*38fd1498Szrj
3085*38fd1498Szrj while (insn_info)
3086*38fd1498Szrj {
3087*38fd1498Szrj /* There may have been code deleted by the dce pass run before
3088*38fd1498Szrj this phase. */
3089*38fd1498Szrj if (insn_info->insn && INSN_P (insn_info->insn))
3090*38fd1498Szrj {
3091*38fd1498Szrj scan_stores (insn_info->store_rec, bb_info->gen, bb_info->kill);
3092*38fd1498Szrj scan_reads (insn_info, bb_info->gen, bb_info->kill);
3093*38fd1498Szrj }
3094*38fd1498Szrj
3095*38fd1498Szrj insn_info = insn_info->prev_insn;
3096*38fd1498Szrj }
3097*38fd1498Szrj }
3098*38fd1498Szrj
3099*38fd1498Szrj
3100*38fd1498Szrj /* Set the gen set of the exit block, and also any block with no
3101*38fd1498Szrj successors that does not have a wild read. */
3102*38fd1498Szrj
3103*38fd1498Szrj static void
dse_step3_exit_block_scan(bb_info_t bb_info)3104*38fd1498Szrj dse_step3_exit_block_scan (bb_info_t bb_info)
3105*38fd1498Szrj {
3106*38fd1498Szrj /* The gen set is all 0's for the exit block except for the
3107*38fd1498Szrj frame_pointer_group. */
3108*38fd1498Szrj
3109*38fd1498Szrj if (stores_off_frame_dead_at_return)
3110*38fd1498Szrj {
3111*38fd1498Szrj unsigned int i;
3112*38fd1498Szrj group_info *group;
3113*38fd1498Szrj
3114*38fd1498Szrj FOR_EACH_VEC_ELT (rtx_group_vec, i, group)
3115*38fd1498Szrj {
3116*38fd1498Szrj if (group->process_globally && group->frame_related)
3117*38fd1498Szrj bitmap_ior_into (bb_info->gen, group->group_kill);
3118*38fd1498Szrj }
3119*38fd1498Szrj }
3120*38fd1498Szrj }
3121*38fd1498Szrj
3122*38fd1498Szrj
3123*38fd1498Szrj /* Find all of the blocks that are not backwards reachable from the
3124*38fd1498Szrj exit block or any block with no successors (BB). These are the
3125*38fd1498Szrj infinite loops or infinite self loops. These blocks will still
3126*38fd1498Szrj have their bits set in UNREACHABLE_BLOCKS. */
3127*38fd1498Szrj
3128*38fd1498Szrj static void
mark_reachable_blocks(sbitmap unreachable_blocks,basic_block bb)3129*38fd1498Szrj mark_reachable_blocks (sbitmap unreachable_blocks, basic_block bb)
3130*38fd1498Szrj {
3131*38fd1498Szrj edge e;
3132*38fd1498Szrj edge_iterator ei;
3133*38fd1498Szrj
3134*38fd1498Szrj if (bitmap_bit_p (unreachable_blocks, bb->index))
3135*38fd1498Szrj {
3136*38fd1498Szrj bitmap_clear_bit (unreachable_blocks, bb->index);
3137*38fd1498Szrj FOR_EACH_EDGE (e, ei, bb->preds)
3138*38fd1498Szrj {
3139*38fd1498Szrj mark_reachable_blocks (unreachable_blocks, e->src);
3140*38fd1498Szrj }
3141*38fd1498Szrj }
3142*38fd1498Szrj }
3143*38fd1498Szrj
3144*38fd1498Szrj /* Build the transfer functions for the function. */
3145*38fd1498Szrj
3146*38fd1498Szrj static void
dse_step3()3147*38fd1498Szrj dse_step3 ()
3148*38fd1498Szrj {
3149*38fd1498Szrj basic_block bb;
3150*38fd1498Szrj sbitmap_iterator sbi;
3151*38fd1498Szrj bitmap all_ones = NULL;
3152*38fd1498Szrj unsigned int i;
3153*38fd1498Szrj
3154*38fd1498Szrj auto_sbitmap unreachable_blocks (last_basic_block_for_fn (cfun));
3155*38fd1498Szrj bitmap_ones (unreachable_blocks);
3156*38fd1498Szrj
3157*38fd1498Szrj FOR_ALL_BB_FN (bb, cfun)
3158*38fd1498Szrj {
3159*38fd1498Szrj bb_info_t bb_info = bb_table[bb->index];
3160*38fd1498Szrj if (bb_info->gen)
3161*38fd1498Szrj bitmap_clear (bb_info->gen);
3162*38fd1498Szrj else
3163*38fd1498Szrj bb_info->gen = BITMAP_ALLOC (&dse_bitmap_obstack);
3164*38fd1498Szrj
3165*38fd1498Szrj if (bb->index == ENTRY_BLOCK)
3166*38fd1498Szrj ;
3167*38fd1498Szrj else if (bb->index == EXIT_BLOCK)
3168*38fd1498Szrj dse_step3_exit_block_scan (bb_info);
3169*38fd1498Szrj else
3170*38fd1498Szrj dse_step3_scan (bb);
3171*38fd1498Szrj if (EDGE_COUNT (bb->succs) == 0)
3172*38fd1498Szrj mark_reachable_blocks (unreachable_blocks, bb);
3173*38fd1498Szrj
3174*38fd1498Szrj /* If this is the second time dataflow is run, delete the old
3175*38fd1498Szrj sets. */
3176*38fd1498Szrj if (bb_info->in)
3177*38fd1498Szrj BITMAP_FREE (bb_info->in);
3178*38fd1498Szrj if (bb_info->out)
3179*38fd1498Szrj BITMAP_FREE (bb_info->out);
3180*38fd1498Szrj }
3181*38fd1498Szrj
3182*38fd1498Szrj /* For any block in an infinite loop, we must initialize the out set
3183*38fd1498Szrj to all ones. This could be expensive, but almost never occurs in
3184*38fd1498Szrj practice. However, it is common in regression tests. */
3185*38fd1498Szrj EXECUTE_IF_SET_IN_BITMAP (unreachable_blocks, 0, i, sbi)
3186*38fd1498Szrj {
3187*38fd1498Szrj if (bitmap_bit_p (all_blocks, i))
3188*38fd1498Szrj {
3189*38fd1498Szrj bb_info_t bb_info = bb_table[i];
3190*38fd1498Szrj if (!all_ones)
3191*38fd1498Szrj {
3192*38fd1498Szrj unsigned int j;
3193*38fd1498Szrj group_info *group;
3194*38fd1498Szrj
3195*38fd1498Szrj all_ones = BITMAP_ALLOC (&dse_bitmap_obstack);
3196*38fd1498Szrj FOR_EACH_VEC_ELT (rtx_group_vec, j, group)
3197*38fd1498Szrj bitmap_ior_into (all_ones, group->group_kill);
3198*38fd1498Szrj }
3199*38fd1498Szrj if (!bb_info->out)
3200*38fd1498Szrj {
3201*38fd1498Szrj bb_info->out = BITMAP_ALLOC (&dse_bitmap_obstack);
3202*38fd1498Szrj bitmap_copy (bb_info->out, all_ones);
3203*38fd1498Szrj }
3204*38fd1498Szrj }
3205*38fd1498Szrj }
3206*38fd1498Szrj
3207*38fd1498Szrj if (all_ones)
3208*38fd1498Szrj BITMAP_FREE (all_ones);
3209*38fd1498Szrj }
3210*38fd1498Szrj
3211*38fd1498Szrj
3212*38fd1498Szrj
3213*38fd1498Szrj /*----------------------------------------------------------------------------
3214*38fd1498Szrj Fourth step.
3215*38fd1498Szrj
3216*38fd1498Szrj Solve the bitvector equations.
3217*38fd1498Szrj ----------------------------------------------------------------------------*/
3218*38fd1498Szrj
3219*38fd1498Szrj
3220*38fd1498Szrj /* Confluence function for blocks with no successors. Create an out
3221*38fd1498Szrj set from the gen set of the exit block. This block logically has
3222*38fd1498Szrj the exit block as a successor. */
3223*38fd1498Szrj
3224*38fd1498Szrj
3225*38fd1498Szrj
3226*38fd1498Szrj static void
dse_confluence_0(basic_block bb)3227*38fd1498Szrj dse_confluence_0 (basic_block bb)
3228*38fd1498Szrj {
3229*38fd1498Szrj bb_info_t bb_info = bb_table[bb->index];
3230*38fd1498Szrj
3231*38fd1498Szrj if (bb->index == EXIT_BLOCK)
3232*38fd1498Szrj return;
3233*38fd1498Szrj
3234*38fd1498Szrj if (!bb_info->out)
3235*38fd1498Szrj {
3236*38fd1498Szrj bb_info->out = BITMAP_ALLOC (&dse_bitmap_obstack);
3237*38fd1498Szrj bitmap_copy (bb_info->out, bb_table[EXIT_BLOCK]->gen);
3238*38fd1498Szrj }
3239*38fd1498Szrj }
3240*38fd1498Szrj
3241*38fd1498Szrj /* Propagate the information from the in set of the dest of E to the
3242*38fd1498Szrj out set of the src of E. If the various in or out sets are not
3243*38fd1498Szrj there, that means they are all ones. */
3244*38fd1498Szrj
3245*38fd1498Szrj static bool
dse_confluence_n(edge e)3246*38fd1498Szrj dse_confluence_n (edge e)
3247*38fd1498Szrj {
3248*38fd1498Szrj bb_info_t src_info = bb_table[e->src->index];
3249*38fd1498Szrj bb_info_t dest_info = bb_table[e->dest->index];
3250*38fd1498Szrj
3251*38fd1498Szrj if (dest_info->in)
3252*38fd1498Szrj {
3253*38fd1498Szrj if (src_info->out)
3254*38fd1498Szrj bitmap_and_into (src_info->out, dest_info->in);
3255*38fd1498Szrj else
3256*38fd1498Szrj {
3257*38fd1498Szrj src_info->out = BITMAP_ALLOC (&dse_bitmap_obstack);
3258*38fd1498Szrj bitmap_copy (src_info->out, dest_info->in);
3259*38fd1498Szrj }
3260*38fd1498Szrj }
3261*38fd1498Szrj return true;
3262*38fd1498Szrj }
3263*38fd1498Szrj
3264*38fd1498Szrj
3265*38fd1498Szrj /* Propagate the info from the out to the in set of BB_INDEX's basic
3266*38fd1498Szrj block. There are three cases:
3267*38fd1498Szrj
3268*38fd1498Szrj 1) The block has no kill set. In this case the kill set is all
3269*38fd1498Szrj ones. It does not matter what the out set of the block is, none of
3270*38fd1498Szrj the info can reach the top. The only thing that reaches the top is
3271*38fd1498Szrj the gen set and we just copy the set.
3272*38fd1498Szrj
3273*38fd1498Szrj 2) There is a kill set but no out set and bb has successors. In
3274*38fd1498Szrj this case we just return. Eventually an out set will be created and
3275*38fd1498Szrj it is better to wait than to create a set of ones.
3276*38fd1498Szrj
3277*38fd1498Szrj 3) There is both a kill and out set. We apply the obvious transfer
3278*38fd1498Szrj function.
3279*38fd1498Szrj */
3280*38fd1498Szrj
3281*38fd1498Szrj static bool
dse_transfer_function(int bb_index)3282*38fd1498Szrj dse_transfer_function (int bb_index)
3283*38fd1498Szrj {
3284*38fd1498Szrj bb_info_t bb_info = bb_table[bb_index];
3285*38fd1498Szrj
3286*38fd1498Szrj if (bb_info->kill)
3287*38fd1498Szrj {
3288*38fd1498Szrj if (bb_info->out)
3289*38fd1498Szrj {
3290*38fd1498Szrj /* Case 3 above. */
3291*38fd1498Szrj if (bb_info->in)
3292*38fd1498Szrj return bitmap_ior_and_compl (bb_info->in, bb_info->gen,
3293*38fd1498Szrj bb_info->out, bb_info->kill);
3294*38fd1498Szrj else
3295*38fd1498Szrj {
3296*38fd1498Szrj bb_info->in = BITMAP_ALLOC (&dse_bitmap_obstack);
3297*38fd1498Szrj bitmap_ior_and_compl (bb_info->in, bb_info->gen,
3298*38fd1498Szrj bb_info->out, bb_info->kill);
3299*38fd1498Szrj return true;
3300*38fd1498Szrj }
3301*38fd1498Szrj }
3302*38fd1498Szrj else
3303*38fd1498Szrj /* Case 2 above. */
3304*38fd1498Szrj return false;
3305*38fd1498Szrj }
3306*38fd1498Szrj else
3307*38fd1498Szrj {
3308*38fd1498Szrj /* Case 1 above. If there is already an in set, nothing
3309*38fd1498Szrj happens. */
3310*38fd1498Szrj if (bb_info->in)
3311*38fd1498Szrj return false;
3312*38fd1498Szrj else
3313*38fd1498Szrj {
3314*38fd1498Szrj bb_info->in = BITMAP_ALLOC (&dse_bitmap_obstack);
3315*38fd1498Szrj bitmap_copy (bb_info->in, bb_info->gen);
3316*38fd1498Szrj return true;
3317*38fd1498Szrj }
3318*38fd1498Szrj }
3319*38fd1498Szrj }
3320*38fd1498Szrj
3321*38fd1498Szrj /* Solve the dataflow equations. */
3322*38fd1498Szrj
3323*38fd1498Szrj static void
dse_step4(void)3324*38fd1498Szrj dse_step4 (void)
3325*38fd1498Szrj {
3326*38fd1498Szrj df_simple_dataflow (DF_BACKWARD, NULL, dse_confluence_0,
3327*38fd1498Szrj dse_confluence_n, dse_transfer_function,
3328*38fd1498Szrj all_blocks, df_get_postorder (DF_BACKWARD),
3329*38fd1498Szrj df_get_n_blocks (DF_BACKWARD));
3330*38fd1498Szrj if (dump_file && (dump_flags & TDF_DETAILS))
3331*38fd1498Szrj {
3332*38fd1498Szrj basic_block bb;
3333*38fd1498Szrj
3334*38fd1498Szrj fprintf (dump_file, "\n\n*** Global dataflow info after analysis.\n");
3335*38fd1498Szrj FOR_ALL_BB_FN (bb, cfun)
3336*38fd1498Szrj {
3337*38fd1498Szrj bb_info_t bb_info = bb_table[bb->index];
3338*38fd1498Szrj
3339*38fd1498Szrj df_print_bb_index (bb, dump_file);
3340*38fd1498Szrj if (bb_info->in)
3341*38fd1498Szrj bitmap_print (dump_file, bb_info->in, " in: ", "\n");
3342*38fd1498Szrj else
3343*38fd1498Szrj fprintf (dump_file, " in: *MISSING*\n");
3344*38fd1498Szrj if (bb_info->gen)
3345*38fd1498Szrj bitmap_print (dump_file, bb_info->gen, " gen: ", "\n");
3346*38fd1498Szrj else
3347*38fd1498Szrj fprintf (dump_file, " gen: *MISSING*\n");
3348*38fd1498Szrj if (bb_info->kill)
3349*38fd1498Szrj bitmap_print (dump_file, bb_info->kill, " kill: ", "\n");
3350*38fd1498Szrj else
3351*38fd1498Szrj fprintf (dump_file, " kill: *MISSING*\n");
3352*38fd1498Szrj if (bb_info->out)
3353*38fd1498Szrj bitmap_print (dump_file, bb_info->out, " out: ", "\n");
3354*38fd1498Szrj else
3355*38fd1498Szrj fprintf (dump_file, " out: *MISSING*\n\n");
3356*38fd1498Szrj }
3357*38fd1498Szrj }
3358*38fd1498Szrj }
3359*38fd1498Szrj
3360*38fd1498Szrj
3361*38fd1498Szrj
3362*38fd1498Szrj /*----------------------------------------------------------------------------
3363*38fd1498Szrj Fifth step.
3364*38fd1498Szrj
3365*38fd1498Szrj Delete the stores that can only be deleted using the global information.
3366*38fd1498Szrj ----------------------------------------------------------------------------*/
3367*38fd1498Szrj
3368*38fd1498Szrj
3369*38fd1498Szrj static void
dse_step5(void)3370*38fd1498Szrj dse_step5 (void)
3371*38fd1498Szrj {
3372*38fd1498Szrj basic_block bb;
3373*38fd1498Szrj FOR_EACH_BB_FN (bb, cfun)
3374*38fd1498Szrj {
3375*38fd1498Szrj bb_info_t bb_info = bb_table[bb->index];
3376*38fd1498Szrj insn_info_t insn_info = bb_info->last_insn;
3377*38fd1498Szrj bitmap v = bb_info->out;
3378*38fd1498Szrj
3379*38fd1498Szrj while (insn_info)
3380*38fd1498Szrj {
3381*38fd1498Szrj bool deleted = false;
3382*38fd1498Szrj if (dump_file && insn_info->insn)
3383*38fd1498Szrj {
3384*38fd1498Szrj fprintf (dump_file, "starting to process insn %d\n",
3385*38fd1498Szrj INSN_UID (insn_info->insn));
3386*38fd1498Szrj bitmap_print (dump_file, v, " v: ", "\n");
3387*38fd1498Szrj }
3388*38fd1498Szrj
3389*38fd1498Szrj /* There may have been code deleted by the dce pass run before
3390*38fd1498Szrj this phase. */
3391*38fd1498Szrj if (insn_info->insn
3392*38fd1498Szrj && INSN_P (insn_info->insn)
3393*38fd1498Szrj && (!insn_info->cannot_delete)
3394*38fd1498Szrj && (!bitmap_empty_p (v)))
3395*38fd1498Szrj {
3396*38fd1498Szrj store_info *store_info = insn_info->store_rec;
3397*38fd1498Szrj
3398*38fd1498Szrj /* Try to delete the current insn. */
3399*38fd1498Szrj deleted = true;
3400*38fd1498Szrj
3401*38fd1498Szrj /* Skip the clobbers. */
3402*38fd1498Szrj while (!store_info->is_set)
3403*38fd1498Szrj store_info = store_info->next;
3404*38fd1498Szrj
3405*38fd1498Szrj HOST_WIDE_INT i, offset, width;
3406*38fd1498Szrj group_info *group_info = rtx_group_vec[store_info->group_id];
3407*38fd1498Szrj
3408*38fd1498Szrj if (!store_info->offset.is_constant (&offset)
3409*38fd1498Szrj || !store_info->width.is_constant (&width))
3410*38fd1498Szrj deleted = false;
3411*38fd1498Szrj else
3412*38fd1498Szrj {
3413*38fd1498Szrj HOST_WIDE_INT end = offset + width;
3414*38fd1498Szrj for (i = offset; i < end; i++)
3415*38fd1498Szrj {
3416*38fd1498Szrj int index = get_bitmap_index (group_info, i);
3417*38fd1498Szrj
3418*38fd1498Szrj if (dump_file && (dump_flags & TDF_DETAILS))
3419*38fd1498Szrj fprintf (dump_file, "i = %d, index = %d\n",
3420*38fd1498Szrj (int) i, index);
3421*38fd1498Szrj if (index == 0 || !bitmap_bit_p (v, index))
3422*38fd1498Szrj {
3423*38fd1498Szrj if (dump_file && (dump_flags & TDF_DETAILS))
3424*38fd1498Szrj fprintf (dump_file, "failing at i = %d\n",
3425*38fd1498Szrj (int) i);
3426*38fd1498Szrj deleted = false;
3427*38fd1498Szrj break;
3428*38fd1498Szrj }
3429*38fd1498Szrj }
3430*38fd1498Szrj }
3431*38fd1498Szrj if (deleted)
3432*38fd1498Szrj {
3433*38fd1498Szrj if (dbg_cnt (dse)
3434*38fd1498Szrj && check_for_inc_dec_1 (insn_info))
3435*38fd1498Szrj {
3436*38fd1498Szrj delete_insn (insn_info->insn);
3437*38fd1498Szrj insn_info->insn = NULL;
3438*38fd1498Szrj globally_deleted++;
3439*38fd1498Szrj }
3440*38fd1498Szrj }
3441*38fd1498Szrj }
3442*38fd1498Szrj /* We do want to process the local info if the insn was
3443*38fd1498Szrj deleted. For instance, if the insn did a wild read, we
3444*38fd1498Szrj no longer need to trash the info. */
3445*38fd1498Szrj if (insn_info->insn
3446*38fd1498Szrj && INSN_P (insn_info->insn)
3447*38fd1498Szrj && (!deleted))
3448*38fd1498Szrj {
3449*38fd1498Szrj scan_stores (insn_info->store_rec, v, NULL);
3450*38fd1498Szrj if (insn_info->wild_read)
3451*38fd1498Szrj {
3452*38fd1498Szrj if (dump_file && (dump_flags & TDF_DETAILS))
3453*38fd1498Szrj fprintf (dump_file, "wild read\n");
3454*38fd1498Szrj bitmap_clear (v);
3455*38fd1498Szrj }
3456*38fd1498Szrj else if (insn_info->read_rec
3457*38fd1498Szrj || insn_info->non_frame_wild_read
3458*38fd1498Szrj || insn_info->frame_read)
3459*38fd1498Szrj {
3460*38fd1498Szrj if (dump_file && (dump_flags & TDF_DETAILS))
3461*38fd1498Szrj {
3462*38fd1498Szrj if (!insn_info->non_frame_wild_read
3463*38fd1498Szrj && !insn_info->frame_read)
3464*38fd1498Szrj fprintf (dump_file, "regular read\n");
3465*38fd1498Szrj if (insn_info->non_frame_wild_read)
3466*38fd1498Szrj fprintf (dump_file, "non-frame wild read\n");
3467*38fd1498Szrj if (insn_info->frame_read)
3468*38fd1498Szrj fprintf (dump_file, "frame read\n");
3469*38fd1498Szrj }
3470*38fd1498Szrj scan_reads (insn_info, v, NULL);
3471*38fd1498Szrj }
3472*38fd1498Szrj }
3473*38fd1498Szrj
3474*38fd1498Szrj insn_info = insn_info->prev_insn;
3475*38fd1498Szrj }
3476*38fd1498Szrj }
3477*38fd1498Szrj }
3478*38fd1498Szrj
3479*38fd1498Szrj
3480*38fd1498Szrj
3481*38fd1498Szrj /*----------------------------------------------------------------------------
3482*38fd1498Szrj Sixth step.
3483*38fd1498Szrj
3484*38fd1498Szrj Delete stores made redundant by earlier stores (which store the same
3485*38fd1498Szrj value) that couldn't be eliminated.
3486*38fd1498Szrj ----------------------------------------------------------------------------*/
3487*38fd1498Szrj
3488*38fd1498Szrj static void
dse_step6(void)3489*38fd1498Szrj dse_step6 (void)
3490*38fd1498Szrj {
3491*38fd1498Szrj basic_block bb;
3492*38fd1498Szrj
3493*38fd1498Szrj FOR_ALL_BB_FN (bb, cfun)
3494*38fd1498Szrj {
3495*38fd1498Szrj bb_info_t bb_info = bb_table[bb->index];
3496*38fd1498Szrj insn_info_t insn_info = bb_info->last_insn;
3497*38fd1498Szrj
3498*38fd1498Szrj while (insn_info)
3499*38fd1498Szrj {
3500*38fd1498Szrj /* There may have been code deleted by the dce pass run before
3501*38fd1498Szrj this phase. */
3502*38fd1498Szrj if (insn_info->insn
3503*38fd1498Szrj && INSN_P (insn_info->insn)
3504*38fd1498Szrj && !insn_info->cannot_delete)
3505*38fd1498Szrj {
3506*38fd1498Szrj store_info *s_info = insn_info->store_rec;
3507*38fd1498Szrj
3508*38fd1498Szrj while (s_info && !s_info->is_set)
3509*38fd1498Szrj s_info = s_info->next;
3510*38fd1498Szrj if (s_info
3511*38fd1498Szrj && s_info->redundant_reason
3512*38fd1498Szrj && s_info->redundant_reason->insn
3513*38fd1498Szrj && INSN_P (s_info->redundant_reason->insn))
3514*38fd1498Szrj {
3515*38fd1498Szrj rtx_insn *rinsn = s_info->redundant_reason->insn;
3516*38fd1498Szrj if (dump_file && (dump_flags & TDF_DETAILS))
3517*38fd1498Szrj fprintf (dump_file, "Locally deleting insn %d "
3518*38fd1498Szrj "because insn %d stores the "
3519*38fd1498Szrj "same value and couldn't be "
3520*38fd1498Szrj "eliminated\n",
3521*38fd1498Szrj INSN_UID (insn_info->insn),
3522*38fd1498Szrj INSN_UID (rinsn));
3523*38fd1498Szrj delete_dead_store_insn (insn_info);
3524*38fd1498Szrj }
3525*38fd1498Szrj }
3526*38fd1498Szrj insn_info = insn_info->prev_insn;
3527*38fd1498Szrj }
3528*38fd1498Szrj }
3529*38fd1498Szrj }
3530*38fd1498Szrj
3531*38fd1498Szrj /*----------------------------------------------------------------------------
3532*38fd1498Szrj Seventh step.
3533*38fd1498Szrj
3534*38fd1498Szrj Destroy everything left standing.
3535*38fd1498Szrj ----------------------------------------------------------------------------*/
3536*38fd1498Szrj
3537*38fd1498Szrj static void
dse_step7(void)3538*38fd1498Szrj dse_step7 (void)
3539*38fd1498Szrj {
3540*38fd1498Szrj bitmap_obstack_release (&dse_bitmap_obstack);
3541*38fd1498Szrj obstack_free (&dse_obstack, NULL);
3542*38fd1498Szrj
3543*38fd1498Szrj end_alias_analysis ();
3544*38fd1498Szrj free (bb_table);
3545*38fd1498Szrj delete rtx_group_table;
3546*38fd1498Szrj rtx_group_table = NULL;
3547*38fd1498Szrj rtx_group_vec.release ();
3548*38fd1498Szrj BITMAP_FREE (all_blocks);
3549*38fd1498Szrj BITMAP_FREE (scratch);
3550*38fd1498Szrj
3551*38fd1498Szrj rtx_store_info_pool.release ();
3552*38fd1498Szrj read_info_type_pool.release ();
3553*38fd1498Szrj insn_info_type_pool.release ();
3554*38fd1498Szrj dse_bb_info_type_pool.release ();
3555*38fd1498Szrj group_info_pool.release ();
3556*38fd1498Szrj deferred_change_pool.release ();
3557*38fd1498Szrj }
3558*38fd1498Szrj
3559*38fd1498Szrj
3560*38fd1498Szrj /* -------------------------------------------------------------------------
3561*38fd1498Szrj DSE
3562*38fd1498Szrj ------------------------------------------------------------------------- */
3563*38fd1498Szrj
3564*38fd1498Szrj /* Callback for running pass_rtl_dse. */
3565*38fd1498Szrj
3566*38fd1498Szrj static unsigned int
rest_of_handle_dse(void)3567*38fd1498Szrj rest_of_handle_dse (void)
3568*38fd1498Szrj {
3569*38fd1498Szrj df_set_flags (DF_DEFER_INSN_RESCAN);
3570*38fd1498Szrj
3571*38fd1498Szrj /* Need the notes since we must track live hardregs in the forwards
3572*38fd1498Szrj direction. */
3573*38fd1498Szrj df_note_add_problem ();
3574*38fd1498Szrj df_analyze ();
3575*38fd1498Szrj
3576*38fd1498Szrj dse_step0 ();
3577*38fd1498Szrj dse_step1 ();
3578*38fd1498Szrj dse_step2_init ();
3579*38fd1498Szrj if (dse_step2 ())
3580*38fd1498Szrj {
3581*38fd1498Szrj df_set_flags (DF_LR_RUN_DCE);
3582*38fd1498Szrj df_analyze ();
3583*38fd1498Szrj if (dump_file && (dump_flags & TDF_DETAILS))
3584*38fd1498Szrj fprintf (dump_file, "doing global processing\n");
3585*38fd1498Szrj dse_step3 ();
3586*38fd1498Szrj dse_step4 ();
3587*38fd1498Szrj dse_step5 ();
3588*38fd1498Szrj }
3589*38fd1498Szrj
3590*38fd1498Szrj dse_step6 ();
3591*38fd1498Szrj dse_step7 ();
3592*38fd1498Szrj
3593*38fd1498Szrj if (dump_file)
3594*38fd1498Szrj fprintf (dump_file, "dse: local deletions = %d, global deletions = %d\n",
3595*38fd1498Szrj locally_deleted, globally_deleted);
3596*38fd1498Szrj
3597*38fd1498Szrj /* DSE can eliminate potentially-trapping MEMs.
3598*38fd1498Szrj Remove any EH edges associated with them. */
3599*38fd1498Szrj if ((locally_deleted || globally_deleted)
3600*38fd1498Szrj && cfun->can_throw_non_call_exceptions
3601*38fd1498Szrj && purge_all_dead_edges ())
3602*38fd1498Szrj cleanup_cfg (0);
3603*38fd1498Szrj
3604*38fd1498Szrj return 0;
3605*38fd1498Szrj }
3606*38fd1498Szrj
3607*38fd1498Szrj namespace {
3608*38fd1498Szrj
3609*38fd1498Szrj const pass_data pass_data_rtl_dse1 =
3610*38fd1498Szrj {
3611*38fd1498Szrj RTL_PASS, /* type */
3612*38fd1498Szrj "dse1", /* name */
3613*38fd1498Szrj OPTGROUP_NONE, /* optinfo_flags */
3614*38fd1498Szrj TV_DSE1, /* tv_id */
3615*38fd1498Szrj 0, /* properties_required */
3616*38fd1498Szrj 0, /* properties_provided */
3617*38fd1498Szrj 0, /* properties_destroyed */
3618*38fd1498Szrj 0, /* todo_flags_start */
3619*38fd1498Szrj TODO_df_finish, /* todo_flags_finish */
3620*38fd1498Szrj };
3621*38fd1498Szrj
3622*38fd1498Szrj class pass_rtl_dse1 : public rtl_opt_pass
3623*38fd1498Szrj {
3624*38fd1498Szrj public:
pass_rtl_dse1(gcc::context * ctxt)3625*38fd1498Szrj pass_rtl_dse1 (gcc::context *ctxt)
3626*38fd1498Szrj : rtl_opt_pass (pass_data_rtl_dse1, ctxt)
3627*38fd1498Szrj {}
3628*38fd1498Szrj
3629*38fd1498Szrj /* opt_pass methods: */
gate(function *)3630*38fd1498Szrj virtual bool gate (function *)
3631*38fd1498Szrj {
3632*38fd1498Szrj return optimize > 0 && flag_dse && dbg_cnt (dse1);
3633*38fd1498Szrj }
3634*38fd1498Szrj
execute(function *)3635*38fd1498Szrj virtual unsigned int execute (function *) { return rest_of_handle_dse (); }
3636*38fd1498Szrj
3637*38fd1498Szrj }; // class pass_rtl_dse1
3638*38fd1498Szrj
3639*38fd1498Szrj } // anon namespace
3640*38fd1498Szrj
3641*38fd1498Szrj rtl_opt_pass *
make_pass_rtl_dse1(gcc::context * ctxt)3642*38fd1498Szrj make_pass_rtl_dse1 (gcc::context *ctxt)
3643*38fd1498Szrj {
3644*38fd1498Szrj return new pass_rtl_dse1 (ctxt);
3645*38fd1498Szrj }
3646*38fd1498Szrj
3647*38fd1498Szrj namespace {
3648*38fd1498Szrj
3649*38fd1498Szrj const pass_data pass_data_rtl_dse2 =
3650*38fd1498Szrj {
3651*38fd1498Szrj RTL_PASS, /* type */
3652*38fd1498Szrj "dse2", /* name */
3653*38fd1498Szrj OPTGROUP_NONE, /* optinfo_flags */
3654*38fd1498Szrj TV_DSE2, /* tv_id */
3655*38fd1498Szrj 0, /* properties_required */
3656*38fd1498Szrj 0, /* properties_provided */
3657*38fd1498Szrj 0, /* properties_destroyed */
3658*38fd1498Szrj 0, /* todo_flags_start */
3659*38fd1498Szrj TODO_df_finish, /* todo_flags_finish */
3660*38fd1498Szrj };
3661*38fd1498Szrj
3662*38fd1498Szrj class pass_rtl_dse2 : public rtl_opt_pass
3663*38fd1498Szrj {
3664*38fd1498Szrj public:
pass_rtl_dse2(gcc::context * ctxt)3665*38fd1498Szrj pass_rtl_dse2 (gcc::context *ctxt)
3666*38fd1498Szrj : rtl_opt_pass (pass_data_rtl_dse2, ctxt)
3667*38fd1498Szrj {}
3668*38fd1498Szrj
3669*38fd1498Szrj /* opt_pass methods: */
gate(function *)3670*38fd1498Szrj virtual bool gate (function *)
3671*38fd1498Szrj {
3672*38fd1498Szrj return optimize > 0 && flag_dse && dbg_cnt (dse2);
3673*38fd1498Szrj }
3674*38fd1498Szrj
execute(function *)3675*38fd1498Szrj virtual unsigned int execute (function *) { return rest_of_handle_dse (); }
3676*38fd1498Szrj
3677*38fd1498Szrj }; // class pass_rtl_dse2
3678*38fd1498Szrj
3679*38fd1498Szrj } // anon namespace
3680*38fd1498Szrj
3681*38fd1498Szrj rtl_opt_pass *
make_pass_rtl_dse2(gcc::context * ctxt)3682*38fd1498Szrj make_pass_rtl_dse2 (gcc::context *ctxt)
3683*38fd1498Szrj {
3684*38fd1498Szrj return new pass_rtl_dse2 (ctxt);
3685*38fd1498Szrj }
3686