xref: /netbsd-src/external/gpl3/gcc/dist/gcc/tree-ssa-dse.cc (revision 0a3071956a3a9fdebdbf7f338cf2d439b45fc728)
1 /* Dead and redundant store elimination
2    Copyright (C) 2004-2022 Free Software Foundation, Inc.
3 
4 This file is part of GCC.
5 
6 GCC is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 3, or (at your option)
9 any later version.
10 
11 GCC is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14 GNU General Public License for more details.
15 
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING3.  If not see
18 <http://www.gnu.org/licenses/>.  */
19 
20 #include "config.h"
21 #include "system.h"
22 #include "coretypes.h"
23 #include "backend.h"
24 #include "rtl.h"
25 #include "tree.h"
26 #include "gimple.h"
27 #include "tree-pass.h"
28 #include "ssa.h"
29 #include "gimple-pretty-print.h"
30 #include "fold-const.h"
31 #include "gimple-iterator.h"
32 #include "tree-cfg.h"
33 #include "tree-dfa.h"
34 #include "tree-cfgcleanup.h"
35 #include "alias.h"
36 #include "tree-ssa-loop.h"
37 #include "tree-ssa-dse.h"
38 #include "builtins.h"
39 #include "gimple-fold.h"
40 #include "gimplify.h"
41 #include "tree-eh.h"
42 #include "cfganal.h"
43 #include "cgraph.h"
44 #include "ipa-modref-tree.h"
45 #include "ipa-modref.h"
46 #include "target.h"
47 #include "tree-ssa-loop-niter.h"
48 
49 /* This file implements dead store elimination.
50 
51    A dead store is a store into a memory location which will later be
52    overwritten by another store without any intervening loads.  In this
53    case the earlier store can be deleted or trimmed if the store
54    was partially dead.
55 
56    A redundant store is a store into a memory location which stores
57    the exact same value as a prior store to the same memory location.
58    While this can often be handled by dead store elimination, removing
59    the redundant store is often better than removing or trimming the
60    dead store.
61 
62    In our SSA + virtual operand world we use immediate uses of virtual
63    operands to detect these cases.  If a store's virtual definition
64    is used precisely once by a later store to the same location which
65    post dominates the first store, then the first store is dead.  If
66    the data stored is the same, then the second store is redundant.
67 
68    The single use of the store's virtual definition ensures that
69    there are no intervening aliased loads and the requirement that
70    the second load post dominate the first ensures that if the earlier
71    store executes, then the later stores will execute before the function
72    exits.
73 
74    It may help to think of this as first moving the earlier store to
75    the point immediately before the later store.  Again, the single
76    use of the virtual definition and the post-dominance relationship
77    ensure that such movement would be safe.  Clearly if there are
78    back to back stores, then the second is makes the first dead.  If
79    the second store stores the same value, then the second store is
80    redundant.
81 
82    Reviewing section 10.7.2 in Morgan's "Building an Optimizing Compiler"
83    may also help in understanding this code since it discusses the
84    relationship between dead store and redundant load elimination.  In
85    fact, they are the same transformation applied to different views of
86    the CFG.  */
87 
88 static void delete_dead_or_redundant_call (gimple_stmt_iterator *, const char *);
89 
90 /* Bitmap of blocks that have had EH statements cleaned.  We should
91    remove their dead edges eventually.  */
92 static bitmap need_eh_cleanup;
93 static bitmap need_ab_cleanup;
94 
95 /* STMT is a statement that may write into memory.  Analyze it and
96    initialize WRITE to describe how STMT affects memory.  When
97    MAY_DEF_OK is true then the function initializes WRITE to what
98    the stmt may define.
99 
100    Return TRUE if the statement was analyzed, FALSE otherwise.
101 
102    It is always safe to return FALSE.  But typically better optimziation
103    can be achieved by analyzing more statements.  */
104 
105 static bool
initialize_ao_ref_for_dse(gimple * stmt,ao_ref * write,bool may_def_ok=false)106 initialize_ao_ref_for_dse (gimple *stmt, ao_ref *write, bool may_def_ok = false)
107 {
108   /* It's advantageous to handle certain mem* functions.  */
109   if (gimple_call_builtin_p (stmt, BUILT_IN_NORMAL))
110     {
111       switch (DECL_FUNCTION_CODE (gimple_call_fndecl (stmt)))
112 	{
113 	case BUILT_IN_MEMCPY:
114 	case BUILT_IN_MEMMOVE:
115 	case BUILT_IN_MEMSET:
116 	case BUILT_IN_MEMCPY_CHK:
117 	case BUILT_IN_MEMMOVE_CHK:
118 	case BUILT_IN_MEMSET_CHK:
119 	case BUILT_IN_STRNCPY:
120 	case BUILT_IN_STRNCPY_CHK:
121 	  {
122 	    tree size = gimple_call_arg (stmt, 2);
123 	    tree ptr = gimple_call_arg (stmt, 0);
124 	    ao_ref_init_from_ptr_and_size (write, ptr, size);
125 	    return true;
126 	  }
127 
128 	/* A calloc call can never be dead, but it can make
129 	   subsequent stores redundant if they store 0 into
130 	   the same memory locations.  */
131 	case BUILT_IN_CALLOC:
132 	  {
133 	    tree nelem = gimple_call_arg (stmt, 0);
134 	    tree selem = gimple_call_arg (stmt, 1);
135 	    tree lhs;
136 	    if (TREE_CODE (nelem) == INTEGER_CST
137 		&& TREE_CODE (selem) == INTEGER_CST
138 		&& (lhs = gimple_call_lhs (stmt)) != NULL_TREE)
139 	      {
140 		tree size = fold_build2 (MULT_EXPR, TREE_TYPE (nelem),
141 					 nelem, selem);
142 		ao_ref_init_from_ptr_and_size (write, lhs, size);
143 		return true;
144 	      }
145 	  }
146 
147 	default:
148 	  break;
149 	}
150     }
151   else if (tree lhs = gimple_get_lhs (stmt))
152     {
153       if (TREE_CODE (lhs) != SSA_NAME
154 	  && (may_def_ok || !stmt_could_throw_p (cfun, stmt)))
155 	{
156 	  ao_ref_init (write, lhs);
157 	  return true;
158 	}
159     }
160   return false;
161 }
162 
163 /* Given REF from the alias oracle, return TRUE if it is a valid
164    kill memory reference for dead store elimination, false otherwise.
165 
166    In particular, the reference must have a known base, known maximum
167    size, start at a byte offset and have a size that is one or more
168    bytes.  */
169 
170 static bool
valid_ao_ref_kill_for_dse(ao_ref * ref)171 valid_ao_ref_kill_for_dse (ao_ref *ref)
172 {
173   return (ao_ref_base (ref)
174 	  && known_size_p (ref->max_size)
175 	  && maybe_ne (ref->size, 0)
176 	  && known_eq (ref->max_size, ref->size)
177 	  && known_ge (ref->offset, 0));
178 }
179 
180 /* Given REF from the alias oracle, return TRUE if it is a valid
181    load or store memory reference for dead store elimination, false otherwise.
182 
183    Unlike for valid_ao_ref_kill_for_dse we can accept writes where max_size
184    is not same as size since we can handle conservatively the larger range.  */
185 
186 static bool
valid_ao_ref_for_dse(ao_ref * ref)187 valid_ao_ref_for_dse (ao_ref *ref)
188 {
189   return (ao_ref_base (ref)
190 	  && known_size_p (ref->max_size)
191 	  && known_ge (ref->offset, 0));
192 }
193 
194 /* Initialize OFFSET and SIZE to a range known to contain REF
195    where the boundaries are divisible by BITS_PER_UNIT (bit still in bits).
196    Return false if this is impossible.  */
197 
198 static bool
get_byte_aligned_range_containing_ref(ao_ref * ref,poly_int64 * offset,HOST_WIDE_INT * size)199 get_byte_aligned_range_containing_ref (ao_ref *ref, poly_int64 *offset,
200 				       HOST_WIDE_INT *size)
201 {
202   if (!known_size_p (ref->max_size))
203     return false;
204   *offset = aligned_lower_bound (ref->offset, BITS_PER_UNIT);
205   poly_int64 end = aligned_upper_bound (ref->offset + ref->max_size,
206 					BITS_PER_UNIT);
207   return (end - *offset).is_constant (size);
208 }
209 
210 /* Initialize OFFSET and SIZE to a range known to be contained REF
211    where the boundaries are divisible by BITS_PER_UNIT (but still in bits).
212    Return false if this is impossible.  */
213 
214 static bool
get_byte_aligned_range_contained_in_ref(ao_ref * ref,poly_int64 * offset,HOST_WIDE_INT * size)215 get_byte_aligned_range_contained_in_ref (ao_ref *ref, poly_int64 *offset,
216 					 HOST_WIDE_INT *size)
217 {
218   if (!known_size_p (ref->size)
219       || !known_eq (ref->size, ref->max_size))
220     return false;
221   *offset = aligned_upper_bound (ref->offset, BITS_PER_UNIT);
222   poly_int64 end = aligned_lower_bound (ref->offset + ref->max_size,
223 					BITS_PER_UNIT);
224   /* For bit accesses we can get -1 here, but also 0 sized kill is not
225      useful.  */
226   if (!known_gt (end, *offset))
227     return false;
228   return (end - *offset).is_constant (size);
229 }
230 
231 /* Compute byte range (returned iN REF_OFFSET and RET_SIZE) for access COPY
232    inside REF.  If KILL is true, then COPY represent a kill and the byte range
233    needs to be fully contained in bit range given by COPY.  If KILL is false
234    then the byte range returned must contain the range of COPY.  */
235 
236 static bool
get_byte_range(ao_ref * copy,ao_ref * ref,bool kill,HOST_WIDE_INT * ret_offset,HOST_WIDE_INT * ret_size)237 get_byte_range (ao_ref *copy, ao_ref *ref, bool kill,
238 		HOST_WIDE_INT *ret_offset, HOST_WIDE_INT *ret_size)
239 {
240   HOST_WIDE_INT copy_size, ref_size;
241   poly_int64 copy_offset, ref_offset;
242   HOST_WIDE_INT diff;
243 
244   /* First translate from bits to bytes, rounding to bigger or smaller ranges
245      as needed.  Kills needs to be always rounded to smaller ranges while
246      uses and stores to larger ranges.  */
247   if (kill)
248     {
249       if (!get_byte_aligned_range_contained_in_ref (copy, &copy_offset,
250 						    &copy_size))
251 	return false;
252     }
253   else
254     {
255       if (!get_byte_aligned_range_containing_ref (copy, &copy_offset,
256 						  &copy_size))
257 	return false;
258     }
259 
260   if (!get_byte_aligned_range_containing_ref (ref, &ref_offset, &ref_size)
261       || !ordered_p (copy_offset, ref_offset))
262     return false;
263 
264   /* Switch sizes from bits to bytes so we do not need to care about
265      overflows.  Offset calculation needs to stay in bits until we compute
266      the difference and can switch to HOST_WIDE_INT.  */
267   copy_size /= BITS_PER_UNIT;
268   ref_size /= BITS_PER_UNIT;
269 
270   /* If COPY starts before REF, then reset the beginning of
271      COPY to match REF and decrease the size of COPY by the
272      number of bytes removed from COPY.  */
273   if (maybe_lt (copy_offset, ref_offset))
274     {
275       if (!(ref_offset - copy_offset).is_constant (&diff)
276 	  || copy_size < diff / BITS_PER_UNIT)
277 	return false;
278       copy_size -= diff / BITS_PER_UNIT;
279       copy_offset = ref_offset;
280     }
281 
282   if (!(copy_offset - ref_offset).is_constant (&diff)
283       || ref_size <= diff / BITS_PER_UNIT)
284     return false;
285 
286   /* If COPY extends beyond REF, chop off its size appropriately.  */
287   HOST_WIDE_INT limit = ref_size - diff / BITS_PER_UNIT;
288 
289   if (copy_size > limit)
290     copy_size = limit;
291   *ret_size = copy_size;
292   if (!(copy_offset - ref_offset).is_constant (ret_offset))
293     return false;
294   *ret_offset /= BITS_PER_UNIT;
295   return true;
296 }
297 
298 /* Update LIVE_BYTES tracking REF for write to WRITE:
299    Verify we have the same base memory address, the write
300    has a known size and overlaps with REF.  */
301 static void
clear_live_bytes_for_ref(sbitmap live_bytes,ao_ref * ref,ao_ref * write)302 clear_live_bytes_for_ref (sbitmap live_bytes, ao_ref *ref, ao_ref *write)
303 {
304   HOST_WIDE_INT start, size;
305 
306   if (valid_ao_ref_kill_for_dse (write)
307       && operand_equal_p (write->base, ref->base, OEP_ADDRESS_OF)
308       && get_byte_range (write, ref, true, &start, &size))
309     bitmap_clear_range (live_bytes, start, size);
310 }
311 
312 /* Clear any bytes written by STMT from the bitmap LIVE_BYTES.  The base
313    address written by STMT must match the one found in REF, which must
314    have its base address previously initialized.
315 
316    This routine must be conservative.  If we don't know the offset or
317    actual size written, assume nothing was written.  */
318 
319 static void
clear_bytes_written_by(sbitmap live_bytes,gimple * stmt,ao_ref * ref)320 clear_bytes_written_by (sbitmap live_bytes, gimple *stmt, ao_ref *ref)
321 {
322   ao_ref write;
323 
324   if (gcall *call = dyn_cast <gcall *> (stmt))
325     {
326       bool interposed;
327       modref_summary *summary = get_modref_function_summary (call, &interposed);
328 
329       if (summary && !interposed)
330 	for (auto kill : summary->kills)
331 	  if (kill.get_ao_ref (as_a <gcall *> (stmt), &write))
332 	    clear_live_bytes_for_ref (live_bytes, ref, &write);
333     }
334   if (!initialize_ao_ref_for_dse (stmt, &write))
335     return;
336 
337   clear_live_bytes_for_ref (live_bytes, ref, &write);
338 }
339 
340 /* REF is a memory write.  Extract relevant information from it and
341    initialize the LIVE_BYTES bitmap.  If successful, return TRUE.
342    Otherwise return FALSE.  */
343 
344 static bool
setup_live_bytes_from_ref(ao_ref * ref,sbitmap live_bytes)345 setup_live_bytes_from_ref (ao_ref *ref, sbitmap live_bytes)
346 {
347   HOST_WIDE_INT const_size;
348   if (valid_ao_ref_for_dse (ref)
349       && ((aligned_upper_bound (ref->offset + ref->max_size, BITS_PER_UNIT)
350 	   - aligned_lower_bound (ref->offset,
351 				  BITS_PER_UNIT)).is_constant (&const_size))
352       && (const_size / BITS_PER_UNIT <= param_dse_max_object_size)
353       && const_size > 1)
354     {
355       bitmap_clear (live_bytes);
356       bitmap_set_range (live_bytes, 0, const_size / BITS_PER_UNIT);
357       return true;
358     }
359   return false;
360 }
361 
362 /* Compute the number of stored bytes that we can trim from the head and
363    tail of REF.  LIVE is the bitmap of stores to REF that are still live.
364 
365    Store the number of bytes trimmed from the head and tail in TRIM_HEAD
366    and TRIM_TAIL respectively.
367 
368    STMT is the statement being trimmed and is used for debugging dump
369    output only.  */
370 
371 static void
compute_trims(ao_ref * ref,sbitmap live,int * trim_head,int * trim_tail,gimple * stmt)372 compute_trims (ao_ref *ref, sbitmap live, int *trim_head, int *trim_tail,
373 	       gimple *stmt)
374 {
375   *trim_head = 0;
376   *trim_tail = 0;
377 
378   /* We use bitmaps biased such that ref->offset is contained in bit zero and
379      the bitmap extends through ref->max_size, so we know that in the original
380      bitmap bits 0 .. ref->max_size were true.  But we need to check that this
381      covers the bytes of REF exactly.  */
382   const unsigned int align = known_alignment (ref->offset);
383   if ((align > 0 && align < BITS_PER_UNIT)
384       || !known_eq (ref->size, ref->max_size))
385     return;
386 
387   /* Now identify how much, if any of the tail we can chop off.  */
388   HOST_WIDE_INT const_size;
389   int last_live = bitmap_last_set_bit (live);
390   if (ref->size.is_constant (&const_size))
391     {
392       int last_orig = (const_size / BITS_PER_UNIT) - 1;
393       /* We can leave inconvenient amounts on the tail as
394 	 residual handling in mem* and str* functions is usually
395 	 reasonably efficient.  */
396       *trim_tail = last_orig - last_live;
397 
398       /* But don't trim away out of bounds accesses, as this defeats
399 	 proper warnings.
400 
401 	 We could have a type with no TYPE_SIZE_UNIT or we could have a VLA
402 	 where TYPE_SIZE_UNIT is not a constant.  */
403       if (*trim_tail
404 	  && TYPE_SIZE_UNIT (TREE_TYPE (ref->base))
405 	  && TREE_CODE (TYPE_SIZE_UNIT (TREE_TYPE (ref->base))) == INTEGER_CST
406 	  && compare_tree_int (TYPE_SIZE_UNIT (TREE_TYPE (ref->base)),
407 			       last_orig) <= 0)
408 	*trim_tail = 0;
409     }
410 
411   /* Identify how much, if any of the head we can chop off.  */
412   int first_orig = 0;
413   int first_live = bitmap_first_set_bit (live);
414   *trim_head = first_live - first_orig;
415 
416   /* If REF is aligned, try to maintain this alignment if it reduces
417      the number of (power-of-two sized aligned) writes to memory.  */
418   unsigned int align_bits;
419   unsigned HOST_WIDE_INT bitpos;
420   if ((*trim_head || *trim_tail)
421       && last_live - first_live >= 2
422       && ao_ref_alignment (ref, &align_bits, &bitpos)
423       && align_bits >= 32
424       && bitpos == 0
425       && align_bits % BITS_PER_UNIT == 0)
426     {
427       unsigned int align_units = align_bits / BITS_PER_UNIT;
428       if (align_units > 16)
429 	align_units = 16;
430       while ((first_live | (align_units - 1)) > (unsigned int)last_live)
431 	align_units >>= 1;
432 
433       if (*trim_head)
434 	{
435 	  unsigned int pos = first_live & (align_units - 1);
436 	  for (unsigned int i = 1; i <= align_units; i <<= 1)
437 	    {
438 	      unsigned int mask = ~(i - 1);
439 	      unsigned int bytes = align_units - (pos & mask);
440 	      if (wi::popcount (bytes) <= 1)
441 		{
442 		  *trim_head &= mask;
443 		  break;
444 		}
445 	    }
446 	}
447 
448       if (*trim_tail)
449 	{
450 	  unsigned int pos = last_live & (align_units - 1);
451 	  for (unsigned int i = 1; i <= align_units; i <<= 1)
452 	    {
453 	      int mask = i - 1;
454 	      unsigned int bytes = (pos | mask) + 1;
455 	      if ((last_live | mask) > (last_live + *trim_tail))
456 		break;
457 	      if (wi::popcount (bytes) <= 1)
458 		{
459 		  unsigned int extra = (last_live | mask) - last_live;
460 		  *trim_tail -= extra;
461 		  break;
462 		}
463 	    }
464 	}
465     }
466 
467   if ((*trim_head || *trim_tail) && dump_file && (dump_flags & TDF_DETAILS))
468     {
469       fprintf (dump_file, "  Trimming statement (head = %d, tail = %d): ",
470 	       *trim_head, *trim_tail);
471       print_gimple_stmt (dump_file, stmt, 0, dump_flags);
472       fprintf (dump_file, "\n");
473     }
474 }
475 
476 /* STMT initializes an object from COMPLEX_CST where one or more of the bytes
477    written may be dead stores.  REF is a representation of the memory written.
478    LIVE is the bitmap of stores to REF that are still live.
479 
480    Attempt to rewrite STMT so that only the real or the imaginary part of the
481    object is actually stored.  */
482 
483 static void
maybe_trim_complex_store(ao_ref * ref,sbitmap live,gimple * stmt)484 maybe_trim_complex_store (ao_ref *ref, sbitmap live, gimple *stmt)
485 {
486   int trim_head, trim_tail;
487   compute_trims (ref, live, &trim_head, &trim_tail, stmt);
488 
489   /* The amount of data trimmed from the head or tail must be at
490      least half the size of the object to ensure we're trimming
491      the entire real or imaginary half.  By writing things this
492      way we avoid more O(n) bitmap operations.  */
493   if (known_ge (trim_tail * 2 * BITS_PER_UNIT, ref->size))
494     {
495       /* TREE_REALPART is live */
496       tree x = TREE_REALPART (gimple_assign_rhs1 (stmt));
497       tree y = gimple_assign_lhs (stmt);
498       y = build1 (REALPART_EXPR, TREE_TYPE (x), y);
499       gimple_assign_set_lhs (stmt, y);
500       gimple_assign_set_rhs1 (stmt, x);
501     }
502   else if (known_ge (trim_head * 2 * BITS_PER_UNIT, ref->size))
503     {
504       /* TREE_IMAGPART is live */
505       tree x = TREE_IMAGPART (gimple_assign_rhs1 (stmt));
506       tree y = gimple_assign_lhs (stmt);
507       y = build1 (IMAGPART_EXPR, TREE_TYPE (x), y);
508       gimple_assign_set_lhs (stmt, y);
509       gimple_assign_set_rhs1 (stmt, x);
510     }
511 
512   /* Other cases indicate parts of both the real and imag subobjects
513      are live.  We do not try to optimize those cases.  */
514 }
515 
516 /* STMT initializes an object using a CONSTRUCTOR where one or more of the
517    bytes written are dead stores.  REF is a representation of the memory
518    written.  LIVE is the bitmap of stores to REF that are still live.
519 
520    Attempt to rewrite STMT so that it writes fewer memory locations.
521 
522    The most common case for getting here is a CONSTRUCTOR with no elements
523    being used to zero initialize an object.  We do not try to handle other
524    cases as those would force us to fully cover the object with the
525    CONSTRUCTOR node except for the components that are dead.  */
526 
527 static void
maybe_trim_constructor_store(ao_ref * ref,sbitmap live,gimple * stmt)528 maybe_trim_constructor_store (ao_ref *ref, sbitmap live, gimple *stmt)
529 {
530   tree ctor = gimple_assign_rhs1 (stmt);
531 
532   /* This is the only case we currently handle.  It actually seems to
533      catch most cases of actual interest.  */
534   gcc_assert (CONSTRUCTOR_NELTS (ctor) == 0);
535 
536   int head_trim = 0;
537   int tail_trim = 0;
538   compute_trims (ref, live, &head_trim, &tail_trim, stmt);
539 
540   /* Now we want to replace the constructor initializer
541      with memset (object + head_trim, 0, size - head_trim - tail_trim).  */
542   if (head_trim || tail_trim)
543     {
544       /* We want &lhs for the MEM_REF expression.  */
545       tree lhs_addr = build_fold_addr_expr (gimple_assign_lhs (stmt));
546 
547       if (! is_gimple_min_invariant (lhs_addr))
548 	return;
549 
550       /* The number of bytes for the new constructor.  */
551       poly_int64 ref_bytes = exact_div (ref->size, BITS_PER_UNIT);
552       poly_int64 count = ref_bytes - head_trim - tail_trim;
553 
554       /* And the new type for the CONSTRUCTOR.  Essentially it's just
555 	 a char array large enough to cover the non-trimmed parts of
556 	 the original CONSTRUCTOR.  Note we want explicit bounds here
557 	 so that we know how many bytes to clear when expanding the
558 	 CONSTRUCTOR.  */
559       tree type = build_array_type_nelts (char_type_node, count);
560 
561       /* Build a suitable alias type rather than using alias set zero
562 	 to avoid pessimizing.  */
563       tree alias_type = reference_alias_ptr_type (gimple_assign_lhs (stmt));
564 
565       /* Build a MEM_REF representing the whole accessed area, starting
566 	 at the first byte not trimmed.  */
567       tree exp = fold_build2 (MEM_REF, type, lhs_addr,
568 			      build_int_cst (alias_type, head_trim));
569 
570       /* Now update STMT with a new RHS and LHS.  */
571       gimple_assign_set_lhs (stmt, exp);
572       gimple_assign_set_rhs1 (stmt, build_constructor (type, NULL));
573     }
574 }
575 
576 /* STMT is a memcpy, memmove or memset.  Decrement the number of bytes
577    copied/set by DECREMENT.  */
578 static void
decrement_count(gimple * stmt,int decrement)579 decrement_count (gimple *stmt, int decrement)
580 {
581   tree *countp = gimple_call_arg_ptr (stmt, 2);
582   gcc_assert (TREE_CODE (*countp) == INTEGER_CST);
583   *countp = wide_int_to_tree (TREE_TYPE (*countp), (TREE_INT_CST_LOW (*countp)
584 						    - decrement));
585 }
586 
587 static void
increment_start_addr(gimple * stmt,tree * where,int increment)588 increment_start_addr (gimple *stmt, tree *where, int increment)
589 {
590   if (tree lhs = gimple_call_lhs (stmt))
591     if (where == gimple_call_arg_ptr (stmt, 0))
592       {
593 	gassign *newop = gimple_build_assign (lhs, unshare_expr (*where));
594 	gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
595 	gsi_insert_after (&gsi, newop, GSI_SAME_STMT);
596 	gimple_call_set_lhs (stmt, NULL_TREE);
597 	update_stmt (stmt);
598       }
599 
600   if (TREE_CODE (*where) == SSA_NAME)
601     {
602       tree tem = make_ssa_name (TREE_TYPE (*where));
603       gassign *newop
604 	= gimple_build_assign (tem, POINTER_PLUS_EXPR, *where,
605 			       build_int_cst (sizetype, increment));
606       gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
607       gsi_insert_before (&gsi, newop, GSI_SAME_STMT);
608       *where = tem;
609       update_stmt (stmt);
610       return;
611     }
612 
613   *where = build_fold_addr_expr (fold_build2 (MEM_REF, char_type_node,
614 					      *where,
615 					      build_int_cst (ptr_type_node,
616 							     increment)));
617 }
618 
619 /* STMT is builtin call that writes bytes in bitmap ORIG, some bytes are dead
620    (ORIG & ~NEW) and need not be stored.  Try to rewrite STMT to reduce
621    the amount of data it actually writes.
622 
623    Right now we only support trimming from the head or the tail of the
624    memory region.  In theory we could split the mem* call, but it's
625    likely of marginal value.  */
626 
627 static void
maybe_trim_memstar_call(ao_ref * ref,sbitmap live,gimple * stmt)628 maybe_trim_memstar_call (ao_ref *ref, sbitmap live, gimple *stmt)
629 {
630   int head_trim, tail_trim;
631   switch (DECL_FUNCTION_CODE (gimple_call_fndecl (stmt)))
632     {
633     case BUILT_IN_STRNCPY:
634     case BUILT_IN_STRNCPY_CHK:
635       compute_trims (ref, live, &head_trim, &tail_trim, stmt);
636       if (head_trim)
637 	{
638 	  /* Head trimming of strncpy is only possible if we can
639 	     prove all bytes we would trim are non-zero (or we could
640 	     turn the strncpy into memset if there must be zero
641 	     among the head trimmed bytes).  If we don't know anything
642 	     about those bytes, the presence or absence of '\0' bytes
643 	     in there will affect whether it acts for the non-trimmed
644 	     bytes as memset or memcpy/strncpy.  */
645 	  c_strlen_data lendata = { };
646 	  int orig_head_trim = head_trim;
647 	  tree srcstr = gimple_call_arg (stmt, 1);
648 	  if (!get_range_strlen (srcstr, &lendata, /*eltsize=*/1)
649 	      || !tree_fits_uhwi_p (lendata.minlen))
650 	    head_trim = 0;
651 	  else if (tree_to_uhwi (lendata.minlen) < (unsigned) head_trim)
652 	    {
653 	      head_trim = tree_to_uhwi (lendata.minlen);
654 	      if ((orig_head_trim & (UNITS_PER_WORD - 1)) == 0)
655 		head_trim &= ~(UNITS_PER_WORD - 1);
656 	    }
657 	  if (orig_head_trim != head_trim
658 	      && dump_file
659 	      && (dump_flags & TDF_DETAILS))
660 	    fprintf (dump_file,
661 		     "  Adjusting strncpy trimming to (head = %d,"
662 		     " tail = %d)\n", head_trim, tail_trim);
663 	}
664       goto do_memcpy;
665 
666     case BUILT_IN_MEMCPY:
667     case BUILT_IN_MEMMOVE:
668     case BUILT_IN_MEMCPY_CHK:
669     case BUILT_IN_MEMMOVE_CHK:
670       compute_trims (ref, live, &head_trim, &tail_trim, stmt);
671 
672     do_memcpy:
673       /* Tail trimming is easy, we can just reduce the count.  */
674       if (tail_trim)
675 	decrement_count (stmt, tail_trim);
676 
677       /* Head trimming requires adjusting all the arguments.  */
678       if (head_trim)
679 	{
680 	  /* For __*_chk need to adjust also the last argument.  */
681 	  if (gimple_call_num_args (stmt) == 4)
682 	    {
683 	      tree size = gimple_call_arg (stmt, 3);
684 	      if (!tree_fits_uhwi_p (size))
685 		break;
686 	      if (!integer_all_onesp (size))
687 		{
688 		  unsigned HOST_WIDE_INT sz = tree_to_uhwi (size);
689 		  if (sz < (unsigned) head_trim)
690 		    break;
691 		  tree arg = wide_int_to_tree (TREE_TYPE (size),
692 					       sz - head_trim);
693 		  gimple_call_set_arg (stmt, 3, arg);
694 		}
695 	    }
696 	  tree *dst = gimple_call_arg_ptr (stmt, 0);
697 	  increment_start_addr (stmt, dst, head_trim);
698 	  tree *src = gimple_call_arg_ptr (stmt, 1);
699 	  increment_start_addr (stmt, src, head_trim);
700 	  decrement_count (stmt, head_trim);
701 	}
702       break;
703 
704     case BUILT_IN_MEMSET:
705     case BUILT_IN_MEMSET_CHK:
706       compute_trims (ref, live, &head_trim, &tail_trim, stmt);
707 
708       /* Tail trimming is easy, we can just reduce the count.  */
709       if (tail_trim)
710 	decrement_count (stmt, tail_trim);
711 
712       /* Head trimming requires adjusting all the arguments.  */
713       if (head_trim)
714 	{
715 	  /* For __*_chk need to adjust also the last argument.  */
716 	  if (gimple_call_num_args (stmt) == 4)
717 	    {
718 	      tree size = gimple_call_arg (stmt, 3);
719 	      if (!tree_fits_uhwi_p (size))
720 		break;
721 	      if (!integer_all_onesp (size))
722 		{
723 		  unsigned HOST_WIDE_INT sz = tree_to_uhwi (size);
724 		  if (sz < (unsigned) head_trim)
725 		    break;
726 		  tree arg = wide_int_to_tree (TREE_TYPE (size),
727 					       sz - head_trim);
728 		  gimple_call_set_arg (stmt, 3, arg);
729 		}
730 	    }
731 	  tree *dst = gimple_call_arg_ptr (stmt, 0);
732 	  increment_start_addr (stmt, dst, head_trim);
733 	  decrement_count (stmt, head_trim);
734 	}
735       break;
736 
737     default:
738       break;
739     }
740 }
741 
742 /* STMT is a memory write where one or more bytes written are dead stores.
743    REF is a representation of the memory written.  LIVE is the bitmap of
744    stores to REF that are still live.
745 
746    Attempt to rewrite STMT so that it writes fewer memory locations.  Right
747    now we only support trimming at the start or end of the memory region.
748    It's not clear how much there is to be gained by trimming from the middle
749    of the region.  */
750 
751 static void
maybe_trim_partially_dead_store(ao_ref * ref,sbitmap live,gimple * stmt)752 maybe_trim_partially_dead_store (ao_ref *ref, sbitmap live, gimple *stmt)
753 {
754   if (is_gimple_assign (stmt)
755       && TREE_CODE (gimple_assign_lhs (stmt)) != TARGET_MEM_REF)
756     {
757       switch (gimple_assign_rhs_code (stmt))
758 	{
759 	case CONSTRUCTOR:
760 	  maybe_trim_constructor_store (ref, live, stmt);
761 	  break;
762 	case COMPLEX_CST:
763 	  maybe_trim_complex_store (ref, live, stmt);
764 	  break;
765 	default:
766 	  break;
767 	}
768     }
769 }
770 
771 /* Return TRUE if USE_REF reads bytes from LIVE where live is
772    derived from REF, a write reference.
773 
774    While this routine may modify USE_REF, it's passed by value, not
775    location.  So callers do not see those modifications.  */
776 
777 static bool
live_bytes_read(ao_ref * use_ref,ao_ref * ref,sbitmap live)778 live_bytes_read (ao_ref *use_ref, ao_ref *ref, sbitmap live)
779 {
780   /* We have already verified that USE_REF and REF hit the same object.
781      Now verify that there's actually an overlap between USE_REF and REF.  */
782   HOST_WIDE_INT start, size;
783   if (get_byte_range (use_ref, ref, false, &start, &size))
784     {
785       /* If USE_REF covers all of REF, then it will hit one or more
786 	 live bytes.   This avoids useless iteration over the bitmap
787 	 below.  */
788       if (start == 0 && known_eq (size * 8, ref->size))
789 	return true;
790 
791       /* Now check if any of the remaining bits in use_ref are set in LIVE.  */
792       return bitmap_bit_in_range_p (live, start, (start + size - 1));
793     }
794   return true;
795 }
796 
797 /* Callback for dse_classify_store calling for_each_index.  Verify that
798    indices are invariant in the loop with backedge PHI in basic-block DATA.  */
799 
800 static bool
check_name(tree,tree * idx,void * data)801 check_name (tree, tree *idx, void *data)
802 {
803   basic_block phi_bb = (basic_block) data;
804   if (TREE_CODE (*idx) == SSA_NAME
805       && !SSA_NAME_IS_DEFAULT_DEF (*idx)
806       && dominated_by_p (CDI_DOMINATORS, gimple_bb (SSA_NAME_DEF_STMT (*idx)),
807 			 phi_bb))
808     return false;
809   return true;
810 }
811 
812 /* STMT stores the value 0 into one or more memory locations
813    (via memset, empty constructor, calloc call, etc).
814 
815    See if there is a subsequent store of the value 0 to one
816    or more of the same memory location(s).  If so, the subsequent
817    store is redundant and can be removed.
818 
819    The subsequent stores could be via memset, empty constructors,
820    simple MEM stores, etc.  */
821 
822 static void
dse_optimize_redundant_stores(gimple * stmt)823 dse_optimize_redundant_stores (gimple *stmt)
824 {
825   int cnt = 0;
826 
827   /* TBAA state of STMT, if it is a call it is effectively alias-set zero.  */
828   alias_set_type earlier_set = 0;
829   alias_set_type earlier_base_set = 0;
830   if (is_gimple_assign (stmt))
831     {
832       ao_ref lhs_ref;
833       ao_ref_init (&lhs_ref, gimple_assign_lhs (stmt));
834       earlier_set = ao_ref_alias_set (&lhs_ref);
835       earlier_base_set = ao_ref_base_alias_set (&lhs_ref);
836     }
837 
838   /* We could do something fairly complex and look through PHIs
839      like DSE_CLASSIFY_STORE, but it doesn't seem to be worth
840      the effort.
841 
842      Look at all the immediate uses of the VDEF (which are obviously
843      dominated by STMT).   See if one or more stores 0 into the same
844      memory locations a STMT, if so remove the immediate use statements.  */
845   tree defvar = gimple_vdef (stmt);
846   imm_use_iterator ui;
847   gimple *use_stmt;
848   FOR_EACH_IMM_USE_STMT (use_stmt, ui, defvar)
849     {
850       /* Limit stmt walking.  */
851       if (++cnt > param_dse_max_alias_queries_per_store)
852 	break;
853 
854       /* If USE_STMT stores 0 into one or more of the same locations
855 	 as STMT and STMT would kill USE_STMT, then we can just remove
856 	 USE_STMT.  */
857       tree fndecl;
858       if ((is_gimple_assign (use_stmt)
859 	   && gimple_vdef (use_stmt)
860 	   && (gimple_assign_single_p (use_stmt)
861 	       && initializer_zerop (gimple_assign_rhs1 (use_stmt))))
862 	  || (gimple_call_builtin_p (use_stmt, BUILT_IN_NORMAL)
863 	      && (fndecl = gimple_call_fndecl (use_stmt)) != NULL
864 	      && (DECL_FUNCTION_CODE (fndecl) == BUILT_IN_MEMSET
865 		  || DECL_FUNCTION_CODE (fndecl) == BUILT_IN_MEMSET_CHK)
866 	      && integer_zerop (gimple_call_arg (use_stmt, 1))))
867 	{
868 	  ao_ref write;
869 
870 	  if (!initialize_ao_ref_for_dse (use_stmt, &write))
871 	    break;
872 
873 	  if (valid_ao_ref_for_dse (&write)
874 	      && stmt_kills_ref_p (stmt, &write))
875 	    {
876 	      gimple_stmt_iterator gsi = gsi_for_stmt (use_stmt);
877 	      if (is_gimple_assign (use_stmt))
878 		{
879 		  ao_ref lhs_ref;
880 		  ao_ref_init (&lhs_ref, gimple_assign_lhs (use_stmt));
881 		  if ((earlier_set == ao_ref_alias_set (&lhs_ref)
882 		       || alias_set_subset_of (ao_ref_alias_set (&lhs_ref),
883 					       earlier_set))
884 		      && (earlier_base_set == ao_ref_base_alias_set (&lhs_ref)
885 			  || alias_set_subset_of
886 			       (ao_ref_base_alias_set (&lhs_ref),
887 						  earlier_base_set)))
888 		    delete_dead_or_redundant_assignment (&gsi, "redundant",
889 							 need_eh_cleanup,
890 							 need_ab_cleanup);
891 		}
892 	      else if (is_gimple_call (use_stmt))
893 		{
894 		  if ((earlier_set == 0
895 		       || alias_set_subset_of (0, earlier_set))
896 		      && (earlier_base_set == 0
897 			  || alias_set_subset_of (0, earlier_base_set)))
898 		  delete_dead_or_redundant_call (&gsi, "redundant");
899 		}
900 	      else
901 		gcc_unreachable ();
902 	    }
903 	}
904     }
905 }
906 
907 /* A helper of dse_optimize_stmt.
908    Given a GIMPLE_ASSIGN in STMT that writes to REF, classify it
909    according to downstream uses and defs.  Sets *BY_CLOBBER_P to true
910    if only clobber statements influenced the classification result.
911    Returns the classification.  */
912 
913 dse_store_status
dse_classify_store(ao_ref * ref,gimple * stmt,bool byte_tracking_enabled,sbitmap live_bytes,bool * by_clobber_p,tree stop_at_vuse)914 dse_classify_store (ao_ref *ref, gimple *stmt,
915 		    bool byte_tracking_enabled, sbitmap live_bytes,
916 		    bool *by_clobber_p, tree stop_at_vuse)
917 {
918   gimple *temp;
919   int cnt = 0;
920   auto_bitmap visited;
921 
922   if (by_clobber_p)
923     *by_clobber_p = true;
924 
925   /* Find the first dominated statement that clobbers (part of) the
926      memory stmt stores to with no intermediate statement that may use
927      part of the memory stmt stores.  That is, find a store that may
928      prove stmt to be a dead store.  */
929   temp = stmt;
930   do
931     {
932       gimple *use_stmt;
933       imm_use_iterator ui;
934       bool fail = false;
935       tree defvar;
936 
937       if (gimple_code (temp) == GIMPLE_PHI)
938 	{
939 	  defvar = PHI_RESULT (temp);
940 	  bitmap_set_bit (visited, SSA_NAME_VERSION (defvar));
941 	}
942       else
943 	defvar = gimple_vdef (temp);
944 
945       /* If we're instructed to stop walking at region boundary, do so.  */
946       if (defvar == stop_at_vuse)
947 	return DSE_STORE_LIVE;
948 
949       auto_vec<gimple *, 10> defs;
950       gimple *first_phi_def = NULL;
951       gimple *last_phi_def = NULL;
952       FOR_EACH_IMM_USE_STMT (use_stmt, ui, defvar)
953 	{
954 	  /* Limit stmt walking.  */
955 	  if (++cnt > param_dse_max_alias_queries_per_store)
956 	    {
957 	      fail = true;
958 	      break;
959 	    }
960 
961 	  /* In simple cases we can look through PHI nodes, but we
962 	     have to be careful with loops and with memory references
963 	     containing operands that are also operands of PHI nodes.
964 	     See gcc.c-torture/execute/20051110-*.c.  */
965 	  if (gimple_code (use_stmt) == GIMPLE_PHI)
966 	    {
967 	      /* If we already visited this PHI ignore it for further
968 		 processing.  */
969 	      if (!bitmap_bit_p (visited,
970 				 SSA_NAME_VERSION (PHI_RESULT (use_stmt))))
971 		{
972 		  /* If we visit this PHI by following a backedge then we have
973 		     to make sure ref->ref only refers to SSA names that are
974 		     invariant with respect to the loop represented by this
975 		     PHI node.  */
976 		  if (dominated_by_p (CDI_DOMINATORS, gimple_bb (stmt),
977 				      gimple_bb (use_stmt))
978 		      && !for_each_index (ref->ref ? &ref->ref : &ref->base,
979 					  check_name, gimple_bb (use_stmt)))
980 		    return DSE_STORE_LIVE;
981 		  defs.safe_push (use_stmt);
982 		  if (!first_phi_def)
983 		    first_phi_def = use_stmt;
984 		  last_phi_def = use_stmt;
985 		}
986 	    }
987 	  /* If the statement is a use the store is not dead.  */
988 	  else if (ref_maybe_used_by_stmt_p (use_stmt, ref))
989 	    {
990 	      /* Handle common cases where we can easily build an ao_ref
991 		 structure for USE_STMT and in doing so we find that the
992 		 references hit non-live bytes and thus can be ignored.
993 
994 		 TODO: We can also use modref summary to handle calls.  */
995 	      if (byte_tracking_enabled
996 		  && is_gimple_assign (use_stmt))
997 		{
998 		  ao_ref use_ref;
999 		  ao_ref_init (&use_ref, gimple_assign_rhs1 (use_stmt));
1000 		  if (valid_ao_ref_for_dse (&use_ref)
1001 		      && operand_equal_p (use_ref.base, ref->base,
1002 					  OEP_ADDRESS_OF)
1003 		      && !live_bytes_read (&use_ref, ref, live_bytes))
1004 		    {
1005 		      /* If this is a store, remember it as we possibly
1006 			 need to walk the defs uses.  */
1007 		      if (gimple_vdef (use_stmt))
1008 			defs.safe_push (use_stmt);
1009 		      continue;
1010 		    }
1011 		}
1012 
1013 	      fail = true;
1014 	      break;
1015 	    }
1016 	  /* We have visited ourselves already so ignore STMT for the
1017 	     purpose of chaining.  */
1018 	  else if (use_stmt == stmt)
1019 	    ;
1020 	  /* If this is a store, remember it as we possibly need to walk the
1021 	     defs uses.  */
1022 	  else if (gimple_vdef (use_stmt))
1023 	    defs.safe_push (use_stmt);
1024 	}
1025 
1026       if (fail)
1027 	{
1028 	  /* STMT might be partially dead and we may be able to reduce
1029 	     how many memory locations it stores into.  */
1030 	  if (byte_tracking_enabled && !gimple_clobber_p (stmt))
1031 	    return DSE_STORE_MAYBE_PARTIAL_DEAD;
1032 	  return DSE_STORE_LIVE;
1033 	}
1034 
1035       /* If we didn't find any definition this means the store is dead
1036          if it isn't a store to global reachable memory.  In this case
1037 	 just pretend the stmt makes itself dead.  Otherwise fail.  */
1038       if (defs.is_empty ())
1039 	{
1040 	  if (ref_may_alias_global_p (ref, false))
1041 	    return DSE_STORE_LIVE;
1042 
1043 	  if (by_clobber_p)
1044 	    *by_clobber_p = false;
1045 	  return DSE_STORE_DEAD;
1046 	}
1047 
1048       /* Process defs and remove those we need not process further.  */
1049       for (unsigned i = 0; i < defs.length ();)
1050 	{
1051 	  gimple *def = defs[i];
1052 	  gimple *use_stmt;
1053 	  use_operand_p use_p;
1054 	  tree vdef = (gimple_code (def) == GIMPLE_PHI
1055 		       ? gimple_phi_result (def) : gimple_vdef (def));
1056 	  /* If the path to check starts with a kill we do not need to
1057 	     process it further.
1058 	     ???  With byte tracking we need only kill the bytes currently
1059 	     live.  */
1060 	  if (stmt_kills_ref_p (def, ref))
1061 	    {
1062 	      if (by_clobber_p && !gimple_clobber_p (def))
1063 		*by_clobber_p = false;
1064 	      defs.unordered_remove (i);
1065 	    }
1066 	  /* If the path ends here we do not need to process it further.
1067 	     This for example happens with calls to noreturn functions.  */
1068 	  else if (has_zero_uses (vdef))
1069 	    {
1070 	      /* But if the store is to global memory it is definitely
1071 		 not dead.  */
1072 	      if (ref_may_alias_global_p (ref, false))
1073 		return DSE_STORE_LIVE;
1074 	      defs.unordered_remove (i);
1075 	    }
1076 	  /* In addition to kills we can remove defs whose only use
1077 	     is another def in defs.  That can only ever be PHIs of which
1078 	     we track two for simplicity reasons, the first and last in
1079 	     {first,last}_phi_def (we fail for multiple PHIs anyways).
1080 	     We can also ignore defs that feed only into
1081 	     already visited PHIs.  */
1082 	  else if (single_imm_use (vdef, &use_p, &use_stmt)
1083 		   && (use_stmt == first_phi_def
1084 		       || use_stmt == last_phi_def
1085 		       || (gimple_code (use_stmt) == GIMPLE_PHI
1086 			   && bitmap_bit_p (visited,
1087 					    SSA_NAME_VERSION
1088 					      (PHI_RESULT (use_stmt))))))
1089 	    defs.unordered_remove (i);
1090 	  else
1091 	    ++i;
1092 	}
1093 
1094       /* If all defs kill the ref we are done.  */
1095       if (defs.is_empty ())
1096 	return DSE_STORE_DEAD;
1097       /* If more than one def survives fail.  */
1098       if (defs.length () > 1)
1099 	{
1100 	  /* STMT might be partially dead and we may be able to reduce
1101 	     how many memory locations it stores into.  */
1102 	  if (byte_tracking_enabled && !gimple_clobber_p (stmt))
1103 	    return DSE_STORE_MAYBE_PARTIAL_DEAD;
1104 	  return DSE_STORE_LIVE;
1105 	}
1106       temp = defs[0];
1107 
1108       /* Track partial kills.  */
1109       if (byte_tracking_enabled)
1110 	{
1111 	  clear_bytes_written_by (live_bytes, temp, ref);
1112 	  if (bitmap_empty_p (live_bytes))
1113 	    {
1114 	      if (by_clobber_p && !gimple_clobber_p (temp))
1115 		*by_clobber_p = false;
1116 	      return DSE_STORE_DEAD;
1117 	    }
1118 	}
1119     }
1120   /* Continue walking until there are no more live bytes.  */
1121   while (1);
1122 }
1123 
1124 
1125 /* Delete a dead call at GSI, which is mem* call of some kind.  */
1126 static void
delete_dead_or_redundant_call(gimple_stmt_iterator * gsi,const char * type)1127 delete_dead_or_redundant_call (gimple_stmt_iterator *gsi, const char *type)
1128 {
1129   gimple *stmt = gsi_stmt (*gsi);
1130   if (dump_file && (dump_flags & TDF_DETAILS))
1131     {
1132       fprintf (dump_file, "  Deleted %s call: ", type);
1133       print_gimple_stmt (dump_file, stmt, 0, dump_flags);
1134       fprintf (dump_file, "\n");
1135     }
1136 
1137   basic_block bb = gimple_bb (stmt);
1138   tree lhs = gimple_call_lhs (stmt);
1139   if (lhs)
1140     {
1141       tree ptr = gimple_call_arg (stmt, 0);
1142       gimple *new_stmt = gimple_build_assign (lhs, ptr);
1143       unlink_stmt_vdef (stmt);
1144       if (gsi_replace (gsi, new_stmt, true))
1145 	bitmap_set_bit (need_eh_cleanup, bb->index);
1146     }
1147   else
1148     {
1149       /* Then we need to fix the operand of the consuming stmt.  */
1150       unlink_stmt_vdef (stmt);
1151 
1152       /* Remove the dead store.  */
1153       if (gsi_remove (gsi, true))
1154 	bitmap_set_bit (need_eh_cleanup, bb->index);
1155       release_defs (stmt);
1156     }
1157 }
1158 
1159 /* Delete a dead store at GSI, which is a gimple assignment. */
1160 
1161 void
delete_dead_or_redundant_assignment(gimple_stmt_iterator * gsi,const char * type,bitmap need_eh_cleanup,bitmap need_ab_cleanup)1162 delete_dead_or_redundant_assignment (gimple_stmt_iterator *gsi,
1163 				     const char *type,
1164 				     bitmap need_eh_cleanup,
1165 				     bitmap need_ab_cleanup)
1166 {
1167   gimple *stmt = gsi_stmt (*gsi);
1168   if (dump_file && (dump_flags & TDF_DETAILS))
1169     {
1170       fprintf (dump_file, "  Deleted %s store: ", type);
1171       print_gimple_stmt (dump_file, stmt, 0, dump_flags);
1172       fprintf (dump_file, "\n");
1173     }
1174 
1175   /* Then we need to fix the operand of the consuming stmt.  */
1176   unlink_stmt_vdef (stmt);
1177 
1178   /* Remove the dead store.  */
1179   basic_block bb = gimple_bb (stmt);
1180   if (need_ab_cleanup && stmt_can_make_abnormal_goto (stmt))
1181     bitmap_set_bit (need_ab_cleanup, bb->index);
1182   if (gsi_remove (gsi, true) && need_eh_cleanup)
1183     bitmap_set_bit (need_eh_cleanup, bb->index);
1184 
1185   /* And release any SSA_NAMEs set in this statement back to the
1186      SSA_NAME manager.  */
1187   release_defs (stmt);
1188 }
1189 
1190 /* Try to prove, using modref summary, that all memory written to by a call is
1191    dead and remove it.  Assume that if return value is written to memory
1192    it is already proved to be dead.  */
1193 
1194 static bool
dse_optimize_call(gimple_stmt_iterator * gsi,sbitmap live_bytes)1195 dse_optimize_call (gimple_stmt_iterator *gsi, sbitmap live_bytes)
1196 {
1197   gcall *stmt = dyn_cast <gcall *> (gsi_stmt (*gsi));
1198 
1199   if (!stmt)
1200     return false;
1201 
1202   tree callee = gimple_call_fndecl (stmt);
1203 
1204   if (!callee)
1205     return false;
1206 
1207   /* Pure/const functions are optimized by normal DCE
1208      or handled as store above.  */
1209   int flags = gimple_call_flags (stmt);
1210   if ((flags & (ECF_PURE|ECF_CONST|ECF_NOVOPS))
1211       && !(flags & (ECF_LOOPING_CONST_OR_PURE)))
1212     return false;
1213 
1214   cgraph_node *node = cgraph_node::get (callee);
1215   if (!node)
1216     return false;
1217 
1218   if (stmt_could_throw_p (cfun, stmt)
1219       && !cfun->can_delete_dead_exceptions)
1220     return false;
1221 
1222   /* If return value is used the call is not dead.  */
1223   tree lhs = gimple_call_lhs (stmt);
1224   if (lhs && TREE_CODE (lhs) == SSA_NAME)
1225     {
1226       imm_use_iterator ui;
1227       gimple *use_stmt;
1228       FOR_EACH_IMM_USE_STMT (use_stmt, ui, lhs)
1229 	if (!is_gimple_debug (use_stmt))
1230 	  return false;
1231     }
1232 
1233   /* Verify that there are no side-effects except for return value
1234      and memory writes tracked by modref.  */
1235   modref_summary *summary = get_modref_function_summary (node);
1236   if (!summary || !summary->try_dse)
1237     return false;
1238 
1239   bool by_clobber_p = false;
1240 
1241   /* Walk all memory writes and verify that they are dead.  */
1242   for (auto base_node : summary->stores->bases)
1243     for (auto ref_node : base_node->refs)
1244       for (auto access_node : ref_node->accesses)
1245 	{
1246 	  tree arg = access_node.get_call_arg (stmt);
1247 
1248 	  if (!arg || !POINTER_TYPE_P (TREE_TYPE (arg)))
1249 	    return false;
1250 
1251 	  if (integer_zerop (arg)
1252 	      && !targetm.addr_space.zero_address_valid
1253 		    (TYPE_ADDR_SPACE (TREE_TYPE (arg))))
1254 	    continue;
1255 
1256 	  ao_ref ref;
1257 
1258 	  if (!access_node.get_ao_ref (stmt, &ref))
1259 	    return false;
1260 	  ref.ref_alias_set = ref_node->ref;
1261 	  ref.base_alias_set = base_node->base;
1262 
1263 	  bool byte_tracking_enabled
1264 	      = setup_live_bytes_from_ref (&ref, live_bytes);
1265 	  enum dse_store_status store_status;
1266 
1267 	  store_status = dse_classify_store (&ref, stmt,
1268 					     byte_tracking_enabled,
1269 					     live_bytes, &by_clobber_p);
1270 	  if (store_status != DSE_STORE_DEAD)
1271 	    return false;
1272 	}
1273   delete_dead_or_redundant_assignment (gsi, "dead", need_eh_cleanup,
1274 				       need_ab_cleanup);
1275   return true;
1276 }
1277 
1278 /* Attempt to eliminate dead stores in the statement referenced by BSI.
1279 
1280    A dead store is a store into a memory location which will later be
1281    overwritten by another store without any intervening loads.  In this
1282    case the earlier store can be deleted.
1283 
1284    In our SSA + virtual operand world we use immediate uses of virtual
1285    operands to detect dead stores.  If a store's virtual definition
1286    is used precisely once by a later store to the same location which
1287    post dominates the first store, then the first store is dead.  */
1288 
1289 static void
dse_optimize_stmt(function * fun,gimple_stmt_iterator * gsi,sbitmap live_bytes)1290 dse_optimize_stmt (function *fun, gimple_stmt_iterator *gsi, sbitmap live_bytes)
1291 {
1292   gimple *stmt = gsi_stmt (*gsi);
1293 
1294   /* Don't return early on *this_2(D) ={v} {CLOBBER}.  */
1295   if (gimple_has_volatile_ops (stmt)
1296       && (!gimple_clobber_p (stmt)
1297 	  || TREE_CODE (gimple_assign_lhs (stmt)) != MEM_REF))
1298     return;
1299 
1300   ao_ref ref;
1301   /* If this is not a store we can still remove dead call using
1302      modref summary.  Note we specifically allow ref to be initialized
1303      to a conservative may-def since we are looking for followup stores
1304      to kill all of it.  */
1305   if (!initialize_ao_ref_for_dse (stmt, &ref, true))
1306     {
1307       dse_optimize_call (gsi, live_bytes);
1308       return;
1309     }
1310 
1311   /* We know we have virtual definitions.  We can handle assignments and
1312      some builtin calls.  */
1313   if (gimple_call_builtin_p (stmt, BUILT_IN_NORMAL))
1314     {
1315       tree fndecl = gimple_call_fndecl (stmt);
1316       switch (DECL_FUNCTION_CODE (fndecl))
1317 	{
1318 	case BUILT_IN_MEMCPY:
1319 	case BUILT_IN_MEMMOVE:
1320 	case BUILT_IN_STRNCPY:
1321 	case BUILT_IN_MEMSET:
1322 	case BUILT_IN_MEMCPY_CHK:
1323 	case BUILT_IN_MEMMOVE_CHK:
1324 	case BUILT_IN_STRNCPY_CHK:
1325 	case BUILT_IN_MEMSET_CHK:
1326 	  {
1327 	    /* Occasionally calls with an explicit length of zero
1328 	       show up in the IL.  It's pointless to do analysis
1329 	       on them, they're trivially dead.  */
1330 	    tree size = gimple_call_arg (stmt, 2);
1331 	    if (integer_zerop (size))
1332 	      {
1333 		delete_dead_or_redundant_call (gsi, "dead");
1334 		return;
1335 	      }
1336 
1337 	    /* If this is a memset call that initializes an object
1338 	       to zero, it may be redundant with an earlier memset
1339 	       or empty CONSTRUCTOR of a larger object.  */
1340 	    if ((DECL_FUNCTION_CODE (fndecl) == BUILT_IN_MEMSET
1341 		 || DECL_FUNCTION_CODE (fndecl) == BUILT_IN_MEMSET_CHK)
1342 		&& integer_zerop (gimple_call_arg (stmt, 1)))
1343 	      dse_optimize_redundant_stores (stmt);
1344 
1345 	    enum dse_store_status store_status;
1346 	    bool byte_tracking_enabled
1347 	      = setup_live_bytes_from_ref (&ref, live_bytes);
1348 	    store_status = dse_classify_store (&ref, stmt,
1349 					       byte_tracking_enabled,
1350 					       live_bytes);
1351 	    if (store_status == DSE_STORE_LIVE)
1352 	      return;
1353 
1354 	    if (store_status == DSE_STORE_MAYBE_PARTIAL_DEAD)
1355 	      {
1356 		maybe_trim_memstar_call (&ref, live_bytes, stmt);
1357 		return;
1358 	      }
1359 
1360 	    if (store_status == DSE_STORE_DEAD)
1361 	      delete_dead_or_redundant_call (gsi, "dead");
1362 	    return;
1363 	  }
1364 
1365 	case BUILT_IN_CALLOC:
1366 	  /* We already know the arguments are integer constants.  */
1367 	  dse_optimize_redundant_stores (stmt);
1368 	  return;
1369 
1370 	default:
1371 	  return;
1372 	}
1373     }
1374 
1375   bool by_clobber_p = false;
1376 
1377   /* Check if this statement stores zero to a memory location,
1378      and if there is a subsequent store of zero to the same
1379      memory location.  If so, remove the subsequent store.  */
1380   if (gimple_assign_single_p (stmt)
1381       && initializer_zerop (gimple_assign_rhs1 (stmt)))
1382     dse_optimize_redundant_stores (stmt);
1383 
1384   /* Self-assignments are zombies.  */
1385   if (is_gimple_assign (stmt)
1386       && operand_equal_p (gimple_assign_rhs1 (stmt),
1387 			  gimple_assign_lhs (stmt), 0))
1388     ;
1389   else
1390     {
1391       bool byte_tracking_enabled
1392 	  = setup_live_bytes_from_ref (&ref, live_bytes);
1393       enum dse_store_status store_status;
1394       store_status = dse_classify_store (&ref, stmt,
1395 					 byte_tracking_enabled,
1396 					 live_bytes, &by_clobber_p);
1397       if (store_status == DSE_STORE_LIVE)
1398 	return;
1399 
1400       if (store_status == DSE_STORE_MAYBE_PARTIAL_DEAD)
1401 	{
1402 	  maybe_trim_partially_dead_store (&ref, live_bytes, stmt);
1403 	  return;
1404 	}
1405     }
1406 
1407   /* Now we know that use_stmt kills the LHS of stmt.  */
1408 
1409   /* But only remove *this_2(D) ={v} {CLOBBER} if killed by
1410      another clobber stmt.  */
1411   if (gimple_clobber_p (stmt)
1412       && !by_clobber_p)
1413     return;
1414 
1415   if (is_gimple_call (stmt)
1416       && (gimple_has_side_effects (stmt)
1417 	  || (stmt_could_throw_p (fun, stmt)
1418 	      && !fun->can_delete_dead_exceptions)))
1419     {
1420       /* See if we can remove complete call.  */
1421       if (dse_optimize_call (gsi, live_bytes))
1422 	return;
1423       /* Make sure we do not remove a return slot we cannot reconstruct
1424 	 later.  */
1425       if (gimple_call_return_slot_opt_p (as_a <gcall *>(stmt))
1426 	  && (TREE_ADDRESSABLE (TREE_TYPE (gimple_call_fntype (stmt)))
1427 	      || !poly_int_tree_p
1428 		    (TYPE_SIZE (TREE_TYPE (gimple_call_fntype (stmt))))))
1429 	return;
1430       if (dump_file && (dump_flags & TDF_DETAILS))
1431 	{
1432 	  fprintf (dump_file, "  Deleted dead store in call LHS: ");
1433 	  print_gimple_stmt (dump_file, stmt, 0, dump_flags);
1434 	  fprintf (dump_file, "\n");
1435 	}
1436       gimple_call_set_lhs (stmt, NULL_TREE);
1437       update_stmt (stmt);
1438     }
1439   else
1440     delete_dead_or_redundant_assignment (gsi, "dead", need_eh_cleanup,
1441 					 need_ab_cleanup);
1442 }
1443 
1444 namespace {
1445 
1446 const pass_data pass_data_dse =
1447 {
1448   GIMPLE_PASS, /* type */
1449   "dse", /* name */
1450   OPTGROUP_NONE, /* optinfo_flags */
1451   TV_TREE_DSE, /* tv_id */
1452   ( PROP_cfg | PROP_ssa ), /* properties_required */
1453   0, /* properties_provided */
1454   0, /* properties_destroyed */
1455   0, /* todo_flags_start */
1456   0, /* todo_flags_finish */
1457 };
1458 
1459 class pass_dse : public gimple_opt_pass
1460 {
1461 public:
pass_dse(gcc::context * ctxt)1462   pass_dse (gcc::context *ctxt)
1463     : gimple_opt_pass (pass_data_dse, ctxt)
1464   {}
1465 
1466   /* opt_pass methods: */
clone()1467   opt_pass * clone () { return new pass_dse (m_ctxt); }
gate(function *)1468   virtual bool gate (function *) { return flag_tree_dse != 0; }
1469   virtual unsigned int execute (function *);
1470 
1471 }; // class pass_dse
1472 
1473 unsigned int
execute(function * fun)1474 pass_dse::execute (function *fun)
1475 {
1476   unsigned todo = 0;
1477   bool released_def = false;
1478 
1479   need_eh_cleanup = BITMAP_ALLOC (NULL);
1480   need_ab_cleanup = BITMAP_ALLOC (NULL);
1481   auto_sbitmap live_bytes (param_dse_max_object_size);
1482 
1483   renumber_gimple_stmt_uids (fun);
1484 
1485   calculate_dominance_info (CDI_DOMINATORS);
1486 
1487   /* Dead store elimination is fundamentally a reverse program order walk.  */
1488   int *rpo = XNEWVEC (int, n_basic_blocks_for_fn (fun) - NUM_FIXED_BLOCKS);
1489   int n = pre_and_rev_post_order_compute_fn (fun, NULL, rpo, false);
1490   for (int i = n; i != 0; --i)
1491     {
1492       basic_block bb = BASIC_BLOCK_FOR_FN (fun, rpo[i-1]);
1493       gimple_stmt_iterator gsi;
1494 
1495       for (gsi = gsi_last_bb (bb); !gsi_end_p (gsi);)
1496 	{
1497 	  gimple *stmt = gsi_stmt (gsi);
1498 
1499 	  if (gimple_vdef (stmt))
1500 	    dse_optimize_stmt (fun, &gsi, live_bytes);
1501 	  else if (def_operand_p
1502 		     def_p = single_ssa_def_operand (stmt, SSA_OP_DEF))
1503 	    {
1504 	      /* When we remove dead stores make sure to also delete trivially
1505 		 dead SSA defs.  */
1506 	      if (has_zero_uses (DEF_FROM_PTR (def_p))
1507 		  && !gimple_has_side_effects (stmt)
1508 		  && !is_ctrl_altering_stmt (stmt)
1509 		  && (!stmt_could_throw_p (fun, stmt)
1510 		      || fun->can_delete_dead_exceptions))
1511 		{
1512 		  if (dump_file && (dump_flags & TDF_DETAILS))
1513 		    {
1514 		      fprintf (dump_file, "  Deleted trivially dead stmt: ");
1515 		      print_gimple_stmt (dump_file, stmt, 0, dump_flags);
1516 		      fprintf (dump_file, "\n");
1517 		    }
1518 		  if (gsi_remove (&gsi, true) && need_eh_cleanup)
1519 		    bitmap_set_bit (need_eh_cleanup, bb->index);
1520 		  release_defs (stmt);
1521 		  released_def = true;
1522 		}
1523 	    }
1524 	  if (gsi_end_p (gsi))
1525 	    gsi = gsi_last_bb (bb);
1526 	  else
1527 	    gsi_prev (&gsi);
1528 	}
1529       bool removed_phi = false;
1530       for (gphi_iterator si = gsi_start_phis (bb); !gsi_end_p (si);)
1531 	{
1532 	  gphi *phi = si.phi ();
1533 	  if (has_zero_uses (gimple_phi_result (phi)))
1534 	    {
1535 	      if (dump_file && (dump_flags & TDF_DETAILS))
1536 		{
1537 		  fprintf (dump_file, "  Deleted trivially dead PHI: ");
1538 		  print_gimple_stmt (dump_file, phi, 0, dump_flags);
1539 		  fprintf (dump_file, "\n");
1540 		}
1541 	      remove_phi_node (&si, true);
1542 	      removed_phi = true;
1543 	      released_def = true;
1544 	    }
1545 	  else
1546 	    gsi_next (&si);
1547 	}
1548       if (removed_phi && gimple_seq_empty_p (phi_nodes (bb)))
1549 	todo |= TODO_cleanup_cfg;
1550     }
1551   free (rpo);
1552 
1553   /* Removal of stores may make some EH edges dead.  Purge such edges from
1554      the CFG as needed.  */
1555   if (!bitmap_empty_p (need_eh_cleanup))
1556     {
1557       gimple_purge_all_dead_eh_edges (need_eh_cleanup);
1558       todo |= TODO_cleanup_cfg;
1559     }
1560   if (!bitmap_empty_p (need_ab_cleanup))
1561     {
1562       gimple_purge_all_dead_abnormal_call_edges (need_ab_cleanup);
1563       todo |= TODO_cleanup_cfg;
1564     }
1565 
1566   BITMAP_FREE (need_eh_cleanup);
1567   BITMAP_FREE (need_ab_cleanup);
1568 
1569   if (released_def)
1570     free_numbers_of_iterations_estimates (fun);
1571 
1572   return todo;
1573 }
1574 
1575 } // anon namespace
1576 
1577 gimple_opt_pass *
make_pass_dse(gcc::context * ctxt)1578 make_pass_dse (gcc::context *ctxt)
1579 {
1580   return new pass_dse (ctxt);
1581 }
1582