xref: /netbsd-src/external/gpl3/gcc/dist/gcc/fwprop.cc (revision 0a3071956a3a9fdebdbf7f338cf2d439b45fc728)
1 /* RTL-based forward propagation pass for GNU compiler.
2    Copyright (C) 2005-2022 Free Software Foundation, Inc.
3    Contributed by Paolo Bonzini and Steven Bosscher.
4 
5 This file is part of GCC.
6 
7 GCC is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 3, or (at your option) any later
10 version.
11 
12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
15 for more details.
16 
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3.  If not see
19 <http://www.gnu.org/licenses/>.  */
20 
21 #define INCLUDE_ALGORITHM
22 #define INCLUDE_FUNCTIONAL
23 #include "config.h"
24 #include "system.h"
25 #include "coretypes.h"
26 #include "backend.h"
27 #include "rtl.h"
28 #include "rtlanal.h"
29 #include "df.h"
30 #include "rtl-ssa.h"
31 
32 #include "predict.h"
33 #include "cfgrtl.h"
34 #include "cfgcleanup.h"
35 #include "cfgloop.h"
36 #include "tree-pass.h"
37 #include "rtl-iter.h"
38 #include "target.h"
39 
40 /* This pass does simple forward propagation and simplification when an
41    operand of an insn can only come from a single def.  This pass uses
42    RTL SSA, so it is global.  However, we only do limited analysis of
43    available expressions.
44 
45    1) The pass tries to propagate the source of the def into the use,
46    and checks if the result is independent of the substituted value.
47    For example, the high word of a (zero_extend:DI (reg:SI M)) is always
48    zero, independent of the source register.
49 
50    In particular, we propagate constants into the use site.  Sometimes
51    RTL expansion did not put the constant in the same insn on purpose,
52    to satisfy a predicate, and the result will fail to be recognized;
53    but this happens rarely and in this case we can still create a
54    REG_EQUAL note.  For multi-word operations, this
55 
56       (set (subreg:SI (reg:DI 120) 0) (const_int 0))
57       (set (subreg:SI (reg:DI 120) 4) (const_int -1))
58       (set (subreg:SI (reg:DI 122) 0)
59 	 (ior:SI (subreg:SI (reg:DI 119) 0) (subreg:SI (reg:DI 120) 0)))
60       (set (subreg:SI (reg:DI 122) 4)
61 	 (ior:SI (subreg:SI (reg:DI 119) 4) (subreg:SI (reg:DI 120) 4)))
62 
63    can be simplified to the much simpler
64 
65       (set (subreg:SI (reg:DI 122) 0) (subreg:SI (reg:DI 119)))
66       (set (subreg:SI (reg:DI 122) 4) (const_int -1))
67 
68    This particular propagation is also effective at putting together
69    complex addressing modes.  We are more aggressive inside MEMs, in
70    that all definitions are propagated if the use is in a MEM; if the
71    result is a valid memory address we check address_cost to decide
72    whether the substitution is worthwhile.
73 
74    2) The pass propagates register copies.  This is not as effective as
75    the copy propagation done by CSE's canon_reg, which works by walking
76    the instruction chain, it can help the other transformations.
77 
78    We should consider removing this optimization, and instead reorder the
79    RTL passes, because GCSE does this transformation too.  With some luck,
80    the CSE pass at the end of rest_of_handle_gcse could also go away.
81 
82    3) The pass looks for paradoxical subregs that are actually unnecessary.
83    Things like this:
84 
85      (set (reg:QI 120) (subreg:QI (reg:SI 118) 0))
86      (set (reg:QI 121) (subreg:QI (reg:SI 119) 0))
87      (set (reg:SI 122) (plus:SI (subreg:SI (reg:QI 120) 0)
88 				(subreg:SI (reg:QI 121) 0)))
89 
90    are very common on machines that can only do word-sized operations.
91    For each use of a paradoxical subreg (subreg:WIDER (reg:NARROW N) 0),
92    if it has a single def and it is (subreg:NARROW (reg:WIDE M) 0),
93    we can replace the paradoxical subreg with simply (reg:WIDE M).  The
94    above will simplify this to
95 
96      (set (reg:QI 120) (subreg:QI (reg:SI 118) 0))
97      (set (reg:QI 121) (subreg:QI (reg:SI 119) 0))
98      (set (reg:SI 122) (plus:SI (reg:SI 118) (reg:SI 119)))
99 
100    where the first two insns are now dead.  */
101 
102 using namespace rtl_ssa;
103 
104 static int num_changes;
105 
106 /* Do not try to replace constant addresses or addresses of local and
107    argument slots.  These MEM expressions are made only once and inserted
108    in many instructions, as well as being used to control symbol table
109    output.  It is not safe to clobber them.
110 
111    There are some uncommon cases where the address is already in a register
112    for some reason, but we cannot take advantage of that because we have
113    no easy way to unshare the MEM.  In addition, looking up all stack
114    addresses is costly.  */
115 
116 static bool
can_simplify_addr(rtx addr)117 can_simplify_addr (rtx addr)
118 {
119   rtx reg;
120 
121   if (CONSTANT_ADDRESS_P (addr))
122     return false;
123 
124   if (GET_CODE (addr) == PLUS)
125     reg = XEXP (addr, 0);
126   else
127     reg = addr;
128 
129   return (!REG_P (reg)
130 	  || (REGNO (reg) != FRAME_POINTER_REGNUM
131 	      && REGNO (reg) != HARD_FRAME_POINTER_REGNUM
132 	      && REGNO (reg) != ARG_POINTER_REGNUM));
133 }
134 
135 /* MEM is the result of an address simplification, and temporarily
136    undoing changes OLD_NUM_CHANGES onwards restores the original address.
137    Return whether it is good to use the new address instead of the
138    old one.  INSN is the containing instruction.  */
139 
140 static bool
should_replace_address(int old_num_changes,rtx mem,rtx_insn * insn)141 should_replace_address (int old_num_changes, rtx mem, rtx_insn *insn)
142 {
143   int gain;
144 
145   /* Prefer the new address if it is less expensive.  */
146   bool speed = optimize_bb_for_speed_p (BLOCK_FOR_INSN (insn));
147   temporarily_undo_changes (old_num_changes);
148   gain = address_cost (XEXP (mem, 0), GET_MODE (mem),
149 		       MEM_ADDR_SPACE (mem), speed);
150   redo_changes (old_num_changes);
151   gain -= address_cost (XEXP (mem, 0), GET_MODE (mem),
152 			MEM_ADDR_SPACE (mem), speed);
153 
154   /* If the addresses have equivalent cost, prefer the new address
155      if it has the highest `set_src_cost'.  That has the potential of
156      eliminating the most insns without additional costs, and it
157      is the same that cse.cc used to do.  */
158   if (gain == 0)
159     {
160       gain = set_src_cost (XEXP (mem, 0), VOIDmode, speed);
161       temporarily_undo_changes (old_num_changes);
162       gain -= set_src_cost (XEXP (mem, 0), VOIDmode, speed);
163       redo_changes (old_num_changes);
164     }
165 
166   return (gain > 0);
167 }
168 
169 
170 namespace
171 {
172   class fwprop_propagation : public insn_propagation
173   {
174   public:
175     static const uint16_t CHANGED_MEM = FIRST_SPARE_RESULT;
176     static const uint16_t CONSTANT = FIRST_SPARE_RESULT << 1;
177     static const uint16_t PROFITABLE = FIRST_SPARE_RESULT << 2;
178 
179     fwprop_propagation (insn_info *, set_info *, rtx, rtx);
180 
changed_mem_p() const181     bool changed_mem_p () const { return result_flags & CHANGED_MEM; }
182     bool folded_to_constants_p () const;
183     bool profitable_p () const;
184 
185     bool check_mem (int, rtx) final override;
186     void note_simplification (int, uint16_t, rtx, rtx) final override;
187     uint16_t classify_result (rtx, rtx);
188 
189   private:
190     const bool single_use_p;
191     const bool single_ebb_p;
192   };
193 }
194 
195 /* Prepare to replace FROM with TO in USE_INSN.  */
196 
fwprop_propagation(insn_info * use_insn,set_info * def,rtx from,rtx to)197 fwprop_propagation::fwprop_propagation (insn_info *use_insn,
198 					set_info *def, rtx from, rtx to)
199   : insn_propagation (use_insn->rtl (), from, to),
200     single_use_p (def->single_nondebug_use ()),
201     single_ebb_p (use_insn->ebb () == def->ebb ())
202 {
203   should_check_mems = true;
204   should_note_simplifications = true;
205 }
206 
207 /* MEM is the result of an address simplification, and temporarily
208    undoing changes OLD_NUM_CHANGES onwards restores the original address.
209    Return true if the propagation should continue, false if it has failed.  */
210 
211 bool
check_mem(int old_num_changes,rtx mem)212 fwprop_propagation::check_mem (int old_num_changes, rtx mem)
213 {
214   if (!memory_address_addr_space_p (GET_MODE (mem), XEXP (mem, 0),
215 				    MEM_ADDR_SPACE (mem)))
216     {
217       failure_reason = "would create an invalid MEM";
218       return false;
219     }
220 
221   temporarily_undo_changes (old_num_changes);
222   bool can_simplify = can_simplify_addr (XEXP (mem, 0));
223   redo_changes (old_num_changes);
224   if (!can_simplify)
225     {
226       failure_reason = "would replace a frame address";
227       return false;
228     }
229 
230   /* Copy propagations are always ok.  Otherwise check the costs.  */
231   if (!(REG_P (from) && REG_P (to))
232       && !should_replace_address (old_num_changes, mem, insn))
233     {
234       failure_reason = "would increase the cost of a MEM";
235       return false;
236     }
237 
238   result_flags |= CHANGED_MEM;
239   return true;
240 }
241 
242 /* OLDX has been simplified to NEWX.  Describe the change in terms of
243    result_flags.  */
244 
245 uint16_t
classify_result(rtx old_rtx,rtx new_rtx)246 fwprop_propagation::classify_result (rtx old_rtx, rtx new_rtx)
247 {
248   if (CONSTANT_P (new_rtx))
249     {
250       /* If OLD_RTX is a LO_SUM, then it presumably exists for a reason,
251 	 and NEW_RTX is likely not a legitimate address.  We want it to
252 	 disappear if it is invalid.
253 
254 	 ??? Using the mode of the LO_SUM as the mode of the address
255 	 seems odd, but it was what the pre-SSA code did.  */
256       if (GET_CODE (old_rtx) == LO_SUM
257 	  && !memory_address_p (GET_MODE (old_rtx), new_rtx))
258 	return CONSTANT;
259       return CONSTANT | PROFITABLE;
260     }
261 
262   /* Allow replacements that simplify operations on a vector or complex
263      value to a component.  The most prominent case is
264      (subreg ([vec_]concat ...)).   */
265   if (REG_P (new_rtx)
266       && !HARD_REGISTER_P (new_rtx)
267       && (VECTOR_MODE_P (GET_MODE (from))
268 	  || COMPLEX_MODE_P (GET_MODE (from)))
269       && GET_MODE (new_rtx) == GET_MODE_INNER (GET_MODE (from)))
270     return PROFITABLE;
271 
272   /* Allow (subreg (mem)) -> (mem) simplifications with the following
273      exceptions:
274      1) Propagating (mem)s into multiple uses is not profitable.
275      2) Propagating (mem)s across EBBs may not be profitable if the source EBB
276 	runs less frequently.
277      3) Propagating (mem)s into paradoxical (subreg)s is not profitable.
278      4) Creating new (mem/v)s is not correct, since DCE will not remove the old
279 	ones.  */
280   if (single_use_p
281       && single_ebb_p
282       && SUBREG_P (old_rtx)
283       && !paradoxical_subreg_p (old_rtx)
284       && MEM_P (new_rtx)
285       && !MEM_VOLATILE_P (new_rtx))
286     return PROFITABLE;
287 
288   return 0;
289 }
290 
291 /* Record that OLD_RTX has been simplified to NEW_RTX.  OLD_NUM_CHANGES
292    is the number of unrelated changes that had been made before processing
293    OLD_RTX and its subrtxes.  OLD_RESULT_FLAGS is the value that result_flags
294    had at that point.  */
295 
296 void
note_simplification(int old_num_changes,uint16_t old_result_flags,rtx old_rtx,rtx new_rtx)297 fwprop_propagation::note_simplification (int old_num_changes,
298 					 uint16_t old_result_flags,
299 					 rtx old_rtx, rtx new_rtx)
300 {
301   result_flags &= ~(CONSTANT | PROFITABLE);
302   uint16_t new_flags = classify_result (old_rtx, new_rtx);
303   if (old_num_changes)
304     new_flags &= old_result_flags;
305   result_flags |= new_flags;
306 }
307 
308 /* Return true if all substitutions eventually folded to constants.  */
309 
310 bool
folded_to_constants_p() const311 fwprop_propagation::folded_to_constants_p () const
312 {
313   /* If we're propagating a HIGH, require it to be folded with a
314      partnering LO_SUM.  For example, a REG_EQUAL note with a register
315      replaced by an unfolded HIGH is not useful.  */
316   if (CONSTANT_P (to) && GET_CODE (to) != HIGH)
317     return true;
318   return !(result_flags & UNSIMPLIFIED) && (result_flags & CONSTANT);
319 }
320 
321 
322 /* Return true if it is worth keeping the result of the propagation,
323    false if it would increase the complexity of the pattern too much.  */
324 
325 bool
profitable_p() const326 fwprop_propagation::profitable_p () const
327 {
328   if (changed_mem_p ())
329     return true;
330 
331   if (!(result_flags & UNSIMPLIFIED)
332       && (result_flags & PROFITABLE))
333     return true;
334 
335   if (REG_P (to))
336     return true;
337 
338   if (GET_CODE (to) == SUBREG
339       && REG_P (SUBREG_REG (to))
340       && !paradoxical_subreg_p (to))
341     return true;
342 
343   if (CONSTANT_P (to))
344     return true;
345 
346   return false;
347 }
348 
349 /* Check that X has a single def.  */
350 
351 static bool
reg_single_def_p(rtx x)352 reg_single_def_p (rtx x)
353 {
354   return REG_P (x) && crtl->ssa->single_dominating_def (REGNO (x));
355 }
356 
357 /* Try to substitute (set DEST SRC), which defines DEF, into note NOTE of
358    USE_INSN.  Return the number of substitutions on success, otherwise return
359    -1 and leave USE_INSN unchanged.
360 
361    If REQUIRE_CONSTANT is true, require all substituted occurrences of SRC
362    to fold to a constant, so that the note does not use any more registers
363    than it did previously.  If REQUIRE_CONSTANT is false, also allow the
364    substitution if it's something we'd normally allow for the main
365    instruction pattern.  */
366 
367 static int
try_fwprop_subst_note(insn_info * use_insn,set_info * def,rtx note,rtx dest,rtx src,bool require_constant)368 try_fwprop_subst_note (insn_info *use_insn, set_info *def,
369 		       rtx note, rtx dest, rtx src, bool require_constant)
370 {
371   rtx_insn *use_rtl = use_insn->rtl ();
372   insn_info *def_insn = def->insn ();
373 
374   insn_change_watermark watermark;
375   fwprop_propagation prop (use_insn, def, dest, src);
376   if (!prop.apply_to_rvalue (&XEXP (note, 0)))
377     {
378       if (dump_file && (dump_flags & TDF_DETAILS))
379 	fprintf (dump_file, "cannot propagate from insn %d into"
380 		 " notes of insn %d: %s\n", def_insn->uid (),
381 		 use_insn->uid (), prop.failure_reason);
382       return -1;
383     }
384 
385   if (prop.num_replacements == 0)
386     return 0;
387 
388   if (require_constant)
389     {
390       if (!prop.folded_to_constants_p ())
391 	{
392 	  if (dump_file && (dump_flags & TDF_DETAILS))
393 	    fprintf (dump_file, "cannot propagate from insn %d into"
394 		     " notes of insn %d: %s\n", def_insn->uid (),
395 		     use_insn->uid (), "wouldn't fold to constants");
396 	  return -1;
397 	}
398     }
399   else
400     {
401       if (!prop.folded_to_constants_p () && !prop.profitable_p ())
402 	{
403 	  if (dump_file && (dump_flags & TDF_DETAILS))
404 	    fprintf (dump_file, "cannot propagate from insn %d into"
405 		     " notes of insn %d: %s\n", def_insn->uid (),
406 		     use_insn->uid (), "would increase complexity of node");
407 	  return -1;
408 	}
409     }
410 
411   if (dump_file && (dump_flags & TDF_DETAILS))
412     {
413       fprintf (dump_file, "\nin notes of insn %d, replacing:\n  ",
414 	       INSN_UID (use_rtl));
415       temporarily_undo_changes (0);
416       print_inline_rtx (dump_file, note, 2);
417       redo_changes (0);
418       fprintf (dump_file, "\n with:\n  ");
419       print_inline_rtx (dump_file, note, 2);
420       fprintf (dump_file, "\n");
421     }
422   watermark.keep ();
423   return prop.num_replacements;
424 }
425 
426 /* Try to substitute (set DEST SRC), which defines DEF, into location LOC of
427    USE_INSN's pattern.  Return true on success, otherwise leave USE_INSN
428    unchanged.  */
429 
430 static bool
try_fwprop_subst_pattern(obstack_watermark & attempt,insn_change & use_change,set_info * def,rtx * loc,rtx dest,rtx src)431 try_fwprop_subst_pattern (obstack_watermark &attempt, insn_change &use_change,
432 			  set_info *def, rtx *loc, rtx dest, rtx src)
433 {
434   insn_info *use_insn = use_change.insn ();
435   rtx_insn *use_rtl = use_insn->rtl ();
436   insn_info *def_insn = def->insn ();
437 
438   insn_change_watermark watermark;
439   fwprop_propagation prop (use_insn, def, dest, src);
440   if (!prop.apply_to_pattern (loc))
441     {
442       if (dump_file && (dump_flags & TDF_DETAILS))
443 	fprintf (dump_file, "cannot propagate from insn %d into"
444 		 " insn %d: %s\n", def_insn->uid (), use_insn->uid (),
445 		 prop.failure_reason);
446       return false;
447     }
448 
449   if (prop.num_replacements == 0)
450     return false;
451 
452   if (!prop.profitable_p ())
453     {
454       if (dump_file && (dump_flags & TDF_DETAILS))
455 	fprintf (dump_file, "cannot propagate from insn %d into"
456 		 " insn %d: %s\n", def_insn->uid (), use_insn->uid (),
457 		 "would increase complexity of pattern");
458       return false;
459     }
460 
461   if (dump_file && (dump_flags & TDF_DETAILS))
462     {
463       fprintf (dump_file, "\npropagating insn %d into insn %d, replacing:\n",
464 	       def_insn->uid (), use_insn->uid ());
465       temporarily_undo_changes (0);
466       print_rtl_single (dump_file, PATTERN (use_rtl));
467       redo_changes (0);
468     }
469 
470   /* ??? In theory, it should be better to use insn costs rather than
471      set_src_costs here.  That would involve replacing this code with
472      change_is_worthwhile.  */
473   bool ok = recog (attempt, use_change);
474   if (ok && !prop.changed_mem_p () && !use_insn->is_asm ())
475     if (rtx use_set = single_set (use_rtl))
476       {
477 	bool speed = optimize_bb_for_speed_p (BLOCK_FOR_INSN (use_rtl));
478 	temporarily_undo_changes (0);
479 	auto old_cost = set_src_cost (SET_SRC (use_set),
480 				      GET_MODE (SET_DEST (use_set)), speed);
481 	redo_changes (0);
482 	auto new_cost = set_src_cost (SET_SRC (use_set),
483 				      GET_MODE (SET_DEST (use_set)), speed);
484 	if (new_cost > old_cost)
485 	  {
486 	    if (dump_file)
487 	      fprintf (dump_file, "change not profitable"
488 		       " (cost %d -> cost %d)\n", old_cost, new_cost);
489 	    ok = false;
490 	  }
491       }
492 
493   if (!ok)
494     {
495       /* The pattern didn't match, but if all uses of SRC folded to
496 	 constants, we can add a REG_EQUAL note for the result, if there
497 	 isn't one already.  */
498       if (!prop.folded_to_constants_p ())
499 	return false;
500 
501       /* Test this first to avoid creating an unnecessary copy of SRC.  */
502       if (find_reg_note (use_rtl, REG_EQUAL, NULL_RTX))
503 	return false;
504 
505       rtx set = set_for_reg_notes (use_rtl);
506       if (!set || !REG_P (SET_DEST (set)))
507 	return false;
508 
509       rtx value = copy_rtx (SET_SRC (set));
510       cancel_changes (0);
511 
512       /* If there are any paradoxical SUBREGs, drop the REG_EQUAL note,
513 	 because the bits in there can be anything and so might not
514 	 match the REG_EQUAL note content.  See PR70574.  */
515       if (contains_paradoxical_subreg_p (SET_SRC (set)))
516 	return false;
517 
518       if (dump_file && (dump_flags & TDF_DETAILS))
519 	fprintf (dump_file, " Setting REG_EQUAL note\n");
520 
521       return set_unique_reg_note (use_rtl, REG_EQUAL, value);
522     }
523 
524   rtx *note_ptr = &REG_NOTES (use_rtl);
525   while (rtx note = *note_ptr)
526     {
527       if ((REG_NOTE_KIND (note) == REG_EQUAL
528 	   || REG_NOTE_KIND (note) == REG_EQUIV)
529 	  && try_fwprop_subst_note (use_insn, def, note, dest, src, false) < 0)
530 	{
531 	  *note_ptr = XEXP (note, 1);
532 	  free_EXPR_LIST_node (note);
533 	}
534       else
535 	note_ptr = &XEXP (note, 1);
536     }
537 
538   confirm_change_group ();
539   crtl->ssa->change_insn (use_change);
540   num_changes++;
541   return true;
542 }
543 
544 /* Try to substitute (set DEST SRC), which defines DEF, into USE_INSN's notes,
545    given that it was not possible to do this for USE_INSN's main pattern.
546    Return true on success, otherwise leave USE_INSN unchanged.  */
547 
548 static bool
try_fwprop_subst_notes(insn_info * use_insn,set_info * def,rtx dest,rtx src)549 try_fwprop_subst_notes (insn_info *use_insn, set_info *def,
550 			rtx dest, rtx src)
551 {
552   rtx_insn *use_rtl = use_insn->rtl ();
553   for (rtx note = REG_NOTES (use_rtl); note; note = XEXP (note, 1))
554     if ((REG_NOTE_KIND (note) == REG_EQUAL
555 	 || REG_NOTE_KIND (note) == REG_EQUIV)
556 	&& try_fwprop_subst_note (use_insn, def, note, dest, src, true) > 0)
557       {
558 	confirm_change_group ();
559 	return true;
560       }
561 
562   return false;
563 }
564 
565 /* Check whether we could validly substitute (set DEST SRC), which defines DEF,
566    into USE.  If so, first try performing the substitution in location LOC
567    of USE->insn ()'s pattern.  If that fails, try instead to substitute
568    into the notes.
569 
570    Return true on success, otherwise leave USE_INSN unchanged.  */
571 
572 static bool
try_fwprop_subst(use_info * use,set_info * def,rtx * loc,rtx dest,rtx src)573 try_fwprop_subst (use_info *use, set_info *def,
574 		  rtx *loc, rtx dest, rtx src)
575 {
576   insn_info *use_insn = use->insn ();
577   insn_info *def_insn = def->insn ();
578 
579   auto attempt = crtl->ssa->new_change_attempt ();
580   use_array src_uses = remove_note_accesses (attempt, def_insn->uses ());
581 
582   /* ??? Not really a meaningful test: it means we can propagate arithmetic
583      involving hard registers but not bare references to them.  A better
584      test would be to iterate over src_uses looking for hard registers
585      that are not fixed.  */
586   if (REG_P (src) && HARD_REGISTER_P (src))
587     return false;
588 
589   /* ??? It would be better to make this EBB-based instead.  That would
590      involve checking for equal EBBs rather than equal BBs and trying
591      to make the uses available at use_insn->ebb ()->first_bb ().  */
592   if (def_insn->bb () != use_insn->bb ())
593     {
594       src_uses = crtl->ssa->make_uses_available (attempt, src_uses,
595 						 use_insn->bb (),
596 						 use_insn->is_debug_insn ());
597       if (!src_uses.is_valid ())
598 	return false;
599     }
600 
601   insn_change use_change (use_insn);
602   use_change.new_uses = merge_access_arrays (attempt, use_change.new_uses,
603 					     src_uses);
604   if (!use_change.new_uses.is_valid ())
605     return false;
606 
607   /* ??? We could allow movement within the EBB by adding:
608 
609      use_change.move_range = use_insn->ebb ()->insn_range ();  */
610   if (!restrict_movement (use_change))
611     return false;
612 
613   return (try_fwprop_subst_pattern (attempt, use_change, def, loc, dest, src)
614 	  || try_fwprop_subst_notes (use_insn, def, dest, src));
615 }
616 
617 /* For the given single_set INSN, containing SRC known to be a
618    ZERO_EXTEND or SIGN_EXTEND of a register, return true if INSN
619    is redundant due to the register being set by a LOAD_EXTEND_OP
620    load from memory.  */
621 
622 static bool
free_load_extend(rtx src,insn_info * insn)623 free_load_extend (rtx src, insn_info *insn)
624 {
625   rtx reg = XEXP (src, 0);
626   if (load_extend_op (GET_MODE (reg)) != GET_CODE (src))
627     return false;
628 
629   def_info *def = nullptr;
630   for (use_info *use : insn->uses ())
631     if (use->regno () == REGNO (reg))
632       {
633 	def = use->def ();
634 	break;
635       }
636 
637   if (!def)
638     return false;
639 
640   insn_info *def_insn = def->insn ();
641   if (def_insn->is_artificial ())
642     return false;
643 
644   rtx_insn *def_rtl = def_insn->rtl ();
645   if (NONJUMP_INSN_P (def_rtl))
646     {
647       rtx patt = PATTERN (def_rtl);
648 
649       if (GET_CODE (patt) == SET
650 	  && GET_CODE (SET_SRC (patt)) == MEM
651 	  && rtx_equal_p (SET_DEST (patt), reg))
652 	return true;
653     }
654   return false;
655 }
656 
657 /* Subroutine of forward_propagate_subreg that handles a use of DEST
658    in REF.  The other parameters are the same.  */
659 
660 static bool
forward_propagate_subreg(use_info * use,set_info * def,rtx dest,rtx src,df_ref ref)661 forward_propagate_subreg (use_info *use, set_info *def,
662 			  rtx dest, rtx src, df_ref ref)
663 {
664   scalar_int_mode int_use_mode, src_mode;
665 
666   /* Only consider subregs... */
667   rtx use_reg = DF_REF_REG (ref);
668   machine_mode use_mode = GET_MODE (use_reg);
669   if (GET_CODE (use_reg) != SUBREG
670       || GET_MODE (SUBREG_REG (use_reg)) != GET_MODE (dest))
671     return false;
672 
673   /* ??? Replacing throughout the pattern would help for match_dups.  */
674   rtx *loc = DF_REF_LOC (ref);
675   if (paradoxical_subreg_p (use_reg))
676     {
677       /* If this is a paradoxical SUBREG, we have no idea what value the
678 	 extra bits would have.  However, if the operand is equivalent to
679 	 a SUBREG whose operand is the same as our mode, and all the modes
680 	 are within a word, we can just use the inner operand because
681 	 these SUBREGs just say how to treat the register.  */
682       if (GET_CODE (src) == SUBREG
683 	  && REG_P (SUBREG_REG (src))
684 	  && REGNO (SUBREG_REG (src)) >= FIRST_PSEUDO_REGISTER
685 	  && GET_MODE (SUBREG_REG (src)) == use_mode
686 	  && subreg_lowpart_p (src))
687 	return try_fwprop_subst (use, def, loc, use_reg, SUBREG_REG (src));
688     }
689 
690   /* If this is a SUBREG of a ZERO_EXTEND or SIGN_EXTEND, and the SUBREG
691      is the low part of the reg being extended then just use the inner
692      operand.  Don't do this if the ZERO_EXTEND or SIGN_EXTEND insn will
693      be removed due to it matching a LOAD_EXTEND_OP load from memory,
694      or due to the operation being a no-op when applied to registers.
695      For example, if we have:
696 
697 	 A: (set (reg:DI X) (sign_extend:DI (reg:SI Y)))
698 	 B: (... (subreg:SI (reg:DI X)) ...)
699 
700      and mode_rep_extended says that Y is already sign-extended,
701      the backend will typically allow A to be combined with the
702      definition of Y or, failing that, allow A to be deleted after
703      reload through register tying.  Introducing more uses of Y
704      prevents both optimisations.  */
705   else if (is_a <scalar_int_mode> (use_mode, &int_use_mode)
706 	   && subreg_lowpart_p (use_reg))
707     {
708       if ((GET_CODE (src) == ZERO_EXTEND
709 	   || GET_CODE (src) == SIGN_EXTEND)
710 	  && is_a <scalar_int_mode> (GET_MODE (src), &src_mode)
711 	  && REG_P (XEXP (src, 0))
712 	  && REGNO (XEXP (src, 0)) >= FIRST_PSEUDO_REGISTER
713 	  && GET_MODE (XEXP (src, 0)) == use_mode
714 	  && !free_load_extend (src, def->insn ())
715 	  && (targetm.mode_rep_extended (int_use_mode, src_mode)
716 	      != (int) GET_CODE (src)))
717 	return try_fwprop_subst (use, def, loc, use_reg, XEXP (src, 0));
718     }
719 
720   return false;
721 }
722 
723 /* Try to substitute (set DEST SRC), which defines DEF, into USE and simplify
724    the result, handling cases where DEST is used in a subreg and where
725    applying that subreg to SRC results in a useful simplification.  */
726 
727 static bool
forward_propagate_subreg(use_info * use,set_info * def,rtx dest,rtx src)728 forward_propagate_subreg (use_info *use, set_info *def, rtx dest, rtx src)
729 {
730   if (!use->includes_subregs () || !REG_P (dest))
731     return false;
732 
733   if (GET_CODE (src) != SUBREG
734       && GET_CODE (src) != ZERO_EXTEND
735       && GET_CODE (src) != SIGN_EXTEND)
736     return false;
737 
738   rtx_insn *use_rtl = use->insn ()->rtl ();
739   df_ref ref;
740 
741   FOR_EACH_INSN_USE (ref, use_rtl)
742     if (DF_REF_REGNO (ref) == use->regno ()
743 	&& forward_propagate_subreg (use, def, dest, src, ref))
744       return true;
745 
746   FOR_EACH_INSN_EQ_USE (ref, use_rtl)
747     if (DF_REF_REGNO (ref) == use->regno ()
748 	&& forward_propagate_subreg (use, def, dest, src, ref))
749       return true;
750 
751   return false;
752 }
753 
754 /* Try to substitute (set DEST SRC), which defines DEF, into USE and
755    simplify the result.  */
756 
757 static bool
forward_propagate_and_simplify(use_info * use,set_info * def,rtx dest,rtx src)758 forward_propagate_and_simplify (use_info *use, set_info *def,
759 				rtx dest, rtx src)
760 {
761   insn_info *use_insn = use->insn ();
762   rtx_insn *use_rtl = use_insn->rtl ();
763   insn_info *def_insn = def->insn ();
764 
765   /* ??? This check seems unnecessary.  We should be able to propagate
766      into any kind of instruction, regardless of whether it's a single set.
767      It seems odd to be more permissive with asms than normal instructions.  */
768   bool need_single_set = (!use_insn->is_asm () && !use_insn->is_debug_insn ());
769   rtx use_set = single_set (use_rtl);
770   if (need_single_set && !use_set)
771     return false;
772 
773   /* Do not propagate into PC etc.
774 
775      ??? This too seems unnecessary.  The current code should work correctly
776      without it, including cases where jumps become unconditional.  */
777   if (use_set && GET_MODE (SET_DEST (use_set)) == VOIDmode)
778     return false;
779 
780   /* In __asm don't replace if src might need more registers than
781      reg, as that could increase register pressure on the __asm.  */
782   if (use_insn->is_asm () && def_insn->uses ().size () > 1)
783     return false;
784 
785   /* Check if the def is loading something from the constant pool; in this
786      case we would undo optimization such as compress_float_constant.
787      Still, we can set a REG_EQUAL note.  */
788   if (MEM_P (src) && MEM_READONLY_P (src))
789     {
790       rtx x = avoid_constant_pool_reference (src);
791       rtx note_set;
792       if (x != src
793 	  && (note_set = set_for_reg_notes (use_rtl))
794 	  && REG_P (SET_DEST (note_set))
795 	  && !contains_paradoxical_subreg_p (SET_SRC (note_set)))
796 	{
797 	  rtx note = find_reg_note (use_rtl, REG_EQUAL, NULL_RTX);
798 	  rtx old_rtx = note ? XEXP (note, 0) : SET_SRC (note_set);
799 	  rtx new_rtx = simplify_replace_rtx (old_rtx, src, x);
800 	  if (old_rtx != new_rtx)
801 	    set_unique_reg_note (use_rtl, REG_EQUAL, copy_rtx (new_rtx));
802 	}
803       return false;
804     }
805 
806   /* ??? Unconditionally propagating into PATTERN would work better
807      for instructions that have match_dups.  */
808   rtx *loc = need_single_set ? &use_set : &PATTERN (use_rtl);
809   return try_fwprop_subst (use, def, loc, dest, src);
810 }
811 
812 /* Given a use USE of an insn, if it has a single reaching
813    definition, try to forward propagate it into that insn.
814    Return true if something changed.
815 
816    REG_PROP_ONLY is true if we should only propagate register copies.  */
817 
818 static bool
forward_propagate_into(use_info * use,bool reg_prop_only=false)819 forward_propagate_into (use_info *use, bool reg_prop_only = false)
820 {
821   if (use->includes_read_writes ())
822     return false;
823 
824   /* Disregard uninitialized uses.  */
825   set_info *def = use->def ();
826   if (!def)
827     return false;
828 
829   /* Only consider single-register definitions.  This could be relaxed,
830      but it should rarely be needed before RA.  */
831   def = look_through_degenerate_phi (def);
832   if (def->includes_multiregs ())
833     return false;
834 
835   /* Only consider uses whose definition comes from a real instruction.  */
836   insn_info *def_insn = def->insn ();
837   if (def_insn->is_artificial ())
838     return false;
839 
840   rtx_insn *def_rtl = def_insn->rtl ();
841   if (!NONJUMP_INSN_P (def_rtl))
842     return false;
843   /* ??? This seems an unnecessary restriction.  We can easily tell
844      which set the definition comes from.  */
845   if (multiple_sets (def_rtl))
846     return false;
847   rtx def_set = simple_regno_set (PATTERN (def_rtl), def->regno ());
848   if (!def_set)
849     return false;
850 
851   rtx dest = SET_DEST (def_set);
852   rtx src = SET_SRC (def_set);
853 
854   /* Allow propagations into a loop only for reg-to-reg copies, since
855      replacing one register by another shouldn't increase the cost.
856      Propagations from inner loop to outer loop should also be ok.  */
857   struct loop *def_loop = def_insn->bb ()->cfg_bb ()->loop_father;
858   struct loop *use_loop = use->bb ()->cfg_bb ()->loop_father;
859   if ((reg_prop_only
860        || (def_loop != use_loop
861 	   && !flow_loop_nested_p (use_loop, def_loop)))
862       && (!reg_single_def_p (dest) || !reg_single_def_p (src)))
863     return false;
864 
865   /* Don't substitute into a non-local goto, this confuses CFG.  */
866   insn_info *use_insn = use->insn ();
867   rtx_insn *use_rtl = use_insn->rtl ();
868   if (JUMP_P (use_rtl)
869       && find_reg_note (use_rtl, REG_NON_LOCAL_GOTO, NULL_RTX))
870     return false;
871 
872   if (forward_propagate_and_simplify (use, def, dest, src)
873       || forward_propagate_subreg (use, def, dest, src))
874     return true;
875 
876   return false;
877 }
878 
879 static void
fwprop_init(void)880 fwprop_init (void)
881 {
882   num_changes = 0;
883   calculate_dominance_info (CDI_DOMINATORS);
884 
885   /* We do not always want to propagate into loops, so we have to find
886      loops and be careful about them.  Avoid CFG modifications so that
887      we don't have to update dominance information afterwards for
888      build_single_def_use_links.  */
889   loop_optimizer_init (AVOID_CFG_MODIFICATIONS);
890 
891   df_analyze ();
892   crtl->ssa = new rtl_ssa::function_info (cfun);
893 }
894 
895 static void
fwprop_done(void)896 fwprop_done (void)
897 {
898   loop_optimizer_finalize ();
899 
900   crtl->ssa->perform_pending_updates ();
901   free_dominance_info (CDI_DOMINATORS);
902   cleanup_cfg (0);
903 
904   delete crtl->ssa;
905   crtl->ssa = nullptr;
906 
907   delete_trivially_dead_insns (get_insns (), max_reg_num ());
908 
909   if (dump_file)
910     fprintf (dump_file,
911 	     "\nNumber of successful forward propagations: %d\n\n",
912 	     num_changes);
913 }
914 
915 /* Try to optimize INSN, returning true if something changes.
916    FWPROP_ADDR_P is true if we are running fwprop_addr rather than
917    the full fwprop.  */
918 
919 static bool
fwprop_insn(insn_info * insn,bool fwprop_addr_p)920 fwprop_insn (insn_info *insn, bool fwprop_addr_p)
921 {
922   for (use_info *use : insn->uses ())
923     {
924       if (use->is_mem ())
925 	continue;
926       /* ??? The choices here follow those in the pre-SSA code.  */
927       if (!use->includes_address_uses ())
928 	{
929 	  if (forward_propagate_into (use, fwprop_addr_p))
930 	    return true;
931 	}
932       else
933 	{
934 	  struct loop *loop = insn->bb ()->cfg_bb ()->loop_father;
935 	  /* The outermost loop is not really a loop.  */
936 	  if (loop == NULL || loop_outer (loop) == NULL)
937 	    {
938 	      if (forward_propagate_into (use, fwprop_addr_p))
939 		return true;
940 	    }
941 	  else if (fwprop_addr_p)
942 	    {
943 	      if (forward_propagate_into (use, false))
944 		return true;
945 	    }
946 	}
947     }
948   return false;
949 }
950 
951 /* Main entry point.  */
952 
953 static bool
gate_fwprop(void)954 gate_fwprop (void)
955 {
956   return optimize > 0 && flag_forward_propagate;
957 }
958 
959 static unsigned int
fwprop(bool fwprop_addr_p)960 fwprop (bool fwprop_addr_p)
961 {
962   fwprop_init ();
963 
964   /* Go through all the instructions (including debug instructions) looking
965      for uses that we could propagate into.
966 
967      Do not forward propagate addresses into loops until after unrolling.
968      CSE did so because it was able to fix its own mess, but we are not.  */
969 
970   insn_info *next;
971 
972   /* ??? This code uses a worklist in order to preserve the behavior
973      of the pre-SSA implementation.  It would be better to instead
974      iterate on each instruction until no more propagations are
975      possible, then move on to the next.  */
976   auto_vec<insn_info *> worklist;
977   for (insn_info *insn = crtl->ssa->first_insn (); insn; insn = next)
978     {
979       next = insn->next_any_insn ();
980       if (insn->can_be_optimized () || insn->is_debug_insn ())
981 	if (fwprop_insn (insn, fwprop_addr_p))
982 	  worklist.safe_push (insn);
983     }
984   for (unsigned int i = 0; i < worklist.length (); ++i)
985     {
986       insn_info *insn = worklist[i];
987       if (fwprop_insn (insn, fwprop_addr_p))
988 	worklist.safe_push (insn);
989     }
990 
991   fwprop_done ();
992   return 0;
993 }
994 
995 namespace {
996 
997 const pass_data pass_data_rtl_fwprop =
998 {
999   RTL_PASS, /* type */
1000   "fwprop1", /* name */
1001   OPTGROUP_NONE, /* optinfo_flags */
1002   TV_FWPROP, /* tv_id */
1003   0, /* properties_required */
1004   0, /* properties_provided */
1005   0, /* properties_destroyed */
1006   0, /* todo_flags_start */
1007   TODO_df_finish, /* todo_flags_finish */
1008 };
1009 
1010 class pass_rtl_fwprop : public rtl_opt_pass
1011 {
1012 public:
pass_rtl_fwprop(gcc::context * ctxt)1013   pass_rtl_fwprop (gcc::context *ctxt)
1014     : rtl_opt_pass (pass_data_rtl_fwprop, ctxt)
1015   {}
1016 
1017   /* opt_pass methods: */
gate(function *)1018   virtual bool gate (function *) { return gate_fwprop (); }
execute(function *)1019   virtual unsigned int execute (function *) { return fwprop (false); }
1020 
1021 }; // class pass_rtl_fwprop
1022 
1023 } // anon namespace
1024 
1025 rtl_opt_pass *
make_pass_rtl_fwprop(gcc::context * ctxt)1026 make_pass_rtl_fwprop (gcc::context *ctxt)
1027 {
1028   return new pass_rtl_fwprop (ctxt);
1029 }
1030 
1031 namespace {
1032 
1033 const pass_data pass_data_rtl_fwprop_addr =
1034 {
1035   RTL_PASS, /* type */
1036   "fwprop2", /* name */
1037   OPTGROUP_NONE, /* optinfo_flags */
1038   TV_FWPROP, /* tv_id */
1039   0, /* properties_required */
1040   0, /* properties_provided */
1041   0, /* properties_destroyed */
1042   0, /* todo_flags_start */
1043   TODO_df_finish, /* todo_flags_finish */
1044 };
1045 
1046 class pass_rtl_fwprop_addr : public rtl_opt_pass
1047 {
1048 public:
pass_rtl_fwprop_addr(gcc::context * ctxt)1049   pass_rtl_fwprop_addr (gcc::context *ctxt)
1050     : rtl_opt_pass (pass_data_rtl_fwprop_addr, ctxt)
1051   {}
1052 
1053   /* opt_pass methods: */
gate(function *)1054   virtual bool gate (function *) { return gate_fwprop (); }
execute(function *)1055   virtual unsigned int execute (function *) { return fwprop (true); }
1056 
1057 }; // class pass_rtl_fwprop_addr
1058 
1059 } // anon namespace
1060 
1061 rtl_opt_pass *
make_pass_rtl_fwprop_addr(gcc::context * ctxt)1062 make_pass_rtl_fwprop_addr (gcc::context *ctxt)
1063 {
1064   return new pass_rtl_fwprop_addr (ctxt);
1065 }
1066