xref: /netbsd-src/external/gpl3/gcc.old/dist/gcc/combine-stack-adj.c (revision b7b7574d3bf8eeb51a1fa3977b59142ec6434a55)
1 /* Combine stack adjustments.
2    Copyright (C) 1987, 1988, 1989, 1992, 1993, 1994, 1995, 1996, 1997,
3    1998, 1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009
4    Free Software Foundation, Inc.
5 
6 This file is part of GCC.
7 
8 GCC is free software; you can redistribute it and/or modify it under
9 the terms of the GNU General Public License as published by the Free
10 Software Foundation; either version 3, or (at your option) any later
11 version.
12 
13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
16 for more details.
17 
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3.  If not see
20 <http://www.gnu.org/licenses/>.  */
21 
22 /* Track stack adjustments and stack memory references.  Attempt to
23    reduce the number of stack adjustments by back-propagating across
24    the memory references.
25 
26    This is intended primarily for use with targets that do not define
27    ACCUMULATE_OUTGOING_ARGS.  It is of significantly more value to
28    targets that define PREFERRED_STACK_BOUNDARY more aligned than
29    STACK_BOUNDARY (e.g. x86), or if not all registers can be pushed
30    (e.g. x86 fp regs) which would ordinarily have to be implemented
31    as a sub/mov pair due to restrictions in calls.c.
32 
33    Propagation stops when any of the insns that need adjusting are
34    (a) no longer valid because we've exceeded their range, (b) a
35    non-trivial push instruction, or (c) a call instruction.
36 
37    Restriction B is based on the assumption that push instructions
38    are smaller or faster.  If a port really wants to remove all
39    pushes, it should have defined ACCUMULATE_OUTGOING_ARGS.  The
40    one exception that is made is for an add immediately followed
41    by a push.  */
42 
43 #include "config.h"
44 #include "system.h"
45 #include "coretypes.h"
46 #include "tm.h"
47 #include "rtl.h"
48 #include "tm_p.h"
49 #include "insn-config.h"
50 #include "recog.h"
51 #include "output.h"
52 #include "regs.h"
53 #include "hard-reg-set.h"
54 #include "flags.h"
55 #include "function.h"
56 #include "expr.h"
57 #include "basic-block.h"
58 #include "df.h"
59 #include "except.h"
60 #include "toplev.h"
61 #include "reload.h"
62 #include "timevar.h"
63 #include "tree-pass.h"
64 
65 
66 /* Turn STACK_GROWS_DOWNWARD into a boolean.  */
67 #ifdef STACK_GROWS_DOWNWARD
68 #undef STACK_GROWS_DOWNWARD
69 #define STACK_GROWS_DOWNWARD 1
70 #else
71 #define STACK_GROWS_DOWNWARD 0
72 #endif
73 
74 /* This structure records two kinds of stack references between stack
75    adjusting instructions: stack references in memory addresses for
76    regular insns and all stack references for debug insns.  */
77 
78 struct csa_reflist
79 {
80   HOST_WIDE_INT sp_offset;
81   rtx insn, *ref;
82   struct csa_reflist *next;
83 };
84 
85 static int stack_memref_p (rtx);
86 static rtx single_set_for_csa (rtx);
87 static void free_csa_reflist (struct csa_reflist *);
88 static struct csa_reflist *record_one_stack_ref (rtx, rtx *,
89 						 struct csa_reflist *);
90 static int try_apply_stack_adjustment (rtx, struct csa_reflist *,
91 				       HOST_WIDE_INT, HOST_WIDE_INT);
92 static void combine_stack_adjustments_for_block (basic_block);
93 static int record_stack_refs (rtx *, void *);
94 
95 
96 /* Main entry point for stack adjustment combination.  */
97 
98 static void
99 combine_stack_adjustments (void)
100 {
101   basic_block bb;
102 
103   FOR_EACH_BB (bb)
104     combine_stack_adjustments_for_block (bb);
105 }
106 
107 /* Recognize a MEM of the form (sp) or (plus sp const).  */
108 
109 static int
110 stack_memref_p (rtx x)
111 {
112   if (!MEM_P (x))
113     return 0;
114   x = XEXP (x, 0);
115 
116   if (x == stack_pointer_rtx)
117     return 1;
118   if (GET_CODE (x) == PLUS
119       && XEXP (x, 0) == stack_pointer_rtx
120       && CONST_INT_P (XEXP (x, 1)))
121     return 1;
122 
123   return 0;
124 }
125 
126 /* Recognize either normal single_set or the hack in i386.md for
127    tying fp and sp adjustments.  */
128 
129 static rtx
130 single_set_for_csa (rtx insn)
131 {
132   int i;
133   rtx tmp = single_set (insn);
134   if (tmp)
135     return tmp;
136 
137   if (!NONJUMP_INSN_P (insn)
138       || GET_CODE (PATTERN (insn)) != PARALLEL)
139     return NULL_RTX;
140 
141   tmp = PATTERN (insn);
142   if (GET_CODE (XVECEXP (tmp, 0, 0)) != SET)
143     return NULL_RTX;
144 
145   for (i = 1; i < XVECLEN (tmp, 0); ++i)
146     {
147       rtx this_rtx = XVECEXP (tmp, 0, i);
148 
149       /* The special case is allowing a no-op set.  */
150       if (GET_CODE (this_rtx) == SET
151 	  && SET_SRC (this_rtx) == SET_DEST (this_rtx))
152 	;
153       else if (GET_CODE (this_rtx) != CLOBBER
154 	       && GET_CODE (this_rtx) != USE)
155 	return NULL_RTX;
156     }
157 
158   return XVECEXP (tmp, 0, 0);
159 }
160 
161 /* Free the list of csa_reflist nodes.  */
162 
163 static void
164 free_csa_reflist (struct csa_reflist *reflist)
165 {
166   struct csa_reflist *next;
167   for (; reflist ; reflist = next)
168     {
169       next = reflist->next;
170       free (reflist);
171     }
172 }
173 
174 /* Create a new csa_reflist node from the given stack reference.
175    It is already known that the reference is either a MEM satisfying the
176    predicate stack_memref_p or a REG representing the stack pointer.  */
177 
178 static struct csa_reflist *
179 record_one_stack_ref (rtx insn, rtx *ref, struct csa_reflist *next_reflist)
180 {
181   struct csa_reflist *ml;
182 
183   ml = XNEW (struct csa_reflist);
184 
185   if (REG_P (*ref) || XEXP (*ref, 0) == stack_pointer_rtx)
186     ml->sp_offset = 0;
187   else
188     ml->sp_offset = INTVAL (XEXP (XEXP (*ref, 0), 1));
189 
190   ml->insn = insn;
191   ml->ref = ref;
192   ml->next = next_reflist;
193 
194   return ml;
195 }
196 
197 /* Attempt to apply ADJUST to the stack adjusting insn INSN, as well
198    as each of the memories and stack references in REFLIST.  Return true
199    on success.  */
200 
201 static int
202 try_apply_stack_adjustment (rtx insn, struct csa_reflist *reflist,
203 			    HOST_WIDE_INT new_adjust, HOST_WIDE_INT delta)
204 {
205   struct csa_reflist *ml;
206   rtx set;
207 
208   set = single_set_for_csa (insn);
209   if (MEM_P (SET_DEST (set)))
210     validate_change (insn, &SET_DEST (set),
211 		     replace_equiv_address (SET_DEST (set), stack_pointer_rtx),
212 		     1);
213   else
214     validate_change (insn, &XEXP (SET_SRC (set), 1), GEN_INT (new_adjust), 1);
215 
216   for (ml = reflist; ml ; ml = ml->next)
217     {
218       rtx new_addr = plus_constant (stack_pointer_rtx, ml->sp_offset - delta);
219       rtx new_val;
220 
221       if (MEM_P (*ml->ref))
222 	new_val = replace_equiv_address_nv (*ml->ref, new_addr);
223       else if (GET_MODE (*ml->ref) == GET_MODE (stack_pointer_rtx))
224 	new_val = new_addr;
225       else
226 	new_val = lowpart_subreg (GET_MODE (*ml->ref), new_addr,
227 				  GET_MODE (new_addr));
228       validate_change (ml->insn, ml->ref, new_val, 1);
229     }
230 
231   if (apply_change_group ())
232     {
233       /* Succeeded.  Update our knowledge of the stack references.  */
234       for (ml = reflist; ml ; ml = ml->next)
235 	ml->sp_offset -= delta;
236 
237       return 1;
238     }
239   else
240     return 0;
241 }
242 
243 /* Called via for_each_rtx and used to record all stack memory and other
244    references in the insn and discard all other stack pointer references.  */
245 struct record_stack_refs_data
246 {
247   rtx insn;
248   struct csa_reflist *reflist;
249 };
250 
251 static int
252 record_stack_refs (rtx *xp, void *data)
253 {
254   rtx x = *xp;
255   struct record_stack_refs_data *d =
256     (struct record_stack_refs_data *) data;
257   if (!x)
258     return 0;
259   switch (GET_CODE (x))
260     {
261     case MEM:
262       if (!reg_mentioned_p (stack_pointer_rtx, x))
263 	return -1;
264       /* We are not able to handle correctly all possible memrefs containing
265          stack pointer, so this check is necessary.  */
266       if (stack_memref_p (x))
267 	{
268 	  d->reflist = record_one_stack_ref (d->insn, xp, d->reflist);
269 	  return -1;
270 	}
271       /* Try harder for DEBUG_INSNs, handle e.g. (mem (mem (sp + 16) + 4).  */
272       return !DEBUG_INSN_P (d->insn);
273     case REG:
274       /* ??? We want be able to handle non-memory stack pointer
275 	 references later.  For now just discard all insns referring to
276 	 stack pointer outside mem expressions.  We would probably
277 	 want to teach validate_replace to simplify expressions first.
278 
279 	 We can't just compare with STACK_POINTER_RTX because the
280 	 reference to the stack pointer might be in some other mode.
281 	 In particular, an explicit clobber in an asm statement will
282 	 result in a QImode clobber.
283 
284 	 In DEBUG_INSNs, we want to replace all occurrences, otherwise
285 	 they will cause -fcompare-debug failures.  */
286       if (REGNO (x) == STACK_POINTER_REGNUM)
287 	{
288 	  if (!DEBUG_INSN_P (d->insn))
289 	    return 1;
290 	  d->reflist = record_one_stack_ref (d->insn, xp, d->reflist);
291 	  return -1;
292 	}
293       break;
294     default:
295       break;
296     }
297   return 0;
298 }
299 
300 /* Adjust or create REG_FRAME_RELATED_EXPR note when merging a stack
301    adjustment into a frame related insn.  */
302 
303 static void
304 adjust_frame_related_expr (rtx last_sp_set, rtx insn,
305 			   HOST_WIDE_INT this_adjust)
306 {
307   rtx note = find_reg_note (last_sp_set, REG_FRAME_RELATED_EXPR, NULL_RTX);
308   rtx new_expr = NULL_RTX;
309 
310   if (note == NULL_RTX && RTX_FRAME_RELATED_P (insn))
311     return;
312 
313   if (note
314       && GET_CODE (XEXP (note, 0)) == SEQUENCE
315       && XVECLEN (XEXP (note, 0), 0) >= 2)
316     {
317       rtx expr = XEXP (note, 0);
318       rtx last = XVECEXP (expr, 0, XVECLEN (expr, 0) - 1);
319       int i;
320 
321       if (GET_CODE (last) == SET
322 	  && RTX_FRAME_RELATED_P (last) == RTX_FRAME_RELATED_P (insn)
323 	  && SET_DEST (last) == stack_pointer_rtx
324 	  && GET_CODE (SET_SRC (last)) == PLUS
325 	  && XEXP (SET_SRC (last), 0) == stack_pointer_rtx
326 	  && CONST_INT_P (XEXP (SET_SRC (last), 1)))
327 	{
328 	  XEXP (SET_SRC (last), 1)
329 	    = GEN_INT (INTVAL (XEXP (SET_SRC (last), 1)) + this_adjust);
330 	  return;
331 	}
332 
333       new_expr = gen_rtx_SEQUENCE (VOIDmode,
334 				   rtvec_alloc (XVECLEN (expr, 0) + 1));
335       for (i = 0; i < XVECLEN (expr, 0); i++)
336 	XVECEXP (new_expr, 0, i) = XVECEXP (expr, 0, i);
337     }
338   else
339     {
340       new_expr = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (2));
341       if (note)
342 	XVECEXP (new_expr, 0, 0) = XEXP (note, 0);
343       else
344 	{
345 	  rtx expr = copy_rtx (single_set_for_csa (last_sp_set));
346 
347 	  XEXP (SET_SRC (expr), 1)
348 	    = GEN_INT (INTVAL (XEXP (SET_SRC (expr), 1)) - this_adjust);
349 	  RTX_FRAME_RELATED_P (expr) = 1;
350 	  XVECEXP (new_expr, 0, 0) = expr;
351 	}
352     }
353 
354   XVECEXP (new_expr, 0, XVECLEN (new_expr, 0) - 1)
355     = copy_rtx (single_set_for_csa (insn));
356   RTX_FRAME_RELATED_P (XVECEXP (new_expr, 0, XVECLEN (new_expr, 0) - 1))
357     = RTX_FRAME_RELATED_P (insn);
358   if (note)
359     XEXP (note, 0) = new_expr;
360   else
361     add_reg_note (last_sp_set, REG_FRAME_RELATED_EXPR, new_expr);
362 }
363 
364 /* Subroutine of combine_stack_adjustments, called for each basic block.  */
365 
366 static void
367 combine_stack_adjustments_for_block (basic_block bb)
368 {
369   HOST_WIDE_INT last_sp_adjust = 0;
370   rtx last_sp_set = NULL_RTX;
371   struct csa_reflist *reflist = NULL;
372   rtx insn, next, set;
373   struct record_stack_refs_data data;
374   bool end_of_block = false;
375 
376   for (insn = BB_HEAD (bb); !end_of_block ; insn = next)
377     {
378       end_of_block = insn == BB_END (bb);
379       next = NEXT_INSN (insn);
380 
381       if (! INSN_P (insn))
382 	continue;
383 
384       set = single_set_for_csa (insn);
385       if (set)
386 	{
387 	  rtx dest = SET_DEST (set);
388 	  rtx src = SET_SRC (set);
389 
390 	  /* Find constant additions to the stack pointer.  */
391 	  if (dest == stack_pointer_rtx
392 	      && GET_CODE (src) == PLUS
393 	      && XEXP (src, 0) == stack_pointer_rtx
394 	      && CONST_INT_P (XEXP (src, 1)))
395 	    {
396 	      HOST_WIDE_INT this_adjust = INTVAL (XEXP (src, 1));
397 
398 	      /* If we've not seen an adjustment previously, record
399 		 it now and continue.  */
400 	      if (! last_sp_set)
401 		{
402 		  last_sp_set = insn;
403 		  last_sp_adjust = this_adjust;
404 		  continue;
405 		}
406 
407 	      /* If not all recorded refs can be adjusted, or the
408 		 adjustment is now too large for a constant addition,
409 		 we cannot merge the two stack adjustments.
410 
411 		 Also we need to be careful to not move stack pointer
412 		 such that we create stack accesses outside the allocated
413 		 area.  We can combine an allocation into the first insn,
414 		 or a deallocation into the second insn.  We can not
415 		 combine an allocation followed by a deallocation.
416 
417 		 The only somewhat frequent occurrence of the later is when
418 		 a function allocates a stack frame but does not use it.
419 		 For this case, we would need to analyze rtl stream to be
420 		 sure that allocated area is really unused.  This means not
421 		 only checking the memory references, but also all registers
422 		 or global memory references possibly containing a stack
423 		 frame address.
424 
425 		 Perhaps the best way to address this problem is to teach
426 		 gcc not to allocate stack for objects never used.  */
427 
428 	      /* Combine an allocation into the first instruction.  */
429 	      if (STACK_GROWS_DOWNWARD ? this_adjust <= 0 : this_adjust >= 0)
430 		{
431 		  if (try_apply_stack_adjustment (last_sp_set, reflist,
432 						  last_sp_adjust + this_adjust,
433 						  this_adjust))
434 		    {
435 		      if (RTX_FRAME_RELATED_P (last_sp_set))
436 			adjust_frame_related_expr (last_sp_set, insn,
437 						   this_adjust);
438 		      /* It worked!  */
439 		      delete_insn (insn);
440 		      last_sp_adjust += this_adjust;
441 		      continue;
442 		    }
443 		}
444 
445 	      /* Otherwise we have a deallocation.  Do not combine with
446 		 a previous allocation.  Combine into the second insn.  */
447 	      else if (STACK_GROWS_DOWNWARD
448 		       ? last_sp_adjust >= 0 : last_sp_adjust <= 0)
449 		{
450 		  if (try_apply_stack_adjustment (insn, reflist,
451 						  last_sp_adjust + this_adjust,
452 						  -last_sp_adjust))
453 		    {
454 		      /* It worked!  */
455 		      delete_insn (last_sp_set);
456 		      last_sp_set = insn;
457 		      last_sp_adjust += this_adjust;
458 		      free_csa_reflist (reflist);
459 		      reflist = NULL;
460 		      continue;
461 		    }
462 		}
463 
464 	      /* Combination failed.  Restart processing from here.  If
465 		 deallocation+allocation conspired to cancel, we can
466 		 delete the old deallocation insn.  */
467 	      if (last_sp_set && last_sp_adjust == 0)
468 		delete_insn (last_sp_set);
469 	      free_csa_reflist (reflist);
470 	      reflist = NULL;
471 	      last_sp_set = insn;
472 	      last_sp_adjust = this_adjust;
473 	      continue;
474 	    }
475 
476 	  /* Find a store with pre-(dec|inc)rement or pre-modify of exactly
477 	     the previous adjustment and turn it into a simple store.  This
478 	     is equivalent to anticipating the stack adjustment so this must
479 	     be an allocation.  */
480 	  if (MEM_P (dest)
481 	      && ((STACK_GROWS_DOWNWARD
482 		   ? (GET_CODE (XEXP (dest, 0)) == PRE_DEC
483 		      && last_sp_adjust
484 			 == (HOST_WIDE_INT) GET_MODE_SIZE (GET_MODE (dest)))
485 		   : (GET_CODE (XEXP (dest, 0)) == PRE_INC
486 		      && last_sp_adjust
487 		         == -(HOST_WIDE_INT) GET_MODE_SIZE (GET_MODE (dest))))
488 		  || ((STACK_GROWS_DOWNWARD
489 		       ? last_sp_adjust >= 0 : last_sp_adjust <= 0)
490 		      && GET_CODE (XEXP (dest, 0)) == PRE_MODIFY
491 		      && GET_CODE (XEXP (XEXP (dest, 0), 1)) == PLUS
492 		      && XEXP (XEXP (XEXP (dest, 0), 1), 0)
493 			 == stack_pointer_rtx
494 		      && GET_CODE (XEXP (XEXP (XEXP (dest, 0), 1), 1))
495 		         == CONST_INT
496 		      && INTVAL (XEXP (XEXP (XEXP (dest, 0), 1), 1))
497 		         == -last_sp_adjust))
498 	      && XEXP (XEXP (dest, 0), 0) == stack_pointer_rtx
499 	      && !reg_mentioned_p (stack_pointer_rtx, src)
500 	      && memory_address_p (GET_MODE (dest), stack_pointer_rtx)
501 	      && try_apply_stack_adjustment (insn, reflist, 0,
502 					     -last_sp_adjust))
503 	    {
504 	      delete_insn (last_sp_set);
505 	      free_csa_reflist (reflist);
506 	      reflist = NULL;
507 	      last_sp_set = NULL_RTX;
508 	      last_sp_adjust = 0;
509 	      continue;
510 	    }
511 	}
512 
513       data.insn = insn;
514       data.reflist = reflist;
515       if (!CALL_P (insn) && last_sp_set
516 	  && !for_each_rtx (&PATTERN (insn), record_stack_refs, &data))
517 	{
518 	   reflist = data.reflist;
519 	   continue;
520 	}
521       reflist = data.reflist;
522 
523       /* Otherwise, we were not able to process the instruction.
524 	 Do not continue collecting data across such a one.  */
525       if (last_sp_set
526 	  && (CALL_P (insn)
527 	      || reg_mentioned_p (stack_pointer_rtx, PATTERN (insn))))
528 	{
529 	  if (last_sp_set && last_sp_adjust == 0)
530 	    delete_insn (last_sp_set);
531 	  free_csa_reflist (reflist);
532 	  reflist = NULL;
533 	  last_sp_set = NULL_RTX;
534 	  last_sp_adjust = 0;
535 	}
536     }
537 
538   if (last_sp_set && last_sp_adjust == 0)
539     delete_insn (last_sp_set);
540 
541   if (reflist)
542     free_csa_reflist (reflist);
543 }
544 
545 
546 static bool
547 gate_handle_stack_adjustments (void)
548 {
549   return (optimize > 0);
550 }
551 
552 static unsigned int
553 rest_of_handle_stack_adjustments (void)
554 {
555   cleanup_cfg (flag_crossjumping ? CLEANUP_CROSSJUMP : 0);
556 
557   /* This is kind of a heuristic.  We need to run combine_stack_adjustments
558      even for machines with possibly nonzero RETURN_POPS_ARGS
559      and ACCUMULATE_OUTGOING_ARGS.  We expect that only ports having
560      push instructions will have popping returns.  */
561 #ifndef PUSH_ROUNDING
562   if (!ACCUMULATE_OUTGOING_ARGS)
563 #endif
564     {
565       df_note_add_problem ();
566       df_analyze ();
567       combine_stack_adjustments ();
568     }
569   return 0;
570 }
571 
572 struct rtl_opt_pass pass_stack_adjustments =
573 {
574  {
575   RTL_PASS,
576   "csa",                                /* name */
577   gate_handle_stack_adjustments,        /* gate */
578   rest_of_handle_stack_adjustments,     /* execute */
579   NULL,                                 /* sub */
580   NULL,                                 /* next */
581   0,                                    /* static_pass_number */
582   TV_COMBINE_STACK_ADJUST,              /* tv_id */
583   0,                                    /* properties_required */
584   0,                                    /* properties_provided */
585   0,                                    /* properties_destroyed */
586   0,                                    /* todo_flags_start */
587   TODO_df_finish | TODO_verify_rtl_sharing |
588   TODO_dump_func |
589   TODO_ggc_collect,                     /* todo_flags_finish */
590  }
591 };
592