xref: /netbsd-src/external/gpl3/gcc/dist/gcc/reorg.cc (revision b1e838363e3c6fc78a55519254d99869742dd33c)
1 /* Perform instruction reorganizations for delay slot filling.
2    Copyright (C) 1992-2022 Free Software Foundation, Inc.
3    Contributed by Richard Kenner (kenner@vlsi1.ultra.nyu.edu).
4    Hacked by Michael Tiemann (tiemann@cygnus.com).
5 
6 This file is part of GCC.
7 
8 GCC is free software; you can redistribute it and/or modify it under
9 the terms of the GNU General Public License as published by the Free
10 Software Foundation; either version 3, or (at your option) any later
11 version.
12 
13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
16 for more details.
17 
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3.  If not see
20 <http://www.gnu.org/licenses/>.  */
21 
22 /* Instruction reorganization pass.
23 
24    This pass runs after register allocation and final jump
25    optimization.  It should be the last pass to run before peephole.
26    It serves primarily to fill delay slots of insns, typically branch
27    and call insns.  Other insns typically involve more complicated
28    interactions of data dependencies and resource constraints, and
29    are better handled by scheduling before register allocation (by the
30    function `schedule_insns').
31 
32    The Branch Penalty is the number of extra cycles that are needed to
33    execute a branch insn.  On an ideal machine, branches take a single
34    cycle, and the Branch Penalty is 0.  Several RISC machines approach
35    branch delays differently:
36 
37    The MIPS has a single branch delay slot.  Most insns
38    (except other branches) can be used to fill this slot.  When the
39    slot is filled, two insns execute in two cycles, reducing the
40    branch penalty to zero.
41 
42    The SPARC always has a branch delay slot, but its effects can be
43    annulled when the branch is not taken.  This means that failing to
44    find other sources of insns, we can hoist an insn from the branch
45    target that would only be safe to execute knowing that the branch
46    is taken.
47 
48    The HP-PA always has a branch delay slot.  For unconditional branches
49    its effects can be annulled when the branch is taken.  The effects
50    of the delay slot in a conditional branch can be nullified for forward
51    taken branches, or for untaken backward branches.  This means
52    we can hoist insns from the fall-through path for forward branches or
53    steal insns from the target of backward branches.
54 
55    The TMS320C3x and C4x have three branch delay slots.  When the three
56    slots are filled, the branch penalty is zero.  Most insns can fill the
57    delay slots except jump insns.
58 
59    Three techniques for filling delay slots have been implemented so far:
60 
61    (1) `fill_simple_delay_slots' is the simplest, most efficient way
62    to fill delay slots.  This pass first looks for insns which come
63    from before the branch and which are safe to execute after the
64    branch.  Then it searches after the insn requiring delay slots or,
65    in the case of a branch, for insns that are after the point at
66    which the branch merges into the fallthrough code, if such a point
67    exists.  When such insns are found, the branch penalty decreases
68    and no code expansion takes place.
69 
70    (2) `fill_eager_delay_slots' is more complicated: it is used for
71    scheduling conditional jumps, or for scheduling jumps which cannot
72    be filled using (1).  A machine need not have annulled jumps to use
73    this strategy, but it helps (by keeping more options open).
74    `fill_eager_delay_slots' tries to guess the direction the branch
75    will go; if it guesses right 100% of the time, it can reduce the
76    branch penalty as much as `fill_simple_delay_slots' does.  If it
77    guesses wrong 100% of the time, it might as well schedule nops.  When
78    `fill_eager_delay_slots' takes insns from the fall-through path of
79    the jump, usually there is no code expansion; when it takes insns
80    from the branch target, there is code expansion if it is not the
81    only way to reach that target.
82 
83    (3) `relax_delay_slots' uses a set of rules to simplify code that
84    has been reorganized by (1) and (2).  It finds cases where
85    conditional test can be eliminated, jumps can be threaded, extra
86    insns can be eliminated, etc.  It is the job of (1) and (2) to do a
87    good job of scheduling locally; `relax_delay_slots' takes care of
88    making the various individual schedules work well together.  It is
89    especially tuned to handle the control flow interactions of branch
90    insns.  It does nothing for insns with delay slots that do not
91    branch.  */
92 
93 #include "config.h"
94 #include "system.h"
95 #include "coretypes.h"
96 #include "backend.h"
97 #include "target.h"
98 #include "rtl.h"
99 #include "tree.h"
100 #include "predict.h"
101 #include "memmodel.h"
102 #include "tm_p.h"
103 #include "expmed.h"
104 #include "insn-config.h"
105 #include "emit-rtl.h"
106 #include "recog.h"
107 #include "insn-attr.h"
108 #include "resource.h"
109 #include "tree-pass.h"
110 
111 
112 /* First, some functions that were used before GCC got a control flow graph.
113    These functions are now only used here in reorg.cc, and have therefore
114    been moved here to avoid inadvertent misuse elsewhere in the compiler.  */
115 
116 /* Return the last label to mark the same position as LABEL.  Return LABEL
117    itself if it is null or any return rtx.  */
118 
119 static rtx
skip_consecutive_labels(rtx label_or_return)120 skip_consecutive_labels (rtx label_or_return)
121 {
122   rtx_insn *insn;
123 
124   if (label_or_return && ANY_RETURN_P (label_or_return))
125     return label_or_return;
126 
127   rtx_insn *label = as_a <rtx_insn *> (label_or_return);
128 
129   /* __builtin_unreachable can create a CODE_LABEL followed by a BARRIER.
130 
131      Since reaching the CODE_LABEL is undefined behavior, we can return
132      any code label and we're OK at run time.
133 
134      However, if we return a CODE_LABEL which leads to a shrink-wrapped
135      epilogue, but the path does not have a prologue, then we will trip
136      a sanity check in the dwarf2 cfi code which wants to verify that
137      the CFIs are all the same on the traces leading to the epilogue.
138 
139      So we explicitly disallow looking through BARRIERS here.  */
140   for (insn = label;
141        insn != 0 && !INSN_P (insn) && !BARRIER_P (insn);
142        insn = NEXT_INSN (insn))
143     if (LABEL_P (insn))
144       label = insn;
145 
146   return label;
147 }
148 
149 /* Insns which have delay slots that have not yet been filled.  */
150 
151 static struct obstack unfilled_slots_obstack;
152 static rtx *unfilled_firstobj;
153 
154 /* Define macros to refer to the first and last slot containing unfilled
155    insns.  These are used because the list may move and its address
156    should be recomputed at each use.  */
157 
158 #define unfilled_slots_base	\
159   ((rtx_insn **) obstack_base (&unfilled_slots_obstack))
160 
161 #define unfilled_slots_next	\
162   ((rtx_insn **) obstack_next_free (&unfilled_slots_obstack))
163 
164 /* Points to the label before the end of the function, or before a
165    return insn.  */
166 static rtx_code_label *function_return_label;
167 /* Likewise for a simple_return.  */
168 static rtx_code_label *function_simple_return_label;
169 
170 /* Mapping between INSN_UID's and position in the code since INSN_UID's do
171    not always monotonically increase.  */
172 static int *uid_to_ruid;
173 
174 /* Highest valid index in `uid_to_ruid'.  */
175 static int max_uid;
176 
177 static int stop_search_p (rtx_insn *, int);
178 static int resource_conflicts_p (struct resources *, struct resources *);
179 static int insn_references_resource_p (rtx, struct resources *, bool);
180 static int insn_sets_resource_p (rtx, struct resources *, bool);
181 static rtx_code_label *find_end_label (rtx);
182 static rtx_insn *emit_delay_sequence (rtx_insn *, const vec<rtx_insn *> &,
183 				      int);
184 static void add_to_delay_list (rtx_insn *, vec<rtx_insn *> *);
185 static rtx_insn *delete_from_delay_slot (rtx_insn *);
186 static void delete_scheduled_jump (rtx_insn *);
187 static void note_delay_statistics (int, int);
188 static int get_jump_flags (const rtx_insn *, rtx);
189 static int mostly_true_jump (rtx);
190 static rtx get_branch_condition (const rtx_insn *, rtx);
191 static int condition_dominates_p (rtx, const rtx_insn *);
192 static int redirect_with_delay_slots_safe_p (rtx_insn *, rtx, rtx);
193 static int redirect_with_delay_list_safe_p (rtx_insn *, rtx,
194 					    const vec<rtx_insn *> &);
195 static int check_annul_list_true_false (int, const vec<rtx_insn *> &);
196 static void steal_delay_list_from_target (rtx_insn *, rtx, rtx_sequence *,
197 					  vec<rtx_insn *> *,
198 					  struct resources *,
199 					  struct resources *,
200 					  struct resources *,
201 					  int, int *, int *,
202 					  rtx *);
203 static void steal_delay_list_from_fallthrough (rtx_insn *, rtx, rtx_sequence *,
204 					       vec<rtx_insn *> *,
205 					       struct resources *,
206 					       struct resources *,
207 					       struct resources *,
208 					       int, int *, int *);
209 static void try_merge_delay_insns (rtx_insn *, rtx_insn *);
210 static rtx_insn *redundant_insn (rtx, rtx_insn *, const vec<rtx_insn *> &);
211 static int own_thread_p (rtx, rtx, int);
212 static void update_block (rtx_insn *, rtx_insn *);
213 static int reorg_redirect_jump (rtx_jump_insn *, rtx);
214 static void update_reg_dead_notes (rtx_insn *, rtx_insn *);
215 static void fix_reg_dead_note (rtx_insn *, rtx);
216 static void update_reg_unused_notes (rtx_insn *, rtx);
217 static void fill_simple_delay_slots (int);
218 static void fill_slots_from_thread (rtx_jump_insn *, rtx, rtx, rtx,
219 				    int, int, int, int,
220 				    int *, vec<rtx_insn *> *);
221 static void fill_eager_delay_slots (void);
222 static void relax_delay_slots (rtx_insn *);
223 static void make_return_insns (rtx_insn *);
224 
225 /* A wrapper around next_active_insn which takes care to return ret_rtx
226    unchanged.  */
227 
228 static rtx
first_active_target_insn(rtx insn)229 first_active_target_insn (rtx insn)
230 {
231   if (ANY_RETURN_P (insn))
232     return insn;
233   return next_active_insn (as_a <rtx_insn *> (insn));
234 }
235 
236 /* Return true iff INSN is a simplejump, or any kind of return insn.  */
237 
238 static bool
simplejump_or_return_p(rtx insn)239 simplejump_or_return_p (rtx insn)
240 {
241   return (JUMP_P (insn)
242 	  && (simplejump_p (as_a <rtx_insn *> (insn))
243 	      || ANY_RETURN_P (PATTERN (insn))));
244 }
245 
246 /* Return TRUE if this insn should stop the search for insn to fill delay
247    slots.  LABELS_P indicates that labels should terminate the search.
248    In all cases, jumps terminate the search.  */
249 
250 static int
stop_search_p(rtx_insn * insn,int labels_p)251 stop_search_p (rtx_insn *insn, int labels_p)
252 {
253   if (insn == 0)
254     return 1;
255 
256   /* If the insn can throw an exception that is caught within the function,
257      it may effectively perform a jump from the viewpoint of the function.
258      Therefore act like for a jump.  */
259   if (can_throw_internal (insn))
260     return 1;
261 
262   switch (GET_CODE (insn))
263     {
264     case NOTE:
265     case CALL_INSN:
266     case DEBUG_INSN:
267       return 0;
268 
269     case CODE_LABEL:
270       return labels_p;
271 
272     case JUMP_INSN:
273     case BARRIER:
274       return 1;
275 
276     case INSN:
277       /* OK unless it contains a delay slot or is an `asm' insn of some type.
278 	 We don't know anything about these.  */
279       return (GET_CODE (PATTERN (insn)) == SEQUENCE
280 	      || GET_CODE (PATTERN (insn)) == ASM_INPUT
281 	      || asm_noperands (PATTERN (insn)) >= 0);
282 
283     default:
284       gcc_unreachable ();
285     }
286 }
287 
288 /* Return TRUE if any resources are marked in both RES1 and RES2 or if either
289    resource set contains a volatile memory reference.  Otherwise, return FALSE.  */
290 
291 static int
resource_conflicts_p(struct resources * res1,struct resources * res2)292 resource_conflicts_p (struct resources *res1, struct resources *res2)
293 {
294   if ((res1->cc && res2->cc) || (res1->memory && res2->memory)
295       || res1->volatil || res2->volatil)
296     return 1;
297 
298   return hard_reg_set_intersect_p (res1->regs, res2->regs);
299 }
300 
301 /* Return TRUE if any resource marked in RES, a `struct resources', is
302    referenced by INSN.  If INCLUDE_DELAYED_EFFECTS is set, return if the called
303    routine is using those resources.
304 
305    We compute this by computing all the resources referenced by INSN and
306    seeing if this conflicts with RES.  It might be faster to directly check
307    ourselves, and this is the way it used to work, but it means duplicating
308    a large block of complex code.  */
309 
310 static int
insn_references_resource_p(rtx insn,struct resources * res,bool include_delayed_effects)311 insn_references_resource_p (rtx insn, struct resources *res,
312 			    bool include_delayed_effects)
313 {
314   struct resources insn_res;
315 
316   CLEAR_RESOURCE (&insn_res);
317   mark_referenced_resources (insn, &insn_res, include_delayed_effects);
318   return resource_conflicts_p (&insn_res, res);
319 }
320 
321 /* Return TRUE if INSN modifies resources that are marked in RES.
322    INCLUDE_DELAYED_EFFECTS is set if the actions of that routine should be
323    included.   */
324 
325 static int
insn_sets_resource_p(rtx insn,struct resources * res,bool include_delayed_effects)326 insn_sets_resource_p (rtx insn, struct resources *res,
327 		      bool include_delayed_effects)
328 {
329   struct resources insn_sets;
330 
331   CLEAR_RESOURCE (&insn_sets);
332   mark_set_resources (insn, &insn_sets, 0,
333 		      (include_delayed_effects
334 		       ? MARK_SRC_DEST_CALL
335 		       : MARK_SRC_DEST));
336   return resource_conflicts_p (&insn_sets, res);
337 }
338 
339 /* Find a label at the end of the function or before a RETURN.  If there
340    is none, try to make one.  If that fails, returns 0.
341 
342    The property of such a label is that it is placed just before the
343    epilogue or a bare RETURN insn, so that another bare RETURN can be
344    turned into a jump to the label unconditionally.  In particular, the
345    label cannot be placed before a RETURN insn with a filled delay slot.
346 
347    ??? There may be a problem with the current implementation.  Suppose
348    we start with a bare RETURN insn and call find_end_label.  It may set
349    function_return_label just before the RETURN.  Suppose the machinery
350    is able to fill the delay slot of the RETURN insn afterwards.  Then
351    function_return_label is no longer valid according to the property
352    described above and find_end_label will still return it unmodified.
353    Note that this is probably mitigated by the following observation:
354    once function_return_label is made, it is very likely the target of
355    a jump, so filling the delay slot of the RETURN will be much more
356    difficult.
357    KIND is either simple_return_rtx or ret_rtx, indicating which type of
358    return we're looking for.  */
359 
360 static rtx_code_label *
find_end_label(rtx kind)361 find_end_label (rtx kind)
362 {
363   rtx_insn *insn;
364   rtx_code_label **plabel;
365 
366   if (kind == ret_rtx)
367     plabel = &function_return_label;
368   else
369     {
370       gcc_assert (kind == simple_return_rtx);
371       plabel = &function_simple_return_label;
372     }
373 
374   /* If we found one previously, return it.  */
375   if (*plabel)
376     return *plabel;
377 
378   /* Otherwise, see if there is a label at the end of the function.  If there
379      is, it must be that RETURN insns aren't needed, so that is our return
380      label and we don't have to do anything else.  */
381 
382   insn = get_last_insn ();
383   while (NOTE_P (insn)
384 	 || (NONJUMP_INSN_P (insn)
385 	     && (GET_CODE (PATTERN (insn)) == USE
386 		 || GET_CODE (PATTERN (insn)) == CLOBBER)))
387     insn = PREV_INSN (insn);
388 
389   /* When a target threads its epilogue we might already have a
390      suitable return insn.  If so put a label before it for the
391      function_return_label.  */
392   if (BARRIER_P (insn)
393       && JUMP_P (PREV_INSN (insn))
394       && PATTERN (PREV_INSN (insn)) == kind)
395     {
396       rtx_insn *temp = PREV_INSN (PREV_INSN (insn));
397       rtx_code_label *label = gen_label_rtx ();
398       LABEL_NUSES (label) = 0;
399 
400       /* Put the label before any USE insns that may precede the RETURN
401 	 insn.  */
402       while (GET_CODE (temp) == USE)
403 	temp = PREV_INSN (temp);
404 
405       emit_label_after (label, temp);
406       *plabel = label;
407     }
408 
409   else if (LABEL_P (insn))
410     *plabel = as_a <rtx_code_label *> (insn);
411   else
412     {
413       rtx_code_label *label = gen_label_rtx ();
414       LABEL_NUSES (label) = 0;
415       /* If the basic block reorder pass moves the return insn to
416 	 some other place try to locate it again and put our
417 	 function_return_label there.  */
418       while (insn && ! (JUMP_P (insn) && (PATTERN (insn) == kind)))
419 	insn = PREV_INSN (insn);
420       if (insn)
421 	{
422 	  insn = PREV_INSN (insn);
423 
424 	  /* Put the label before any USE insns that may precede the
425 	     RETURN insn.  */
426 	  while (GET_CODE (insn) == USE)
427 	    insn = PREV_INSN (insn);
428 
429 	  emit_label_after (label, insn);
430 	}
431       else
432 	{
433 	  if (targetm.have_epilogue () && ! targetm.have_return ())
434 	    /* The RETURN insn has its delay slot filled so we cannot
435 	       emit the label just before it.  Since we already have
436 	       an epilogue and cannot emit a new RETURN, we cannot
437 	       emit the label at all.  */
438 	    return NULL;
439 
440 	  /* Otherwise, make a new label and emit a RETURN and BARRIER,
441 	     if needed.  */
442 	  emit_label (label);
443 	  if (targetm.have_return ())
444 	    {
445 	      /* The return we make may have delay slots too.  */
446 	      rtx_insn *pat = targetm.gen_return ();
447 	      rtx_insn *insn = emit_jump_insn (pat);
448 	      set_return_jump_label (insn);
449 	      emit_barrier ();
450 	      if (num_delay_slots (insn) > 0)
451 		obstack_ptr_grow (&unfilled_slots_obstack, insn);
452 	    }
453 	}
454       *plabel = label;
455     }
456 
457   /* Show one additional use for this label so it won't go away until
458      we are done.  */
459   ++LABEL_NUSES (*plabel);
460 
461   return *plabel;
462 }
463 
464 /* Put INSN and LIST together in a SEQUENCE rtx of LENGTH, and replace
465    the pattern of INSN with the SEQUENCE.
466 
467    Returns the insn containing the SEQUENCE that replaces INSN.  */
468 
469 static rtx_insn *
emit_delay_sequence(rtx_insn * insn,const vec<rtx_insn * > & list,int length)470 emit_delay_sequence (rtx_insn *insn, const vec<rtx_insn *> &list, int length)
471 {
472   /* Allocate the rtvec to hold the insns and the SEQUENCE.  */
473   rtvec seqv = rtvec_alloc (length + 1);
474   rtx seq = gen_rtx_SEQUENCE (VOIDmode, seqv);
475   rtx_insn *seq_insn = make_insn_raw (seq);
476 
477   /* If DELAY_INSN has a location, use it for SEQ_INSN.  If DELAY_INSN does
478      not have a location, but one of the delayed insns does, we pick up a
479      location from there later.  */
480   INSN_LOCATION (seq_insn) = INSN_LOCATION (insn);
481 
482   /* Unlink INSN from the insn chain, so that we can put it into
483      the SEQUENCE.   Remember where we want to emit SEQUENCE in AFTER.  */
484   rtx_insn *after = PREV_INSN (insn);
485   remove_insn (insn);
486   SET_NEXT_INSN (insn) = SET_PREV_INSN (insn) = NULL;
487 
488   /* Build our SEQUENCE and rebuild the insn chain.  */
489   start_sequence ();
490   XVECEXP (seq, 0, 0) = emit_insn (insn);
491 
492   unsigned int delay_insns = list.length ();
493   gcc_assert (delay_insns == (unsigned int) length);
494   for (unsigned int i = 0; i < delay_insns; i++)
495     {
496       rtx_insn *tem = list[i];
497       rtx note, next;
498 
499       /* Show that this copy of the insn isn't deleted.  */
500       tem->set_undeleted ();
501 
502       /* Unlink insn from its original place, and re-emit it into
503 	 the sequence.  */
504       SET_NEXT_INSN (tem) = SET_PREV_INSN (tem) = NULL;
505       XVECEXP (seq, 0, i + 1) = emit_insn (tem);
506 
507       /* SPARC assembler, for instance, emit warning when debug info is output
508          into the delay slot.  */
509       if (INSN_LOCATION (tem) && !INSN_LOCATION (seq_insn))
510 	INSN_LOCATION (seq_insn) = INSN_LOCATION (tem);
511       INSN_LOCATION (tem) = 0;
512 
513       for (note = REG_NOTES (tem); note; note = next)
514 	{
515 	  next = XEXP (note, 1);
516 	  switch (REG_NOTE_KIND (note))
517 	    {
518 	    case REG_DEAD:
519 	      /* Remove any REG_DEAD notes because we can't rely on them now
520 		 that the insn has been moved.  */
521 	      remove_note (tem, note);
522 	      break;
523 
524 	    case REG_LABEL_OPERAND:
525 	    case REG_LABEL_TARGET:
526 	      /* Keep the label reference count up to date.  */
527 	      if (LABEL_P (XEXP (note, 0)))
528 		LABEL_NUSES (XEXP (note, 0)) ++;
529 	      break;
530 
531 	    default:
532 	      break;
533 	    }
534 	}
535     }
536   end_sequence ();
537 
538   /* Splice our SEQUENCE into the insn stream where INSN used to be.  */
539   add_insn_after (seq_insn, after, NULL);
540 
541   return seq_insn;
542 }
543 
544 /* Add INSN to DELAY_LIST and return the head of the new list.  The list must
545    be in the order in which the insns are to be executed.  */
546 
547 static void
add_to_delay_list(rtx_insn * insn,vec<rtx_insn * > * delay_list)548 add_to_delay_list (rtx_insn *insn, vec<rtx_insn *> *delay_list)
549 {
550   /* If INSN has its block number recorded, clear it since we may
551      be moving the insn to a new block.  */
552   clear_hashed_info_for_insn (insn);
553 
554   delay_list->safe_push (insn);
555 }
556 
557 /* Delete INSN from the delay slot of the insn that it is in, which may
558    produce an insn with no delay slots.  Return the new insn.  */
559 
560 static rtx_insn *
delete_from_delay_slot(rtx_insn * insn)561 delete_from_delay_slot (rtx_insn *insn)
562 {
563   rtx_insn *trial, *seq_insn, *prev;
564   rtx_sequence *seq;
565   int i;
566   int had_barrier = 0;
567 
568   /* We first must find the insn containing the SEQUENCE with INSN in its
569      delay slot.  Do this by finding an insn, TRIAL, where
570      PREV_INSN (NEXT_INSN (TRIAL)) != TRIAL.  */
571 
572   for (trial = insn;
573        PREV_INSN (NEXT_INSN (trial)) == trial;
574        trial = NEXT_INSN (trial))
575     ;
576 
577   seq_insn = PREV_INSN (NEXT_INSN (trial));
578   seq = as_a <rtx_sequence *> (PATTERN (seq_insn));
579 
580   if (NEXT_INSN (seq_insn) && BARRIER_P (NEXT_INSN (seq_insn)))
581     had_barrier = 1;
582 
583   /* Create a delay list consisting of all the insns other than the one
584      we are deleting (unless we were the only one).  */
585   auto_vec<rtx_insn *, 5> delay_list;
586   if (seq->len () > 2)
587     for (i = 1; i < seq->len (); i++)
588       if (seq->insn (i) != insn)
589 	add_to_delay_list (seq->insn (i), &delay_list);
590 
591   /* Delete the old SEQUENCE, re-emit the insn that used to have the delay
592      list, and rebuild the delay list if non-empty.  */
593   prev = PREV_INSN (seq_insn);
594   trial = seq->insn (0);
595   delete_related_insns (seq_insn);
596   add_insn_after (trial, prev, NULL);
597 
598   /* If there was a barrier after the old SEQUENCE, remit it.  */
599   if (had_barrier)
600     emit_barrier_after (trial);
601 
602   /* If there are any delay insns, remit them.  Otherwise clear the
603      annul flag.  */
604   if (!delay_list.is_empty ())
605     trial = emit_delay_sequence (trial, delay_list, XVECLEN (seq, 0) - 2);
606   else if (JUMP_P (trial))
607     INSN_ANNULLED_BRANCH_P (trial) = 0;
608 
609   INSN_FROM_TARGET_P (insn) = 0;
610 
611   /* Show we need to fill this insn again.  */
612   obstack_ptr_grow (&unfilled_slots_obstack, trial);
613 
614   return trial;
615 }
616 
617 /* Delete INSN, a JUMP_INSN.  */
618 
619 static void
delete_scheduled_jump(rtx_insn * insn)620 delete_scheduled_jump (rtx_insn *insn)
621 {
622   delete_related_insns (insn);
623 }
624 
625 /* Counters for delay-slot filling.  */
626 
627 #define NUM_REORG_FUNCTIONS 2
628 #define MAX_DELAY_HISTOGRAM 3
629 #define MAX_REORG_PASSES 2
630 
631 static int num_insns_needing_delays[NUM_REORG_FUNCTIONS][MAX_REORG_PASSES];
632 
633 static int num_filled_delays[NUM_REORG_FUNCTIONS][MAX_DELAY_HISTOGRAM+1][MAX_REORG_PASSES];
634 
635 static int reorg_pass_number;
636 
637 static void
note_delay_statistics(int slots_filled,int index)638 note_delay_statistics (int slots_filled, int index)
639 {
640   num_insns_needing_delays[index][reorg_pass_number]++;
641   if (slots_filled > MAX_DELAY_HISTOGRAM)
642     slots_filled = MAX_DELAY_HISTOGRAM;
643   num_filled_delays[index][slots_filled][reorg_pass_number]++;
644 }
645 
646 /* Optimize the following cases:
647 
648    1.  When a conditional branch skips over only one instruction,
649        use an annulling branch and put that insn in the delay slot.
650        Use either a branch that annuls when the condition if true or
651        invert the test with a branch that annuls when the condition is
652        false.  This saves insns, since otherwise we must copy an insn
653        from the L1 target.
654 
655         (orig)		 (skip)		(otherwise)
656 	Bcc.n L1	Bcc',a L1	Bcc,a L1'
657 	insn		insn		insn2
658       L1:	      L1:	      L1:
659 	insn2		insn2		insn2
660 	insn3		insn3	      L1':
661 					insn3
662 
663    2.  When a conditional branch skips over only one instruction,
664        and after that, it unconditionally branches somewhere else,
665        perform the similar optimization. This saves executing the
666        second branch in the case where the inverted condition is true.
667 
668 	Bcc.n L1	Bcc',a L2
669 	insn		insn
670       L1:	      L1:
671 	Bra L2		Bra L2
672 
673    INSN is a JUMP_INSN.
674 
675    This should be expanded to skip over N insns, where N is the number
676    of delay slots required.  */
677 
678 static void
optimize_skip(rtx_jump_insn * insn,vec<rtx_insn * > * delay_list)679 optimize_skip (rtx_jump_insn *insn, vec<rtx_insn *> *delay_list)
680 {
681   rtx_insn *trial = next_nonnote_insn (insn);
682   rtx_insn *next_trial = next_active_insn (trial);
683   int flags;
684 
685   flags = get_jump_flags (insn, JUMP_LABEL (insn));
686 
687   if (trial == 0
688       || !NONJUMP_INSN_P (trial)
689       || GET_CODE (PATTERN (trial)) == SEQUENCE
690       || recog_memoized (trial) < 0
691       || (! eligible_for_annul_false (insn, 0, trial, flags)
692 	  && ! eligible_for_annul_true (insn, 0, trial, flags))
693       || RTX_FRAME_RELATED_P (trial)
694       || can_throw_internal (trial))
695     return;
696 
697   /* There are two cases where we are just executing one insn (we assume
698      here that a branch requires only one insn; this should be generalized
699      at some point):  Where the branch goes around a single insn or where
700      we have one insn followed by a branch to the same label we branch to.
701      In both of these cases, inverting the jump and annulling the delay
702      slot give the same effect in fewer insns.  */
703   if (next_trial == next_active_insn (JUMP_LABEL_AS_INSN (insn))
704       || (next_trial != 0
705 	  && simplejump_or_return_p (next_trial)
706 	  && JUMP_LABEL (insn) == JUMP_LABEL (next_trial)))
707     {
708       if (eligible_for_annul_false (insn, 0, trial, flags))
709 	{
710 	  if (invert_jump (insn, JUMP_LABEL (insn), 1))
711 	    INSN_FROM_TARGET_P (trial) = 1;
712 	  else if (! eligible_for_annul_true (insn, 0, trial, flags))
713 	    return;
714 	}
715 
716       add_to_delay_list (trial, delay_list);
717       next_trial = next_active_insn (trial);
718       update_block (trial, trial);
719       delete_related_insns (trial);
720 
721       /* Also, if we are targeting an unconditional
722 	 branch, thread our jump to the target of that branch.  Don't
723 	 change this into a RETURN here, because it may not accept what
724 	 we have in the delay slot.  We'll fix this up later.  */
725       if (next_trial && simplejump_or_return_p (next_trial))
726 	{
727 	  rtx target_label = JUMP_LABEL (next_trial);
728 	  if (ANY_RETURN_P (target_label))
729 	    target_label = find_end_label (target_label);
730 
731 	  if (target_label)
732 	    {
733 	      /* Recompute the flags based on TARGET_LABEL since threading
734 		 the jump to TARGET_LABEL may change the direction of the
735 		 jump (which may change the circumstances in which the
736 		 delay slot is nullified).  */
737 	      flags = get_jump_flags (insn, target_label);
738 	      if (eligible_for_annul_true (insn, 0, trial, flags))
739 		reorg_redirect_jump (insn, target_label);
740 	    }
741 	}
742 
743       INSN_ANNULLED_BRANCH_P (insn) = 1;
744     }
745 }
746 
747 /*  Encode and return branch direction and prediction information for
748     INSN assuming it will jump to LABEL.
749 
750     Non conditional branches return no direction information and
751     are predicted as very likely taken.  */
752 
753 static int
get_jump_flags(const rtx_insn * insn,rtx label)754 get_jump_flags (const rtx_insn *insn, rtx label)
755 {
756   int flags;
757 
758   /* get_jump_flags can be passed any insn with delay slots, these may
759      be INSNs, CALL_INSNs, or JUMP_INSNs.  Only JUMP_INSNs have branch
760      direction information, and only if they are conditional jumps.
761 
762      If LABEL is a return, then there is no way to determine the branch
763      direction.  */
764   if (JUMP_P (insn)
765       && (condjump_p (insn) || condjump_in_parallel_p (insn))
766       && !ANY_RETURN_P (label)
767       && INSN_UID (insn) <= max_uid
768       && INSN_UID (label) <= max_uid)
769     flags
770       = (uid_to_ruid[INSN_UID (label)] > uid_to_ruid[INSN_UID (insn)])
771 	 ? ATTR_FLAG_forward : ATTR_FLAG_backward;
772   /* No valid direction information.  */
773   else
774     flags = 0;
775 
776   return flags;
777 }
778 
779 /* Return truth value of the statement that this branch
780    is mostly taken.  If we think that the branch is extremely likely
781    to be taken, we return 2.  If the branch is slightly more likely to be
782    taken, return 1.  If the branch is slightly less likely to be taken,
783    return 0 and if the branch is highly unlikely to be taken, return -1.  */
784 
785 static int
mostly_true_jump(rtx jump_insn)786 mostly_true_jump (rtx jump_insn)
787 {
788   /* If branch probabilities are available, then use that number since it
789      always gives a correct answer.  */
790   rtx note = find_reg_note (jump_insn, REG_BR_PROB, 0);
791   if (note)
792     {
793       int prob = profile_probability::from_reg_br_prob_note (XINT (note, 0))
794 			.to_reg_br_prob_base ();
795 
796       if (prob >= REG_BR_PROB_BASE * 9 / 10)
797 	return 2;
798       else if (prob >= REG_BR_PROB_BASE / 2)
799 	return 1;
800       else if (prob >= REG_BR_PROB_BASE / 10)
801 	return 0;
802       else
803 	return -1;
804     }
805 
806   /* If there is no note, assume branches are not taken.
807      This should be rare.  */
808     return 0;
809 }
810 
811 /* Return the condition under which INSN will branch to TARGET.  If TARGET
812    is zero, return the condition under which INSN will return.  If INSN is
813    an unconditional branch, return const_true_rtx.  If INSN isn't a simple
814    type of jump, or it doesn't go to TARGET, return 0.  */
815 
816 static rtx
get_branch_condition(const rtx_insn * insn,rtx target)817 get_branch_condition (const rtx_insn *insn, rtx target)
818 {
819   rtx pat = PATTERN (insn);
820   rtx src;
821 
822   if (condjump_in_parallel_p (insn))
823     pat = XVECEXP (pat, 0, 0);
824 
825   if (ANY_RETURN_P (pat) && pat == target)
826     return const_true_rtx;
827 
828   if (GET_CODE (pat) != SET || SET_DEST (pat) != pc_rtx)
829     return 0;
830 
831   src = SET_SRC (pat);
832   if (GET_CODE (src) == LABEL_REF && label_ref_label (src) == target)
833     return const_true_rtx;
834 
835   else if (GET_CODE (src) == IF_THEN_ELSE
836 	   && XEXP (src, 2) == pc_rtx
837 	   && ((GET_CODE (XEXP (src, 1)) == LABEL_REF
838 		&& label_ref_label (XEXP (src, 1)) == target)
839 	       || (ANY_RETURN_P (XEXP (src, 1)) && XEXP (src, 1) == target)))
840     return XEXP (src, 0);
841 
842   else if (GET_CODE (src) == IF_THEN_ELSE
843 	   && XEXP (src, 1) == pc_rtx
844 	   && ((GET_CODE (XEXP (src, 2)) == LABEL_REF
845 		&& label_ref_label (XEXP (src, 2)) == target)
846 	       || (ANY_RETURN_P (XEXP (src, 2)) && XEXP (src, 2) == target)))
847     {
848       enum rtx_code rev;
849       rev = reversed_comparison_code (XEXP (src, 0), insn);
850       if (rev != UNKNOWN)
851 	return gen_rtx_fmt_ee (rev, GET_MODE (XEXP (src, 0)),
852 			       XEXP (XEXP (src, 0), 0),
853 			       XEXP (XEXP (src, 0), 1));
854     }
855 
856   return 0;
857 }
858 
859 /* Return nonzero if CONDITION is more strict than the condition of
860    INSN, i.e., if INSN will always branch if CONDITION is true.  */
861 
862 static int
condition_dominates_p(rtx condition,const rtx_insn * insn)863 condition_dominates_p (rtx condition, const rtx_insn *insn)
864 {
865   rtx other_condition = get_branch_condition (insn, JUMP_LABEL (insn));
866   enum rtx_code code = GET_CODE (condition);
867   enum rtx_code other_code;
868 
869   if (rtx_equal_p (condition, other_condition)
870       || other_condition == const_true_rtx)
871     return 1;
872 
873   else if (condition == const_true_rtx || other_condition == 0)
874     return 0;
875 
876   other_code = GET_CODE (other_condition);
877   if (GET_RTX_LENGTH (code) != 2 || GET_RTX_LENGTH (other_code) != 2
878       || ! rtx_equal_p (XEXP (condition, 0), XEXP (other_condition, 0))
879       || ! rtx_equal_p (XEXP (condition, 1), XEXP (other_condition, 1)))
880     return 0;
881 
882   return comparison_dominates_p (code, other_code);
883 }
884 
885 /* Return nonzero if redirecting JUMP to NEWLABEL does not invalidate
886    any insns already in the delay slot of JUMP.  */
887 
888 static int
redirect_with_delay_slots_safe_p(rtx_insn * jump,rtx newlabel,rtx seq)889 redirect_with_delay_slots_safe_p (rtx_insn *jump, rtx newlabel, rtx seq)
890 {
891   int flags, i;
892   rtx_sequence *pat = as_a <rtx_sequence *> (PATTERN (seq));
893 
894   /* Make sure all the delay slots of this jump would still
895      be valid after threading the jump.  If they are still
896      valid, then return nonzero.  */
897 
898   flags = get_jump_flags (jump, newlabel);
899   for (i = 1; i < pat->len (); i++)
900     if (! (
901 #if ANNUL_IFFALSE_SLOTS
902 	   (INSN_ANNULLED_BRANCH_P (jump)
903 	    && INSN_FROM_TARGET_P (pat->insn (i)))
904 	   ? eligible_for_annul_false (jump, i - 1, pat->insn (i), flags) :
905 #endif
906 #if ANNUL_IFTRUE_SLOTS
907 	   (INSN_ANNULLED_BRANCH_P (jump)
908 	    && ! INSN_FROM_TARGET_P (XVECEXP (pat, 0, i)))
909 	   ? eligible_for_annul_true (jump, i - 1, pat->insn (i), flags) :
910 #endif
911 	   eligible_for_delay (jump, i - 1, pat->insn (i), flags)))
912       break;
913 
914   return (i == pat->len ());
915 }
916 
917 /* Return nonzero if redirecting JUMP to NEWLABEL does not invalidate
918    any insns we wish to place in the delay slot of JUMP.  */
919 
920 static int
redirect_with_delay_list_safe_p(rtx_insn * jump,rtx newlabel,const vec<rtx_insn * > & delay_list)921 redirect_with_delay_list_safe_p (rtx_insn *jump, rtx newlabel,
922 				 const vec<rtx_insn *> &delay_list)
923 {
924   /* Make sure all the insns in DELAY_LIST would still be
925      valid after threading the jump.  If they are still
926      valid, then return nonzero.  */
927 
928   int flags = get_jump_flags (jump, newlabel);
929   unsigned int delay_insns = delay_list.length ();
930   unsigned int i = 0;
931   for (; i < delay_insns; i++)
932     if (! (
933 #if ANNUL_IFFALSE_SLOTS
934 	   (INSN_ANNULLED_BRANCH_P (jump)
935 	    && INSN_FROM_TARGET_P (delay_list[i]))
936 	   ? eligible_for_annul_false (jump, i, delay_list[i], flags) :
937 #endif
938 #if ANNUL_IFTRUE_SLOTS
939 	   (INSN_ANNULLED_BRANCH_P (jump)
940 	    && ! INSN_FROM_TARGET_P (delay_list[i]))
941 	   ? eligible_for_annul_true (jump, i, delay_list[i], flags) :
942 #endif
943 	   eligible_for_delay (jump, i, delay_list[i], flags)))
944       break;
945 
946   return i == delay_insns;
947 }
948 
949 /* DELAY_LIST is a list of insns that have already been placed into delay
950    slots.  See if all of them have the same annulling status as ANNUL_TRUE_P.
951    If not, return 0; otherwise return 1.  */
952 
953 static int
check_annul_list_true_false(int annul_true_p,const vec<rtx_insn * > & delay_list)954 check_annul_list_true_false (int annul_true_p,
955 			     const vec<rtx_insn *> &delay_list)
956 {
957   rtx_insn *trial;
958   unsigned int i;
959   FOR_EACH_VEC_ELT (delay_list, i, trial)
960     if ((annul_true_p && INSN_FROM_TARGET_P (trial))
961 	|| (!annul_true_p && !INSN_FROM_TARGET_P (trial)))
962       return 0;
963 
964   return 1;
965 }
966 
967 /* INSN branches to an insn whose pattern SEQ is a SEQUENCE.  Given that
968    the condition tested by INSN is CONDITION and the resources shown in
969    OTHER_NEEDED are needed after INSN, see whether INSN can take all the insns
970    from SEQ's delay list, in addition to whatever insns it may execute
971    (in DELAY_LIST).   SETS and NEEDED are denote resources already set and
972    needed while searching for delay slot insns.  Return the concatenated
973    delay list if possible, otherwise, return 0.
974 
975    SLOTS_TO_FILL is the total number of slots required by INSN, and
976    PSLOTS_FILLED points to the number filled so far (also the number of
977    insns in DELAY_LIST).  It is updated with the number that have been
978    filled from the SEQUENCE, if any.
979 
980    PANNUL_P points to a nonzero value if we already know that we need
981    to annul INSN.  If this routine determines that annulling is needed,
982    it may set that value nonzero.
983 
984    PNEW_THREAD points to a location that is to receive the place at which
985    execution should continue.  */
986 
987 static void
steal_delay_list_from_target(rtx_insn * insn,rtx condition,rtx_sequence * seq,vec<rtx_insn * > * delay_list,struct resources * sets,struct resources * needed,struct resources * other_needed,int slots_to_fill,int * pslots_filled,int * pannul_p,rtx * pnew_thread)988 steal_delay_list_from_target (rtx_insn *insn, rtx condition, rtx_sequence *seq,
989 			      vec<rtx_insn *> *delay_list,
990 			      struct resources *sets,
991 			      struct resources *needed,
992 			      struct resources *other_needed,
993 			      int slots_to_fill, int *pslots_filled,
994 			      int *pannul_p, rtx *pnew_thread)
995 {
996   int slots_remaining = slots_to_fill - *pslots_filled;
997   int total_slots_filled = *pslots_filled;
998   auto_vec<rtx_insn *, 5> new_delay_list;
999   int must_annul = *pannul_p;
1000   int used_annul = 0;
1001   int i;
1002   struct resources cc_set;
1003   rtx_insn **redundant;
1004 
1005   /* We can't do anything if there are more delay slots in SEQ than we
1006      can handle, or if we don't know that it will be a taken branch.
1007      We know that it will be a taken branch if it is either an unconditional
1008      branch or a conditional branch with a stricter branch condition.
1009 
1010      Also, exit if the branch has more than one set, since then it is computing
1011      other results that can't be ignored, e.g. the HPPA mov&branch instruction.
1012      ??? It may be possible to move other sets into INSN in addition to
1013      moving the instructions in the delay slots.
1014 
1015      We cannot steal the delay list if one of the instructions in the
1016      current delay_list modifies the condition codes and the jump in the
1017      sequence is a conditional jump. We cannot do this because we cannot
1018      change the direction of the jump because the condition codes
1019      will effect the direction of the jump in the sequence.  */
1020 
1021   CLEAR_RESOURCE (&cc_set);
1022 
1023   rtx_insn *trial;
1024   FOR_EACH_VEC_ELT (*delay_list, i, trial)
1025     {
1026       mark_set_resources (trial, &cc_set, 0, MARK_SRC_DEST_CALL);
1027       if (insn_references_resource_p (seq->insn (0), &cc_set, false))
1028 	return;
1029     }
1030 
1031   if (XVECLEN (seq, 0) - 1 > slots_remaining
1032       || ! condition_dominates_p (condition, seq->insn (0))
1033       || ! single_set (seq->insn (0)))
1034     return;
1035 
1036   /* On some targets, branches with delay slots can have a limited
1037      displacement.  Give the back end a chance to tell us we can't do
1038      this.  */
1039   if (! targetm.can_follow_jump (insn, seq->insn (0)))
1040     return;
1041 
1042   redundant = XALLOCAVEC (rtx_insn *, XVECLEN (seq, 0));
1043   for (i = 1; i < seq->len (); i++)
1044     {
1045       rtx_insn *trial = seq->insn (i);
1046       int flags;
1047 
1048       if (insn_references_resource_p (trial, sets, false)
1049 	  || insn_sets_resource_p (trial, needed, false)
1050 	  || insn_sets_resource_p (trial, sets, false)
1051 	  /* If TRIAL is from the fallthrough code of an annulled branch insn
1052 	     in SEQ, we cannot use it.  */
1053 	  || (INSN_ANNULLED_BRANCH_P (seq->insn (0))
1054 	      && ! INSN_FROM_TARGET_P (trial)))
1055 	return;
1056 
1057       /* If this insn was already done (usually in a previous delay slot),
1058 	 pretend we put it in our delay slot.  */
1059       redundant[i] = redundant_insn (trial, insn, new_delay_list);
1060       if (redundant[i])
1061 	continue;
1062 
1063       /* We will end up re-vectoring this branch, so compute flags
1064 	 based on jumping to the new label.  */
1065       flags = get_jump_flags (insn, JUMP_LABEL (seq->insn (0)));
1066 
1067       if (! must_annul
1068 	  && ((condition == const_true_rtx
1069 	       || (! insn_sets_resource_p (trial, other_needed, false)
1070 		   && ! may_trap_or_fault_p (PATTERN (trial)))))
1071 	  ? eligible_for_delay (insn, total_slots_filled, trial, flags)
1072 	  : (must_annul || (delay_list->is_empty () && new_delay_list.is_empty ()))
1073 	     && (must_annul = 1,
1074 		 check_annul_list_true_false (0, *delay_list)
1075 	         && check_annul_list_true_false (0, new_delay_list)
1076 	         && eligible_for_annul_false (insn, total_slots_filled,
1077 					      trial, flags)))
1078 	{
1079 	  if (must_annul)
1080 	    {
1081 	      /* Frame related instructions cannot go into annulled delay
1082 		 slots, it messes up the dwarf info.  */
1083 	      if (RTX_FRAME_RELATED_P (trial))
1084 		return;
1085 	      used_annul = 1;
1086 	    }
1087 	  rtx_insn *temp = copy_delay_slot_insn (trial);
1088 	  INSN_FROM_TARGET_P (temp) = 1;
1089 	  add_to_delay_list (temp, &new_delay_list);
1090 	  total_slots_filled++;
1091 
1092 	  if (--slots_remaining == 0)
1093 	    break;
1094 	}
1095       else
1096 	return;
1097     }
1098 
1099   /* Record the effect of the instructions that were redundant and which
1100      we therefore decided not to copy.  */
1101   for (i = 1; i < seq->len (); i++)
1102     if (redundant[i])
1103       {
1104 	fix_reg_dead_note (redundant[i], insn);
1105 	update_block (seq->insn (i), insn);
1106       }
1107 
1108   /* Show the place to which we will be branching.  */
1109   *pnew_thread = first_active_target_insn (JUMP_LABEL (seq->insn (0)));
1110 
1111   /* Add any new insns to the delay list and update the count of the
1112      number of slots filled.  */
1113   *pslots_filled = total_slots_filled;
1114   if (used_annul)
1115     *pannul_p = 1;
1116 
1117   rtx_insn *temp;
1118   FOR_EACH_VEC_ELT (new_delay_list, i, temp)
1119     add_to_delay_list (temp, delay_list);
1120 }
1121 
1122 /* Similar to steal_delay_list_from_target except that SEQ is on the
1123    fallthrough path of INSN.  Here we only do something if the delay insn
1124    of SEQ is an unconditional branch.  In that case we steal its delay slot
1125    for INSN since unconditional branches are much easier to fill.  */
1126 
1127 static void
steal_delay_list_from_fallthrough(rtx_insn * insn,rtx condition,rtx_sequence * seq,vec<rtx_insn * > * delay_list,struct resources * sets,struct resources * needed,struct resources * other_needed,int slots_to_fill,int * pslots_filled,int * pannul_p)1128 steal_delay_list_from_fallthrough (rtx_insn *insn, rtx condition,
1129 				   rtx_sequence *seq,
1130 				   vec<rtx_insn *> *delay_list,
1131 				   struct resources *sets,
1132 				   struct resources *needed,
1133 				   struct resources *other_needed,
1134 				   int slots_to_fill, int *pslots_filled,
1135 				   int *pannul_p)
1136 {
1137   int i;
1138   int flags;
1139   int must_annul = *pannul_p;
1140   int used_annul = 0;
1141 
1142   flags = get_jump_flags (insn, JUMP_LABEL (insn));
1143 
1144   /* We can't do anything if SEQ's delay insn isn't an
1145      unconditional branch.  */
1146 
1147   if (! simplejump_or_return_p (seq->insn (0)))
1148     return;
1149 
1150   for (i = 1; i < seq->len (); i++)
1151     {
1152       rtx_insn *trial = seq->insn (i);
1153       rtx_insn *prior_insn;
1154 
1155       if (insn_references_resource_p (trial, sets, false)
1156 	  || insn_sets_resource_p (trial, needed, false)
1157 	  || insn_sets_resource_p (trial, sets, false))
1158 	break;
1159 
1160       /* If this insn was already done, we don't need it.  */
1161       if ((prior_insn = redundant_insn (trial, insn, *delay_list)))
1162 	{
1163 	  fix_reg_dead_note (prior_insn, insn);
1164 	  update_block (trial, insn);
1165 	  delete_from_delay_slot (trial);
1166 	  continue;
1167 	}
1168 
1169       if (! must_annul
1170 	  && ((condition == const_true_rtx
1171 	       || (! insn_sets_resource_p (trial, other_needed, false)
1172 		   && ! may_trap_or_fault_p (PATTERN (trial)))))
1173 	  ? eligible_for_delay (insn, *pslots_filled, trial, flags)
1174 	  : (must_annul || delay_list->is_empty ()) && (must_annul = 1,
1175 	     check_annul_list_true_false (1, *delay_list)
1176 	     && eligible_for_annul_true (insn, *pslots_filled, trial, flags)))
1177 	{
1178 	  if (must_annul)
1179 	    used_annul = 1;
1180 	  delete_from_delay_slot (trial);
1181 	  add_to_delay_list (trial, delay_list);
1182 
1183 	  if (++(*pslots_filled) == slots_to_fill)
1184 	    break;
1185 	}
1186       else
1187 	break;
1188     }
1189 
1190   if (used_annul)
1191     *pannul_p = 1;
1192 }
1193 
1194 /* Try merging insns starting at THREAD which match exactly the insns in
1195    INSN's delay list.
1196 
1197    If all insns were matched and the insn was previously annulling, the
1198    annul bit will be cleared.
1199 
1200    For each insn that is merged, if the branch is or will be non-annulling,
1201    we delete the merged insn.  */
1202 
1203 static void
try_merge_delay_insns(rtx_insn * insn,rtx_insn * thread)1204 try_merge_delay_insns (rtx_insn *insn, rtx_insn *thread)
1205 {
1206   rtx_insn *trial, *next_trial;
1207   rtx_insn *delay_insn = as_a <rtx_insn *> (XVECEXP (PATTERN (insn), 0, 0));
1208   int annul_p = JUMP_P (delay_insn) && INSN_ANNULLED_BRANCH_P (delay_insn);
1209   int slot_number = 1;
1210   int num_slots = XVECLEN (PATTERN (insn), 0);
1211   rtx next_to_match = XVECEXP (PATTERN (insn), 0, slot_number);
1212   struct resources set, needed, modified;
1213   auto_vec<std::pair<rtx_insn *, bool>, 10> merged_insns;
1214   int flags;
1215 
1216   flags = get_jump_flags (delay_insn, JUMP_LABEL (delay_insn));
1217 
1218   CLEAR_RESOURCE (&needed);
1219   CLEAR_RESOURCE (&set);
1220 
1221   /* If this is not an annulling branch, take into account anything needed in
1222      INSN's delay slot.  This prevents two increments from being incorrectly
1223      folded into one.  If we are annulling, this would be the correct
1224      thing to do.  (The alternative, looking at things set in NEXT_TO_MATCH
1225      will essentially disable this optimization.  This method is somewhat of
1226      a kludge, but I don't see a better way.)  */
1227   if (! annul_p)
1228     for (int i = 1; i < num_slots; i++)
1229       if (XVECEXP (PATTERN (insn), 0, i))
1230 	mark_referenced_resources (XVECEXP (PATTERN (insn), 0, i), &needed,
1231 				   true);
1232 
1233   for (trial = thread; !stop_search_p (trial, 1); trial = next_trial)
1234     {
1235       rtx pat = PATTERN (trial);
1236       rtx oldtrial = trial;
1237 
1238       next_trial = next_nonnote_insn (trial);
1239 
1240       /* TRIAL must be a CALL_INSN or INSN.  Skip USE and CLOBBER.  */
1241       if (NONJUMP_INSN_P (trial)
1242 	  && (GET_CODE (pat) == USE || GET_CODE (pat) == CLOBBER))
1243 	continue;
1244 
1245       if (GET_CODE (next_to_match) == GET_CODE (trial)
1246 	  && ! insn_references_resource_p (trial, &set, true)
1247 	  && ! insn_sets_resource_p (trial, &set, true)
1248 	  && ! insn_sets_resource_p (trial, &needed, true)
1249 	  && (trial = try_split (pat, trial, 0)) != 0
1250 	  /* Update next_trial, in case try_split succeeded.  */
1251 	  && (next_trial = next_nonnote_insn (trial))
1252 	  /* Likewise THREAD.  */
1253 	  && (thread = oldtrial == thread ? trial : thread)
1254 	  && rtx_equal_p (PATTERN (next_to_match), PATTERN (trial))
1255 	  /* Have to test this condition if annul condition is different
1256 	     from (and less restrictive than) non-annulling one.  */
1257 	  && eligible_for_delay (delay_insn, slot_number - 1, trial, flags))
1258 	{
1259 
1260 	  if (! annul_p)
1261 	    {
1262 	      update_block (trial, thread);
1263 	      if (trial == thread)
1264 		thread = next_active_insn (thread);
1265 
1266 	      delete_related_insns (trial);
1267 	      INSN_FROM_TARGET_P (next_to_match) = 0;
1268 	    }
1269 	  else
1270 	    merged_insns.safe_push (std::pair<rtx_insn *, bool> (trial, false));
1271 
1272 	  if (++slot_number == num_slots)
1273 	    break;
1274 
1275 	  next_to_match = XVECEXP (PATTERN (insn), 0, slot_number);
1276 	}
1277 
1278       mark_set_resources (trial, &set, 0, MARK_SRC_DEST_CALL);
1279       mark_referenced_resources (trial, &needed, true);
1280     }
1281 
1282   /* See if we stopped on a filled insn.  If we did, try to see if its
1283      delay slots match.  */
1284   if (slot_number != num_slots
1285       && trial && NONJUMP_INSN_P (trial)
1286       && GET_CODE (PATTERN (trial)) == SEQUENCE
1287       && !(JUMP_P (XVECEXP (PATTERN (trial), 0, 0))
1288            && INSN_ANNULLED_BRANCH_P (XVECEXP (PATTERN (trial), 0, 0))))
1289     {
1290       rtx_sequence *pat = as_a <rtx_sequence *> (PATTERN (trial));
1291       rtx filled_insn = XVECEXP (pat, 0, 0);
1292 
1293       /* Account for resources set/needed by the filled insn.  */
1294       mark_set_resources (filled_insn, &set, 0, MARK_SRC_DEST_CALL);
1295       mark_referenced_resources (filled_insn, &needed, true);
1296 
1297       for (int i = 1; i < pat->len (); i++)
1298 	{
1299 	  rtx_insn *dtrial = pat->insn (i);
1300 
1301 	  CLEAR_RESOURCE (&modified);
1302 	  /* Account for resources set by the insn following NEXT_TO_MATCH
1303 	     inside INSN's delay list. */
1304 	  for (int j = 1; slot_number + j < num_slots; j++)
1305 	    mark_set_resources (XVECEXP (PATTERN (insn), 0, slot_number + j),
1306 				&modified, 0, MARK_SRC_DEST_CALL);
1307 	  /* Account for resources set by the insn before DTRIAL and inside
1308 	     TRIAL's delay list. */
1309 	  for (int j = 1; j < i; j++)
1310 	    mark_set_resources (XVECEXP (pat, 0, j),
1311 				&modified, 0, MARK_SRC_DEST_CALL);
1312 	  if (! insn_references_resource_p (dtrial, &set, true)
1313 	      && ! insn_sets_resource_p (dtrial, &set, true)
1314 	      && ! insn_sets_resource_p (dtrial, &needed, true)
1315 	      && rtx_equal_p (PATTERN (next_to_match), PATTERN (dtrial))
1316 	      /* Check that DTRIAL and NEXT_TO_MATCH does not reference a
1317 	         resource modified between them (only dtrial is checked because
1318 	         next_to_match and dtrial shall to be equal in order to hit
1319 	         this line) */
1320 	      && ! insn_references_resource_p (dtrial, &modified, true)
1321 	      && eligible_for_delay (delay_insn, slot_number - 1, dtrial, flags))
1322 	    {
1323 	      if (! annul_p)
1324 		{
1325 		  rtx_insn *new_rtx;
1326 
1327 		  update_block (dtrial, thread);
1328 		  new_rtx = delete_from_delay_slot (dtrial);
1329 	          if (thread->deleted ())
1330 		    thread = new_rtx;
1331 		  INSN_FROM_TARGET_P (next_to_match) = 0;
1332 		}
1333 	      else
1334 		merged_insns.safe_push (std::pair<rtx_insn *, bool> (dtrial,
1335 								     true));
1336 
1337 	      if (++slot_number == num_slots)
1338 		break;
1339 
1340 	      next_to_match = XVECEXP (PATTERN (insn), 0, slot_number);
1341 	    }
1342 	  else
1343 	    {
1344 	      /* Keep track of the set/referenced resources for the delay
1345 		 slots of any trial insns we encounter.  */
1346 	      mark_set_resources (dtrial, &set, 0, MARK_SRC_DEST_CALL);
1347 	      mark_referenced_resources (dtrial, &needed, true);
1348 	    }
1349 	}
1350     }
1351 
1352   /* If all insns in the delay slot have been matched and we were previously
1353      annulling the branch, we need not any more.  In that case delete all the
1354      merged insns.  Also clear the INSN_FROM_TARGET_P bit of each insn in
1355      the delay list so that we know that it isn't only being used at the
1356      target.  */
1357   if (slot_number == num_slots && annul_p)
1358     {
1359       unsigned int len = merged_insns.length ();
1360       for (unsigned int i = len - 1; i < len; i--)
1361 	if (merged_insns[i].second)
1362 	  {
1363 	    update_block (merged_insns[i].first, thread);
1364 	    rtx_insn *new_rtx = delete_from_delay_slot (merged_insns[i].first);
1365 	    if (thread->deleted ())
1366 	      thread = new_rtx;
1367 	  }
1368 	else
1369 	  {
1370 	    update_block (merged_insns[i].first, thread);
1371 	    delete_related_insns (merged_insns[i].first);
1372 	  }
1373 
1374       INSN_ANNULLED_BRANCH_P (delay_insn) = 0;
1375 
1376       for (int i = 0; i < XVECLEN (PATTERN (insn), 0); i++)
1377 	INSN_FROM_TARGET_P (XVECEXP (PATTERN (insn), 0, i)) = 0;
1378     }
1379 }
1380 
1381 /* See if INSN is redundant with an insn in front of TARGET.  Often this
1382    is called when INSN is a candidate for a delay slot of TARGET.
1383    DELAY_LIST are insns that will be placed in delay slots of TARGET in front
1384    of INSN.  Often INSN will be redundant with an insn in a delay slot of
1385    some previous insn.  This happens when we have a series of branches to the
1386    same label; in that case the first insn at the target might want to go
1387    into each of the delay slots.
1388 
1389    If we are not careful, this routine can take up a significant fraction
1390    of the total compilation time (4%), but only wins rarely.  Hence we
1391    speed this routine up by making two passes.  The first pass goes back
1392    until it hits a label and sees if it finds an insn with an identical
1393    pattern.  Only in this (relatively rare) event does it check for
1394    data conflicts.
1395 
1396    We do not split insns we encounter.  This could cause us not to find a
1397    redundant insn, but the cost of splitting seems greater than the possible
1398    gain in rare cases.  */
1399 
1400 static rtx_insn *
redundant_insn(rtx insn,rtx_insn * target,const vec<rtx_insn * > & delay_list)1401 redundant_insn (rtx insn, rtx_insn *target, const vec<rtx_insn *> &delay_list)
1402 {
1403   rtx target_main = target;
1404   rtx ipat = PATTERN (insn);
1405   rtx_insn *trial;
1406   rtx pat;
1407   struct resources needed, set;
1408   int i;
1409   unsigned insns_to_search;
1410 
1411   /* If INSN has any REG_UNUSED notes, it can't match anything since we
1412      are allowed to not actually assign to such a register.  */
1413   if (find_reg_note (insn, REG_UNUSED, NULL_RTX) != 0)
1414     return 0;
1415 
1416   /* Scan backwards looking for a match.  */
1417   for (trial = PREV_INSN (target),
1418 	 insns_to_search = param_max_delay_slot_insn_search;
1419        trial && insns_to_search > 0;
1420        trial = PREV_INSN (trial))
1421     {
1422       /* (use (insn))s can come immediately after a barrier if the
1423 	 label that used to precede them has been deleted as dead.
1424 	 See delete_related_insns.  */
1425       if (LABEL_P (trial) || BARRIER_P (trial))
1426 	return 0;
1427 
1428       if (!INSN_P (trial))
1429 	continue;
1430       --insns_to_search;
1431 
1432       pat = PATTERN (trial);
1433       if (GET_CODE (pat) == USE || GET_CODE (pat) == CLOBBER)
1434 	continue;
1435 
1436       if (GET_CODE (trial) == DEBUG_INSN)
1437 	continue;
1438 
1439       if (rtx_sequence *seq = dyn_cast <rtx_sequence *> (pat))
1440 	{
1441 	  /* Stop for a CALL and its delay slots because it is difficult to
1442 	     track its resource needs correctly.  */
1443 	  if (CALL_P (seq->element (0)))
1444 	    return 0;
1445 
1446 	  /* Stop for an INSN or JUMP_INSN with delayed effects and its delay
1447 	     slots because it is difficult to track its resource needs
1448 	     correctly.  */
1449 
1450 	  if (INSN_SETS_ARE_DELAYED (seq->insn (0)))
1451 	    return 0;
1452 
1453 	  if (INSN_REFERENCES_ARE_DELAYED (seq->insn (0)))
1454 	    return 0;
1455 
1456 	  /* See if any of the insns in the delay slot match, updating
1457 	     resource requirements as we go.  */
1458 	  for (i = seq->len () - 1; i > 0; i--)
1459 	    if (GET_CODE (seq->element (i)) == GET_CODE (insn)
1460 		&& rtx_equal_p (PATTERN (seq->element (i)), ipat)
1461 		&& ! find_reg_note (seq->element (i), REG_UNUSED, NULL_RTX))
1462 	      break;
1463 
1464 	  /* If found a match, exit this loop early.  */
1465 	  if (i > 0)
1466 	    break;
1467 	}
1468 
1469       else if (GET_CODE (trial) == GET_CODE (insn) && rtx_equal_p (pat, ipat)
1470 	       && ! find_reg_note (trial, REG_UNUSED, NULL_RTX))
1471 	break;
1472     }
1473 
1474   /* If we didn't find an insn that matches, return 0.  */
1475   if (trial == 0)
1476     return 0;
1477 
1478   /* See what resources this insn sets and needs.  If they overlap, it
1479      can't be redundant.  */
1480 
1481   CLEAR_RESOURCE (&needed);
1482   CLEAR_RESOURCE (&set);
1483   mark_set_resources (insn, &set, 0, MARK_SRC_DEST_CALL);
1484   mark_referenced_resources (insn, &needed, true);
1485 
1486   /* If TARGET is a SEQUENCE, get the main insn.  */
1487   if (NONJUMP_INSN_P (target) && GET_CODE (PATTERN (target)) == SEQUENCE)
1488     target_main = XVECEXP (PATTERN (target), 0, 0);
1489 
1490   if (resource_conflicts_p (&needed, &set)
1491       /* The insn requiring the delay may not set anything needed or set by
1492 	 INSN.  */
1493       || insn_sets_resource_p (target_main, &needed, true)
1494       || insn_sets_resource_p (target_main, &set, true))
1495     return 0;
1496 
1497   /* Insns we pass may not set either NEEDED or SET, so merge them for
1498      simpler tests.  */
1499   needed.memory |= set.memory;
1500   needed.regs |= set.regs;
1501 
1502   /* This insn isn't redundant if it conflicts with an insn that either is
1503      or will be in a delay slot of TARGET.  */
1504 
1505   unsigned int j;
1506   rtx_insn *temp;
1507   FOR_EACH_VEC_ELT (delay_list, j, temp)
1508     if (insn_sets_resource_p (temp, &needed, true))
1509       return 0;
1510 
1511   if (NONJUMP_INSN_P (target) && GET_CODE (PATTERN (target)) == SEQUENCE)
1512     for (i = 1; i < XVECLEN (PATTERN (target), 0); i++)
1513       if (insn_sets_resource_p (XVECEXP (PATTERN (target), 0, i), &needed,
1514 				true))
1515 	return 0;
1516 
1517   /* Scan backwards until we reach a label or an insn that uses something
1518      INSN sets or sets something insn uses or sets.  */
1519 
1520   for (trial = PREV_INSN (target),
1521 	 insns_to_search = param_max_delay_slot_insn_search;
1522        trial && !LABEL_P (trial) && insns_to_search > 0;
1523        trial = PREV_INSN (trial))
1524     {
1525       if (!INSN_P (trial))
1526 	continue;
1527       --insns_to_search;
1528 
1529       pat = PATTERN (trial);
1530       if (GET_CODE (pat) == USE || GET_CODE (pat) == CLOBBER)
1531 	continue;
1532 
1533       if (GET_CODE (trial) == DEBUG_INSN)
1534 	continue;
1535 
1536       if (rtx_sequence *seq = dyn_cast <rtx_sequence *> (pat))
1537 	{
1538 	  bool annul_p = false;
1539           rtx_insn *control = seq->insn (0);
1540 
1541 	  /* If this is a CALL_INSN and its delay slots, it is hard to track
1542 	     the resource needs properly, so give up.  */
1543 	  if (CALL_P (control))
1544 	    return 0;
1545 
1546 	  /* If this is an INSN or JUMP_INSN with delayed effects, it
1547 	     is hard to track the resource needs properly, so give up.  */
1548 
1549 	  if (INSN_SETS_ARE_DELAYED (control))
1550 	    return 0;
1551 
1552 	  if (INSN_REFERENCES_ARE_DELAYED (control))
1553 	    return 0;
1554 
1555 	  if (JUMP_P (control))
1556 	    annul_p = INSN_ANNULLED_BRANCH_P (control);
1557 
1558 	  /* See if any of the insns in the delay slot match, updating
1559 	     resource requirements as we go.  */
1560 	  for (i = seq->len () - 1; i > 0; i--)
1561 	    {
1562 	      rtx_insn *candidate = seq->insn (i);
1563 
1564 	      /* If an insn will be annulled if the branch is false, it isn't
1565 		 considered as a possible duplicate insn.  */
1566 	      if (rtx_equal_p (PATTERN (candidate), ipat)
1567 		  && ! (annul_p && INSN_FROM_TARGET_P (candidate)))
1568 		{
1569 		  /* Show that this insn will be used in the sequel.  */
1570 		  INSN_FROM_TARGET_P (candidate) = 0;
1571 		  return candidate;
1572 		}
1573 
1574 	      /* Unless this is an annulled insn from the target of a branch,
1575 		 we must stop if it sets anything needed or set by INSN.  */
1576 	      if ((!annul_p || !INSN_FROM_TARGET_P (candidate))
1577 		  && insn_sets_resource_p (candidate, &needed, true))
1578 		return 0;
1579 	    }
1580 
1581 	  /* If the insn requiring the delay slot conflicts with INSN, we
1582 	     must stop.  */
1583 	  if (insn_sets_resource_p (control, &needed, true))
1584 	    return 0;
1585 	}
1586       else
1587 	{
1588 	  /* See if TRIAL is the same as INSN.  */
1589 	  pat = PATTERN (trial);
1590 	  if (rtx_equal_p (pat, ipat))
1591 	    return trial;
1592 
1593 	  /* Can't go any further if TRIAL conflicts with INSN.  */
1594 	  if (insn_sets_resource_p (trial, &needed, true))
1595 	    return 0;
1596 	}
1597     }
1598 
1599   return 0;
1600 }
1601 
1602 /* Return 1 if THREAD can only be executed in one way.  If LABEL is nonzero,
1603    it is the target of the branch insn being scanned.  If ALLOW_FALLTHROUGH
1604    is nonzero, we are allowed to fall into this thread; otherwise, we are
1605    not.
1606 
1607    If LABEL is used more than one or we pass a label other than LABEL before
1608    finding an active insn, we do not own this thread.  */
1609 
1610 static int
own_thread_p(rtx thread,rtx label,int allow_fallthrough)1611 own_thread_p (rtx thread, rtx label, int allow_fallthrough)
1612 {
1613   rtx_insn *active_insn;
1614   rtx_insn *insn;
1615 
1616   /* We don't own the function end.  */
1617   if (thread == 0 || ANY_RETURN_P (thread))
1618     return 0;
1619 
1620   /* We have a non-NULL insn.  */
1621   rtx_insn *thread_insn = as_a <rtx_insn *> (thread);
1622 
1623   /* Get the first active insn, or THREAD_INSN, if it is an active insn.  */
1624   active_insn = next_active_insn (PREV_INSN (thread_insn));
1625 
1626   for (insn = thread_insn; insn != active_insn; insn = NEXT_INSN (insn))
1627     if (LABEL_P (insn)
1628 	&& (insn != label || LABEL_NUSES (insn) != 1))
1629       return 0;
1630 
1631   if (allow_fallthrough)
1632     return 1;
1633 
1634   /* Ensure that we reach a BARRIER before any insn or label.  */
1635   for (insn = prev_nonnote_insn (thread_insn);
1636        insn == 0 || !BARRIER_P (insn);
1637        insn = prev_nonnote_insn (insn))
1638     if (insn == 0
1639 	|| LABEL_P (insn)
1640 	|| (NONJUMP_INSN_P (insn)
1641 	    && GET_CODE (PATTERN (insn)) != USE
1642 	    && GET_CODE (PATTERN (insn)) != CLOBBER))
1643       return 0;
1644 
1645   return 1;
1646 }
1647 
1648 /* Called when INSN is being moved from a location near the target of a jump.
1649    We leave a marker of the form (use (INSN)) immediately in front of WHERE
1650    for mark_target_live_regs.  These markers will be deleted at the end.
1651 
1652    We used to try to update the live status of registers if WHERE is at
1653    the start of a basic block, but that can't work since we may remove a
1654    BARRIER in relax_delay_slots.  */
1655 
1656 static void
update_block(rtx_insn * insn,rtx_insn * where)1657 update_block (rtx_insn *insn, rtx_insn *where)
1658 {
1659   emit_insn_before (gen_rtx_USE (VOIDmode, insn), where);
1660 
1661   /* INSN might be making a value live in a block where it didn't use to
1662      be.  So recompute liveness information for this block.  */
1663   incr_ticks_for_insn (insn);
1664 }
1665 
1666 /* Similar to REDIRECT_JUMP except that we update the BB_TICKS entry for
1667    the basic block containing the jump.  */
1668 
1669 static int
reorg_redirect_jump(rtx_jump_insn * jump,rtx nlabel)1670 reorg_redirect_jump (rtx_jump_insn *jump, rtx nlabel)
1671 {
1672   incr_ticks_for_insn (jump);
1673   return redirect_jump (jump, nlabel, 1);
1674 }
1675 
1676 /* Called when INSN is being moved forward into a delay slot of DELAYED_INSN.
1677    We check every instruction between INSN and DELAYED_INSN for REG_DEAD notes
1678    that reference values used in INSN.  If we find one, then we move the
1679    REG_DEAD note to INSN.
1680 
1681    This is needed to handle the case where a later insn (after INSN) has a
1682    REG_DEAD note for a register used by INSN, and this later insn subsequently
1683    gets moved before a CODE_LABEL because it is a redundant insn.  In this
1684    case, mark_target_live_regs may be confused into thinking the register
1685    is dead because it sees a REG_DEAD note immediately before a CODE_LABEL.  */
1686 
1687 static void
update_reg_dead_notes(rtx_insn * insn,rtx_insn * delayed_insn)1688 update_reg_dead_notes (rtx_insn *insn, rtx_insn *delayed_insn)
1689 {
1690   rtx link, next;
1691   rtx_insn *p;
1692 
1693   for (p = next_nonnote_insn (insn); p != delayed_insn;
1694        p = next_nonnote_insn (p))
1695     for (link = REG_NOTES (p); link; link = next)
1696       {
1697 	next = XEXP (link, 1);
1698 
1699 	if (REG_NOTE_KIND (link) != REG_DEAD
1700 	    || !REG_P (XEXP (link, 0)))
1701 	  continue;
1702 
1703 	if (reg_referenced_p (XEXP (link, 0), PATTERN (insn)))
1704 	  {
1705 	    /* Move the REG_DEAD note from P to INSN.  */
1706 	    remove_note (p, link);
1707 	    XEXP (link, 1) = REG_NOTES (insn);
1708 	    REG_NOTES (insn) = link;
1709 	  }
1710       }
1711 }
1712 
1713 /* Called when an insn redundant with start_insn is deleted.  If there
1714    is a REG_DEAD note for the target of start_insn between start_insn
1715    and stop_insn, then the REG_DEAD note needs to be deleted since the
1716    value no longer dies there.
1717 
1718    If the REG_DEAD note isn't deleted, then mark_target_live_regs may be
1719    confused into thinking the register is dead.  */
1720 
1721 static void
fix_reg_dead_note(rtx_insn * start_insn,rtx stop_insn)1722 fix_reg_dead_note (rtx_insn *start_insn, rtx stop_insn)
1723 {
1724   rtx link, next;
1725   rtx_insn *p;
1726 
1727   for (p = next_nonnote_insn (start_insn); p != stop_insn;
1728        p = next_nonnote_insn (p))
1729     for (link = REG_NOTES (p); link; link = next)
1730       {
1731 	next = XEXP (link, 1);
1732 
1733 	if (REG_NOTE_KIND (link) != REG_DEAD
1734 	    || !REG_P (XEXP (link, 0)))
1735 	  continue;
1736 
1737 	if (reg_set_p (XEXP (link, 0), PATTERN (start_insn)))
1738 	  {
1739 	    remove_note (p, link);
1740 	    return;
1741 	  }
1742       }
1743 }
1744 
1745 /* Delete any REG_UNUSED notes that exist on INSN but not on OTHER_INSN.
1746 
1747    This handles the case of udivmodXi4 instructions which optimize their
1748    output depending on whether any REG_UNUSED notes are present.  We must
1749    make sure that INSN calculates as many results as OTHER_INSN does.  */
1750 
1751 static void
update_reg_unused_notes(rtx_insn * insn,rtx other_insn)1752 update_reg_unused_notes (rtx_insn *insn, rtx other_insn)
1753 {
1754   rtx link, next;
1755 
1756   for (link = REG_NOTES (insn); link; link = next)
1757     {
1758       next = XEXP (link, 1);
1759 
1760       if (REG_NOTE_KIND (link) != REG_UNUSED
1761 	  || !REG_P (XEXP (link, 0)))
1762 	continue;
1763 
1764       if (!find_regno_note (other_insn, REG_UNUSED, REGNO (XEXP (link, 0))))
1765 	remove_note (insn, link);
1766     }
1767 }
1768 
1769 static vec <rtx> sibling_labels;
1770 
1771 /* Return the label before INSN, or put a new label there.  If SIBLING is
1772    non-zero, it is another label associated with the new label (if any),
1773    typically the former target of the jump that will be redirected to
1774    the new label.  */
1775 
1776 static rtx_insn *
get_label_before(rtx_insn * insn,rtx sibling)1777 get_label_before (rtx_insn *insn, rtx sibling)
1778 {
1779   rtx_insn *label;
1780 
1781   /* Find an existing label at this point
1782      or make a new one if there is none.  */
1783   label = prev_nonnote_insn (insn);
1784 
1785   if (label == 0 || !LABEL_P (label))
1786     {
1787       rtx_insn *prev = PREV_INSN (insn);
1788 
1789       label = gen_label_rtx ();
1790       emit_label_after (label, prev);
1791       LABEL_NUSES (label) = 0;
1792       if (sibling)
1793 	{
1794 	  sibling_labels.safe_push (label);
1795 	  sibling_labels.safe_push (sibling);
1796 	}
1797     }
1798   return label;
1799 }
1800 
1801 /* Scan a function looking for insns that need a delay slot and find insns to
1802    put into the delay slot.
1803 
1804    NON_JUMPS_P is nonzero if we are to only try to fill non-jump insns (such
1805    as calls).  We do these first since we don't want jump insns (that are
1806    easier to fill) to get the only insns that could be used for non-jump insns.
1807    When it is zero, only try to fill JUMP_INSNs.
1808 
1809    When slots are filled in this manner, the insns (including the
1810    delay_insn) are put together in a SEQUENCE rtx.  In this fashion,
1811    it is possible to tell whether a delay slot has really been filled
1812    or not.  `final' knows how to deal with this, by communicating
1813    through FINAL_SEQUENCE.  */
1814 
1815 static void
fill_simple_delay_slots(int non_jumps_p)1816 fill_simple_delay_slots (int non_jumps_p)
1817 {
1818   rtx_insn *insn, *trial, *next_trial;
1819   rtx pat;
1820   int i;
1821   int num_unfilled_slots = unfilled_slots_next - unfilled_slots_base;
1822   struct resources needed, set;
1823   int slots_to_fill, slots_filled;
1824   auto_vec<rtx_insn *, 5> delay_list;
1825 
1826   for (i = 0; i < num_unfilled_slots; i++)
1827     {
1828       int flags;
1829       /* Get the next insn to fill.  If it has already had any slots assigned,
1830 	 we can't do anything with it.  Maybe we'll improve this later.  */
1831 
1832       insn = unfilled_slots_base[i];
1833       if (insn == 0
1834 	  || insn->deleted ()
1835 	  || (NONJUMP_INSN_P (insn)
1836 	      && GET_CODE (PATTERN (insn)) == SEQUENCE)
1837 	  || (JUMP_P (insn) && non_jumps_p)
1838 	  || (!JUMP_P (insn) && ! non_jumps_p))
1839 	continue;
1840 
1841       /* It may have been that this insn used to need delay slots, but
1842 	 now doesn't; ignore in that case.  This can happen, for example,
1843 	 on the HP PA RISC, where the number of delay slots depends on
1844 	 what insns are nearby.  */
1845       slots_to_fill = num_delay_slots (insn);
1846 
1847       /* Some machine description have defined instructions to have
1848 	 delay slots only in certain circumstances which may depend on
1849 	 nearby insns (which change due to reorg's actions).
1850 
1851 	 For example, the PA port normally has delay slots for unconditional
1852 	 jumps.
1853 
1854 	 However, the PA port claims such jumps do not have a delay slot
1855 	 if they are immediate successors of certain CALL_INSNs.  This
1856 	 allows the port to favor filling the delay slot of the call with
1857 	 the unconditional jump.  */
1858       if (slots_to_fill == 0)
1859 	continue;
1860 
1861       /* This insn needs, or can use, some delay slots.  SLOTS_TO_FILL
1862 	 says how many.  After initialization, first try optimizing
1863 
1864 	 call _foo		call _foo
1865 	 nop			add %o7,.-L1,%o7
1866 	 b,a L1
1867 	 nop
1868 
1869 	 If this case applies, the delay slot of the call is filled with
1870 	 the unconditional jump.  This is done first to avoid having the
1871 	 delay slot of the call filled in the backward scan.  Also, since
1872 	 the unconditional jump is likely to also have a delay slot, that
1873 	 insn must exist when it is subsequently scanned.
1874 
1875 	 This is tried on each insn with delay slots as some machines
1876 	 have insns which perform calls, but are not represented as
1877 	 CALL_INSNs.  */
1878 
1879       slots_filled = 0;
1880       delay_list.truncate (0);
1881 
1882       if (JUMP_P (insn))
1883 	flags = get_jump_flags (insn, JUMP_LABEL (insn));
1884       else
1885 	flags = get_jump_flags (insn, NULL_RTX);
1886 
1887       if ((trial = next_active_insn (insn))
1888 	  && JUMP_P (trial)
1889 	  && simplejump_p (trial)
1890 	  && eligible_for_delay (insn, slots_filled, trial, flags)
1891 	  && no_labels_between_p (insn, trial)
1892 	  && ! can_throw_internal (trial))
1893 	{
1894 	  rtx_insn **tmp;
1895 	  slots_filled++;
1896 	  add_to_delay_list (trial, &delay_list);
1897 
1898 	  /* TRIAL may have had its delay slot filled, then unfilled.  When
1899 	     the delay slot is unfilled, TRIAL is placed back on the unfilled
1900 	     slots obstack.  Unfortunately, it is placed on the end of the
1901 	     obstack, not in its original location.  Therefore, we must search
1902 	     from entry i + 1 to the end of the unfilled slots obstack to
1903 	     try and find TRIAL.  */
1904 	  tmp = &unfilled_slots_base[i + 1];
1905 	  while (*tmp != trial && tmp != unfilled_slots_next)
1906 	    tmp++;
1907 
1908 	  /* Remove the unconditional jump from consideration for delay slot
1909 	     filling and unthread it.  */
1910 	  if (*tmp == trial)
1911 	    *tmp = 0;
1912 	  {
1913 	    rtx_insn *next = NEXT_INSN (trial);
1914 	    rtx_insn *prev = PREV_INSN (trial);
1915 	    if (prev)
1916 	      SET_NEXT_INSN (prev) = next;
1917 	    if (next)
1918 	      SET_PREV_INSN (next) = prev;
1919 	  }
1920 	}
1921 
1922       /* Now, scan backwards from the insn to search for a potential
1923 	 delay-slot candidate.  Stop searching when a label or jump is hit.
1924 
1925 	 For each candidate, if it is to go into the delay slot (moved
1926 	 forward in execution sequence), it must not need or set any resources
1927 	 that were set by later insns and must not set any resources that
1928 	 are needed for those insns.
1929 
1930 	 The delay slot insn itself sets resources unless it is a call
1931 	 (in which case the called routine, not the insn itself, is doing
1932 	 the setting).  */
1933 
1934       if (slots_filled < slots_to_fill)
1935 	{
1936 	  /* If the flags register is dead after the insn, then we want to be
1937 	     able to accept a candidate that clobbers it.  For this purpose,
1938 	     we need to filter the flags register during life analysis, so
1939 	     that it doesn't create RAW and WAW dependencies, while still
1940 	     creating the necessary WAR dependencies.  */
1941 	  bool filter_flags
1942 	    = (slots_to_fill == 1
1943 	       && targetm.flags_regnum != INVALID_REGNUM
1944 	       && find_regno_note (insn, REG_DEAD, targetm.flags_regnum));
1945 	  struct resources fset;
1946 	  CLEAR_RESOURCE (&needed);
1947 	  CLEAR_RESOURCE (&set);
1948 	  mark_set_resources (insn, &set, 0, MARK_SRC_DEST);
1949 	  if (filter_flags)
1950 	    {
1951 	      CLEAR_RESOURCE (&fset);
1952 	      mark_set_resources (insn, &fset, 0, MARK_SRC_DEST);
1953 	    }
1954 	  mark_referenced_resources (insn, &needed, false);
1955 
1956 	  for (trial = prev_nonnote_insn (insn); ! stop_search_p (trial, 1);
1957 	       trial = next_trial)
1958 	    {
1959 	      next_trial = prev_nonnote_insn (trial);
1960 
1961 	      /* This must be an INSN or CALL_INSN.  */
1962 	      pat = PATTERN (trial);
1963 
1964 	      /* Stand-alone USE and CLOBBER are just for flow.  */
1965 	      if (GET_CODE (pat) == USE || GET_CODE (pat) == CLOBBER)
1966 		continue;
1967 
1968 	      /* And DEBUG_INSNs never go into delay slots.  */
1969 	      if (GET_CODE (trial) == DEBUG_INSN)
1970 		continue;
1971 
1972 	      /* Check for resource conflict first, to avoid unnecessary
1973 		 splitting.  */
1974 	      if (! insn_references_resource_p (trial, &set, true)
1975 		  && ! insn_sets_resource_p (trial,
1976 					     filter_flags ? &fset : &set,
1977 					     true)
1978 		  && ! insn_sets_resource_p (trial, &needed, true)
1979 		  && ! can_throw_internal (trial))
1980 		{
1981 		  trial = try_split (pat, trial, 1);
1982 		  next_trial = prev_nonnote_insn (trial);
1983 		  if (eligible_for_delay (insn, slots_filled, trial, flags))
1984 		    {
1985 		      /* In this case, we are searching backward, so if we
1986 			 find insns to put on the delay list, we want
1987 			 to put them at the head, rather than the
1988 			 tail, of the list.  */
1989 
1990 		      update_reg_dead_notes (trial, insn);
1991 		      delay_list.safe_insert (0, trial);
1992 		      update_block (trial, trial);
1993 		      delete_related_insns (trial);
1994 		      if (slots_to_fill == ++slots_filled)
1995 			break;
1996 		      continue;
1997 		    }
1998 		}
1999 
2000 	      mark_set_resources (trial, &set, 0, MARK_SRC_DEST_CALL);
2001 	      if (filter_flags)
2002 		{
2003 		  mark_set_resources (trial, &fset, 0, MARK_SRC_DEST_CALL);
2004 		  /* If the flags register is set, then it doesn't create RAW
2005 		     dependencies any longer and it also doesn't create WAW
2006 		     dependencies since it's dead after the original insn.  */
2007 		  if (TEST_HARD_REG_BIT (fset.regs, targetm.flags_regnum))
2008 		    {
2009 		      CLEAR_HARD_REG_BIT (needed.regs, targetm.flags_regnum);
2010 		      CLEAR_HARD_REG_BIT (fset.regs, targetm.flags_regnum);
2011 		    }
2012 		}
2013 	      mark_referenced_resources (trial, &needed, true);
2014 	    }
2015 	}
2016 
2017       /* If all needed slots haven't been filled, we come here.  */
2018 
2019       /* Try to optimize case of jumping around a single insn.  */
2020       if ((ANNUL_IFTRUE_SLOTS || ANNUL_IFFALSE_SLOTS)
2021 	&& slots_filled != slots_to_fill
2022 	  && delay_list.is_empty ()
2023 	  && JUMP_P (insn)
2024 	  && (condjump_p (insn) || condjump_in_parallel_p (insn))
2025 	  && !ANY_RETURN_P (JUMP_LABEL (insn)))
2026 	{
2027 	  optimize_skip (as_a <rtx_jump_insn *> (insn), &delay_list);
2028 	  if (!delay_list.is_empty ())
2029 	    slots_filled += 1;
2030 	}
2031 
2032       /* Try to get insns from beyond the insn needing the delay slot.
2033 	 These insns can neither set or reference resources set in insns being
2034 	 skipped, cannot set resources in the insn being skipped, and, if this
2035 	 is a CALL_INSN (or a CALL_INSN is passed), cannot trap (because the
2036 	 call might not return).
2037 
2038 	 There used to be code which continued past the target label if
2039 	 we saw all uses of the target label.  This code did not work,
2040 	 because it failed to account for some instructions which were
2041 	 both annulled and marked as from the target.  This can happen as a
2042 	 result of optimize_skip.  Since this code was redundant with
2043 	 fill_eager_delay_slots anyways, it was just deleted.  */
2044 
2045       if (slots_filled != slots_to_fill
2046 	  /* If this instruction could throw an exception which is
2047 	     caught in the same function, then it's not safe to fill
2048 	     the delay slot with an instruction from beyond this
2049 	     point.  For example, consider:
2050 
2051                int i = 2;
2052 
2053 	       try {
2054                  f();
2055 	         i = 3;
2056                } catch (...) {}
2057 
2058                return i;
2059 
2060 	     Even though `i' is a local variable, we must be sure not
2061 	     to put `i = 3' in the delay slot if `f' might throw an
2062 	     exception.
2063 
2064 	     Presumably, we should also check to see if we could get
2065 	     back to this function via `setjmp'.  */
2066 	  && ! can_throw_internal (insn)
2067 	  && !JUMP_P (insn))
2068 	{
2069 	  int maybe_never = 0;
2070 	  rtx pat, trial_delay;
2071 
2072 	  CLEAR_RESOURCE (&needed);
2073 	  CLEAR_RESOURCE (&set);
2074 	  mark_set_resources (insn, &set, 0, MARK_SRC_DEST_CALL);
2075 	  mark_referenced_resources (insn, &needed, true);
2076 
2077 	  if (CALL_P (insn))
2078 	    maybe_never = 1;
2079 
2080 	  for (trial = next_nonnote_insn (insn); !stop_search_p (trial, 1);
2081 	       trial = next_trial)
2082 	    {
2083 	      next_trial = next_nonnote_insn (trial);
2084 
2085 	      /* This must be an INSN or CALL_INSN.  */
2086 	      pat = PATTERN (trial);
2087 
2088 	      /* Stand-alone USE and CLOBBER are just for flow.  */
2089 	      if (GET_CODE (pat) == USE || GET_CODE (pat) == CLOBBER)
2090 		continue;
2091 
2092 	      /* And DEBUG_INSNs do not go in delay slots.  */
2093 	      if (GET_CODE (trial) == DEBUG_INSN)
2094 		continue;
2095 
2096 	      /* If this already has filled delay slots, get the insn needing
2097 		 the delay slots.  */
2098 	      if (GET_CODE (pat) == SEQUENCE)
2099 		trial_delay = XVECEXP (pat, 0, 0);
2100 	      else
2101 		trial_delay = trial;
2102 
2103 	      /* Stop our search when seeing a jump.  */
2104 	      if (JUMP_P (trial_delay))
2105 		break;
2106 
2107 	      /* See if we have a resource problem before we try to split.  */
2108 	      if (GET_CODE (pat) != SEQUENCE
2109 		  && ! insn_references_resource_p (trial, &set, true)
2110 		  && ! insn_sets_resource_p (trial, &set, true)
2111 		  && ! insn_sets_resource_p (trial, &needed, true)
2112 		  && ! (maybe_never && may_trap_or_fault_p (pat))
2113 		  && (trial = try_split (pat, trial, 0))
2114 		  && eligible_for_delay (insn, slots_filled, trial, flags)
2115 		  && ! can_throw_internal (trial))
2116 		{
2117 		  next_trial = next_nonnote_insn (trial);
2118 		  add_to_delay_list (trial, &delay_list);
2119 
2120 		  delete_related_insns (trial);
2121 		  if (slots_to_fill == ++slots_filled)
2122 		    break;
2123 		  continue;
2124 		}
2125 
2126 	      mark_set_resources (trial, &set, 0, MARK_SRC_DEST_CALL);
2127 	      mark_referenced_resources (trial, &needed, true);
2128 
2129 	      /* Ensure we don't put insns between the setting of cc and the
2130 		 comparison by moving a setting of cc into an earlier delay
2131 		 slot since these insns could clobber the condition code.  */
2132 	      set.cc = 1;
2133 
2134 	      /* If this is a call, we might not get here.  */
2135 	      if (CALL_P (trial_delay))
2136 		maybe_never = 1;
2137 	    }
2138 
2139 	  /* If there are slots left to fill and our search was stopped by an
2140 	     unconditional branch, try the insn at the branch target.  We can
2141 	     redirect the branch if it works.
2142 
2143 	     Don't do this if the insn at the branch target is a branch.  */
2144 	  if (slots_to_fill != slots_filled
2145 	      && trial
2146 	      && jump_to_label_p (trial)
2147 	      && simplejump_p (trial)
2148 	      && (next_trial = next_active_insn (JUMP_LABEL_AS_INSN (trial))) != 0
2149 	      && ! (NONJUMP_INSN_P (next_trial)
2150 		    && GET_CODE (PATTERN (next_trial)) == SEQUENCE)
2151 	      && !JUMP_P (next_trial)
2152 	      && ! insn_references_resource_p (next_trial, &set, true)
2153 	      && ! insn_sets_resource_p (next_trial, &set, true)
2154 	      && ! insn_sets_resource_p (next_trial, &needed, true)
2155 	      && ! (maybe_never && may_trap_or_fault_p (PATTERN (next_trial)))
2156 	      && (next_trial = try_split (PATTERN (next_trial), next_trial, 0))
2157 	      && eligible_for_delay (insn, slots_filled, next_trial, flags)
2158 	      && ! can_throw_internal (trial))
2159 	    {
2160 	      /* See comment in relax_delay_slots about necessity of using
2161 		 next_real_nondebug_insn here.  */
2162 	      rtx_insn *new_label = next_real_nondebug_insn (next_trial);
2163 
2164 	      if (new_label != 0)
2165 		new_label = get_label_before (new_label, JUMP_LABEL (trial));
2166 	      else
2167 		new_label = find_end_label (simple_return_rtx);
2168 
2169 	      if (new_label)
2170 	        {
2171 		  add_to_delay_list (copy_delay_slot_insn (next_trial),
2172 				     &delay_list);
2173 		  slots_filled++;
2174 		  reorg_redirect_jump (as_a <rtx_jump_insn *> (trial),
2175 				       new_label);
2176 		}
2177 	    }
2178 	}
2179 
2180       /* If this is an unconditional jump, then try to get insns from the
2181 	 target of the jump.  */
2182       rtx_jump_insn *jump_insn;
2183       if ((jump_insn = dyn_cast <rtx_jump_insn *> (insn))
2184 	  && simplejump_p (jump_insn)
2185 	  && slots_filled != slots_to_fill)
2186 	fill_slots_from_thread (jump_insn, const_true_rtx,
2187 				next_active_insn (JUMP_LABEL_AS_INSN (insn)),
2188 				NULL, 1, 1, own_thread_p (JUMP_LABEL (insn),
2189 						 JUMP_LABEL (insn), 0),
2190 				slots_to_fill, &slots_filled, &delay_list);
2191 
2192       if (!delay_list.is_empty ())
2193 	unfilled_slots_base[i]
2194 	  = emit_delay_sequence (insn, delay_list, slots_filled);
2195 
2196       if (slots_to_fill == slots_filled)
2197 	unfilled_slots_base[i] = 0;
2198 
2199       note_delay_statistics (slots_filled, 0);
2200     }
2201 }
2202 
2203 /* Follow any unconditional jump at LABEL, for the purpose of redirecting JUMP;
2204    return the ultimate label reached by any such chain of jumps.
2205    Return a suitable return rtx if the chain ultimately leads to a
2206    return instruction.
2207    If LABEL is not followed by a jump, return LABEL.
2208    If the chain loops or we can't find end, return LABEL,
2209    since that tells caller to avoid changing the insn.
2210    If the returned label is obtained by following a crossing jump,
2211    set *CROSSING to true, otherwise set it to false.  */
2212 
2213 static rtx
follow_jumps(rtx label,rtx_insn * jump,bool * crossing)2214 follow_jumps (rtx label, rtx_insn *jump, bool *crossing)
2215 {
2216   rtx_insn *insn;
2217   rtx_insn *next;
2218   int depth;
2219 
2220   *crossing = false;
2221   if (ANY_RETURN_P (label))
2222     return label;
2223 
2224   rtx_insn *value = as_a <rtx_insn *> (label);
2225 
2226   for (depth = 0;
2227        (depth < 10
2228 	&& (insn = next_active_insn (value)) != 0
2229 	&& JUMP_P (insn)
2230 	&& JUMP_LABEL (insn) != NULL_RTX
2231 	&& ((any_uncondjump_p (insn) && onlyjump_p (insn))
2232 	    || ANY_RETURN_P (PATTERN (insn)))
2233 	&& (next = NEXT_INSN (insn))
2234 	&& BARRIER_P (next));
2235        depth++)
2236     {
2237       rtx this_label_or_return = JUMP_LABEL (insn);
2238 
2239       /* If we have found a cycle, make the insn jump to itself.  */
2240       if (this_label_or_return == label)
2241 	return label;
2242 
2243       /* Cannot follow returns and cannot look through tablejumps.  */
2244       if (ANY_RETURN_P (this_label_or_return))
2245 	return this_label_or_return;
2246 
2247       rtx_insn *this_label = as_a <rtx_insn *> (this_label_or_return);
2248       if (NEXT_INSN (this_label)
2249 	  && JUMP_TABLE_DATA_P (NEXT_INSN (this_label)))
2250 	break;
2251 
2252       if (!targetm.can_follow_jump (jump, insn))
2253 	break;
2254       if (!*crossing)
2255 	*crossing = CROSSING_JUMP_P (jump);
2256       value = this_label;
2257     }
2258   if (depth == 10)
2259     return label;
2260   return value;
2261 }
2262 
2263 /* Try to find insns to place in delay slots.
2264 
2265    INSN is the jump needing SLOTS_TO_FILL delay slots.  It tests CONDITION
2266    or is an unconditional branch if CONDITION is const_true_rtx.
2267    *PSLOTS_FILLED is updated with the number of slots that we have filled.
2268 
2269    THREAD is a flow-of-control, either the insns to be executed if the
2270    branch is true or if the branch is false, THREAD_IF_TRUE says which.
2271 
2272    OPPOSITE_THREAD is the thread in the opposite direction.  It is used
2273    to see if any potential delay slot insns set things needed there.
2274 
2275    LIKELY is nonzero if it is extremely likely that the branch will be
2276    taken and THREAD_IF_TRUE is set.  This is used for the branch at the
2277    end of a loop back up to the top.
2278 
2279    OWN_THREAD is true if we are the only user of the thread, i.e. it is
2280    the target of the jump when we are the only jump going there.
2281 
2282    If OWN_THREAD is false, it must be the "true" thread of a jump.  In that
2283    case, we can only take insns from the head of the thread for our delay
2284    slot.  We then adjust the jump to point after the insns we have taken.  */
2285 
2286 static void
fill_slots_from_thread(rtx_jump_insn * insn,rtx condition,rtx thread_or_return,rtx opposite_thread,int likely,int thread_if_true,int own_thread,int slots_to_fill,int * pslots_filled,vec<rtx_insn * > * delay_list)2287 fill_slots_from_thread (rtx_jump_insn *insn, rtx condition,
2288 			rtx thread_or_return, rtx opposite_thread, int likely,
2289 			int thread_if_true, int own_thread, int slots_to_fill,
2290 			int *pslots_filled, vec<rtx_insn *> *delay_list)
2291 {
2292   rtx new_thread;
2293   struct resources opposite_needed, set, needed;
2294   rtx_insn *trial;
2295   int lose = 0;
2296   int must_annul = 0;
2297   int flags;
2298 
2299   /* Validate our arguments.  */
2300   gcc_assert (condition != const_true_rtx || thread_if_true);
2301   gcc_assert (own_thread || thread_if_true);
2302 
2303   flags = get_jump_flags (insn, JUMP_LABEL (insn));
2304 
2305   /* If our thread is the end of subroutine, we can't get any delay
2306      insns from that.  */
2307   if (thread_or_return == NULL_RTX || ANY_RETURN_P (thread_or_return))
2308     return;
2309 
2310   rtx_insn *thread = as_a <rtx_insn *> (thread_or_return);
2311 
2312   /* If this is an unconditional branch, nothing is needed at the
2313      opposite thread.  Otherwise, compute what is needed there.  */
2314   if (condition == const_true_rtx)
2315     CLEAR_RESOURCE (&opposite_needed);
2316   else
2317     mark_target_live_regs (get_insns (), opposite_thread, &opposite_needed);
2318 
2319   /* If the insn at THREAD can be split, do it here to avoid having to
2320      update THREAD and NEW_THREAD if it is done in the loop below.  Also
2321      initialize NEW_THREAD.  */
2322 
2323   new_thread = thread = try_split (PATTERN (thread), thread, 0);
2324 
2325   /* Scan insns at THREAD.  We are looking for an insn that can be removed
2326      from THREAD (it neither sets nor references resources that were set
2327      ahead of it and it doesn't set anything needs by the insns ahead of
2328      it) and that either can be placed in an annulling insn or aren't
2329      needed at OPPOSITE_THREAD.  */
2330 
2331   CLEAR_RESOURCE (&needed);
2332   CLEAR_RESOURCE (&set);
2333 
2334   /* Handle the flags register specially, to be able to accept a
2335      candidate that clobbers it.  See also fill_simple_delay_slots.  */
2336   bool filter_flags
2337     = (slots_to_fill == 1
2338        && targetm.flags_regnum != INVALID_REGNUM
2339        && find_regno_note (insn, REG_DEAD, targetm.flags_regnum));
2340   struct resources fset;
2341   struct resources flags_res;
2342   if (filter_flags)
2343     {
2344       CLEAR_RESOURCE (&fset);
2345       CLEAR_RESOURCE (&flags_res);
2346       SET_HARD_REG_BIT (flags_res.regs, targetm.flags_regnum);
2347     }
2348 
2349   /* If we do not own this thread, we must stop as soon as we find
2350      something that we can't put in a delay slot, since all we can do
2351      is branch into THREAD at a later point.  Therefore, labels stop
2352      the search if this is not the `true' thread.  */
2353 
2354   for (trial = thread;
2355        ! stop_search_p (trial, ! thread_if_true) && (! lose || own_thread);
2356        trial = next_nonnote_insn (trial))
2357     {
2358       rtx pat, old_trial;
2359 
2360       /* If we have passed a label, we no longer own this thread.  */
2361       if (LABEL_P (trial))
2362 	{
2363 	  own_thread = 0;
2364 	  continue;
2365 	}
2366 
2367       pat = PATTERN (trial);
2368       if (GET_CODE (pat) == USE || GET_CODE (pat) == CLOBBER)
2369 	continue;
2370 
2371       if (GET_CODE (trial) == DEBUG_INSN)
2372 	continue;
2373 
2374       /* If TRIAL conflicts with the insns ahead of it, we lose.  */
2375       if (! insn_references_resource_p (trial, &set, true)
2376 	  && ! insn_sets_resource_p (trial, filter_flags ? &fset : &set, true)
2377 	  && ! insn_sets_resource_p (trial, &needed, true)
2378 	  /* If we're handling sets to the flags register specially, we
2379 	     only allow an insn into a delay-slot, if it either:
2380 	     - doesn't set the flags register,
2381 	     - the "set" of the flags register isn't used (clobbered),
2382 	     - insns between the delay-slot insn and the trial-insn
2383 	     as accounted in "set", have not affected the flags register.  */
2384 	  && (! filter_flags
2385 	      || ! insn_sets_resource_p (trial, &flags_res, true)
2386 	      || find_regno_note (trial, REG_UNUSED, targetm.flags_regnum)
2387 	      || ! TEST_HARD_REG_BIT (set.regs, targetm.flags_regnum))
2388 	  && ! can_throw_internal (trial))
2389 	{
2390 	  rtx_insn *prior_insn;
2391 
2392 	  /* If TRIAL is redundant with some insn before INSN, we don't
2393 	     actually need to add it to the delay list; we can merely pretend
2394 	     we did.  */
2395 	  if ((prior_insn = redundant_insn (trial, insn, *delay_list)))
2396 	    {
2397 	      fix_reg_dead_note (prior_insn, insn);
2398 	      if (own_thread)
2399 		{
2400 		  update_block (trial, thread);
2401 		  if (trial == thread)
2402 		    {
2403 		      thread = next_active_insn (thread);
2404 		      if (new_thread == trial)
2405 			new_thread = thread;
2406 		    }
2407 
2408 		  delete_related_insns (trial);
2409 		}
2410 	      else
2411 		{
2412 		  update_reg_unused_notes (prior_insn, trial);
2413 		  new_thread = next_active_insn (trial);
2414 		}
2415 
2416 	      continue;
2417 	    }
2418 
2419 	  /* There are two ways we can win:  If TRIAL doesn't set anything
2420 	     needed at the opposite thread and can't trap, or if it can
2421 	     go into an annulled delay slot.  But we want neither to copy
2422 	     nor to speculate frame-related insns.  */
2423 	  if (!must_annul
2424 	      && ((condition == const_true_rtx
2425 		   && (own_thread || !RTX_FRAME_RELATED_P (trial)))
2426 	          || (! insn_sets_resource_p (trial, &opposite_needed, true)
2427 		      && ! may_trap_or_fault_p (pat)
2428 		      && ! RTX_FRAME_RELATED_P (trial))))
2429 	    {
2430 	      old_trial = trial;
2431 	      trial = try_split (pat, trial, 0);
2432 	      if (new_thread == old_trial)
2433 		new_thread = trial;
2434 	      if (thread == old_trial)
2435 		thread = trial;
2436 	      pat = PATTERN (trial);
2437 	      if (eligible_for_delay (insn, *pslots_filled, trial, flags))
2438 		goto winner;
2439 	    }
2440 	  else if (!RTX_FRAME_RELATED_P (trial)
2441 		   && ((ANNUL_IFTRUE_SLOTS && ! thread_if_true)
2442 		        || (ANNUL_IFFALSE_SLOTS && thread_if_true)))
2443 	    {
2444 	      old_trial = trial;
2445 	      trial = try_split (pat, trial, 0);
2446 	      if (new_thread == old_trial)
2447 		new_thread = trial;
2448 	      if (thread == old_trial)
2449 		thread = trial;
2450 	      pat = PATTERN (trial);
2451 	      if ((must_annul || delay_list->is_empty ()) && (thread_if_true
2452 		   ? check_annul_list_true_false (0, *delay_list)
2453 		     && eligible_for_annul_false (insn, *pslots_filled, trial, flags)
2454 		   : check_annul_list_true_false (1, *delay_list)
2455 		     && eligible_for_annul_true (insn, *pslots_filled, trial, flags)))
2456 		{
2457 		  rtx_insn *temp;
2458 
2459 		  must_annul = 1;
2460 		winner:
2461 
2462 		  /* If we own this thread, delete the insn.  If this is the
2463 		     destination of a branch, show that a basic block status
2464 		     may have been updated.  In any case, mark the new
2465 		     starting point of this thread.  */
2466 		  if (own_thread)
2467 		    {
2468 		      rtx note;
2469 
2470 		      update_block (trial, thread);
2471 		      if (trial == thread)
2472 			{
2473 			  thread = next_active_insn (thread);
2474 			  if (new_thread == trial)
2475 			    new_thread = thread;
2476 			}
2477 
2478 		      /* We are moving this insn, not deleting it.  We must
2479 			 temporarily increment the use count on any referenced
2480 			 label lest it be deleted by delete_related_insns.  */
2481 		      for (note = REG_NOTES (trial);
2482 			   note != NULL_RTX;
2483 			   note = XEXP (note, 1))
2484 			if (REG_NOTE_KIND (note) == REG_LABEL_OPERAND
2485 			    || REG_NOTE_KIND (note) == REG_LABEL_TARGET)
2486 			  {
2487 			    /* REG_LABEL_OPERAND could be
2488 			       NOTE_INSN_DELETED_LABEL too.  */
2489 			    if (LABEL_P (XEXP (note, 0)))
2490 			      LABEL_NUSES (XEXP (note, 0))++;
2491 			    else
2492 			      gcc_assert (REG_NOTE_KIND (note)
2493 					  == REG_LABEL_OPERAND);
2494 			  }
2495 		      if (jump_to_label_p (trial))
2496 			LABEL_NUSES (JUMP_LABEL (trial))++;
2497 
2498 		      delete_related_insns (trial);
2499 
2500 		      for (note = REG_NOTES (trial);
2501 			   note != NULL_RTX;
2502 			   note = XEXP (note, 1))
2503 			if (REG_NOTE_KIND (note) == REG_LABEL_OPERAND
2504 			    || REG_NOTE_KIND (note) == REG_LABEL_TARGET)
2505 			  {
2506 			    /* REG_LABEL_OPERAND could be
2507 			       NOTE_INSN_DELETED_LABEL too.  */
2508 			    if (LABEL_P (XEXP (note, 0)))
2509 			      LABEL_NUSES (XEXP (note, 0))--;
2510 			    else
2511 			      gcc_assert (REG_NOTE_KIND (note)
2512 					  == REG_LABEL_OPERAND);
2513 			  }
2514 		      if (jump_to_label_p (trial))
2515 			LABEL_NUSES (JUMP_LABEL (trial))--;
2516 		    }
2517 		  else
2518 		    new_thread = next_active_insn (trial);
2519 
2520 		  temp = own_thread ? trial : copy_delay_slot_insn (trial);
2521 		  if (thread_if_true)
2522 		    INSN_FROM_TARGET_P (temp) = 1;
2523 
2524 		  add_to_delay_list (temp, delay_list);
2525 
2526 		  if (slots_to_fill == ++(*pslots_filled))
2527 		    {
2528 		      /* Even though we have filled all the slots, we
2529 			 may be branching to a location that has a
2530 			 redundant insn.  Skip any if so.  */
2531 		      while (new_thread && ! own_thread
2532 			     && ! insn_sets_resource_p (new_thread, &set, true)
2533 			     && ! insn_sets_resource_p (new_thread, &needed,
2534 							true)
2535 			     && ! insn_references_resource_p (new_thread,
2536 							      &set, true)
2537 			     && (prior_insn
2538 				 = redundant_insn (new_thread, insn,
2539 						   *delay_list)))
2540 			{
2541 			  /* We know we do not own the thread, so no need
2542 			     to call update_block and delete_insn.  */
2543 			  fix_reg_dead_note (prior_insn, insn);
2544 			  update_reg_unused_notes (prior_insn, new_thread);
2545 			  new_thread
2546 			    = next_active_insn (as_a<rtx_insn *> (new_thread));
2547 			}
2548 		      break;
2549 		    }
2550 
2551 		  continue;
2552 		}
2553 	    }
2554 	}
2555 
2556       /* This insn can't go into a delay slot.  */
2557       lose = 1;
2558       mark_set_resources (trial, &set, 0, MARK_SRC_DEST_CALL);
2559       mark_referenced_resources (trial, &needed, true);
2560       if (filter_flags)
2561 	{
2562 	  mark_set_resources (trial, &fset, 0, MARK_SRC_DEST_CALL);
2563 
2564 	  /* Groups of flags-register setters with users should not
2565 	     affect opportunities to move flags-register-setting insns
2566 	     (clobbers) into the delay-slot.  */
2567 	  CLEAR_HARD_REG_BIT (needed.regs, targetm.flags_regnum);
2568 	  CLEAR_HARD_REG_BIT (fset.regs, targetm.flags_regnum);
2569 	}
2570 
2571       /* Ensure we don't put insns between the setting of cc and the comparison
2572 	 by moving a setting of cc into an earlier delay slot since these insns
2573 	 could clobber the condition code.  */
2574       set.cc = 1;
2575 
2576       /* If this insn is a register-register copy and the next insn has
2577 	 a use of our destination, change it to use our source.  That way,
2578 	 it will become a candidate for our delay slot the next time
2579 	 through this loop.  This case occurs commonly in loops that
2580 	 scan a list.
2581 
2582 	 We could check for more complex cases than those tested below,
2583 	 but it doesn't seem worth it.  It might also be a good idea to try
2584 	 to swap the two insns.  That might do better.
2585 
2586 	 We can't do this if the next insn modifies our destination, because
2587 	 that would make the replacement into the insn invalid.  We also can't
2588 	 do this if it modifies our source, because it might be an earlyclobber
2589 	 operand.  This latter test also prevents updating the contents of
2590 	 a PRE_INC.  We also can't do this if there's overlap of source and
2591 	 destination.  Overlap may happen for larger-than-register-size modes.  */
2592 
2593       if (NONJUMP_INSN_P (trial) && GET_CODE (pat) == SET
2594 	  && REG_P (SET_SRC (pat))
2595 	  && REG_P (SET_DEST (pat))
2596 	  && !reg_overlap_mentioned_p (SET_DEST (pat), SET_SRC (pat)))
2597 	{
2598 	  rtx_insn *next = next_nonnote_insn (trial);
2599 
2600 	  if (next && NONJUMP_INSN_P (next)
2601 	      && GET_CODE (PATTERN (next)) != USE
2602 	      && ! reg_set_p (SET_DEST (pat), next)
2603 	      && ! reg_set_p (SET_SRC (pat), next)
2604 	      && reg_referenced_p (SET_DEST (pat), PATTERN (next))
2605 	      && ! modified_in_p (SET_DEST (pat), next))
2606 	    validate_replace_rtx (SET_DEST (pat), SET_SRC (pat), next);
2607 	}
2608     }
2609 
2610   /* If we stopped on a branch insn that has delay slots, see if we can
2611      steal some of the insns in those slots.  */
2612   if (trial && NONJUMP_INSN_P (trial)
2613       && GET_CODE (PATTERN (trial)) == SEQUENCE
2614       && JUMP_P (XVECEXP (PATTERN (trial), 0, 0)))
2615     {
2616       rtx_sequence *sequence = as_a <rtx_sequence *> (PATTERN (trial));
2617       /* If this is the `true' thread, we will want to follow the jump,
2618 	 so we can only do this if we have taken everything up to here.  */
2619       if (thread_if_true && trial == new_thread)
2620 	{
2621 	  steal_delay_list_from_target (insn, condition, sequence,
2622 					delay_list, &set, &needed,
2623 					&opposite_needed, slots_to_fill,
2624 					pslots_filled, &must_annul,
2625 					&new_thread);
2626 	  /* If we owned the thread and are told that it branched
2627 	     elsewhere, make sure we own the thread at the new location.  */
2628 	  if (own_thread && trial != new_thread)
2629 	    own_thread = own_thread_p (new_thread, new_thread, 0);
2630 	}
2631       else if (! thread_if_true)
2632 	steal_delay_list_from_fallthrough (insn, condition, sequence,
2633 					   delay_list, &set, &needed,
2634 					   &opposite_needed, slots_to_fill,
2635 					   pslots_filled, &must_annul);
2636     }
2637 
2638   /* If we haven't found anything for this delay slot and it is very
2639      likely that the branch will be taken, see if the insn at our target
2640      increments or decrements a register with an increment that does not
2641      depend on the destination register.  If so, try to place the opposite
2642      arithmetic insn after the jump insn and put the arithmetic insn in the
2643      delay slot.  If we can't do this, return.  */
2644   if (delay_list->is_empty () && likely
2645       && new_thread && !ANY_RETURN_P (new_thread)
2646       && NONJUMP_INSN_P (new_thread)
2647       && !RTX_FRAME_RELATED_P (new_thread)
2648       && GET_CODE (PATTERN (new_thread)) != ASM_INPUT
2649       && asm_noperands (PATTERN (new_thread)) < 0)
2650     {
2651       rtx dest;
2652       rtx src;
2653 
2654       /* We know "new_thread" is an insn due to NONJUMP_INSN_P (new_thread)
2655 	 above.  */
2656       trial = as_a <rtx_insn *> (new_thread);
2657       rtx pat = PATTERN (trial);
2658 
2659       if (!NONJUMP_INSN_P (trial)
2660 	  || GET_CODE (pat) != SET
2661 	  || ! eligible_for_delay (insn, 0, trial, flags)
2662 	  || can_throw_internal (trial))
2663 	return;
2664 
2665       dest = SET_DEST (pat), src = SET_SRC (pat);
2666       if ((GET_CODE (src) == PLUS || GET_CODE (src) == MINUS)
2667 	  && rtx_equal_p (XEXP (src, 0), dest)
2668 	  && (!FLOAT_MODE_P (GET_MODE (src))
2669 	      || flag_unsafe_math_optimizations)
2670 	  && ! reg_overlap_mentioned_p (dest, XEXP (src, 1))
2671 	  && ! side_effects_p (pat))
2672 	{
2673 	  rtx other = XEXP (src, 1);
2674 	  rtx new_arith;
2675 	  rtx_insn *ninsn;
2676 
2677 	  /* If this is a constant adjustment, use the same code with
2678 	     the negated constant.  Otherwise, reverse the sense of the
2679 	     arithmetic.  */
2680 	  if (CONST_INT_P (other))
2681 	    new_arith = gen_rtx_fmt_ee (GET_CODE (src), GET_MODE (src), dest,
2682 					negate_rtx (GET_MODE (src), other));
2683 	  else
2684 	    new_arith = gen_rtx_fmt_ee (GET_CODE (src) == PLUS ? MINUS : PLUS,
2685 					GET_MODE (src), dest, other);
2686 
2687 	  ninsn = emit_insn_after (gen_rtx_SET (dest, new_arith), insn);
2688 
2689 	  if (recog_memoized (ninsn) < 0
2690 	      || (extract_insn (ninsn),
2691 		  !constrain_operands (1, get_preferred_alternatives (ninsn))))
2692 	    {
2693 	      delete_related_insns (ninsn);
2694 	      return;
2695 	    }
2696 
2697 	  if (own_thread)
2698 	    {
2699 	      update_block (trial, thread);
2700 	      if (trial == thread)
2701 		{
2702 		  thread = next_active_insn (thread);
2703 		  if (new_thread == trial)
2704 		    new_thread = thread;
2705 		}
2706 	      delete_related_insns (trial);
2707 	    }
2708 	  else
2709 	    new_thread = next_active_insn (trial);
2710 
2711 	  ninsn = own_thread ? trial : copy_delay_slot_insn (trial);
2712 	  if (thread_if_true)
2713 	    INSN_FROM_TARGET_P (ninsn) = 1;
2714 
2715 	  add_to_delay_list (ninsn, delay_list);
2716 	  (*pslots_filled)++;
2717 	}
2718     }
2719 
2720   if (!delay_list->is_empty () && must_annul)
2721     INSN_ANNULLED_BRANCH_P (insn) = 1;
2722 
2723   /* If we are to branch into the middle of this thread, find an appropriate
2724      label or make a new one if none, and redirect INSN to it.  If we hit the
2725      end of the function, use the end-of-function label.  */
2726   if (new_thread != thread)
2727     {
2728       rtx label;
2729       bool crossing = false;
2730 
2731       gcc_assert (thread_if_true);
2732 
2733       if (new_thread && simplejump_or_return_p (new_thread)
2734 	  && redirect_with_delay_list_safe_p (insn,
2735 					      JUMP_LABEL (new_thread),
2736 					      *delay_list))
2737 	new_thread = follow_jumps (JUMP_LABEL (new_thread), insn,
2738 				   &crossing);
2739 
2740       if (ANY_RETURN_P (new_thread))
2741 	label = find_end_label (new_thread);
2742       else if (LABEL_P (new_thread))
2743 	label = new_thread;
2744       else
2745 	label = get_label_before (as_a <rtx_insn *> (new_thread),
2746 				  JUMP_LABEL (insn));
2747 
2748       if (label)
2749 	{
2750 	  reorg_redirect_jump (insn, label);
2751 	  if (crossing)
2752 	    CROSSING_JUMP_P (insn) = 1;
2753 	}
2754     }
2755 }
2756 
2757 /* Make another attempt to find insns to place in delay slots.
2758 
2759    We previously looked for insns located in front of the delay insn
2760    and, for non-jump delay insns, located behind the delay insn.
2761 
2762    Here only try to schedule jump insns and try to move insns from either
2763    the target or the following insns into the delay slot.  If annulling is
2764    supported, we will be likely to do this.  Otherwise, we can do this only
2765    if safe.  */
2766 
2767 static void
fill_eager_delay_slots(void)2768 fill_eager_delay_slots (void)
2769 {
2770   rtx_insn *insn;
2771   int i;
2772   int num_unfilled_slots = unfilled_slots_next - unfilled_slots_base;
2773 
2774   for (i = 0; i < num_unfilled_slots; i++)
2775     {
2776       rtx condition;
2777       rtx target_label, insn_at_target;
2778       rtx_insn *fallthrough_insn;
2779       auto_vec<rtx_insn *, 5> delay_list;
2780       rtx_jump_insn *jump_insn;
2781       int own_target;
2782       int own_fallthrough;
2783       int prediction, slots_to_fill, slots_filled;
2784 
2785       insn = unfilled_slots_base[i];
2786       if (insn == 0
2787 	  || insn->deleted ()
2788 	  || ! (jump_insn = dyn_cast <rtx_jump_insn *> (insn))
2789 	  || ! (condjump_p (jump_insn) || condjump_in_parallel_p (jump_insn)))
2790 	continue;
2791 
2792       slots_to_fill = num_delay_slots (jump_insn);
2793       /* Some machine description have defined instructions to have
2794 	 delay slots only in certain circumstances which may depend on
2795 	 nearby insns (which change due to reorg's actions).
2796 
2797 	 For example, the PA port normally has delay slots for unconditional
2798 	 jumps.
2799 
2800 	 However, the PA port claims such jumps do not have a delay slot
2801 	 if they are immediate successors of certain CALL_INSNs.  This
2802 	 allows the port to favor filling the delay slot of the call with
2803 	 the unconditional jump.  */
2804       if (slots_to_fill == 0)
2805 	continue;
2806 
2807       slots_filled = 0;
2808       target_label = JUMP_LABEL (jump_insn);
2809       condition = get_branch_condition (jump_insn, target_label);
2810 
2811       if (condition == 0)
2812 	continue;
2813 
2814       /* Get the next active fallthrough and target insns and see if we own
2815 	 them.  Then see whether the branch is likely true.  We don't need
2816 	 to do a lot of this for unconditional branches.  */
2817 
2818       insn_at_target = first_active_target_insn (target_label);
2819       own_target = own_thread_p (target_label, target_label, 0);
2820 
2821       if (condition == const_true_rtx)
2822 	{
2823 	  own_fallthrough = 0;
2824 	  fallthrough_insn = 0;
2825 	  prediction = 2;
2826 	}
2827       else
2828 	{
2829 	  fallthrough_insn = next_active_insn (jump_insn);
2830 	  own_fallthrough = own_thread_p (NEXT_INSN (jump_insn), NULL_RTX, 1);
2831 	  prediction = mostly_true_jump (jump_insn);
2832 	}
2833 
2834       /* If this insn is expected to branch, first try to get insns from our
2835 	 target, then our fallthrough insns.  If it is not expected to branch,
2836 	 try the other order.  */
2837 
2838       if (prediction > 0)
2839 	{
2840 	  fill_slots_from_thread (jump_insn, condition, insn_at_target,
2841 				  fallthrough_insn, prediction == 2, 1,
2842 				  own_target,
2843 				  slots_to_fill, &slots_filled, &delay_list);
2844 
2845 	  if (delay_list.is_empty () && own_fallthrough)
2846 	    {
2847 	      /* Even though we didn't find anything for delay slots,
2848 		 we might have found a redundant insn which we deleted
2849 		 from the thread that was filled.  So we have to recompute
2850 		 the next insn at the target.  */
2851 	      target_label = JUMP_LABEL (jump_insn);
2852 	      insn_at_target = first_active_target_insn (target_label);
2853 
2854 	      fill_slots_from_thread (jump_insn, condition, fallthrough_insn,
2855 				      insn_at_target, 0, 0, own_fallthrough,
2856 				      slots_to_fill, &slots_filled,
2857 				      &delay_list);
2858 	    }
2859 	}
2860       else
2861 	{
2862 	  if (own_fallthrough)
2863 	    fill_slots_from_thread (jump_insn, condition, fallthrough_insn,
2864 				    insn_at_target, 0, 0, own_fallthrough,
2865 				    slots_to_fill, &slots_filled, &delay_list);
2866 
2867 	  if (delay_list.is_empty ())
2868 	    fill_slots_from_thread (jump_insn, condition, insn_at_target,
2869 				    next_active_insn (insn), 0, 1, own_target,
2870 				    slots_to_fill, &slots_filled, &delay_list);
2871 	}
2872 
2873       if (!delay_list.is_empty ())
2874 	unfilled_slots_base[i]
2875 	  = emit_delay_sequence (jump_insn, delay_list, slots_filled);
2876 
2877       if (slots_to_fill == slots_filled)
2878 	unfilled_slots_base[i] = 0;
2879 
2880       note_delay_statistics (slots_filled, 1);
2881     }
2882 }
2883 
2884 static void delete_computation (rtx_insn *insn);
2885 
2886 /* Recursively delete prior insns that compute the value (used only by INSN
2887    which the caller is deleting) stored in the register mentioned by NOTE
2888    which is a REG_DEAD note associated with INSN.  */
2889 
2890 static void
delete_prior_computation(rtx note,rtx_insn * insn)2891 delete_prior_computation (rtx note, rtx_insn *insn)
2892 {
2893   rtx_insn *our_prev;
2894   rtx reg = XEXP (note, 0);
2895 
2896   for (our_prev = prev_nonnote_insn (insn);
2897        our_prev && (NONJUMP_INSN_P (our_prev)
2898 		    || CALL_P (our_prev));
2899        our_prev = prev_nonnote_insn (our_prev))
2900     {
2901       rtx pat = PATTERN (our_prev);
2902 
2903       /* If we reach a CALL which is not calling a const function
2904 	 or the callee pops the arguments, then give up.  */
2905       if (CALL_P (our_prev)
2906 	  && (! RTL_CONST_CALL_P (our_prev)
2907 	      || GET_CODE (pat) != SET || GET_CODE (SET_SRC (pat)) != CALL))
2908 	break;
2909 
2910       /* If we reach a SEQUENCE, it is too complex to try to
2911 	 do anything with it, so give up.  We can be run during
2912 	 and after reorg, so SEQUENCE rtl can legitimately show
2913 	 up here.  */
2914       if (GET_CODE (pat) == SEQUENCE)
2915 	break;
2916 
2917       if (GET_CODE (pat) == USE
2918 	  && NONJUMP_INSN_P (XEXP (pat, 0)))
2919 	/* reorg creates USEs that look like this.  We leave them
2920 	   alone because reorg needs them for its own purposes.  */
2921 	break;
2922 
2923       if (reg_set_p (reg, pat))
2924 	{
2925 	  if (side_effects_p (pat) && !CALL_P (our_prev))
2926 	    break;
2927 
2928 	  if (GET_CODE (pat) == PARALLEL)
2929 	    {
2930 	      /* If we find a SET of something else, we can't
2931 		 delete the insn.  */
2932 
2933 	      int i;
2934 
2935 	      for (i = 0; i < XVECLEN (pat, 0); i++)
2936 		{
2937 		  rtx part = XVECEXP (pat, 0, i);
2938 
2939 		  if (GET_CODE (part) == SET
2940 		      && SET_DEST (part) != reg)
2941 		    break;
2942 		}
2943 
2944 	      if (i == XVECLEN (pat, 0))
2945 		delete_computation (our_prev);
2946 	    }
2947 	  else if (GET_CODE (pat) == SET
2948 		   && REG_P (SET_DEST (pat)))
2949 	    {
2950 	      int dest_regno = REGNO (SET_DEST (pat));
2951 	      int dest_endregno = END_REGNO (SET_DEST (pat));
2952 	      int regno = REGNO (reg);
2953 	      int endregno = END_REGNO (reg);
2954 
2955 	      if (dest_regno >= regno
2956 		  && dest_endregno <= endregno)
2957 		delete_computation (our_prev);
2958 
2959 	      /* We may have a multi-word hard register and some, but not
2960 		 all, of the words of the register are needed in subsequent
2961 		 insns.  Write REG_UNUSED notes for those parts that were not
2962 		 needed.  */
2963 	      else if (dest_regno <= regno
2964 		       && dest_endregno >= endregno)
2965 		{
2966 		  int i;
2967 
2968 		  add_reg_note (our_prev, REG_UNUSED, reg);
2969 
2970 		  for (i = dest_regno; i < dest_endregno; i++)
2971 		    if (! find_regno_note (our_prev, REG_UNUSED, i))
2972 		      break;
2973 
2974 		  if (i == dest_endregno)
2975 		    delete_computation (our_prev);
2976 		}
2977 	    }
2978 
2979 	  break;
2980 	}
2981 
2982       /* If PAT references the register that dies here, it is an
2983 	 additional use.  Hence any prior SET isn't dead.  However, this
2984 	 insn becomes the new place for the REG_DEAD note.  */
2985       if (reg_overlap_mentioned_p (reg, pat))
2986 	{
2987 	  XEXP (note, 1) = REG_NOTES (our_prev);
2988 	  REG_NOTES (our_prev) = note;
2989 	  break;
2990 	}
2991     }
2992 }
2993 
2994 /* Delete INSN and recursively delete insns that compute values used only
2995    by INSN.  This uses the REG_DEAD notes computed during flow analysis.
2996 
2997    Look at all our REG_DEAD notes.  If a previous insn does nothing other
2998    than set a register that dies in this insn, we can delete that insn
2999    as well.  */
3000 
3001 static void
delete_computation(rtx_insn * insn)3002 delete_computation (rtx_insn *insn)
3003 {
3004   rtx note, next;
3005 
3006   for (note = REG_NOTES (insn); note; note = next)
3007     {
3008       next = XEXP (note, 1);
3009 
3010       if (REG_NOTE_KIND (note) != REG_DEAD
3011 	  /* Verify that the REG_NOTE is legitimate.  */
3012 	  || !REG_P (XEXP (note, 0)))
3013 	continue;
3014 
3015       delete_prior_computation (note, insn);
3016     }
3017 
3018   delete_related_insns (insn);
3019 }
3020 
3021 /* If all INSN does is set the pc, delete it,
3022    and delete the insn that set the condition codes for it
3023    if that's what the previous thing was.  */
3024 
3025 static void
delete_jump(rtx_insn * insn)3026 delete_jump (rtx_insn *insn)
3027 {
3028   rtx set = single_set (insn);
3029 
3030   if (set && GET_CODE (SET_DEST (set)) == PC)
3031     delete_computation (insn);
3032 }
3033 
3034 static rtx_insn *
label_before_next_insn(rtx_insn * x,rtx scan_limit)3035 label_before_next_insn (rtx_insn *x, rtx scan_limit)
3036 {
3037   rtx_insn *insn = next_active_insn (x);
3038   while (insn)
3039     {
3040       insn = PREV_INSN (insn);
3041       if (insn == scan_limit || insn == NULL_RTX)
3042 	return NULL;
3043       if (LABEL_P (insn))
3044 	break;
3045     }
3046   return insn;
3047 }
3048 
3049 /* Return TRUE if there is a NOTE_INSN_SWITCH_TEXT_SECTIONS note in between
3050    BEG and END.  */
3051 
3052 static bool
switch_text_sections_between_p(const rtx_insn * beg,const rtx_insn * end)3053 switch_text_sections_between_p (const rtx_insn *beg, const rtx_insn *end)
3054 {
3055   const rtx_insn *p;
3056   for (p = beg; p != end; p = NEXT_INSN (p))
3057     if (NOTE_P (p) && NOTE_KIND (p) == NOTE_INSN_SWITCH_TEXT_SECTIONS)
3058       return true;
3059   return false;
3060 }
3061 
3062 
3063 /* Once we have tried two ways to fill a delay slot, make a pass over the
3064    code to try to improve the results and to do such things as more jump
3065    threading.  */
3066 
3067 static void
relax_delay_slots(rtx_insn * first)3068 relax_delay_slots (rtx_insn *first)
3069 {
3070   rtx_insn *insn, *next;
3071   rtx_sequence *pat;
3072   rtx_insn *delay_insn;
3073   rtx target_label;
3074 
3075   /* Look at every JUMP_INSN and see if we can improve it.  */
3076   for (insn = first; insn; insn = next)
3077     {
3078       rtx_insn *other, *prior_insn;
3079       bool crossing;
3080 
3081       next = next_active_insn (insn);
3082 
3083       /* If this is a jump insn, see if it now jumps to a jump, jumps to
3084 	 the next insn, or jumps to a label that is not the last of a
3085 	 group of consecutive labels.  */
3086       if (is_a <rtx_jump_insn *> (insn)
3087 	  && (condjump_p (insn) || condjump_in_parallel_p (insn))
3088 	  && !ANY_RETURN_P (target_label = JUMP_LABEL (insn)))
3089 	{
3090 	  rtx_jump_insn *jump_insn = as_a <rtx_jump_insn *> (insn);
3091 	  target_label
3092 	    = skip_consecutive_labels (follow_jumps (target_label, jump_insn,
3093 						     &crossing));
3094 	  if (ANY_RETURN_P (target_label))
3095 	    target_label = find_end_label (target_label);
3096 
3097 	  if (target_label
3098 	      && next_active_insn (as_a<rtx_insn *> (target_label)) == next
3099 	      && ! condjump_in_parallel_p (jump_insn)
3100 	      && ! (next && switch_text_sections_between_p (jump_insn, next)))
3101 	    {
3102 	      rtx_insn *direct_label = as_a<rtx_insn *> (JUMP_LABEL (insn));
3103 	      rtx_insn *prev = prev_nonnote_insn (direct_label);
3104 
3105 	      /* If the insn jumps over a BARRIER and is the only way to reach
3106 		 its target, then we need to delete the BARRIER before the jump
3107 		 because, otherwise, the target may end up being considered as
3108 		 unreachable and thus also deleted.  */
3109 	      if (BARRIER_P (prev) && LABEL_NUSES (direct_label) == 1)
3110 		{
3111 		  delete_related_insns (prev);
3112 
3113 		  /* We have just removed a BARRIER, which means that the block
3114 		     number of the next insns has effectively been changed (see
3115 		     find_basic_block in resource.cc), so clear it.  */
3116 		  clear_hashed_info_until_next_barrier (direct_label);
3117 		}
3118 
3119 	      delete_jump (jump_insn);
3120 	      continue;
3121 	    }
3122 
3123 	  if (target_label && target_label != JUMP_LABEL (jump_insn))
3124 	    {
3125 	      reorg_redirect_jump (jump_insn, target_label);
3126 	      if (crossing)
3127 		CROSSING_JUMP_P (jump_insn) = 1;
3128 	    }
3129 
3130 	  /* See if this jump conditionally branches around an unconditional
3131 	     jump.  If so, invert this jump and point it to the target of the
3132 	     second jump.  Check if it's possible on the target.  */
3133 	  if (next && simplejump_or_return_p (next)
3134 	      && any_condjump_p (jump_insn)
3135 	      && target_label
3136 	      && (next_active_insn (as_a<rtx_insn *> (target_label))
3137 		  == next_active_insn (next))
3138 	      && no_labels_between_p (jump_insn, next)
3139 	      && targetm.can_follow_jump (jump_insn, next))
3140 	    {
3141 	      rtx label = JUMP_LABEL (next);
3142 
3143 	      /* Be careful how we do this to avoid deleting code or
3144 		 labels that are momentarily dead.  See similar optimization
3145 		 in jump.cc.
3146 
3147 		 We also need to ensure we properly handle the case when
3148 		 invert_jump fails.  */
3149 
3150 	      ++LABEL_NUSES (target_label);
3151 	      if (!ANY_RETURN_P (label))
3152 		++LABEL_NUSES (label);
3153 
3154 	      if (invert_jump (jump_insn, label, 1))
3155 		{
3156 		  rtx_insn *from = delete_related_insns (next);
3157 
3158 		  /* We have just removed a BARRIER, which means that the block
3159 		     number of the next insns has effectively been changed (see
3160 		     find_basic_block in resource.cc), so clear it.  */
3161 		  if (from)
3162 		    clear_hashed_info_until_next_barrier (from);
3163 
3164 		  next = jump_insn;
3165 		}
3166 
3167 	      if (!ANY_RETURN_P (label))
3168 		--LABEL_NUSES (label);
3169 
3170 	      if (--LABEL_NUSES (target_label) == 0)
3171 		delete_related_insns (target_label);
3172 
3173 	      continue;
3174 	    }
3175 	}
3176 
3177       /* If this is an unconditional jump and the previous insn is a
3178 	 conditional jump, try reversing the condition of the previous
3179 	 insn and swapping our targets.  The next pass might be able to
3180 	 fill the slots.
3181 
3182 	 Don't do this if we expect the conditional branch to be true, because
3183 	 we would then be making the more common case longer.  */
3184 
3185       if (simplejump_or_return_p (insn)
3186 	  && (other = prev_active_insn (insn)) != 0
3187 	  && any_condjump_p (other)
3188 	  && no_labels_between_p (other, insn)
3189 	  && mostly_true_jump (other) < 0)
3190 	{
3191 	  rtx other_target = JUMP_LABEL (other);
3192 	  target_label = JUMP_LABEL (insn);
3193 
3194 	  if (invert_jump (as_a <rtx_jump_insn *> (other), target_label, 0))
3195 	    reorg_redirect_jump (as_a <rtx_jump_insn *> (insn), other_target);
3196 	}
3197 
3198       /* Now look only at cases where we have a filled delay slot.  */
3199       if (!NONJUMP_INSN_P (insn) || GET_CODE (PATTERN (insn)) != SEQUENCE)
3200 	continue;
3201 
3202       pat = as_a <rtx_sequence *> (PATTERN (insn));
3203       delay_insn = pat->insn (0);
3204 
3205       /* See if the first insn in the delay slot is redundant with some
3206 	 previous insn.  Remove it from the delay slot if so; then set up
3207 	 to reprocess this insn.  */
3208       if ((prior_insn = redundant_insn (pat->insn (1), delay_insn, vNULL)))
3209 	{
3210 	  fix_reg_dead_note (prior_insn, insn);
3211 	  update_block (pat->insn (1), insn);
3212 	  delete_from_delay_slot (pat->insn (1));
3213 	  next = prev_active_insn (next);
3214 	  continue;
3215 	}
3216 
3217       /* See if we have a RETURN insn with a filled delay slot followed
3218 	 by a RETURN insn with an unfilled a delay slot.  If so, we can delete
3219 	 the first RETURN (but not its delay insn).  This gives the same
3220 	 effect in fewer instructions.
3221 
3222 	 Only do so if optimizing for size since this results in slower, but
3223 	 smaller code.  */
3224       if (optimize_function_for_size_p (cfun)
3225 	  && ANY_RETURN_P (PATTERN (delay_insn))
3226 	  && next
3227 	  && JUMP_P (next)
3228 	  && PATTERN (next) == PATTERN (delay_insn))
3229 	{
3230 	  rtx_insn *after;
3231 	  int i;
3232 
3233 	  /* Delete the RETURN and just execute the delay list insns.
3234 
3235 	     We do this by deleting the INSN containing the SEQUENCE, then
3236 	     re-emitting the insns separately, and then deleting the RETURN.
3237 	     This allows the count of the jump target to be properly
3238 	     decremented.
3239 
3240 	     Note that we need to change the INSN_UID of the re-emitted insns
3241 	     since it is used to hash the insns for mark_target_live_regs and
3242 	     the re-emitted insns will no longer be wrapped up in a SEQUENCE.
3243 
3244 	     Clear the from target bit, since these insns are no longer
3245 	     in delay slots.  */
3246 	  for (i = 0; i < XVECLEN (pat, 0); i++)
3247 	    INSN_FROM_TARGET_P (XVECEXP (pat, 0, i)) = 0;
3248 
3249 	  rtx_insn *prev = PREV_INSN (insn);
3250 	  delete_related_insns (insn);
3251 	  gcc_assert (GET_CODE (pat) == SEQUENCE);
3252 	  add_insn_after (delay_insn, prev, NULL);
3253 	  after = delay_insn;
3254 	  for (i = 1; i < pat->len (); i++)
3255 	    after = emit_copy_of_insn_after (pat->insn (i), after);
3256 	  delete_scheduled_jump (delay_insn);
3257 	  continue;
3258 	}
3259 
3260       /* Now look only at the cases where we have a filled JUMP_INSN.  */
3261       rtx_jump_insn *delay_jump_insn =
3262 		dyn_cast <rtx_jump_insn *> (delay_insn);
3263       if (! delay_jump_insn || !(condjump_p (delay_jump_insn)
3264 	  || condjump_in_parallel_p (delay_jump_insn)))
3265 	continue;
3266 
3267       target_label = JUMP_LABEL (delay_jump_insn);
3268       if (target_label && ANY_RETURN_P (target_label))
3269 	continue;
3270 
3271       /* If this jump goes to another unconditional jump, thread it, but
3272 	 don't convert a jump into a RETURN here.  */
3273       rtx trial = skip_consecutive_labels (follow_jumps (target_label,
3274 							 delay_jump_insn,
3275 							 &crossing));
3276       if (ANY_RETURN_P (trial))
3277 	trial = find_end_label (trial);
3278 
3279       if (trial && trial != target_label
3280 	  && redirect_with_delay_slots_safe_p (delay_jump_insn, trial, insn))
3281 	{
3282 	  reorg_redirect_jump (delay_jump_insn, trial);
3283 	  target_label = trial;
3284 	  if (crossing)
3285 	    CROSSING_JUMP_P (delay_jump_insn) = 1;
3286 	}
3287 
3288       /* If the first insn at TARGET_LABEL is redundant with a previous
3289 	 insn, redirect the jump to the following insn and process again.
3290 	 We use next_real_nondebug_insn instead of next_active_insn so we
3291 	 don't skip USE-markers, or we'll end up with incorrect
3292 	 liveness info.  */
3293       trial = next_real_nondebug_insn (target_label);
3294       if (trial && GET_CODE (PATTERN (trial)) != SEQUENCE
3295 	  && redundant_insn (trial, insn, vNULL)
3296 	  && ! can_throw_internal (trial))
3297 	{
3298 	  /* Figure out where to emit the special USE insn so we don't
3299 	     later incorrectly compute register live/death info.  */
3300 	  rtx_insn *tmp = next_active_insn (as_a<rtx_insn *> (trial));
3301 	  if (tmp == 0)
3302 	    tmp = find_end_label (simple_return_rtx);
3303 
3304 	  if (tmp)
3305 	    {
3306 	      /* Insert the special USE insn and update dataflow info.
3307 		 We know "trial" is an insn here as it is the output of
3308 		 next_real_nondebug_insn () above.  */
3309 	      update_block (as_a <rtx_insn *> (trial), tmp);
3310 
3311 	      /* Now emit a label before the special USE insn, and
3312 		 redirect our jump to the new label.  */
3313 	      target_label = get_label_before (PREV_INSN (tmp), target_label);
3314 	      reorg_redirect_jump (delay_jump_insn, target_label);
3315 	      next = insn;
3316 	      continue;
3317 	    }
3318 	}
3319 
3320       /* Similarly, if it is an unconditional jump with one insn in its
3321 	 delay list and that insn is redundant, thread the jump.  */
3322       rtx_sequence *trial_seq =
3323 	trial ? dyn_cast <rtx_sequence *> (PATTERN (trial)) : NULL;
3324       if (trial_seq
3325 	  && trial_seq->len () == 2
3326 	  && JUMP_P (trial_seq->insn (0))
3327 	  && simplejump_or_return_p (trial_seq->insn (0))
3328 	  && redundant_insn (trial_seq->insn (1), insn, vNULL))
3329 	{
3330 	  rtx temp_label = JUMP_LABEL (trial_seq->insn (0));
3331 	  if (ANY_RETURN_P (temp_label))
3332 	    temp_label = find_end_label (temp_label);
3333 
3334 	  if (temp_label
3335 	      && redirect_with_delay_slots_safe_p (delay_jump_insn,
3336 						   temp_label, insn))
3337 	    {
3338 	      update_block (trial_seq->insn (1), insn);
3339 	      reorg_redirect_jump (delay_jump_insn, temp_label);
3340 	      next = insn;
3341 	      continue;
3342 	    }
3343 	}
3344 
3345       /* See if we have a simple (conditional) jump that is useless.  */
3346       if (!CROSSING_JUMP_P (delay_jump_insn)
3347 	  && !INSN_ANNULLED_BRANCH_P (delay_jump_insn)
3348 	  && !condjump_in_parallel_p (delay_jump_insn)
3349 	  && prev_active_insn (as_a<rtx_insn *> (target_label)) == insn
3350 	  && !BARRIER_P (prev_nonnote_insn (as_a<rtx_insn *> (target_label))))
3351 	{
3352 	  rtx_insn *after;
3353 	  int i;
3354 
3355 	  /* All this insn does is execute its delay list and jump to the
3356 	     following insn.  So delete the jump and just execute the delay
3357 	     list insns.
3358 
3359 	     We do this by deleting the INSN containing the SEQUENCE, then
3360 	     re-emitting the insns separately, and then deleting the jump.
3361 	     This allows the count of the jump target to be properly
3362 	     decremented.
3363 
3364 	     Note that we need to change the INSN_UID of the re-emitted insns
3365 	     since it is used to hash the insns for mark_target_live_regs and
3366 	     the re-emitted insns will no longer be wrapped up in a SEQUENCE.
3367 
3368 	     Clear the from target bit, since these insns are no longer
3369 	     in delay slots.  */
3370 	  for (i = 0; i < XVECLEN (pat, 0); i++)
3371 	    INSN_FROM_TARGET_P (XVECEXP (pat, 0, i)) = 0;
3372 
3373 	  rtx_insn *prev = PREV_INSN (insn);
3374 	  delete_related_insns (insn);
3375 	  gcc_assert (GET_CODE (pat) == SEQUENCE);
3376 	  add_insn_after (delay_jump_insn, prev, NULL);
3377 	  after = delay_jump_insn;
3378 	  for (i = 1; i < pat->len (); i++)
3379 	    after = emit_copy_of_insn_after (pat->insn (i), after);
3380 	  delete_scheduled_jump (delay_jump_insn);
3381 	  continue;
3382 	}
3383 
3384       /* See if this is an unconditional jump around a single insn which is
3385 	 identical to the one in its delay slot.  In this case, we can just
3386 	 delete the branch and the insn in its delay slot.  */
3387       if (next && NONJUMP_INSN_P (next)
3388 	  && label_before_next_insn (next, insn) == target_label
3389 	  && simplejump_p (insn)
3390 	  && XVECLEN (pat, 0) == 2
3391 	  && rtx_equal_p (PATTERN (next), PATTERN (pat->insn (1))))
3392 	{
3393 	  delete_related_insns (insn);
3394 	  continue;
3395 	}
3396 
3397       /* See if this jump (with its delay slots) conditionally branches
3398 	 around an unconditional jump (without delay slots).  If so, invert
3399 	 this jump and point it to the target of the second jump.  We cannot
3400 	 do this for annulled jumps, though.  Again, don't convert a jump to
3401 	 a RETURN here.  */
3402       if (! INSN_ANNULLED_BRANCH_P (delay_jump_insn)
3403 	  && any_condjump_p (delay_jump_insn)
3404 	  && next && simplejump_or_return_p (next)
3405 	  && (next_active_insn (as_a<rtx_insn *> (target_label))
3406 	      == next_active_insn (next))
3407 	  && no_labels_between_p (insn, next))
3408 	{
3409 	  rtx label = JUMP_LABEL (next);
3410 	  rtx old_label = JUMP_LABEL (delay_jump_insn);
3411 
3412 	  if (ANY_RETURN_P (label))
3413 	    label = find_end_label (label);
3414 
3415 	  /* find_end_label can generate a new label. Check this first.  */
3416 	  if (label
3417 	      && no_labels_between_p (insn, next)
3418 	      && redirect_with_delay_slots_safe_p (delay_jump_insn,
3419 						   label, insn))
3420 	    {
3421 	      /* Be careful how we do this to avoid deleting code or labels
3422 		 that are momentarily dead.  See similar optimization in
3423 		 jump.cc  */
3424 	      if (old_label)
3425 		++LABEL_NUSES (old_label);
3426 
3427 	      if (invert_jump (delay_jump_insn, label, 1))
3428 		{
3429 		  /* Must update the INSN_FROM_TARGET_P bits now that
3430 		     the branch is reversed, so that mark_target_live_regs
3431 		     will handle the delay slot insn correctly.  */
3432 		  for (int i = 1; i < XVECLEN (PATTERN (insn), 0); i++)
3433 		    {
3434 		      rtx slot = XVECEXP (PATTERN (insn), 0, i);
3435 		      INSN_FROM_TARGET_P (slot) = ! INSN_FROM_TARGET_P (slot);
3436 		    }
3437 
3438 		  /* We have just removed a BARRIER, which means that the block
3439 		     number of the next insns has effectively been changed (see
3440 		     find_basic_block in resource.cc), so clear it.  */
3441 		  rtx_insn *from = delete_related_insns (next);
3442 		  if (from)
3443 		    clear_hashed_info_until_next_barrier (from);
3444 
3445 		  next = insn;
3446 		}
3447 
3448 	      if (old_label && --LABEL_NUSES (old_label) == 0)
3449 		delete_related_insns (old_label);
3450 	      continue;
3451 	    }
3452 	}
3453 
3454       /* If we own the thread opposite the way this insn branches, see if we
3455 	 can merge its delay slots with following insns.  */
3456       if (INSN_FROM_TARGET_P (pat->insn (1))
3457 	  && own_thread_p (NEXT_INSN (insn), 0, 1))
3458 	try_merge_delay_insns (insn, next);
3459       else if (! INSN_FROM_TARGET_P (pat->insn (1))
3460 	       && own_thread_p (target_label, target_label, 0))
3461 	try_merge_delay_insns (insn,
3462 			       next_active_insn (as_a<rtx_insn *> (target_label)));
3463 
3464       /* If we get here, we haven't deleted INSN.  But we may have deleted
3465 	 NEXT, so recompute it.  */
3466       next = next_active_insn (insn);
3467     }
3468 }
3469 
3470 
3471 /* Look for filled jumps to the end of function label.  We can try to convert
3472    them into RETURN insns if the insns in the delay slot are valid for the
3473    RETURN as well.  */
3474 
3475 static void
make_return_insns(rtx_insn * first)3476 make_return_insns (rtx_insn *first)
3477 {
3478   rtx_insn *insn;
3479   rtx_jump_insn *jump_insn;
3480   rtx real_return_label = function_return_label;
3481   rtx real_simple_return_label = function_simple_return_label;
3482   int slots, i;
3483 
3484   /* See if there is a RETURN insn in the function other than the one we
3485      made for END_OF_FUNCTION_LABEL.  If so, set up anything we can't change
3486      into a RETURN to jump to it.  */
3487   for (insn = first; insn; insn = NEXT_INSN (insn))
3488     if (JUMP_P (insn) && ANY_RETURN_P (PATTERN (insn)))
3489       {
3490 	rtx t = get_label_before (insn, NULL_RTX);
3491 	if (PATTERN (insn) == ret_rtx)
3492 	  real_return_label = t;
3493 	else
3494 	  real_simple_return_label = t;
3495 	break;
3496       }
3497 
3498   /* Show an extra usage of REAL_RETURN_LABEL so it won't go away if it
3499      was equal to END_OF_FUNCTION_LABEL.  */
3500   if (real_return_label)
3501     LABEL_NUSES (real_return_label)++;
3502   if (real_simple_return_label)
3503     LABEL_NUSES (real_simple_return_label)++;
3504 
3505   /* Clear the list of insns to fill so we can use it.  */
3506   obstack_free (&unfilled_slots_obstack, unfilled_firstobj);
3507 
3508   for (insn = first; insn; insn = NEXT_INSN (insn))
3509     {
3510       int flags;
3511       rtx kind, real_label;
3512 
3513       /* Only look at filled JUMP_INSNs that go to the end of function
3514 	 label.  */
3515       if (!NONJUMP_INSN_P (insn))
3516 	continue;
3517 
3518       if (GET_CODE (PATTERN (insn)) != SEQUENCE)
3519 	continue;
3520 
3521       rtx_sequence *pat = as_a <rtx_sequence *> (PATTERN (insn));
3522 
3523       if (!jump_to_label_p (pat->insn (0)))
3524 	continue;
3525 
3526       if (JUMP_LABEL (pat->insn (0)) == function_return_label)
3527 	{
3528 	  kind = ret_rtx;
3529 	  real_label = real_return_label;
3530 	}
3531       else if (JUMP_LABEL (pat->insn (0)) == function_simple_return_label)
3532 	{
3533 	  kind = simple_return_rtx;
3534 	  real_label = real_simple_return_label;
3535 	}
3536       else
3537 	continue;
3538 
3539       jump_insn = as_a <rtx_jump_insn *> (pat->insn (0));
3540 
3541       /* If we can't make the jump into a RETURN, try to redirect it to the best
3542 	 RETURN and go on to the next insn.  */
3543       if (!reorg_redirect_jump (jump_insn, kind))
3544 	{
3545 	  /* Make sure redirecting the jump will not invalidate the delay
3546 	     slot insns.  */
3547 	  if (redirect_with_delay_slots_safe_p (jump_insn, real_label, insn))
3548 	    reorg_redirect_jump (jump_insn, real_label);
3549 	  continue;
3550 	}
3551 
3552       /* See if this RETURN can accept the insns current in its delay slot.
3553 	 It can if it has more or an equal number of slots and the contents
3554 	 of each is valid.  */
3555 
3556       flags = get_jump_flags (jump_insn, JUMP_LABEL (jump_insn));
3557       slots = num_delay_slots (jump_insn);
3558       if (slots >= XVECLEN (pat, 0) - 1)
3559 	{
3560 	  for (i = 1; i < XVECLEN (pat, 0); i++)
3561 	    if (! (
3562 #if ANNUL_IFFALSE_SLOTS
3563 		   (INSN_ANNULLED_BRANCH_P (jump_insn)
3564 		    && INSN_FROM_TARGET_P (pat->insn (i)))
3565 		   ? eligible_for_annul_false (jump_insn, i - 1,
3566 					       pat->insn (i), flags) :
3567 #endif
3568 #if ANNUL_IFTRUE_SLOTS
3569 		   (INSN_ANNULLED_BRANCH_P (jump_insn)
3570 		    && ! INSN_FROM_TARGET_P (pat->insn (i)))
3571 		   ? eligible_for_annul_true (jump_insn, i - 1,
3572 					      pat->insn (i), flags) :
3573 #endif
3574 		   eligible_for_delay (jump_insn, i - 1,
3575 				       pat->insn (i), flags)))
3576 	      break;
3577 	}
3578       else
3579 	i = 0;
3580 
3581       if (i == XVECLEN (pat, 0))
3582 	continue;
3583 
3584       /* We have to do something with this insn.  If it is an unconditional
3585 	 RETURN, delete the SEQUENCE and output the individual insns,
3586 	 followed by the RETURN.  Then set things up so we try to find
3587 	 insns for its delay slots, if it needs some.  */
3588       if (ANY_RETURN_P (PATTERN (jump_insn)))
3589 	{
3590 	  rtx_insn *after = PREV_INSN (insn);
3591 
3592 	  delete_related_insns (insn);
3593 	  insn = jump_insn;
3594 	  for (i = 1; i < pat->len (); i++)
3595 	    after = emit_copy_of_insn_after (pat->insn (i), after);
3596 	  add_insn_after (insn, after, NULL);
3597 	  emit_barrier_after (insn);
3598 
3599 	  if (slots)
3600 	    obstack_ptr_grow (&unfilled_slots_obstack, insn);
3601 	}
3602       else
3603 	/* It is probably more efficient to keep this with its current
3604 	   delay slot as a branch to a RETURN.  */
3605 	reorg_redirect_jump (jump_insn, real_label);
3606     }
3607 
3608   /* Now delete REAL_RETURN_LABEL if we never used it.  Then try to fill any
3609      new delay slots we have created.  */
3610   if (real_return_label != NULL_RTX && --LABEL_NUSES (real_return_label) == 0)
3611     delete_related_insns (real_return_label);
3612   if (real_simple_return_label != NULL_RTX
3613       && --LABEL_NUSES (real_simple_return_label) == 0)
3614     delete_related_insns (real_simple_return_label);
3615 
3616   fill_simple_delay_slots (1);
3617   fill_simple_delay_slots (0);
3618 }
3619 
3620 /* Try to find insns to place in delay slots.  */
3621 
3622 static void
dbr_schedule(rtx_insn * first)3623 dbr_schedule (rtx_insn *first)
3624 {
3625   rtx_insn *insn, *next, *epilogue_insn = 0;
3626   int i;
3627   bool need_return_insns;
3628 
3629   /* If the current function has no insns other than the prologue and
3630      epilogue, then do not try to fill any delay slots.  */
3631   if (n_basic_blocks_for_fn (cfun) == NUM_FIXED_BLOCKS)
3632     return;
3633 
3634   /* Find the highest INSN_UID and allocate and initialize our map from
3635      INSN_UID's to position in code.  */
3636   for (max_uid = 0, insn = first; insn; insn = NEXT_INSN (insn))
3637     {
3638       if (INSN_UID (insn) > max_uid)
3639 	max_uid = INSN_UID (insn);
3640       if (NOTE_P (insn)
3641 	  && NOTE_KIND (insn) == NOTE_INSN_EPILOGUE_BEG)
3642 	epilogue_insn = insn;
3643     }
3644 
3645   uid_to_ruid = XNEWVEC (int, max_uid + 1);
3646   for (i = 0, insn = first; insn; i++, insn = NEXT_INSN (insn))
3647     uid_to_ruid[INSN_UID (insn)] = i;
3648 
3649   /* Initialize the list of insns that need filling.  */
3650   if (unfilled_firstobj == 0)
3651     {
3652       gcc_obstack_init (&unfilled_slots_obstack);
3653       unfilled_firstobj = XOBNEWVAR (&unfilled_slots_obstack, rtx, 0);
3654     }
3655 
3656   for (insn = next_active_insn (first); insn; insn = next_active_insn (insn))
3657     {
3658       rtx target;
3659 
3660       /* Skip vector tables.  We can't get attributes for them.  */
3661       if (JUMP_TABLE_DATA_P (insn))
3662 	continue;
3663 
3664       if (JUMP_P (insn))
3665         INSN_ANNULLED_BRANCH_P (insn) = 0;
3666       INSN_FROM_TARGET_P (insn) = 0;
3667 
3668       if (num_delay_slots (insn) > 0)
3669 	obstack_ptr_grow (&unfilled_slots_obstack, insn);
3670 
3671       /* Ensure all jumps go to the last of a set of consecutive labels.  */
3672       if (JUMP_P (insn)
3673 	  && (condjump_p (insn) || condjump_in_parallel_p (insn))
3674 	  && !ANY_RETURN_P (JUMP_LABEL (insn))
3675 	  && ((target = skip_consecutive_labels (JUMP_LABEL (insn)))
3676 	      != JUMP_LABEL (insn)))
3677 	redirect_jump (as_a <rtx_jump_insn *> (insn), target, 1);
3678     }
3679 
3680   init_resource_info (epilogue_insn);
3681 
3682   /* Show we haven't computed an end-of-function label yet.  */
3683   function_return_label = function_simple_return_label = NULL;
3684 
3685   /* Initialize the statistics for this function.  */
3686   memset (num_insns_needing_delays, 0, sizeof num_insns_needing_delays);
3687   memset (num_filled_delays, 0, sizeof num_filled_delays);
3688 
3689   /* Now do the delay slot filling.  Try everything twice in case earlier
3690      changes make more slots fillable.  */
3691 
3692   for (reorg_pass_number = 0;
3693        reorg_pass_number < MAX_REORG_PASSES;
3694        reorg_pass_number++)
3695     {
3696       fill_simple_delay_slots (1);
3697       fill_simple_delay_slots (0);
3698       if (!targetm.no_speculation_in_delay_slots_p ())
3699 	fill_eager_delay_slots ();
3700       relax_delay_slots (first);
3701     }
3702 
3703   /* If we made an end of function label, indicate that it is now
3704      safe to delete it by undoing our prior adjustment to LABEL_NUSES.
3705      If it is now unused, delete it.  */
3706   if (function_return_label && --LABEL_NUSES (function_return_label) == 0)
3707     delete_related_insns (function_return_label);
3708   if (function_simple_return_label
3709       && --LABEL_NUSES (function_simple_return_label) == 0)
3710     delete_related_insns (function_simple_return_label);
3711 
3712   need_return_insns = false;
3713   need_return_insns |= targetm.have_return () && function_return_label != 0;
3714   need_return_insns |= (targetm.have_simple_return ()
3715 			&& function_simple_return_label != 0);
3716   if (need_return_insns)
3717     make_return_insns (first);
3718 
3719   /* Delete any USE insns made by update_block; subsequent passes don't need
3720      them or know how to deal with them.  */
3721   for (insn = first; insn; insn = next)
3722     {
3723       next = NEXT_INSN (insn);
3724 
3725       if (NONJUMP_INSN_P (insn) && GET_CODE (PATTERN (insn)) == USE
3726 	  && INSN_P (XEXP (PATTERN (insn), 0)))
3727 	next = delete_related_insns (insn);
3728     }
3729 
3730   obstack_free (&unfilled_slots_obstack, unfilled_firstobj);
3731 
3732   /* It is not clear why the line below is needed, but it does seem to be.  */
3733   unfilled_firstobj = XOBNEWVAR (&unfilled_slots_obstack, rtx, 0);
3734 
3735   if (dump_file)
3736     {
3737       int i, j, need_comma;
3738       int total_delay_slots[MAX_DELAY_HISTOGRAM + 1];
3739       int total_annul_slots[MAX_DELAY_HISTOGRAM + 1];
3740 
3741       for (reorg_pass_number = 0;
3742 	   reorg_pass_number < MAX_REORG_PASSES;
3743 	   reorg_pass_number++)
3744 	{
3745 	  fprintf (dump_file, ";; Reorg pass #%d:\n", reorg_pass_number + 1);
3746 	  for (i = 0; i < NUM_REORG_FUNCTIONS; i++)
3747 	    {
3748 	      need_comma = 0;
3749 	      fprintf (dump_file, ";; Reorg function #%d\n", i);
3750 
3751 	      fprintf (dump_file, ";; %d insns needing delay slots\n;; ",
3752 		       num_insns_needing_delays[i][reorg_pass_number]);
3753 
3754 	      for (j = 0; j < MAX_DELAY_HISTOGRAM + 1; j++)
3755 		if (num_filled_delays[i][j][reorg_pass_number])
3756 		  {
3757 		    if (need_comma)
3758 		      fprintf (dump_file, ", ");
3759 		    need_comma = 1;
3760 		    fprintf (dump_file, "%d got %d delays",
3761 			     num_filled_delays[i][j][reorg_pass_number], j);
3762 		  }
3763 	      fprintf (dump_file, "\n");
3764 	    }
3765 	}
3766       memset (total_delay_slots, 0, sizeof total_delay_slots);
3767       memset (total_annul_slots, 0, sizeof total_annul_slots);
3768       for (insn = first; insn; insn = NEXT_INSN (insn))
3769 	{
3770 	  if (! insn->deleted ()
3771 	      && NONJUMP_INSN_P (insn)
3772 	      && GET_CODE (PATTERN (insn)) != USE
3773 	      && GET_CODE (PATTERN (insn)) != CLOBBER)
3774 	    {
3775 	      if (GET_CODE (PATTERN (insn)) == SEQUENCE)
3776 		{
3777                   rtx control;
3778 		  j = XVECLEN (PATTERN (insn), 0) - 1;
3779 		  if (j > MAX_DELAY_HISTOGRAM)
3780 		    j = MAX_DELAY_HISTOGRAM;
3781                   control = XVECEXP (PATTERN (insn), 0, 0);
3782 		  if (JUMP_P (control) && INSN_ANNULLED_BRANCH_P (control))
3783 		    total_annul_slots[j]++;
3784 		  else
3785 		    total_delay_slots[j]++;
3786 		}
3787 	      else if (num_delay_slots (insn) > 0)
3788 		total_delay_slots[0]++;
3789 	    }
3790 	}
3791       fprintf (dump_file, ";; Reorg totals: ");
3792       need_comma = 0;
3793       for (j = 0; j < MAX_DELAY_HISTOGRAM + 1; j++)
3794 	{
3795 	  if (total_delay_slots[j])
3796 	    {
3797 	      if (need_comma)
3798 		fprintf (dump_file, ", ");
3799 	      need_comma = 1;
3800 	      fprintf (dump_file, "%d got %d delays", total_delay_slots[j], j);
3801 	    }
3802 	}
3803       fprintf (dump_file, "\n");
3804 
3805       if (ANNUL_IFTRUE_SLOTS || ANNUL_IFFALSE_SLOTS)
3806 	{
3807 	  fprintf (dump_file, ";; Reorg annuls: ");
3808 	  need_comma = 0;
3809 	  for (j = 0; j < MAX_DELAY_HISTOGRAM + 1; j++)
3810 	    {
3811 	      if (total_annul_slots[j])
3812 		{
3813 		  if (need_comma)
3814 		    fprintf (dump_file, ", ");
3815 		  need_comma = 1;
3816 		  fprintf (dump_file, "%d got %d delays", total_annul_slots[j], j);
3817 		}
3818 	    }
3819 	  fprintf (dump_file, "\n");
3820 	}
3821 
3822       fprintf (dump_file, "\n");
3823     }
3824 
3825   if (!sibling_labels.is_empty ())
3826     {
3827       update_alignments (sibling_labels);
3828       sibling_labels.release ();
3829     }
3830 
3831   free_resource_info ();
3832   free (uid_to_ruid);
3833   crtl->dbr_scheduled_p = true;
3834 }
3835 
3836 /* Run delay slot optimization.  */
3837 static unsigned int
rest_of_handle_delay_slots(void)3838 rest_of_handle_delay_slots (void)
3839 {
3840   if (DELAY_SLOTS)
3841     dbr_schedule (get_insns ());
3842 
3843   return 0;
3844 }
3845 
3846 namespace {
3847 
3848 const pass_data pass_data_delay_slots =
3849 {
3850   RTL_PASS, /* type */
3851   "dbr", /* name */
3852   OPTGROUP_NONE, /* optinfo_flags */
3853   TV_DBR_SCHED, /* tv_id */
3854   0, /* properties_required */
3855   0, /* properties_provided */
3856   0, /* properties_destroyed */
3857   0, /* todo_flags_start */
3858   0, /* todo_flags_finish */
3859 };
3860 
3861 class pass_delay_slots : public rtl_opt_pass
3862 {
3863 public:
pass_delay_slots(gcc::context * ctxt)3864   pass_delay_slots (gcc::context *ctxt)
3865     : rtl_opt_pass (pass_data_delay_slots, ctxt)
3866   {}
3867 
3868   /* opt_pass methods: */
3869   virtual bool gate (function *);
execute(function *)3870   virtual unsigned int execute (function *)
3871     {
3872       return rest_of_handle_delay_slots ();
3873     }
3874 
3875 }; // class pass_delay_slots
3876 
3877 bool
gate(function *)3878 pass_delay_slots::gate (function *)
3879 {
3880   /* At -O0 dataflow info isn't updated after RA.  */
3881   if (DELAY_SLOTS)
3882     return optimize > 0 && flag_delayed_branch && !crtl->dbr_scheduled_p;
3883 
3884   return false;
3885 }
3886 
3887 } // anon namespace
3888 
3889 rtl_opt_pass *
make_pass_delay_slots(gcc::context * ctxt)3890 make_pass_delay_slots (gcc::context *ctxt)
3891 {
3892   return new pass_delay_slots (ctxt);
3893 }
3894 
3895 /* Machine dependent reorg pass.  */
3896 
3897 namespace {
3898 
3899 const pass_data pass_data_machine_reorg =
3900 {
3901   RTL_PASS, /* type */
3902   "mach", /* name */
3903   OPTGROUP_NONE, /* optinfo_flags */
3904   TV_MACH_DEP, /* tv_id */
3905   0, /* properties_required */
3906   0, /* properties_provided */
3907   0, /* properties_destroyed */
3908   0, /* todo_flags_start */
3909   0, /* todo_flags_finish */
3910 };
3911 
3912 class pass_machine_reorg : public rtl_opt_pass
3913 {
3914 public:
pass_machine_reorg(gcc::context * ctxt)3915   pass_machine_reorg (gcc::context *ctxt)
3916     : rtl_opt_pass (pass_data_machine_reorg, ctxt)
3917   {}
3918 
3919   /* opt_pass methods: */
gate(function *)3920   virtual bool gate (function *)
3921     {
3922       return targetm.machine_dependent_reorg != 0;
3923     }
3924 
execute(function *)3925   virtual unsigned int execute (function *)
3926     {
3927       targetm.machine_dependent_reorg ();
3928       return 0;
3929     }
3930 
3931 }; // class pass_machine_reorg
3932 
3933 } // anon namespace
3934 
3935 rtl_opt_pass *
make_pass_machine_reorg(gcc::context * ctxt)3936 make_pass_machine_reorg (gcc::context *ctxt)
3937 {
3938   return new pass_machine_reorg (ctxt);
3939 }
3940