xref: /netbsd-src/external/gpl3/gcc.old/dist/gcc/hw-doloop.c (revision 8feb0f0b7eaff0608f8350bbfa3098827b4bb91b)
11debfc3dSmrg /* Code to analyze doloop loops in order for targets to perform late
21debfc3dSmrg    optimizations converting doloops to other forms of hardware loops.
3*8feb0f0bSmrg    Copyright (C) 2011-2020 Free Software Foundation, Inc.
41debfc3dSmrg 
51debfc3dSmrg This file is part of GCC.
61debfc3dSmrg 
71debfc3dSmrg GCC is free software; you can redistribute it and/or modify it under
81debfc3dSmrg the terms of the GNU General Public License as published by the Free
91debfc3dSmrg Software Foundation; either version 3, or (at your option) any later
101debfc3dSmrg version.
111debfc3dSmrg 
121debfc3dSmrg GCC is distributed in the hope that it will be useful, but WITHOUT ANY
131debfc3dSmrg WARRANTY; without even the implied warranty of MERCHANTABILITY or
141debfc3dSmrg FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
151debfc3dSmrg for more details.
161debfc3dSmrg 
171debfc3dSmrg You should have received a copy of the GNU General Public License
181debfc3dSmrg along with GCC; see the file COPYING3.  If not see
191debfc3dSmrg <http://www.gnu.org/licenses/>.  */
201debfc3dSmrg 
211debfc3dSmrg #include "config.h"
221debfc3dSmrg #include "system.h"
231debfc3dSmrg #include "coretypes.h"
241debfc3dSmrg #include "backend.h"
251debfc3dSmrg #include "rtl.h"
261debfc3dSmrg #include "df.h"
271debfc3dSmrg #include "insn-config.h"
281debfc3dSmrg #include "regs.h"
291debfc3dSmrg #include "memmodel.h"
301debfc3dSmrg #include "emit-rtl.h"
311debfc3dSmrg #include "recog.h"
321debfc3dSmrg #include "cfgrtl.h"
331debfc3dSmrg #include "hw-doloop.h"
341debfc3dSmrg #include "dumpfile.h"
351debfc3dSmrg 
361debfc3dSmrg /* Dump information collected in LOOPS.  */
371debfc3dSmrg static void
dump_hwloops(hwloop_info loops)381debfc3dSmrg dump_hwloops (hwloop_info loops)
391debfc3dSmrg {
401debfc3dSmrg   hwloop_info loop;
411debfc3dSmrg 
421debfc3dSmrg   for (loop = loops; loop; loop = loop->next)
431debfc3dSmrg     {
441debfc3dSmrg       hwloop_info i;
451debfc3dSmrg       basic_block b;
461debfc3dSmrg       unsigned ix;
471debfc3dSmrg 
481debfc3dSmrg       fprintf (dump_file, ";; loop %d: ", loop->loop_no);
491debfc3dSmrg       if (loop->bad)
501debfc3dSmrg 	fprintf (dump_file, "(bad) ");
511debfc3dSmrg       fprintf (dump_file, "{head:%d, depth:%d, reg:%u}",
521debfc3dSmrg 	       loop->head == NULL ? -1 : loop->head->index,
531debfc3dSmrg 	       loop->depth, REGNO (loop->iter_reg));
541debfc3dSmrg 
551debfc3dSmrg       fprintf (dump_file, " blocks: [ ");
561debfc3dSmrg       for (ix = 0; loop->blocks.iterate (ix, &b); ix++)
571debfc3dSmrg 	fprintf (dump_file, "%d ", b->index);
581debfc3dSmrg       fprintf (dump_file, "] ");
591debfc3dSmrg 
601debfc3dSmrg       fprintf (dump_file, " inner loops: [ ");
611debfc3dSmrg       for (ix = 0; loop->loops.iterate (ix, &i); ix++)
621debfc3dSmrg 	fprintf (dump_file, "%d ", i->loop_no);
631debfc3dSmrg       fprintf (dump_file, "]\n");
641debfc3dSmrg     }
651debfc3dSmrg   fprintf (dump_file, "\n");
661debfc3dSmrg }
671debfc3dSmrg 
681debfc3dSmrg /* Return true if BB is part of LOOP.  */
691debfc3dSmrg static bool
bb_in_loop_p(hwloop_info loop,basic_block bb)701debfc3dSmrg bb_in_loop_p (hwloop_info loop, basic_block bb)
711debfc3dSmrg {
721debfc3dSmrg   return bitmap_bit_p (loop->block_bitmap, bb->index);
731debfc3dSmrg }
741debfc3dSmrg 
751debfc3dSmrg /* Scan the blocks of LOOP (and its inferiors), and record things such as
761debfc3dSmrg    hard registers set, jumps out of and within the loop.  */
771debfc3dSmrg static void
scan_loop(hwloop_info loop)781debfc3dSmrg scan_loop (hwloop_info loop)
791debfc3dSmrg {
801debfc3dSmrg   unsigned ix;
811debfc3dSmrg   basic_block bb;
821debfc3dSmrg 
831debfc3dSmrg   if (loop->bad)
841debfc3dSmrg     return;
851debfc3dSmrg 
861debfc3dSmrg   if (REGNO_REG_SET_P (df_get_live_in (loop->successor),
871debfc3dSmrg 		       REGNO (loop->iter_reg)))
881debfc3dSmrg     loop->iter_reg_used_outside = true;
891debfc3dSmrg 
901debfc3dSmrg   for (ix = 0; loop->blocks.iterate (ix, &bb); ix++)
911debfc3dSmrg     {
921debfc3dSmrg       rtx_insn *insn;
931debfc3dSmrg       edge e;
941debfc3dSmrg       edge_iterator ei;
951debfc3dSmrg 
961debfc3dSmrg       if (bb != loop->tail)
971debfc3dSmrg 	FOR_EACH_EDGE (e, ei, bb->succs)
981debfc3dSmrg 	  {
991debfc3dSmrg 	    if (bb_in_loop_p (loop, e->dest))
1001debfc3dSmrg 	      {
1011debfc3dSmrg 		if (!(e->flags & EDGE_FALLTHRU))
1021debfc3dSmrg 		  loop->jumps_within = true;
1031debfc3dSmrg 	      }
1041debfc3dSmrg 	    else
1051debfc3dSmrg 	      {
1061debfc3dSmrg 		loop->jumps_outof = true;
1071debfc3dSmrg 		if (!loop->bad)
1081debfc3dSmrg 		  gcc_assert (!REGNO_REG_SET_P (df_get_live_in (e->dest),
1091debfc3dSmrg 						REGNO (loop->iter_reg)));
1101debfc3dSmrg 	      }
1111debfc3dSmrg 	  }
1121debfc3dSmrg 
1131debfc3dSmrg       for (insn = BB_HEAD (bb);
1141debfc3dSmrg 	   insn != NEXT_INSN (BB_END (bb));
1151debfc3dSmrg 	   insn = NEXT_INSN (insn))
1161debfc3dSmrg 	{
1171debfc3dSmrg 	  df_ref def;
1181debfc3dSmrg 	  HARD_REG_SET set_this_insn;
1191debfc3dSmrg 
1201debfc3dSmrg 	  if (!NONDEBUG_INSN_P (insn))
1211debfc3dSmrg 	    continue;
1221debfc3dSmrg 
1231debfc3dSmrg 	  if (recog_memoized (insn) < 0
1241debfc3dSmrg 	      && (GET_CODE (PATTERN (insn)) == ASM_INPUT
1251debfc3dSmrg 		  || asm_noperands (PATTERN (insn)) >= 0))
1261debfc3dSmrg 	    loop->has_asm = true;
1271debfc3dSmrg 
1281debfc3dSmrg 	  CLEAR_HARD_REG_SET (set_this_insn);
1291debfc3dSmrg 	  FOR_EACH_INSN_DEF (def, insn)
1301debfc3dSmrg 	    {
1311debfc3dSmrg 	      rtx dreg = DF_REF_REG (def);
1321debfc3dSmrg 
1331debfc3dSmrg 	      if (!REG_P (dreg))
1341debfc3dSmrg 		continue;
1351debfc3dSmrg 
1361debfc3dSmrg 	      add_to_hard_reg_set (&set_this_insn, GET_MODE (dreg),
1371debfc3dSmrg 				   REGNO (dreg));
1381debfc3dSmrg 	    }
1391debfc3dSmrg 
1401debfc3dSmrg 	  if (insn == loop->loop_end)
1411debfc3dSmrg 	    CLEAR_HARD_REG_BIT (set_this_insn, REGNO (loop->iter_reg));
1421debfc3dSmrg 	  else if (reg_mentioned_p (loop->iter_reg, PATTERN (insn)))
1431debfc3dSmrg 	    loop->iter_reg_used = true;
144*8feb0f0bSmrg 	  loop->regs_set_in_loop |= set_this_insn;
1451debfc3dSmrg 	}
1461debfc3dSmrg     }
1471debfc3dSmrg }
1481debfc3dSmrg 
1491debfc3dSmrg /* Compute the incoming_dest and incoming_src members of LOOP by
1501debfc3dSmrg    identifying the edges that jump into the loop.  If there is more
1511debfc3dSmrg    than one block that jumps into the loop, incoming_src will be set
1521debfc3dSmrg    to NULL; likewise, if there is more than one block in the loop that
1531debfc3dSmrg    is the destination of an incoming edge, incoming_dest will be NULL.
1541debfc3dSmrg 
1551debfc3dSmrg    Return true if either of these two fields is nonnull, false
1561debfc3dSmrg    otherwise.  */
1571debfc3dSmrg static bool
process_incoming_edges(hwloop_info loop)1581debfc3dSmrg process_incoming_edges (hwloop_info loop)
1591debfc3dSmrg {
1601debfc3dSmrg   edge e;
1611debfc3dSmrg   edge_iterator ei;
1621debfc3dSmrg   bool first = true;
1631debfc3dSmrg 
1641debfc3dSmrg   FOR_EACH_EDGE (e, ei, loop->incoming)
1651debfc3dSmrg     {
1661debfc3dSmrg       if (first)
1671debfc3dSmrg 	{
1681debfc3dSmrg 	  loop->incoming_src = e->src;
1691debfc3dSmrg 	  loop->incoming_dest = e->dest;
1701debfc3dSmrg 	  first = false;
1711debfc3dSmrg 	}
1721debfc3dSmrg       else
1731debfc3dSmrg 	{
1741debfc3dSmrg 	  if (e->dest != loop->incoming_dest)
1751debfc3dSmrg 	    loop->incoming_dest = NULL;
1761debfc3dSmrg 	  if (e->src != loop->incoming_src)
1771debfc3dSmrg 	    loop->incoming_src = NULL;
1781debfc3dSmrg 	}
1791debfc3dSmrg     }
1801debfc3dSmrg   if (loop->incoming_src == NULL && loop->incoming_dest == NULL)
1811debfc3dSmrg     return false;
1821debfc3dSmrg 
1831debfc3dSmrg   return true;
1841debfc3dSmrg }
1851debfc3dSmrg 
1861debfc3dSmrg /* Try to identify a forwarder block that jump into LOOP, and add it to
1871debfc3dSmrg    the set of blocks in the loop, updating the vector of incoming blocks as
1881debfc3dSmrg    well.  This transformation gives a second chance to loops we couldn't
1891debfc3dSmrg    otherwise handle by increasing the chance that we'll end up with one
1901debfc3dSmrg    incoming_src block.
1911debfc3dSmrg    Return true if we made a change, false otherwise.  */
1921debfc3dSmrg static bool
add_forwarder_blocks(hwloop_info loop)1931debfc3dSmrg add_forwarder_blocks (hwloop_info loop)
1941debfc3dSmrg {
1951debfc3dSmrg   edge e;
1961debfc3dSmrg   edge_iterator ei;
1971debfc3dSmrg 
1981debfc3dSmrg   FOR_EACH_EDGE (e, ei, loop->incoming)
1991debfc3dSmrg     {
2001debfc3dSmrg       if (forwarder_block_p (e->src))
2011debfc3dSmrg 	{
2021debfc3dSmrg 	  edge e2;
2031debfc3dSmrg 	  edge_iterator ei2;
2041debfc3dSmrg 
2051debfc3dSmrg 	  if (dump_file)
2061debfc3dSmrg 	    fprintf (dump_file,
2071debfc3dSmrg 		     ";; Adding forwarder block %d to loop %d and retrying\n",
2081debfc3dSmrg 		     e->src->index, loop->loop_no);
2091debfc3dSmrg 	  loop->blocks.safe_push (e->src);
2101debfc3dSmrg 	  bitmap_set_bit (loop->block_bitmap, e->src->index);
2111debfc3dSmrg 	  FOR_EACH_EDGE (e2, ei2, e->src->preds)
2121debfc3dSmrg 	    vec_safe_push (loop->incoming, e2);
2131debfc3dSmrg 	  loop->incoming->unordered_remove (ei.index);
2141debfc3dSmrg 	  return true;
2151debfc3dSmrg 	}
2161debfc3dSmrg     }
2171debfc3dSmrg   return false;
2181debfc3dSmrg }
2191debfc3dSmrg 
2201debfc3dSmrg /* Called from reorg_loops when a potential loop end is found.  LOOP is
2211debfc3dSmrg    a newly set up structure describing the loop, it is this function's
2221debfc3dSmrg    responsibility to fill most of it.  TAIL_BB and TAIL_INSN point to the
2231debfc3dSmrg    loop_end insn and its enclosing basic block.  REG is the loop counter
2241debfc3dSmrg    register.
2251debfc3dSmrg    For our purposes, a loop is defined by the set of blocks reachable from
2261debfc3dSmrg    the loop head in which the loop counter register is live.  This matches
2271debfc3dSmrg    the expected use; targets that call into this code usually replace the
2281debfc3dSmrg    loop counter with a different special register.  */
2291debfc3dSmrg static void
discover_loop(hwloop_info loop,basic_block tail_bb,rtx_insn * tail_insn,rtx reg)2301debfc3dSmrg discover_loop (hwloop_info loop, basic_block tail_bb, rtx_insn *tail_insn, rtx reg)
2311debfc3dSmrg {
2321debfc3dSmrg   bool found_tail;
2331debfc3dSmrg   unsigned dwork = 0;
2341debfc3dSmrg   basic_block bb;
2351debfc3dSmrg 
2361debfc3dSmrg   loop->tail = tail_bb;
2371debfc3dSmrg   loop->loop_end = tail_insn;
2381debfc3dSmrg   loop->iter_reg = reg;
2391debfc3dSmrg   vec_alloc (loop->incoming, 2);
2401debfc3dSmrg   loop->start_label = as_a <rtx_insn *> (JUMP_LABEL (tail_insn));
2411debfc3dSmrg 
2421debfc3dSmrg   if (EDGE_COUNT (tail_bb->succs) != 2)
2431debfc3dSmrg     {
2441debfc3dSmrg       loop->bad = true;
2451debfc3dSmrg       return;
2461debfc3dSmrg     }
2471debfc3dSmrg   loop->head = BRANCH_EDGE (tail_bb)->dest;
2481debfc3dSmrg   loop->successor = FALLTHRU_EDGE (tail_bb)->dest;
2491debfc3dSmrg 
2501debfc3dSmrg   auto_vec<basic_block, 20> works;
2511debfc3dSmrg   works.safe_push (loop->head);
2521debfc3dSmrg 
2531debfc3dSmrg   found_tail = false;
2541debfc3dSmrg   for (dwork = 0; works.iterate (dwork, &bb); dwork++)
2551debfc3dSmrg     {
2561debfc3dSmrg       edge e;
2571debfc3dSmrg       edge_iterator ei;
2581debfc3dSmrg       if (bb == EXIT_BLOCK_PTR_FOR_FN (cfun))
2591debfc3dSmrg 	{
2601debfc3dSmrg 	  /* We've reached the exit block.  The loop must be bad. */
2611debfc3dSmrg 	  if (dump_file)
2621debfc3dSmrg 	    fprintf (dump_file,
2631debfc3dSmrg 		     ";; Loop is bad - reached exit block while scanning\n");
2641debfc3dSmrg 	  loop->bad = true;
2651debfc3dSmrg 	  break;
2661debfc3dSmrg 	}
2671debfc3dSmrg 
2681debfc3dSmrg       if (bitmap_bit_p (loop->block_bitmap, bb->index))
2691debfc3dSmrg 	continue;
2701debfc3dSmrg 
2711debfc3dSmrg       /* We've not seen this block before.  Add it to the loop's
2721debfc3dSmrg 	 list and then add each successor to the work list.  */
2731debfc3dSmrg 
2741debfc3dSmrg       loop->blocks.safe_push (bb);
2751debfc3dSmrg       bitmap_set_bit (loop->block_bitmap, bb->index);
2761debfc3dSmrg 
2771debfc3dSmrg       if (bb == tail_bb)
2781debfc3dSmrg 	found_tail = true;
2791debfc3dSmrg       else
2801debfc3dSmrg 	{
2811debfc3dSmrg 	  FOR_EACH_EDGE (e, ei, bb->succs)
2821debfc3dSmrg 	    {
2831debfc3dSmrg 	      basic_block succ = EDGE_SUCC (bb, ei.index)->dest;
2841debfc3dSmrg 	      if (REGNO_REG_SET_P (df_get_live_in (succ),
2851debfc3dSmrg 				   REGNO (loop->iter_reg)))
2861debfc3dSmrg 		works.safe_push (succ);
2871debfc3dSmrg 	    }
2881debfc3dSmrg 	}
2891debfc3dSmrg     }
2901debfc3dSmrg 
2911debfc3dSmrg   if (!found_tail)
2921debfc3dSmrg     loop->bad = true;
2931debfc3dSmrg 
2941debfc3dSmrg   /* Find the predecessor, and make sure nothing else jumps into this loop.  */
2951debfc3dSmrg   if (!loop->bad)
2961debfc3dSmrg     {
2971debfc3dSmrg       FOR_EACH_VEC_ELT (loop->blocks, dwork, bb)
2981debfc3dSmrg 	{
2991debfc3dSmrg 	  edge e;
3001debfc3dSmrg 	  edge_iterator ei;
3011debfc3dSmrg 	  FOR_EACH_EDGE (e, ei, bb->preds)
3021debfc3dSmrg 	    {
3031debfc3dSmrg 	      basic_block pred = e->src;
3041debfc3dSmrg 
3051debfc3dSmrg 	      if (!bb_in_loop_p (loop, pred))
3061debfc3dSmrg 		{
3071debfc3dSmrg 		  if (dump_file)
3081debfc3dSmrg 		    fprintf (dump_file, ";; Loop %d: incoming edge %d -> %d\n",
3091debfc3dSmrg 			     loop->loop_no, pred->index,
3101debfc3dSmrg 			     e->dest->index);
3111debfc3dSmrg 		  vec_safe_push (loop->incoming, e);
3121debfc3dSmrg 		}
3131debfc3dSmrg 	    }
3141debfc3dSmrg 	}
3151debfc3dSmrg 
3161debfc3dSmrg       if (!process_incoming_edges (loop))
3171debfc3dSmrg 	{
3181debfc3dSmrg 	  if (dump_file)
3191debfc3dSmrg 	    fprintf (dump_file,
3201debfc3dSmrg 		     ";; retrying loop %d with forwarder blocks\n",
3211debfc3dSmrg 		     loop->loop_no);
3221debfc3dSmrg 	  if (!add_forwarder_blocks (loop))
3231debfc3dSmrg 	    {
3241debfc3dSmrg 	      if (dump_file)
3251debfc3dSmrg 		fprintf (dump_file, ";; No forwarder blocks found\n");
3261debfc3dSmrg 	      loop->bad = true;
3271debfc3dSmrg 	    }
3281debfc3dSmrg 	  else if (!process_incoming_edges (loop))
3291debfc3dSmrg 	    {
3301debfc3dSmrg 	      if (dump_file)
3311debfc3dSmrg 		fprintf (dump_file,
3321debfc3dSmrg 			 ";; can't find suitable entry for loop %d\n",
3331debfc3dSmrg 			 loop->loop_no);
3341debfc3dSmrg 	    }
3351debfc3dSmrg 	}
3361debfc3dSmrg     }
3371debfc3dSmrg }
3381debfc3dSmrg 
3391debfc3dSmrg /* Analyze the structure of the loops in the current function.  Use
3401debfc3dSmrg    LOOP_STACK for bitmap allocations.  Returns all the valid candidates for
3411debfc3dSmrg    hardware loops found in this function.  HOOKS is the argument
3421debfc3dSmrg    passed to reorg_loops, used here to find the iteration registers
3431debfc3dSmrg    from a loop_end pattern.  */
3441debfc3dSmrg static hwloop_info
discover_loops(bitmap_obstack * loop_stack,struct hw_doloop_hooks * hooks)3451debfc3dSmrg discover_loops (bitmap_obstack *loop_stack, struct hw_doloop_hooks *hooks)
3461debfc3dSmrg {
3471debfc3dSmrg   hwloop_info loops = NULL;
3481debfc3dSmrg   hwloop_info loop;
3491debfc3dSmrg   basic_block bb;
3501debfc3dSmrg   int nloops = 0;
3511debfc3dSmrg 
3521debfc3dSmrg   /* Find all the possible loop tails.  This means searching for every
3531debfc3dSmrg      loop_end instruction.  For each one found, create a hwloop_info
3541debfc3dSmrg      structure and add the head block to the work list. */
3551debfc3dSmrg   FOR_EACH_BB_FN (bb, cfun)
3561debfc3dSmrg     {
3571debfc3dSmrg       rtx_insn *tail = BB_END (bb);
3581debfc3dSmrg       rtx_insn *insn;
3591debfc3dSmrg       rtx reg;
3601debfc3dSmrg 
3611debfc3dSmrg       while (tail && NOTE_P (tail) && tail != BB_HEAD (bb))
3621debfc3dSmrg 	tail = PREV_INSN (tail);
3631debfc3dSmrg 
3641debfc3dSmrg       if (tail == NULL_RTX)
3651debfc3dSmrg 	continue;
3661debfc3dSmrg 
3671debfc3dSmrg       if (!JUMP_P (tail))
3681debfc3dSmrg 	continue;
3691debfc3dSmrg       reg = hooks->end_pattern_reg (tail);
3701debfc3dSmrg       if (reg == NULL_RTX)
3711debfc3dSmrg 	continue;
3721debfc3dSmrg 
3731debfc3dSmrg       /* A possible loop end */
3741debfc3dSmrg 
3751debfc3dSmrg       /* There's a degenerate case we can handle - an empty loop consisting
3761debfc3dSmrg 	 of only a back branch.  Handle that by deleting the branch.  */
3771debfc3dSmrg       insn = JUMP_LABEL_AS_INSN (tail);
3781debfc3dSmrg       while (insn && !NONDEBUG_INSN_P (insn))
3791debfc3dSmrg 	insn = NEXT_INSN (insn);
3801debfc3dSmrg       if (insn == tail)
3811debfc3dSmrg 	{
3821debfc3dSmrg 	  basic_block succ = FALLTHRU_EDGE (bb)->dest;
3831debfc3dSmrg 	  if (dump_file)
3841debfc3dSmrg 	    {
3851debfc3dSmrg 	      fprintf (dump_file, ";; degenerate loop ending at\n");
3861debfc3dSmrg 	      print_rtl_single (dump_file, tail);
3871debfc3dSmrg 	    }
3881debfc3dSmrg 	  if (!REGNO_REG_SET_P (df_get_live_in (succ), REGNO (reg)))
3891debfc3dSmrg 	    {
3901debfc3dSmrg 	      if (dump_file)
3911debfc3dSmrg 		fprintf (dump_file, ";; deleting it\n");
3921debfc3dSmrg 	      delete_insn_and_edges (tail);
3931debfc3dSmrg 	      continue;
3941debfc3dSmrg 	    }
3951debfc3dSmrg 	}
3961debfc3dSmrg 
3971debfc3dSmrg       loop = XCNEW (struct hwloop_info_d);
3981debfc3dSmrg       loop->next = loops;
3991debfc3dSmrg       loops = loop;
4001debfc3dSmrg       loop->loop_no = nloops++;
4011debfc3dSmrg       loop->blocks.create (20);
4021debfc3dSmrg       loop->block_bitmap = BITMAP_ALLOC (loop_stack);
4031debfc3dSmrg 
4041debfc3dSmrg       if (dump_file)
4051debfc3dSmrg 	{
4061debfc3dSmrg 	  fprintf (dump_file, ";; potential loop %d ending at\n",
4071debfc3dSmrg 		   loop->loop_no);
4081debfc3dSmrg 	  print_rtl_single (dump_file, tail);
4091debfc3dSmrg 	}
4101debfc3dSmrg 
4111debfc3dSmrg       discover_loop (loop, bb, tail, reg);
4121debfc3dSmrg     }
4131debfc3dSmrg 
4141debfc3dSmrg   /* Compute loop nestings.  Given two loops A and B, either the sets
4151debfc3dSmrg      of their blocks don't intersect at all, or one is the subset of
4161debfc3dSmrg      the other, or the blocks don't form a good nesting structure.  */
4171debfc3dSmrg   for (loop = loops; loop; loop = loop->next)
4181debfc3dSmrg     {
4191debfc3dSmrg       hwloop_info other;
4201debfc3dSmrg 
4211debfc3dSmrg       if (loop->bad)
4221debfc3dSmrg 	continue;
4231debfc3dSmrg 
4241debfc3dSmrg       for (other = loops; other; other = other->next)
4251debfc3dSmrg 	{
4261debfc3dSmrg 	  if (other->bad)
4271debfc3dSmrg 	    continue;
4281debfc3dSmrg 
4291debfc3dSmrg 	  if (!bitmap_intersect_p (other->block_bitmap, loop->block_bitmap))
4301debfc3dSmrg 	    continue;
4311debfc3dSmrg 	  if (!bitmap_intersect_compl_p (other->block_bitmap,
4321debfc3dSmrg 					 loop->block_bitmap))
4331debfc3dSmrg 	    loop->loops.safe_push (other);
4341debfc3dSmrg 	  else if (!bitmap_intersect_compl_p (loop->block_bitmap,
4351debfc3dSmrg 					      other->block_bitmap))
4361debfc3dSmrg 	    other->loops.safe_push (loop);
4371debfc3dSmrg 	  else
4381debfc3dSmrg 	    {
4391debfc3dSmrg 	      if (dump_file)
4401debfc3dSmrg 		fprintf (dump_file,
4411debfc3dSmrg 			 ";; can't find suitable nesting for loops %d and %d\n",
4421debfc3dSmrg 			 loop->loop_no, other->loop_no);
4431debfc3dSmrg 	      loop->bad = other->bad = true;
4441debfc3dSmrg 	    }
4451debfc3dSmrg 	}
4461debfc3dSmrg     }
4471debfc3dSmrg 
4481debfc3dSmrg   if (dump_file)
4491debfc3dSmrg     dump_hwloops (loops);
4501debfc3dSmrg 
4511debfc3dSmrg   return loops;
4521debfc3dSmrg }
4531debfc3dSmrg 
4541debfc3dSmrg /* Free up the loop structures in LOOPS.  */
4551debfc3dSmrg static void
free_loops(hwloop_info loops)4561debfc3dSmrg free_loops (hwloop_info loops)
4571debfc3dSmrg {
4581debfc3dSmrg   while (loops)
4591debfc3dSmrg     {
4601debfc3dSmrg       hwloop_info loop = loops;
4611debfc3dSmrg       loops = loop->next;
4621debfc3dSmrg       loop->loops.release ();
4631debfc3dSmrg       loop->blocks.release ();
4641debfc3dSmrg       BITMAP_FREE (loop->block_bitmap);
4651debfc3dSmrg       XDELETE (loop);
4661debfc3dSmrg     }
4671debfc3dSmrg }
4681debfc3dSmrg 
4691debfc3dSmrg #define BB_AUX_INDEX(BB) ((intptr_t) (BB)->aux)
4701debfc3dSmrg 
4711debfc3dSmrg /* Initialize the aux fields to give ascending indices to basic blocks.  */
4721debfc3dSmrg static void
set_bb_indices(void)4731debfc3dSmrg set_bb_indices (void)
4741debfc3dSmrg {
4751debfc3dSmrg   basic_block bb;
4761debfc3dSmrg   intptr_t index;
4771debfc3dSmrg 
4781debfc3dSmrg   index = 0;
4791debfc3dSmrg   FOR_EACH_BB_FN (bb, cfun)
4801debfc3dSmrg     bb->aux = (void *) index++;
4811debfc3dSmrg }
4821debfc3dSmrg 
4831debfc3dSmrg /* The taken-branch edge from the loop end can actually go forward.
4841debfc3dSmrg    If the target's hardware loop support requires that the loop end be
4851debfc3dSmrg    after the loop start, try to reorder a loop's basic blocks when we
4861debfc3dSmrg    find such a case.
4871debfc3dSmrg    This is not very aggressive; it only moves at most one block.  It
4881debfc3dSmrg    does not introduce new branches into loops; it may remove them, or
4891debfc3dSmrg    it may switch fallthru/jump edges.  */
4901debfc3dSmrg static void
reorder_loops(hwloop_info loops)4911debfc3dSmrg reorder_loops (hwloop_info loops)
4921debfc3dSmrg {
4931debfc3dSmrg   basic_block bb;
4941debfc3dSmrg   hwloop_info loop;
4951debfc3dSmrg 
4961debfc3dSmrg   cfg_layout_initialize (0);
4971debfc3dSmrg 
4981debfc3dSmrg   set_bb_indices ();
4991debfc3dSmrg 
5001debfc3dSmrg   for (loop = loops; loop; loop = loop->next)
5011debfc3dSmrg     {
5021debfc3dSmrg       edge e;
5031debfc3dSmrg       edge_iterator ei;
5041debfc3dSmrg 
5051debfc3dSmrg       if (loop->bad)
5061debfc3dSmrg 	continue;
5071debfc3dSmrg 
5081debfc3dSmrg       if (BB_AUX_INDEX (loop->head) <= BB_AUX_INDEX (loop->tail))
5091debfc3dSmrg 	continue;
5101debfc3dSmrg 
5111debfc3dSmrg       FOR_EACH_EDGE (e, ei, loop->head->succs)
5121debfc3dSmrg 	{
5131debfc3dSmrg 	  if (bitmap_bit_p (loop->block_bitmap, e->dest->index)
5141debfc3dSmrg 	      && BB_AUX_INDEX (e->dest) < BB_AUX_INDEX (loop->tail))
5151debfc3dSmrg 	    {
5161debfc3dSmrg 	      basic_block start_bb = e->dest;
5171debfc3dSmrg 	      basic_block start_prev_bb = start_bb->prev_bb;
5181debfc3dSmrg 
5191debfc3dSmrg 	      if (dump_file)
5201debfc3dSmrg 		fprintf (dump_file, ";; Moving block %d before block %d\n",
5211debfc3dSmrg 			 loop->head->index, start_bb->index);
5221debfc3dSmrg 	      loop->head->prev_bb->next_bb = loop->head->next_bb;
5231debfc3dSmrg 	      loop->head->next_bb->prev_bb = loop->head->prev_bb;
5241debfc3dSmrg 
5251debfc3dSmrg 	      loop->head->prev_bb = start_prev_bb;
5261debfc3dSmrg 	      loop->head->next_bb = start_bb;
5271debfc3dSmrg 	      start_prev_bb->next_bb = start_bb->prev_bb = loop->head;
5281debfc3dSmrg 
5291debfc3dSmrg 	      set_bb_indices ();
5301debfc3dSmrg 	      break;
5311debfc3dSmrg 	    }
5321debfc3dSmrg 	}
5331debfc3dSmrg       loops = loops->next;
5341debfc3dSmrg     }
5351debfc3dSmrg 
5361debfc3dSmrg   FOR_EACH_BB_FN (bb, cfun)
5371debfc3dSmrg     {
5381debfc3dSmrg       if (bb->next_bb != EXIT_BLOCK_PTR_FOR_FN (cfun))
5391debfc3dSmrg 	bb->aux = bb->next_bb;
5401debfc3dSmrg       else
5411debfc3dSmrg 	bb->aux = NULL;
5421debfc3dSmrg     }
5431debfc3dSmrg   cfg_layout_finalize ();
5441debfc3dSmrg   clear_aux_for_blocks ();
5451debfc3dSmrg   df_analyze ();
5461debfc3dSmrg }
5471debfc3dSmrg 
5481debfc3dSmrg /* Call the OPT function for LOOP and all of its sub-loops.  This is
5491debfc3dSmrg    done in a depth-first search; innermost loops are visited first.
5501debfc3dSmrg    OPTIMIZE and FAIL are the functions passed to reorg_loops by the
5511debfc3dSmrg    target's reorg pass.  */
5521debfc3dSmrg static void
optimize_loop(hwloop_info loop,struct hw_doloop_hooks * hooks)5531debfc3dSmrg optimize_loop (hwloop_info loop, struct hw_doloop_hooks *hooks)
5541debfc3dSmrg {
5551debfc3dSmrg   int ix;
5561debfc3dSmrg   hwloop_info inner;
5571debfc3dSmrg   int inner_depth = 0;
5581debfc3dSmrg 
5591debfc3dSmrg   if (loop->visited)
5601debfc3dSmrg     return;
5611debfc3dSmrg 
5621debfc3dSmrg   loop->visited = 1;
5631debfc3dSmrg 
5641debfc3dSmrg   if (loop->bad)
5651debfc3dSmrg     {
5661debfc3dSmrg       if (dump_file)
5671debfc3dSmrg 	fprintf (dump_file, ";; loop %d bad when found\n", loop->loop_no);
5681debfc3dSmrg       goto bad_loop;
5691debfc3dSmrg     }
5701debfc3dSmrg 
5711debfc3dSmrg   /* Every loop contains in its list of inner loops every loop nested inside
5721debfc3dSmrg      it, even if there are intermediate loops.  This works because we're doing
5731debfc3dSmrg      a depth-first search here and never visit a loop more than once.
5741debfc3dSmrg      Recursion depth is effectively limited by the number of available
5751debfc3dSmrg      hardware registers.  */
5761debfc3dSmrg   for (ix = 0; loop->loops.iterate (ix, &inner); ix++)
5771debfc3dSmrg     {
5781debfc3dSmrg       optimize_loop (inner, hooks);
5791debfc3dSmrg 
5801debfc3dSmrg       if (!inner->bad && inner_depth < inner->depth)
5811debfc3dSmrg 	inner_depth = inner->depth;
5821debfc3dSmrg       /* The set of registers may be changed while optimizing the inner
5831debfc3dSmrg 	 loop.  */
584*8feb0f0bSmrg       loop->regs_set_in_loop |= inner->regs_set_in_loop;
5851debfc3dSmrg     }
5861debfc3dSmrg 
5871debfc3dSmrg   loop->depth = inner_depth + 1;
5881debfc3dSmrg 
5891debfc3dSmrg   if (hooks->opt (loop))
5901debfc3dSmrg     return;
5911debfc3dSmrg 
5921debfc3dSmrg  bad_loop:
5931debfc3dSmrg   if (dump_file)
5941debfc3dSmrg     fprintf (dump_file, ";; loop %d is bad\n", loop->loop_no);
5951debfc3dSmrg 
5961debfc3dSmrg   loop->bad = true;
5971debfc3dSmrg   hooks->fail (loop);
5981debfc3dSmrg }
5991debfc3dSmrg 
6001debfc3dSmrg /* This function can be used from a port's machine_dependent_reorg to
6011debfc3dSmrg    find and analyze loops that end in loop_end instructions.  It uses
6021debfc3dSmrg    a set of function pointers in HOOKS to call back into the
6031debfc3dSmrg    target-specific functions to perform the actual machine-specific
6041debfc3dSmrg    transformations.
6051debfc3dSmrg 
6061debfc3dSmrg    Such transformations typically involve additional set-up
6071debfc3dSmrg    instructions before the loop, to define loop bounds or set up a
6081debfc3dSmrg    special loop counter register.
6091debfc3dSmrg 
6101debfc3dSmrg    DO_REORDER should be set to true if we should try to use the
6111debfc3dSmrg    reorder_loops function to ensure the loop end occurs after the loop
6121debfc3dSmrg    start.  This is for use by targets where the loop hardware requires
6131debfc3dSmrg    this condition.
6141debfc3dSmrg 
6151debfc3dSmrg    HOOKS is used to pass in target specific hooks; see
6161debfc3dSmrg    hw-doloop.h.  */
6171debfc3dSmrg void
reorg_loops(bool do_reorder,struct hw_doloop_hooks * hooks)6181debfc3dSmrg reorg_loops (bool do_reorder, struct hw_doloop_hooks *hooks)
6191debfc3dSmrg {
6201debfc3dSmrg   hwloop_info loops = NULL;
6211debfc3dSmrg   hwloop_info loop;
6221debfc3dSmrg   bitmap_obstack loop_stack;
6231debfc3dSmrg 
6241debfc3dSmrg   df_live_add_problem ();
6251debfc3dSmrg   df_live_set_all_dirty ();
6261debfc3dSmrg   df_analyze ();
6271debfc3dSmrg 
6281debfc3dSmrg   bitmap_obstack_initialize (&loop_stack);
6291debfc3dSmrg 
6301debfc3dSmrg   if (dump_file)
6311debfc3dSmrg     fprintf (dump_file, ";; Find loops, first pass\n\n");
6321debfc3dSmrg 
6331debfc3dSmrg   loops = discover_loops (&loop_stack, hooks);
6341debfc3dSmrg 
6351debfc3dSmrg   /* We can't enter cfglayout mode anymore if basic block partitioning
6361debfc3dSmrg      already happened.  */
637a2dc1f3fSmrg   if (do_reorder && !crtl->has_bb_partition)
6381debfc3dSmrg     {
6391debfc3dSmrg       reorder_loops (loops);
6401debfc3dSmrg       free_loops (loops);
6411debfc3dSmrg 
6421debfc3dSmrg       if (dump_file)
6431debfc3dSmrg 	fprintf (dump_file, ";; Find loops, second pass\n\n");
6441debfc3dSmrg 
6451debfc3dSmrg       loops = discover_loops (&loop_stack, hooks);
6461debfc3dSmrg     }
6471debfc3dSmrg 
6481debfc3dSmrg   for (loop = loops; loop; loop = loop->next)
6491debfc3dSmrg     scan_loop (loop);
6501debfc3dSmrg 
6511debfc3dSmrg   /* Now apply the optimizations.  */
6521debfc3dSmrg   for (loop = loops; loop; loop = loop->next)
6531debfc3dSmrg     optimize_loop (loop, hooks);
6541debfc3dSmrg 
6551debfc3dSmrg   if (dump_file)
6561debfc3dSmrg     {
6571debfc3dSmrg       fprintf (dump_file, ";; After hardware loops optimization:\n\n");
6581debfc3dSmrg       dump_hwloops (loops);
6591debfc3dSmrg     }
6601debfc3dSmrg 
6611debfc3dSmrg   free_loops (loops);
6621debfc3dSmrg   bitmap_obstack_release (&loop_stack);
6631debfc3dSmrg 
6641debfc3dSmrg   if (dump_file)
6651debfc3dSmrg     print_rtl (dump_file, get_insns ());
6661debfc3dSmrg }
667