xref: /netbsd-src/external/gpl3/gcc.old/dist/gcc/hw-doloop.c (revision d909946ca08dceb44d7d0f22ec9488679695d976)
1 /* Code to analyze doloop loops in order for targets to perform late
2    optimizations converting doloops to other forms of hardware loops.
3    Copyright (C) 2011-2013 Free Software Foundation, Inc.
4 
5 This file is part of GCC.
6 
7 GCC is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 3, or (at your option) any later
10 version.
11 
12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
15 for more details.
16 
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3.  If not see
19 <http://www.gnu.org/licenses/>.  */
20 
21 #include "config.h"
22 #include "system.h"
23 #include "coretypes.h"
24 #include "tm.h"
25 #include "rtl.h"
26 #include "flags.h"
27 #include "expr.h"
28 #include "hard-reg-set.h"
29 #include "regs.h"
30 #include "basic-block.h"
31 #include "tm_p.h"
32 #include "df.h"
33 #include "cfgloop.h"
34 #include "recog.h"
35 #include "target.h"
36 #include "hw-doloop.h"
37 #include "dumpfile.h"
38 
39 #ifdef HAVE_doloop_end
40 
41 /* Dump information collected in LOOPS.  */
42 static void
43 dump_hwloops (hwloop_info loops)
44 {
45   hwloop_info loop;
46 
47   for (loop = loops; loop; loop = loop->next)
48     {
49       hwloop_info i;
50       basic_block b;
51       unsigned ix;
52 
53       fprintf (dump_file, ";; loop %d: ", loop->loop_no);
54       if (loop->bad)
55 	fprintf (dump_file, "(bad) ");
56       fprintf (dump_file, "{head:%d, depth:%d, reg:%u}",
57 	       loop->head == NULL ? -1 : loop->head->index,
58 	       loop->depth, REGNO (loop->iter_reg));
59 
60       fprintf (dump_file, " blocks: [ ");
61       for (ix = 0; loop->blocks.iterate (ix, &b); ix++)
62 	fprintf (dump_file, "%d ", b->index);
63       fprintf (dump_file, "] ");
64 
65       fprintf (dump_file, " inner loops: [ ");
66       for (ix = 0; loop->loops.iterate (ix, &i); ix++)
67 	fprintf (dump_file, "%d ", i->loop_no);
68       fprintf (dump_file, "]\n");
69     }
70   fprintf (dump_file, "\n");
71 }
72 
73 /* Return true if BB is part of LOOP.  */
74 static bool
75 bb_in_loop_p (hwloop_info loop, basic_block bb)
76 {
77   return bitmap_bit_p (loop->block_bitmap, bb->index);
78 }
79 
80 /* Scan the blocks of LOOP (and its inferiors), and record things such as
81    hard registers set, jumps out of and within the loop.  */
82 static void
83 scan_loop (hwloop_info loop)
84 {
85   unsigned ix;
86   basic_block bb;
87 
88   if (loop->bad)
89     return;
90 
91   if (REGNO_REG_SET_P (df_get_live_in (loop->successor),
92 		       REGNO (loop->iter_reg)))
93     loop->iter_reg_used_outside = true;
94 
95   for (ix = 0; loop->blocks.iterate (ix, &bb); ix++)
96     {
97       rtx insn;
98       edge e;
99       edge_iterator ei;
100 
101       if (bb != loop->tail)
102 	FOR_EACH_EDGE (e, ei, bb->succs)
103 	  {
104 	    if (bb_in_loop_p (loop, e->dest))
105 	      {
106 		if (!(e->flags & EDGE_FALLTHRU))
107 		  loop->jumps_within = true;
108 	      }
109 	    else
110 	      {
111 		loop->jumps_outof = true;
112 		if (!loop->bad)
113 		  gcc_assert (!REGNO_REG_SET_P (df_get_live_in (e->dest),
114 						REGNO (loop->iter_reg)));
115 	      }
116 	  }
117 
118       for (insn = BB_HEAD (bb);
119 	   insn != NEXT_INSN (BB_END (bb));
120 	   insn = NEXT_INSN (insn))
121 	{
122 	  df_ref *def_rec;
123 	  HARD_REG_SET set_this_insn;
124 
125 	  if (!NONDEBUG_INSN_P (insn))
126 	    continue;
127 
128 	  if (recog_memoized (insn) < 0
129 	      && (GET_CODE (PATTERN (insn)) == ASM_INPUT
130 		  || asm_noperands (PATTERN (insn)) >= 0))
131 	    loop->has_asm = true;
132 
133 	  CLEAR_HARD_REG_SET (set_this_insn);
134 	  for (def_rec = DF_INSN_DEFS (insn); *def_rec; def_rec++)
135 	    {
136 	      rtx dreg = DF_REF_REG (*def_rec);
137 
138 	      if (!REG_P (dreg))
139 		continue;
140 
141 	      add_to_hard_reg_set (&set_this_insn, GET_MODE (dreg),
142 				   REGNO (dreg));
143 	    }
144 
145 	  if (insn == loop->loop_end)
146 	    CLEAR_HARD_REG_BIT (set_this_insn, REGNO (loop->iter_reg));
147 	  else if (reg_mentioned_p (loop->iter_reg, PATTERN (insn)))
148 	    loop->iter_reg_used = true;
149 	  IOR_HARD_REG_SET (loop->regs_set_in_loop, set_this_insn);
150 	}
151     }
152 }
153 
154 /* Compute the incoming_dest and incoming_src members of LOOP by
155    identifying the edges that jump into the loop.  If there is more
156    than one block that jumps into the loop, incoming_src will be set
157    to NULL; likewise, if there is more than one block in the loop that
158    is the destination of an incoming edge, incoming_dest will be NULL.
159 
160    Return true if either of these two fields is nonnull, false
161    otherwise.  */
162 static bool
163 process_incoming_edges (hwloop_info loop)
164 {
165   edge e;
166   edge_iterator ei;
167   bool first = true;
168 
169   FOR_EACH_EDGE (e, ei, loop->incoming)
170     {
171       if (first)
172 	{
173 	  loop->incoming_src = e->src;
174 	  loop->incoming_dest = e->dest;
175 	  first = false;
176 	}
177       else
178 	{
179 	  if (e->dest != loop->incoming_dest)
180 	    loop->incoming_dest = NULL;
181 	  if (e->src != loop->incoming_src)
182 	    loop->incoming_src = NULL;
183 	}
184     }
185   if (loop->incoming_src == NULL && loop->incoming_dest == NULL)
186     return false;
187 
188   return true;
189 }
190 
191 /* Try to identify a forwarder block that jump into LOOP, and add it to
192    the set of blocks in the loop, updating the vector of incoming blocks as
193    well.  This transformation gives a second chance to loops we couldn't
194    otherwise handle by increasing the chance that we'll end up with one
195    incoming_src block.
196    Return true if we made a change, false otherwise.  */
197 static bool
198 add_forwarder_blocks (hwloop_info loop)
199 {
200   edge e;
201   edge_iterator ei;
202 
203   FOR_EACH_EDGE (e, ei, loop->incoming)
204     {
205       if (forwarder_block_p (e->src))
206 	{
207 	  edge e2;
208 	  edge_iterator ei2;
209 
210 	  if (dump_file)
211 	    fprintf (dump_file,
212 		     ";; Adding forwarder block %d to loop %d and retrying\n",
213 		     e->src->index, loop->loop_no);
214 	  loop->blocks.safe_push (e->src);
215 	  bitmap_set_bit (loop->block_bitmap, e->src->index);
216 	  FOR_EACH_EDGE (e2, ei2, e->src->preds)
217 	    vec_safe_push (loop->incoming, e2);
218 	  loop->incoming->unordered_remove (ei.index);
219 	  return true;
220 	}
221     }
222   return false;
223 }
224 
225 /* Called from reorg_loops when a potential loop end is found.  LOOP is
226    a newly set up structure describing the loop, it is this function's
227    responsibility to fill most of it.  TAIL_BB and TAIL_INSN point to the
228    loop_end insn and its enclosing basic block.  REG is the loop counter
229    register.
230    For our purposes, a loop is defined by the set of blocks reachable from
231    the loop head in which the loop counter register is live.  This matches
232    the expected use; targets that call into this code usually replace the
233    loop counter with a different special register.  */
234 static void
235 discover_loop (hwloop_info loop, basic_block tail_bb, rtx tail_insn, rtx reg)
236 {
237   bool found_tail;
238   unsigned dwork = 0;
239   basic_block bb;
240   vec<basic_block> works;
241 
242   loop->tail = tail_bb;
243   loop->loop_end = tail_insn;
244   loop->iter_reg = reg;
245   vec_alloc (loop->incoming, 2);
246   loop->start_label = JUMP_LABEL (tail_insn);
247 
248   if (EDGE_COUNT (tail_bb->succs) != 2)
249     {
250       loop->bad = true;
251       return;
252     }
253   loop->head = BRANCH_EDGE (tail_bb)->dest;
254   loop->successor = FALLTHRU_EDGE (tail_bb)->dest;
255 
256   works.create (20);
257   works.safe_push (loop->head);
258 
259   found_tail = false;
260   for (dwork = 0; works.iterate (dwork, &bb); dwork++)
261     {
262       edge e;
263       edge_iterator ei;
264       if (bb == EXIT_BLOCK_PTR)
265 	{
266 	  /* We've reached the exit block.  The loop must be bad. */
267 	  if (dump_file)
268 	    fprintf (dump_file,
269 		     ";; Loop is bad - reached exit block while scanning\n");
270 	  loop->bad = true;
271 	  break;
272 	}
273 
274       if (bitmap_bit_p (loop->block_bitmap, bb->index))
275 	continue;
276 
277       /* We've not seen this block before.  Add it to the loop's
278 	 list and then add each successor to the work list.  */
279 
280       loop->blocks.safe_push (bb);
281       bitmap_set_bit (loop->block_bitmap, bb->index);
282 
283       if (bb == tail_bb)
284 	found_tail = true;
285       else
286 	{
287 	  FOR_EACH_EDGE (e, ei, bb->succs)
288 	    {
289 	      basic_block succ = EDGE_SUCC (bb, ei.index)->dest;
290 	      if (REGNO_REG_SET_P (df_get_live_in (succ),
291 				   REGNO (loop->iter_reg)))
292 		works.safe_push (succ);
293 	    }
294 	}
295     }
296 
297   if (!found_tail)
298     loop->bad = true;
299 
300   /* Find the predecessor, and make sure nothing else jumps into this loop.  */
301   if (!loop->bad)
302     {
303       FOR_EACH_VEC_ELT (loop->blocks, dwork, bb)
304 	{
305 	  edge e;
306 	  edge_iterator ei;
307 	  FOR_EACH_EDGE (e, ei, bb->preds)
308 	    {
309 	      basic_block pred = e->src;
310 
311 	      if (!bb_in_loop_p (loop, pred))
312 		{
313 		  if (dump_file)
314 		    fprintf (dump_file, ";; Loop %d: incoming edge %d -> %d\n",
315 			     loop->loop_no, pred->index,
316 			     e->dest->index);
317 		  vec_safe_push (loop->incoming, e);
318 		}
319 	    }
320 	}
321 
322       if (!process_incoming_edges (loop))
323 	{
324 	  if (dump_file)
325 	    fprintf (dump_file,
326 		     ";; retrying loop %d with forwarder blocks\n",
327 		     loop->loop_no);
328 	  if (!add_forwarder_blocks (loop))
329 	    {
330 	      if (dump_file)
331 		fprintf (dump_file, ";; No forwarder blocks found\n");
332 	      loop->bad = true;
333 	    }
334 	  else if (!process_incoming_edges (loop))
335 	    {
336 	      if (dump_file)
337 		fprintf (dump_file,
338 			 ";; can't find suitable entry for loop %d\n",
339 			 loop->loop_no);
340 	    }
341 	}
342     }
343 
344   works.release ();
345 }
346 
347 /* Analyze the structure of the loops in the current function.  Use
348    LOOP_STACK for bitmap allocations.  Returns all the valid candidates for
349    hardware loops found in this function.  HOOKS is the argument
350    passed to reorg_loops, used here to find the iteration registers
351    from a loop_end pattern.  */
352 static hwloop_info
353 discover_loops (bitmap_obstack *loop_stack, struct hw_doloop_hooks *hooks)
354 {
355   hwloop_info loops = NULL;
356   hwloop_info loop;
357   basic_block bb;
358   int nloops = 0;
359 
360   /* Find all the possible loop tails.  This means searching for every
361      loop_end instruction.  For each one found, create a hwloop_info
362      structure and add the head block to the work list. */
363   FOR_EACH_BB (bb)
364     {
365       rtx tail = BB_END (bb);
366       rtx insn, reg;
367 
368       while (tail && GET_CODE (tail) == NOTE && tail != BB_HEAD (bb))
369 	tail = PREV_INSN (tail);
370 
371       if (tail == NULL_RTX)
372 	continue;
373 
374       if (!JUMP_P (tail))
375 	continue;
376       reg = hooks->end_pattern_reg (tail);
377       if (reg == NULL_RTX)
378 	continue;
379 
380       /* A possible loop end */
381 
382       /* There's a degenerate case we can handle - an empty loop consisting
383 	 of only a back branch.  Handle that by deleting the branch.  */
384       insn = JUMP_LABEL (tail);
385       while (insn && !NONDEBUG_INSN_P (insn))
386 	insn = NEXT_INSN (insn);
387       if (insn == tail)
388 	{
389 	  basic_block succ = FALLTHRU_EDGE (bb)->dest;
390 	  if (dump_file)
391 	    {
392 	      fprintf (dump_file, ";; degenerate loop ending at\n");
393 	      print_rtl_single (dump_file, tail);
394 	    }
395 	  if (!REGNO_REG_SET_P (df_get_live_in (succ), REGNO (reg)))
396 	    {
397 	      if (dump_file)
398 		fprintf (dump_file, ";; deleting it\n");
399 	      delete_insn_and_edges (tail);
400 	      continue;
401 	    }
402 	}
403 
404       loop = XCNEW (struct hwloop_info_d);
405       loop->next = loops;
406       loops = loop;
407       loop->loop_no = nloops++;
408       loop->blocks.create (20);
409       loop->block_bitmap = BITMAP_ALLOC (loop_stack);
410 
411       if (dump_file)
412 	{
413 	  fprintf (dump_file, ";; potential loop %d ending at\n",
414 		   loop->loop_no);
415 	  print_rtl_single (dump_file, tail);
416 	}
417 
418       discover_loop (loop, bb, tail, reg);
419     }
420 
421   /* Compute loop nestings.  Given two loops A and B, either the sets
422      of their blocks don't intersect at all, or one is the subset of
423      the other, or the blocks don't form a good nesting structure.  */
424   for (loop = loops; loop; loop = loop->next)
425     {
426       hwloop_info other;
427 
428       if (loop->bad)
429 	continue;
430 
431       for (other = loops; other; other = other->next)
432 	{
433 	  if (other->bad)
434 	    continue;
435 
436 	  if (!bitmap_intersect_p (other->block_bitmap, loop->block_bitmap))
437 	    continue;
438 	  if (!bitmap_intersect_compl_p (other->block_bitmap,
439 					 loop->block_bitmap))
440 	    loop->loops.safe_push (other);
441 	  else if (!bitmap_intersect_compl_p (loop->block_bitmap,
442 					      other->block_bitmap))
443 	    other->loops.safe_push (loop);
444 	  else
445 	    {
446 	      if (dump_file)
447 		fprintf (dump_file,
448 			 ";; can't find suitable nesting for loops %d and %d\n",
449 			 loop->loop_no, other->loop_no);
450 	      loop->bad = other->bad = true;
451 	    }
452 	}
453     }
454 
455   if (dump_file)
456     dump_hwloops (loops);
457 
458   return loops;
459 }
460 
461 /* Free up the loop structures in LOOPS.  */
462 static void
463 free_loops (hwloop_info loops)
464 {
465   while (loops)
466     {
467       hwloop_info loop = loops;
468       loops = loop->next;
469       loop->loops.release ();
470       loop->blocks.release ();
471       BITMAP_FREE (loop->block_bitmap);
472       XDELETE (loop);
473     }
474 }
475 
476 #define BB_AUX_INDEX(BB) ((intptr_t) (BB)->aux)
477 
478 /* Initialize the aux fields to give ascending indices to basic blocks.  */
479 static void
480 set_bb_indices (void)
481 {
482   basic_block bb;
483   intptr_t index;
484 
485   index = 0;
486   FOR_EACH_BB (bb)
487     bb->aux = (void *) index++;
488 }
489 
490 /* The taken-branch edge from the loop end can actually go forward.
491    If the target's hardware loop support requires that the loop end be
492    after the loop start, try to reorder a loop's basic blocks when we
493    find such a case.
494    This is not very aggressive; it only moves at most one block.  It
495    does not introduce new branches into loops; it may remove them, or
496    it may switch fallthru/jump edges.  */
497 static void
498 reorder_loops (hwloop_info loops)
499 {
500   basic_block bb;
501   hwloop_info loop;
502 
503   cfg_layout_initialize (0);
504 
505   set_bb_indices ();
506 
507   for (loop = loops; loop; loop = loop->next)
508     {
509       edge e;
510       edge_iterator ei;
511 
512       if (loop->bad)
513 	continue;
514 
515       if (BB_AUX_INDEX (loop->head) <= BB_AUX_INDEX (loop->tail))
516 	continue;
517 
518       FOR_EACH_EDGE (e, ei, loop->head->succs)
519 	{
520 	  if (bitmap_bit_p (loop->block_bitmap, e->dest->index)
521 	      && BB_AUX_INDEX (e->dest) < BB_AUX_INDEX (loop->tail))
522 	    {
523 	      basic_block start_bb = e->dest;
524 	      basic_block start_prev_bb = start_bb->prev_bb;
525 
526 	      if (dump_file)
527 		fprintf (dump_file, ";; Moving block %d before block %d\n",
528 			 loop->head->index, start_bb->index);
529 	      loop->head->prev_bb->next_bb = loop->head->next_bb;
530 	      loop->head->next_bb->prev_bb = loop->head->prev_bb;
531 
532 	      loop->head->prev_bb = start_prev_bb;
533 	      loop->head->next_bb = start_bb;
534 	      start_prev_bb->next_bb = start_bb->prev_bb = loop->head;
535 
536 	      set_bb_indices ();
537 	      break;
538 	    }
539 	}
540       loops = loops->next;
541     }
542 
543   FOR_EACH_BB (bb)
544     {
545       if (bb->next_bb != EXIT_BLOCK_PTR)
546 	bb->aux = bb->next_bb;
547       else
548 	bb->aux = NULL;
549     }
550   cfg_layout_finalize ();
551   clear_aux_for_blocks ();
552   df_analyze ();
553 }
554 
555 /* Call the OPT function for LOOP and all of its sub-loops.  This is
556    done in a depth-first search; innermost loops are visited first.
557    OPTIMIZE and FAIL are the functions passed to reorg_loops by the
558    target's reorg pass.  */
559 static void
560 optimize_loop (hwloop_info loop, struct hw_doloop_hooks *hooks)
561 {
562   int ix;
563   hwloop_info inner;
564   int inner_depth = 0;
565 
566   if (loop->visited)
567     return;
568 
569   loop->visited = 1;
570 
571   if (loop->bad)
572     {
573       if (dump_file)
574 	fprintf (dump_file, ";; loop %d bad when found\n", loop->loop_no);
575       goto bad_loop;
576     }
577 
578   /* Every loop contains in its list of inner loops every loop nested inside
579      it, even if there are intermediate loops.  This works because we're doing
580      a depth-first search here and never visit a loop more than once.
581      Recursion depth is effectively limited by the number of available
582      hardware registers.  */
583   for (ix = 0; loop->loops.iterate (ix, &inner); ix++)
584     {
585       optimize_loop (inner, hooks);
586 
587       if (!inner->bad && inner_depth < inner->depth)
588 	inner_depth = inner->depth;
589       /* The set of registers may be changed while optimizing the inner
590 	 loop.  */
591       IOR_HARD_REG_SET (loop->regs_set_in_loop, inner->regs_set_in_loop);
592     }
593 
594   loop->depth = inner_depth + 1;
595 
596   if (hooks->opt (loop))
597     return;
598 
599  bad_loop:
600   if (dump_file)
601     fprintf (dump_file, ";; loop %d is bad\n", loop->loop_no);
602 
603   loop->bad = true;
604   hooks->fail (loop);
605 }
606 
607 /* This function can be used from a port's machine_dependent_reorg to
608    find and analyze loops that end in loop_end instructions.  It uses
609    a set of function pointers in HOOKS to call back into the
610    target-specific functions to perform the actual machine-specific
611    transformations.
612 
613    Such transformations typically involve additional set-up
614    instructions before the loop, to define loop bounds or set up a
615    special loop counter register.
616 
617    DO_REORDER should be set to true if we should try to use the
618    reorder_loops function to ensure the loop end occurs after the loop
619    start.  This is for use by targets where the loop hardware requires
620    this condition.
621 
622    HOOKS is used to pass in target specific hooks; see
623    hw-doloop.h.  */
624 void
625 reorg_loops (bool do_reorder, struct hw_doloop_hooks *hooks)
626 {
627   hwloop_info loops = NULL;
628   hwloop_info loop;
629   bitmap_obstack loop_stack;
630 
631   df_live_add_problem ();
632   df_live_set_all_dirty ();
633   df_analyze ();
634 
635   bitmap_obstack_initialize (&loop_stack);
636 
637   if (dump_file)
638     fprintf (dump_file, ";; Find loops, first pass\n\n");
639 
640   loops = discover_loops (&loop_stack, hooks);
641 
642   if (do_reorder)
643     {
644       reorder_loops (loops);
645       free_loops (loops);
646 
647       if (dump_file)
648 	fprintf (dump_file, ";; Find loops, second pass\n\n");
649 
650       loops = discover_loops (&loop_stack, hooks);
651     }
652 
653   for (loop = loops; loop; loop = loop->next)
654     scan_loop (loop);
655 
656   /* Now apply the optimizations.  */
657   for (loop = loops; loop; loop = loop->next)
658     optimize_loop (loop, hooks);
659 
660   if (dump_file)
661     {
662       fprintf (dump_file, ";; After hardware loops optimization:\n\n");
663       dump_hwloops (loops);
664     }
665 
666   free_loops (loops);
667 
668   if (dump_file)
669     print_rtl (dump_file, get_insns ());
670 }
671 #endif
672