xref: /netbsd-src/external/gpl3/gcc.old/dist/gcc/hw-doloop.c (revision bdc22b2e01993381dcefeff2bc9b56ca75a4235c)
1 /* Code to analyze doloop loops in order for targets to perform late
2    optimizations converting doloops to other forms of hardware loops.
3    Copyright (C) 2011-2015 Free Software Foundation, Inc.
4 
5 This file is part of GCC.
6 
7 GCC is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 3, or (at your option) any later
10 version.
11 
12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
15 for more details.
16 
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3.  If not see
19 <http://www.gnu.org/licenses/>.  */
20 
21 #include "config.h"
22 #include "system.h"
23 #include "coretypes.h"
24 #include "tm.h"
25 #include "rtl.h"
26 #include "flags.h"
27 #include "symtab.h"
28 #include "hashtab.h"
29 #include "hash-set.h"
30 #include "vec.h"
31 #include "machmode.h"
32 #include "hard-reg-set.h"
33 #include "input.h"
34 #include "function.h"
35 #include "statistics.h"
36 #include "double-int.h"
37 #include "real.h"
38 #include "fixed-value.h"
39 #include "alias.h"
40 #include "wide-int.h"
41 #include "inchash.h"
42 #include "tree.h"
43 #include "insn-config.h"
44 #include "expmed.h"
45 #include "dojump.h"
46 #include "explow.h"
47 #include "calls.h"
48 #include "emit-rtl.h"
49 #include "varasm.h"
50 #include "stmt.h"
51 #include "expr.h"
52 #include "regs.h"
53 #include "predict.h"
54 #include "dominance.h"
55 #include "cfg.h"
56 #include "cfgrtl.h"
57 #include "basic-block.h"
58 #include "tm_p.h"
59 #include "df.h"
60 #include "cfgloop.h"
61 #include "recog.h"
62 #include "target.h"
63 #include "hw-doloop.h"
64 #include "dumpfile.h"
65 
66 #ifdef HAVE_doloop_end
67 
68 /* Dump information collected in LOOPS.  */
69 static void
70 dump_hwloops (hwloop_info loops)
71 {
72   hwloop_info loop;
73 
74   for (loop = loops; loop; loop = loop->next)
75     {
76       hwloop_info i;
77       basic_block b;
78       unsigned ix;
79 
80       fprintf (dump_file, ";; loop %d: ", loop->loop_no);
81       if (loop->bad)
82 	fprintf (dump_file, "(bad) ");
83       fprintf (dump_file, "{head:%d, depth:%d, reg:%u}",
84 	       loop->head == NULL ? -1 : loop->head->index,
85 	       loop->depth, REGNO (loop->iter_reg));
86 
87       fprintf (dump_file, " blocks: [ ");
88       for (ix = 0; loop->blocks.iterate (ix, &b); ix++)
89 	fprintf (dump_file, "%d ", b->index);
90       fprintf (dump_file, "] ");
91 
92       fprintf (dump_file, " inner loops: [ ");
93       for (ix = 0; loop->loops.iterate (ix, &i); ix++)
94 	fprintf (dump_file, "%d ", i->loop_no);
95       fprintf (dump_file, "]\n");
96     }
97   fprintf (dump_file, "\n");
98 }
99 
100 /* Return true if BB is part of LOOP.  */
101 static bool
102 bb_in_loop_p (hwloop_info loop, basic_block bb)
103 {
104   return bitmap_bit_p (loop->block_bitmap, bb->index);
105 }
106 
107 /* Scan the blocks of LOOP (and its inferiors), and record things such as
108    hard registers set, jumps out of and within the loop.  */
109 static void
110 scan_loop (hwloop_info loop)
111 {
112   unsigned ix;
113   basic_block bb;
114 
115   if (loop->bad)
116     return;
117 
118   if (REGNO_REG_SET_P (df_get_live_in (loop->successor),
119 		       REGNO (loop->iter_reg)))
120     loop->iter_reg_used_outside = true;
121 
122   for (ix = 0; loop->blocks.iterate (ix, &bb); ix++)
123     {
124       rtx_insn *insn;
125       edge e;
126       edge_iterator ei;
127 
128       if (bb != loop->tail)
129 	FOR_EACH_EDGE (e, ei, bb->succs)
130 	  {
131 	    if (bb_in_loop_p (loop, e->dest))
132 	      {
133 		if (!(e->flags & EDGE_FALLTHRU))
134 		  loop->jumps_within = true;
135 	      }
136 	    else
137 	      {
138 		loop->jumps_outof = true;
139 		if (!loop->bad)
140 		  gcc_assert (!REGNO_REG_SET_P (df_get_live_in (e->dest),
141 						REGNO (loop->iter_reg)));
142 	      }
143 	  }
144 
145       for (insn = BB_HEAD (bb);
146 	   insn != NEXT_INSN (BB_END (bb));
147 	   insn = NEXT_INSN (insn))
148 	{
149 	  df_ref def;
150 	  HARD_REG_SET set_this_insn;
151 
152 	  if (!NONDEBUG_INSN_P (insn))
153 	    continue;
154 
155 	  if (recog_memoized (insn) < 0
156 	      && (GET_CODE (PATTERN (insn)) == ASM_INPUT
157 		  || asm_noperands (PATTERN (insn)) >= 0))
158 	    loop->has_asm = true;
159 
160 	  CLEAR_HARD_REG_SET (set_this_insn);
161 	  FOR_EACH_INSN_DEF (def, insn)
162 	    {
163 	      rtx dreg = DF_REF_REG (def);
164 
165 	      if (!REG_P (dreg))
166 		continue;
167 
168 	      add_to_hard_reg_set (&set_this_insn, GET_MODE (dreg),
169 				   REGNO (dreg));
170 	    }
171 
172 	  if (insn == loop->loop_end)
173 	    CLEAR_HARD_REG_BIT (set_this_insn, REGNO (loop->iter_reg));
174 	  else if (reg_mentioned_p (loop->iter_reg, PATTERN (insn)))
175 	    loop->iter_reg_used = true;
176 	  IOR_HARD_REG_SET (loop->regs_set_in_loop, set_this_insn);
177 	}
178     }
179 }
180 
181 /* Compute the incoming_dest and incoming_src members of LOOP by
182    identifying the edges that jump into the loop.  If there is more
183    than one block that jumps into the loop, incoming_src will be set
184    to NULL; likewise, if there is more than one block in the loop that
185    is the destination of an incoming edge, incoming_dest will be NULL.
186 
187    Return true if either of these two fields is nonnull, false
188    otherwise.  */
189 static bool
190 process_incoming_edges (hwloop_info loop)
191 {
192   edge e;
193   edge_iterator ei;
194   bool first = true;
195 
196   FOR_EACH_EDGE (e, ei, loop->incoming)
197     {
198       if (first)
199 	{
200 	  loop->incoming_src = e->src;
201 	  loop->incoming_dest = e->dest;
202 	  first = false;
203 	}
204       else
205 	{
206 	  if (e->dest != loop->incoming_dest)
207 	    loop->incoming_dest = NULL;
208 	  if (e->src != loop->incoming_src)
209 	    loop->incoming_src = NULL;
210 	}
211     }
212   if (loop->incoming_src == NULL && loop->incoming_dest == NULL)
213     return false;
214 
215   return true;
216 }
217 
218 /* Try to identify a forwarder block that jump into LOOP, and add it to
219    the set of blocks in the loop, updating the vector of incoming blocks as
220    well.  This transformation gives a second chance to loops we couldn't
221    otherwise handle by increasing the chance that we'll end up with one
222    incoming_src block.
223    Return true if we made a change, false otherwise.  */
224 static bool
225 add_forwarder_blocks (hwloop_info loop)
226 {
227   edge e;
228   edge_iterator ei;
229 
230   FOR_EACH_EDGE (e, ei, loop->incoming)
231     {
232       if (forwarder_block_p (e->src))
233 	{
234 	  edge e2;
235 	  edge_iterator ei2;
236 
237 	  if (dump_file)
238 	    fprintf (dump_file,
239 		     ";; Adding forwarder block %d to loop %d and retrying\n",
240 		     e->src->index, loop->loop_no);
241 	  loop->blocks.safe_push (e->src);
242 	  bitmap_set_bit (loop->block_bitmap, e->src->index);
243 	  FOR_EACH_EDGE (e2, ei2, e->src->preds)
244 	    vec_safe_push (loop->incoming, e2);
245 	  loop->incoming->unordered_remove (ei.index);
246 	  return true;
247 	}
248     }
249   return false;
250 }
251 
252 /* Called from reorg_loops when a potential loop end is found.  LOOP is
253    a newly set up structure describing the loop, it is this function's
254    responsibility to fill most of it.  TAIL_BB and TAIL_INSN point to the
255    loop_end insn and its enclosing basic block.  REG is the loop counter
256    register.
257    For our purposes, a loop is defined by the set of blocks reachable from
258    the loop head in which the loop counter register is live.  This matches
259    the expected use; targets that call into this code usually replace the
260    loop counter with a different special register.  */
261 static void
262 discover_loop (hwloop_info loop, basic_block tail_bb, rtx_insn *tail_insn, rtx reg)
263 {
264   bool found_tail;
265   unsigned dwork = 0;
266   basic_block bb;
267 
268   loop->tail = tail_bb;
269   loop->loop_end = tail_insn;
270   loop->iter_reg = reg;
271   vec_alloc (loop->incoming, 2);
272   loop->start_label = as_a <rtx_insn *> (JUMP_LABEL (tail_insn));
273 
274   if (EDGE_COUNT (tail_bb->succs) != 2)
275     {
276       loop->bad = true;
277       return;
278     }
279   loop->head = BRANCH_EDGE (tail_bb)->dest;
280   loop->successor = FALLTHRU_EDGE (tail_bb)->dest;
281 
282   auto_vec<basic_block, 20> works;
283   works.safe_push (loop->head);
284 
285   found_tail = false;
286   for (dwork = 0; works.iterate (dwork, &bb); dwork++)
287     {
288       edge e;
289       edge_iterator ei;
290       if (bb == EXIT_BLOCK_PTR_FOR_FN (cfun))
291 	{
292 	  /* We've reached the exit block.  The loop must be bad. */
293 	  if (dump_file)
294 	    fprintf (dump_file,
295 		     ";; Loop is bad - reached exit block while scanning\n");
296 	  loop->bad = true;
297 	  break;
298 	}
299 
300       if (bitmap_bit_p (loop->block_bitmap, bb->index))
301 	continue;
302 
303       /* We've not seen this block before.  Add it to the loop's
304 	 list and then add each successor to the work list.  */
305 
306       loop->blocks.safe_push (bb);
307       bitmap_set_bit (loop->block_bitmap, bb->index);
308 
309       if (bb == tail_bb)
310 	found_tail = true;
311       else
312 	{
313 	  FOR_EACH_EDGE (e, ei, bb->succs)
314 	    {
315 	      basic_block succ = EDGE_SUCC (bb, ei.index)->dest;
316 	      if (REGNO_REG_SET_P (df_get_live_in (succ),
317 				   REGNO (loop->iter_reg)))
318 		works.safe_push (succ);
319 	    }
320 	}
321     }
322 
323   if (!found_tail)
324     loop->bad = true;
325 
326   /* Find the predecessor, and make sure nothing else jumps into this loop.  */
327   if (!loop->bad)
328     {
329       FOR_EACH_VEC_ELT (loop->blocks, dwork, bb)
330 	{
331 	  edge e;
332 	  edge_iterator ei;
333 	  FOR_EACH_EDGE (e, ei, bb->preds)
334 	    {
335 	      basic_block pred = e->src;
336 
337 	      if (!bb_in_loop_p (loop, pred))
338 		{
339 		  if (dump_file)
340 		    fprintf (dump_file, ";; Loop %d: incoming edge %d -> %d\n",
341 			     loop->loop_no, pred->index,
342 			     e->dest->index);
343 		  vec_safe_push (loop->incoming, e);
344 		}
345 	    }
346 	}
347 
348       if (!process_incoming_edges (loop))
349 	{
350 	  if (dump_file)
351 	    fprintf (dump_file,
352 		     ";; retrying loop %d with forwarder blocks\n",
353 		     loop->loop_no);
354 	  if (!add_forwarder_blocks (loop))
355 	    {
356 	      if (dump_file)
357 		fprintf (dump_file, ";; No forwarder blocks found\n");
358 	      loop->bad = true;
359 	    }
360 	  else if (!process_incoming_edges (loop))
361 	    {
362 	      if (dump_file)
363 		fprintf (dump_file,
364 			 ";; can't find suitable entry for loop %d\n",
365 			 loop->loop_no);
366 	    }
367 	}
368     }
369 }
370 
371 /* Analyze the structure of the loops in the current function.  Use
372    LOOP_STACK for bitmap allocations.  Returns all the valid candidates for
373    hardware loops found in this function.  HOOKS is the argument
374    passed to reorg_loops, used here to find the iteration registers
375    from a loop_end pattern.  */
376 static hwloop_info
377 discover_loops (bitmap_obstack *loop_stack, struct hw_doloop_hooks *hooks)
378 {
379   hwloop_info loops = NULL;
380   hwloop_info loop;
381   basic_block bb;
382   int nloops = 0;
383 
384   /* Find all the possible loop tails.  This means searching for every
385      loop_end instruction.  For each one found, create a hwloop_info
386      structure and add the head block to the work list. */
387   FOR_EACH_BB_FN (bb, cfun)
388     {
389       rtx_insn *tail = BB_END (bb);
390       rtx_insn *insn;
391       rtx reg;
392 
393       while (tail && NOTE_P (tail) && tail != BB_HEAD (bb))
394 	tail = PREV_INSN (tail);
395 
396       if (tail == NULL_RTX)
397 	continue;
398 
399       if (!JUMP_P (tail))
400 	continue;
401       reg = hooks->end_pattern_reg (tail);
402       if (reg == NULL_RTX)
403 	continue;
404 
405       /* A possible loop end */
406 
407       /* There's a degenerate case we can handle - an empty loop consisting
408 	 of only a back branch.  Handle that by deleting the branch.  */
409       insn = JUMP_LABEL_AS_INSN (tail);
410       while (insn && !NONDEBUG_INSN_P (insn))
411 	insn = NEXT_INSN (insn);
412       if (insn == tail)
413 	{
414 	  basic_block succ = FALLTHRU_EDGE (bb)->dest;
415 	  if (dump_file)
416 	    {
417 	      fprintf (dump_file, ";; degenerate loop ending at\n");
418 	      print_rtl_single (dump_file, tail);
419 	    }
420 	  if (!REGNO_REG_SET_P (df_get_live_in (succ), REGNO (reg)))
421 	    {
422 	      if (dump_file)
423 		fprintf (dump_file, ";; deleting it\n");
424 	      delete_insn_and_edges (tail);
425 	      continue;
426 	    }
427 	}
428 
429       loop = XCNEW (struct hwloop_info_d);
430       loop->next = loops;
431       loops = loop;
432       loop->loop_no = nloops++;
433       loop->blocks.create (20);
434       loop->block_bitmap = BITMAP_ALLOC (loop_stack);
435 
436       if (dump_file)
437 	{
438 	  fprintf (dump_file, ";; potential loop %d ending at\n",
439 		   loop->loop_no);
440 	  print_rtl_single (dump_file, tail);
441 	}
442 
443       discover_loop (loop, bb, tail, reg);
444     }
445 
446   /* Compute loop nestings.  Given two loops A and B, either the sets
447      of their blocks don't intersect at all, or one is the subset of
448      the other, or the blocks don't form a good nesting structure.  */
449   for (loop = loops; loop; loop = loop->next)
450     {
451       hwloop_info other;
452 
453       if (loop->bad)
454 	continue;
455 
456       for (other = loops; other; other = other->next)
457 	{
458 	  if (other->bad)
459 	    continue;
460 
461 	  if (!bitmap_intersect_p (other->block_bitmap, loop->block_bitmap))
462 	    continue;
463 	  if (!bitmap_intersect_compl_p (other->block_bitmap,
464 					 loop->block_bitmap))
465 	    loop->loops.safe_push (other);
466 	  else if (!bitmap_intersect_compl_p (loop->block_bitmap,
467 					      other->block_bitmap))
468 	    other->loops.safe_push (loop);
469 	  else
470 	    {
471 	      if (dump_file)
472 		fprintf (dump_file,
473 			 ";; can't find suitable nesting for loops %d and %d\n",
474 			 loop->loop_no, other->loop_no);
475 	      loop->bad = other->bad = true;
476 	    }
477 	}
478     }
479 
480   if (dump_file)
481     dump_hwloops (loops);
482 
483   return loops;
484 }
485 
486 /* Free up the loop structures in LOOPS.  */
487 static void
488 free_loops (hwloop_info loops)
489 {
490   while (loops)
491     {
492       hwloop_info loop = loops;
493       loops = loop->next;
494       loop->loops.release ();
495       loop->blocks.release ();
496       BITMAP_FREE (loop->block_bitmap);
497       XDELETE (loop);
498     }
499 }
500 
501 #define BB_AUX_INDEX(BB) ((intptr_t) (BB)->aux)
502 
503 /* Initialize the aux fields to give ascending indices to basic blocks.  */
504 static void
505 set_bb_indices (void)
506 {
507   basic_block bb;
508   intptr_t index;
509 
510   index = 0;
511   FOR_EACH_BB_FN (bb, cfun)
512     bb->aux = (void *) index++;
513 }
514 
515 /* The taken-branch edge from the loop end can actually go forward.
516    If the target's hardware loop support requires that the loop end be
517    after the loop start, try to reorder a loop's basic blocks when we
518    find such a case.
519    This is not very aggressive; it only moves at most one block.  It
520    does not introduce new branches into loops; it may remove them, or
521    it may switch fallthru/jump edges.  */
522 static void
523 reorder_loops (hwloop_info loops)
524 {
525   basic_block bb;
526   hwloop_info loop;
527 
528   cfg_layout_initialize (0);
529 
530   set_bb_indices ();
531 
532   for (loop = loops; loop; loop = loop->next)
533     {
534       edge e;
535       edge_iterator ei;
536 
537       if (loop->bad)
538 	continue;
539 
540       if (BB_AUX_INDEX (loop->head) <= BB_AUX_INDEX (loop->tail))
541 	continue;
542 
543       FOR_EACH_EDGE (e, ei, loop->head->succs)
544 	{
545 	  if (bitmap_bit_p (loop->block_bitmap, e->dest->index)
546 	      && BB_AUX_INDEX (e->dest) < BB_AUX_INDEX (loop->tail))
547 	    {
548 	      basic_block start_bb = e->dest;
549 	      basic_block start_prev_bb = start_bb->prev_bb;
550 
551 	      if (dump_file)
552 		fprintf (dump_file, ";; Moving block %d before block %d\n",
553 			 loop->head->index, start_bb->index);
554 	      loop->head->prev_bb->next_bb = loop->head->next_bb;
555 	      loop->head->next_bb->prev_bb = loop->head->prev_bb;
556 
557 	      loop->head->prev_bb = start_prev_bb;
558 	      loop->head->next_bb = start_bb;
559 	      start_prev_bb->next_bb = start_bb->prev_bb = loop->head;
560 
561 	      set_bb_indices ();
562 	      break;
563 	    }
564 	}
565       loops = loops->next;
566     }
567 
568   FOR_EACH_BB_FN (bb, cfun)
569     {
570       if (bb->next_bb != EXIT_BLOCK_PTR_FOR_FN (cfun))
571 	bb->aux = bb->next_bb;
572       else
573 	bb->aux = NULL;
574     }
575   cfg_layout_finalize ();
576   clear_aux_for_blocks ();
577   df_analyze ();
578 }
579 
580 /* Call the OPT function for LOOP and all of its sub-loops.  This is
581    done in a depth-first search; innermost loops are visited first.
582    OPTIMIZE and FAIL are the functions passed to reorg_loops by the
583    target's reorg pass.  */
584 static void
585 optimize_loop (hwloop_info loop, struct hw_doloop_hooks *hooks)
586 {
587   int ix;
588   hwloop_info inner;
589   int inner_depth = 0;
590 
591   if (loop->visited)
592     return;
593 
594   loop->visited = 1;
595 
596   if (loop->bad)
597     {
598       if (dump_file)
599 	fprintf (dump_file, ";; loop %d bad when found\n", loop->loop_no);
600       goto bad_loop;
601     }
602 
603   /* Every loop contains in its list of inner loops every loop nested inside
604      it, even if there are intermediate loops.  This works because we're doing
605      a depth-first search here and never visit a loop more than once.
606      Recursion depth is effectively limited by the number of available
607      hardware registers.  */
608   for (ix = 0; loop->loops.iterate (ix, &inner); ix++)
609     {
610       optimize_loop (inner, hooks);
611 
612       if (!inner->bad && inner_depth < inner->depth)
613 	inner_depth = inner->depth;
614       /* The set of registers may be changed while optimizing the inner
615 	 loop.  */
616       IOR_HARD_REG_SET (loop->regs_set_in_loop, inner->regs_set_in_loop);
617     }
618 
619   loop->depth = inner_depth + 1;
620 
621   if (hooks->opt (loop))
622     return;
623 
624  bad_loop:
625   if (dump_file)
626     fprintf (dump_file, ";; loop %d is bad\n", loop->loop_no);
627 
628   loop->bad = true;
629   hooks->fail (loop);
630 }
631 
632 /* This function can be used from a port's machine_dependent_reorg to
633    find and analyze loops that end in loop_end instructions.  It uses
634    a set of function pointers in HOOKS to call back into the
635    target-specific functions to perform the actual machine-specific
636    transformations.
637 
638    Such transformations typically involve additional set-up
639    instructions before the loop, to define loop bounds or set up a
640    special loop counter register.
641 
642    DO_REORDER should be set to true if we should try to use the
643    reorder_loops function to ensure the loop end occurs after the loop
644    start.  This is for use by targets where the loop hardware requires
645    this condition.
646 
647    HOOKS is used to pass in target specific hooks; see
648    hw-doloop.h.  */
649 void
650 reorg_loops (bool do_reorder, struct hw_doloop_hooks *hooks)
651 {
652   hwloop_info loops = NULL;
653   hwloop_info loop;
654   bitmap_obstack loop_stack;
655 
656   df_live_add_problem ();
657   df_live_set_all_dirty ();
658   df_analyze ();
659 
660   bitmap_obstack_initialize (&loop_stack);
661 
662   if (dump_file)
663     fprintf (dump_file, ";; Find loops, first pass\n\n");
664 
665   loops = discover_loops (&loop_stack, hooks);
666 
667   /* We can't enter cfglayout mode anymore if basic block partitioning
668      already happened.  */
669   if (do_reorder && !flag_reorder_blocks_and_partition)
670     {
671       reorder_loops (loops);
672       free_loops (loops);
673 
674       if (dump_file)
675 	fprintf (dump_file, ";; Find loops, second pass\n\n");
676 
677       loops = discover_loops (&loop_stack, hooks);
678     }
679 
680   for (loop = loops; loop; loop = loop->next)
681     scan_loop (loop);
682 
683   /* Now apply the optimizations.  */
684   for (loop = loops; loop; loop = loop->next)
685     optimize_loop (loop, hooks);
686 
687   if (dump_file)
688     {
689       fprintf (dump_file, ";; After hardware loops optimization:\n\n");
690       dump_hwloops (loops);
691     }
692 
693   free_loops (loops);
694   bitmap_obstack_release (&loop_stack);
695 
696   if (dump_file)
697     print_rtl (dump_file, get_insns ());
698 }
699 #endif
700