xref: /openbsd-src/gnu/usr.bin/gcc/gcc/doloop.c (revision c87b03e512fc05ed6e0222f6fb0ae86264b1d05b)
1 /* Perform doloop optimizations
2    Copyright (C) 1999, 2000, 2001, 2002 Free Software Foundation, Inc.
3    Contributed by Michael P. Hayes (m.hayes@elec.canterbury.ac.nz)
4 
5 This file is part of GCC.
6 
7 GCC is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 2, or (at your option) any later
10 version.
11 
12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
15 for more details.
16 
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING.  If not, write to the Free
19 Software Foundation, 59 Temple Place - Suite 330, Boston, MA
20 02111-1307, USA.  */
21 
22 #include "config.h"
23 #include "system.h"
24 #include "rtl.h"
25 #include "flags.h"
26 #include "expr.h"
27 #include "loop.h"
28 #include "hard-reg-set.h"
29 #include "basic-block.h"
30 #include "toplev.h"
31 #include "tm_p.h"
32 
33 
34 /* This module is used to modify loops with a determinable number of
35    iterations to use special low-overhead looping instructions.
36 
37    It first validates whether the loop is well behaved and has a
38    determinable number of iterations (either at compile or run-time).
39    It then modifies the loop to use a low-overhead looping pattern as
40    follows:
41 
42    1. A pseudo register is allocated as the loop iteration counter.
43 
44    2. The number of loop iterations is calculated and is stored
45       in the loop counter.
46 
47    3. At the end of the loop, the jump insn is replaced by the
48       doloop_end pattern.  The compare must remain because it might be
49       used elsewhere.  If the loop-variable or condition register are
50       used elsewhere, they will be eliminated by flow.
51 
52    4. An optional doloop_begin pattern is inserted at the top of the
53       loop.
54 */
55 
56 
57 #ifdef HAVE_doloop_end
58 
59 static rtx doloop_condition_get
60   PARAMS ((rtx));
61 static unsigned HOST_WIDE_INT doloop_iterations_max
62   PARAMS ((const struct loop_info *, enum machine_mode, int));
63 static int doloop_valid_p
64   PARAMS ((const struct loop *, rtx));
65 static int doloop_modify
66   PARAMS ((const struct loop *, rtx, rtx, rtx, rtx, rtx));
67 static int doloop_modify_runtime
68   PARAMS ((const struct loop *, rtx, rtx, rtx, enum machine_mode, rtx));
69 
70 
71 /* Return the loop termination condition for PATTERN or zero
72    if it is not a decrement and branch jump insn.  */
73 static rtx
doloop_condition_get(pattern)74 doloop_condition_get (pattern)
75      rtx pattern;
76 {
77   rtx cmp;
78   rtx inc;
79   rtx reg;
80   rtx condition;
81 
82   /* The canonical doloop pattern we expect is:
83 
84      (parallel [(set (pc) (if_then_else (condition)
85                                         (label_ref (label))
86                                         (pc)))
87                 (set (reg) (plus (reg) (const_int -1)))
88                 (additional clobbers and uses)])
89 
90      Some machines (IA-64) make the decrement conditional on
91      the condition as well, so we don't bother verifying the
92      actual decrement.  In summary, the branch must be the
93      first entry of the parallel (also required by jump.c),
94      and the second entry of the parallel must be a set of
95      the loop counter register.  */
96 
97   if (GET_CODE (pattern) != PARALLEL)
98     return 0;
99 
100   cmp = XVECEXP (pattern, 0, 0);
101   inc = XVECEXP (pattern, 0, 1);
102 
103   /* Check for (set (reg) (something)).  */
104   if (GET_CODE (inc) != SET || ! REG_P (SET_DEST (inc)))
105     return 0;
106 
107   /* Extract loop counter register.  */
108   reg = SET_DEST (inc);
109 
110   /* Check for (set (pc) (if_then_else (condition)
111                                        (label_ref (label))
112                                        (pc))).  */
113   if (GET_CODE (cmp) != SET
114       || SET_DEST (cmp) != pc_rtx
115       || GET_CODE (SET_SRC (cmp)) != IF_THEN_ELSE
116       || GET_CODE (XEXP (SET_SRC (cmp), 1)) != LABEL_REF
117       || XEXP (SET_SRC (cmp), 2) != pc_rtx)
118     return 0;
119 
120   /* Extract loop termination condition.  */
121   condition = XEXP (SET_SRC (cmp), 0);
122 
123   if ((GET_CODE (condition) != GE && GET_CODE (condition) != NE)
124       || GET_CODE (XEXP (condition, 1)) != CONST_INT)
125     return 0;
126 
127   if (XEXP (condition, 0) == reg)
128     return condition;
129 
130   if (GET_CODE (XEXP (condition, 0)) == PLUS
131       && XEXP (XEXP (condition, 0), 0) == reg)
132     return condition;
133 
134   /* ??? If a machine uses a funny comparison, we could return a
135      canonicalised form here.  */
136 
137   return 0;
138 }
139 
140 
141 /* Return an estimate of the maximum number of loop iterations for the
142    loop specified by LOOP or zero if the loop is not normal.
143    MODE is the mode of the iteration count and NONNEG is nonzero if
144    the iteration count has been proved to be non-negative.  */
145 static unsigned HOST_WIDE_INT
doloop_iterations_max(loop_info,mode,nonneg)146 doloop_iterations_max (loop_info, mode, nonneg)
147      const struct loop_info *loop_info;
148      enum machine_mode mode;
149      int nonneg;
150 {
151   unsigned HOST_WIDE_INT n_iterations_max;
152   enum rtx_code code;
153   rtx min_value;
154   rtx max_value;
155   HOST_WIDE_INT abs_inc;
156   int neg_inc;
157 
158   neg_inc = 0;
159   abs_inc = INTVAL (loop_info->increment);
160   if (abs_inc < 0)
161     {
162       abs_inc = -abs_inc;
163       neg_inc = 1;
164     }
165 
166   if (neg_inc)
167     {
168       code = swap_condition (loop_info->comparison_code);
169       min_value = loop_info->final_equiv_value;
170       max_value = loop_info->initial_equiv_value;
171     }
172   else
173     {
174       code = loop_info->comparison_code;
175       min_value = loop_info->initial_equiv_value;
176       max_value = loop_info->final_equiv_value;
177     }
178 
179   /* Since the loop has a VTOP, we know that the initial test will be
180      true and thus the value of max_value should be greater than the
181      value of min_value.  Thus the difference should always be positive
182      and the code must be LT, LE, LTU, LEU, or NE.  Otherwise the loop is
183      not normal, e.g., `for (i = 0; i < 10; i--)'.  */
184   switch (code)
185     {
186     case LTU:
187     case LEU:
188       {
189 	unsigned HOST_WIDE_INT umax;
190 	unsigned HOST_WIDE_INT umin;
191 
192 	if (GET_CODE (min_value) == CONST_INT)
193 	  umin = INTVAL (min_value);
194 	else
195 	  umin = 0;
196 
197 	if (GET_CODE (max_value) == CONST_INT)
198 	  umax = INTVAL (max_value);
199 	else
200 	  umax = ((unsigned) 2 << (GET_MODE_BITSIZE (mode) - 1)) - 1;
201 
202 	n_iterations_max = umax - umin;
203 	break;
204       }
205 
206     case LT:
207     case LE:
208       {
209 	HOST_WIDE_INT smax;
210 	HOST_WIDE_INT smin;
211 
212 	if (GET_CODE (min_value) == CONST_INT)
213 	  smin = INTVAL (min_value);
214 	else
215 	  smin = -((unsigned) 1 << (GET_MODE_BITSIZE (mode) - 1));
216 
217 	if (GET_CODE (max_value) == CONST_INT)
218 	  smax = INTVAL (max_value);
219 	else
220 	  smax = ((unsigned) 1 << (GET_MODE_BITSIZE (mode) - 1)) - 1;
221 
222 	n_iterations_max = smax - smin;
223 	break;
224       }
225 
226     case NE:
227       if (GET_CODE (min_value) == CONST_INT
228 	  && GET_CODE (max_value) == CONST_INT)
229 	n_iterations_max = INTVAL (max_value) - INTVAL (min_value);
230       else
231 	/* We need to conservatively assume that we might have the maximum
232 	   number of iterations without any additional knowledge.  */
233 	n_iterations_max = ((unsigned) 2 << (GET_MODE_BITSIZE (mode) - 1)) - 1;
234       break;
235 
236     default:
237       return 0;
238     }
239 
240   n_iterations_max /= abs_inc;
241 
242   /* If we know that the iteration count is non-negative then adjust
243      n_iterations_max if it is so large that it appears negative.  */
244   if (nonneg
245       && n_iterations_max > ((unsigned) 1 << (GET_MODE_BITSIZE (mode) - 1)))
246     n_iterations_max = ((unsigned) 1 << (GET_MODE_BITSIZE (mode) - 1)) - 1;
247 
248   return n_iterations_max;
249 }
250 
251 
252 /* Return nonzero if the loop specified by LOOP is suitable for
253    the use of special low-overhead looping instructions.  */
254 static int
doloop_valid_p(loop,jump_insn)255 doloop_valid_p (loop, jump_insn)
256      const struct loop *loop;
257      rtx jump_insn;
258 {
259   const struct loop_info *loop_info = LOOP_INFO (loop);
260 
261   /* The loop must have a conditional jump at the end.  */
262   if (! any_condjump_p (jump_insn)
263       || ! onlyjump_p (jump_insn))
264     {
265       if (loop_dump_stream)
266 	fprintf (loop_dump_stream,
267 		 "Doloop: Invalid jump at loop end.\n");
268       return 0;
269     }
270 
271   /* Give up if a loop has been completely unrolled.  */
272   if (loop_info->n_iterations == loop_info->unroll_number)
273     {
274       if (loop_dump_stream)
275 	fprintf (loop_dump_stream,
276 		 "Doloop: Loop completely unrolled.\n");
277       return 0;
278     }
279 
280   /* The loop must have a single exit target.  A break or return
281      statement within a loop will generate multiple loop exits.
282      Another example of a loop that currently generates multiple exit
283      targets is for (i = 0; i < (foo ? 8 : 4); i++) { }.  */
284   if (loop_info->has_multiple_exit_targets || loop->exit_count)
285     {
286       if (loop_dump_stream)
287 	fprintf (loop_dump_stream,
288 		 "Doloop: Loop has multiple exit targets.\n");
289       return 0;
290     }
291 
292   /* An indirect jump may jump out of the loop.  */
293   if (loop_info->has_indirect_jump)
294     {
295       if (loop_dump_stream)
296 	fprintf (loop_dump_stream,
297 		 "Doloop: Indirect jump in function.\n");
298       return 0;
299     }
300 
301   /* A called function may clobber any special registers required for
302      low-overhead looping.  */
303   if (loop_info->has_call)
304     {
305       if (loop_dump_stream)
306 	fprintf (loop_dump_stream,
307 		 "Doloop: Function call in loop.\n");
308       return 0;
309     }
310 
311   /* Some targets (eg, PPC) use the count register for branch on table
312      instructions.  ??? This should be a target specific check.  */
313   if (loop_info->has_tablejump)
314     {
315       if (loop_dump_stream)
316 	fprintf (loop_dump_stream,
317 		 "Doloop: Computed branch in the loop.\n");
318       return 0;
319     }
320 
321   if (! loop_info->increment)
322     {
323       if (loop_dump_stream)
324 	fprintf (loop_dump_stream,
325 		 "Doloop: Could not determine iteration info.\n");
326       return 0;
327     }
328 
329   if (GET_CODE (loop_info->increment) != CONST_INT)
330     {
331       if (loop_dump_stream)
332 	fprintf (loop_dump_stream,
333 		 "Doloop: Increment not an integer constant.\n");
334       return 0;
335     }
336 
337   /* There is no guarantee that a NE loop will terminate if the
338      absolute increment is not unity.  ??? We could compute this
339      condition at run-time and have an additional jump around the loop
340      to ensure an infinite loop.  */
341   if (loop_info->comparison_code == NE
342       && !loop_info->preconditioned
343       && INTVAL (loop_info->increment) != -1
344       && INTVAL (loop_info->increment) != 1)
345     {
346       if (loop_dump_stream)
347 	fprintf (loop_dump_stream,
348 		 "Doloop: NE loop with non-unity increment.\n");
349       return 0;
350     }
351 
352   /* Check for loops that may not terminate under special conditions.  */
353   if (! loop_info->n_iterations
354       && ((loop_info->comparison_code == LEU
355 	   && INTVAL (loop_info->increment) > 0)
356 	  || (loop_info->comparison_code == GEU
357 	      && INTVAL (loop_info->increment) < 0)
358 	  || (loop_info->comparison_code == LTU
359 	      && INTVAL (loop_info->increment) > 1)
360 	  || (loop_info->comparison_code == GTU
361 	      && INTVAL (loop_info->increment) < -1)))
362     {
363       /* If the comparison is LEU and the comparison value is UINT_MAX
364 	 then the loop will not terminate.  Similarly, if the
365 	 comparison code is GEU and the comparison value is 0, the
366 	 loop will not terminate.
367 
368 	 If the absolute increment is not 1, the loop can be infinite
369 	 even with LTU/GTU, e.g. for (i = 3; i > 0; i -= 2)
370 
371 	 Note that with LE and GE, the loop behavior is undefined
372 	 (C++ standard section 5 clause 5) if an overflow occurs, say
373 	 between INT_MAX and INT_MAX + 1.  We thus don't have to worry
374 	 about these two cases.
375 
376 	 ??? We could compute these conditions at run-time and have a
377 	 additional jump around the loop to ensure an infinite loop.
378 	 However, it is very unlikely that this is the intended
379 	 behavior of the loop and checking for these rare boundary
380 	 conditions would pessimize all other code.
381 
382 	 If the loop is executed only a few times an extra check to
383 	 restart the loop could use up most of the benefits of using a
384 	 count register loop.  Note however, that normally, this
385 	 restart branch would never execute, so it could be predicted
386 	 well by the CPU.  We should generate the pessimistic code by
387 	 default, and have an option, e.g. -funsafe-loops that would
388 	 enable count-register loops in this case.  */
389       if (loop_dump_stream)
390 	fprintf (loop_dump_stream,
391 		 "Doloop: Possible infinite iteration case ignored.\n");
392     }
393 
394   return 1;
395 }
396 
397 
398 /* Modify the loop to use the low-overhead looping insn where LOOP
399    describes the loop, ITERATIONS is an RTX containing the desired
400    number of loop iterations, ITERATIONS_MAX is a CONST_INT specifying
401    the maximum number of loop iterations, and DOLOOP_INSN is the
402    low-overhead looping insn to emit at the end of the loop.  This
403    returns nonzero if it was successful.  */
404 static int
doloop_modify(loop,iterations,iterations_max,doloop_seq,start_label,condition)405 doloop_modify (loop, iterations, iterations_max,
406 	       doloop_seq, start_label, condition)
407      const struct loop *loop;
408      rtx iterations;
409      rtx iterations_max;
410      rtx doloop_seq;
411      rtx start_label;
412      rtx condition;
413 {
414   rtx counter_reg;
415   rtx count;
416   rtx sequence;
417   rtx jump_insn;
418   int nonneg = 0;
419   int decrement_count;
420 
421   jump_insn = prev_nonnote_insn (loop->end);
422 
423   if (loop_dump_stream)
424     {
425       fprintf (loop_dump_stream, "Doloop: Inserting doloop pattern (");
426       if (GET_CODE (iterations) == CONST_INT)
427 	fprintf (loop_dump_stream, HOST_WIDE_INT_PRINT_DEC,
428 		 INTVAL (iterations));
429       else
430 	fputs ("runtime", loop_dump_stream);
431       fputs (" iterations).", loop_dump_stream);
432     }
433 
434   /* Emit the label that will delimit the top of the loop.
435      This has to be done before the delete_insn call below, to prevent
436      delete_insn from deleting too much.  */
437   emit_label_after (start_label, loop->top ? loop->top : loop->start);
438   LABEL_NUSES (start_label)++;
439 
440   /* Discard original jump to continue loop.  The original compare
441      result may still be live, so it cannot be discarded explicitly.  */
442   delete_related_insns (jump_insn);
443 
444   counter_reg = XEXP (condition, 0);
445   if (GET_CODE (counter_reg) == PLUS)
446     counter_reg = XEXP (counter_reg, 0);
447 
448   start_sequence ();
449 
450   count = iterations;
451   decrement_count = 0;
452   switch (GET_CODE (condition))
453     {
454     case NE:
455       /* Currently only NE tests against zero and one are supported.  */
456       if (XEXP (condition, 1) == const0_rtx)
457 	decrement_count = 1;
458       else if (XEXP (condition, 1) != const1_rtx)
459 	abort ();
460       break;
461 
462     case GE:
463       /* Currently only GE tests against zero are supported.  */
464       if (XEXP (condition, 1) != const0_rtx)
465 	abort ();
466 
467       /* The iteration count needs decrementing for a GE test.  */
468       decrement_count = 1;
469 
470       /* Determine if the iteration counter will be non-negative.
471 	 Note that the maximum value loaded is iterations_max - 1.  */
472       if ((unsigned HOST_WIDE_INT) INTVAL (iterations_max)
473 	  <= ((unsigned) 1 << (GET_MODE_BITSIZE (GET_MODE (counter_reg)) - 1)))
474 	nonneg = 1;
475       break;
476 
477       /* Abort if an invalid doloop pattern has been generated.  */
478     default:
479       abort ();
480     }
481 
482   if (decrement_count)
483     {
484       if (GET_CODE (count) == CONST_INT)
485 	count = GEN_INT (INTVAL (count) - 1);
486       else
487 	count = expand_simple_binop (GET_MODE (counter_reg), MINUS,
488 				     count, GEN_INT (1),
489 				     0, 0, OPTAB_LIB_WIDEN);
490     }
491 
492   /* Insert initialization of the count register into the loop header.  */
493   convert_move (counter_reg, count, 1);
494   sequence = get_insns ();
495   end_sequence ();
496   emit_insn_before (sequence, loop->start);
497 
498   /* Some targets (eg, C4x) need to initialize special looping
499      registers.  */
500 #ifdef HAVE_doloop_begin
501   {
502     rtx init;
503 
504     init = gen_doloop_begin (counter_reg,
505 			     GET_CODE (iterations) == CONST_INT
506 			     ? iterations : const0_rtx, iterations_max,
507 			     GEN_INT (loop->level));
508     if (init)
509       {
510 	start_sequence ();
511 	emit_insn (init);
512 	sequence = get_insns ();
513 	end_sequence ();
514 	emit_insn_after (sequence, loop->start);
515       }
516   }
517 #endif
518 
519   /* Insert the new low-overhead looping insn.  */
520   emit_jump_insn_before (doloop_seq, loop->end);
521   jump_insn = prev_nonnote_insn (loop->end);
522   JUMP_LABEL (jump_insn) = start_label;
523 
524   /* Add a REG_NONNEG note if the actual or estimated maximum number
525      of iterations is non-negative.  */
526   if (nonneg)
527     {
528       REG_NOTES (jump_insn)
529 	= gen_rtx_EXPR_LIST (REG_NONNEG, NULL_RTX, REG_NOTES (jump_insn));
530     }
531   return 1;
532 }
533 
534 
535 /* Handle the more complex case, where the bounds are not known at
536    compile time.  In this case we generate a run_time calculation of
537    the number of iterations.  We rely on the existence of a run-time
538    guard to ensure that the loop executes at least once, i.e.,
539    initial_value obeys the loop comparison condition.  If a guard is
540    not present, we emit one.  The loop to modify is described by LOOP.
541    ITERATIONS_MAX is a CONST_INT specifying the estimated maximum
542    number of loop iterations.  DOLOOP_INSN is the low-overhead looping
543    insn to insert.  Returns nonzero if loop successfully modified.  */
544 static int
doloop_modify_runtime(loop,iterations_max,doloop_seq,start_label,mode,condition)545 doloop_modify_runtime (loop, iterations_max,
546 		       doloop_seq, start_label, mode, condition)
547      const struct loop *loop;
548      rtx iterations_max;
549      rtx doloop_seq;
550      rtx start_label;
551      enum machine_mode mode;
552      rtx condition;
553 {
554   const struct loop_info *loop_info = LOOP_INFO (loop);
555   HOST_WIDE_INT abs_inc;
556   HOST_WIDE_INT abs_loop_inc;
557   int neg_inc;
558   rtx diff;
559   rtx sequence;
560   rtx iterations;
561   rtx initial_value;
562   rtx final_value;
563   rtx increment;
564   int unsigned_p;
565   enum rtx_code comparison_code;
566 
567   increment = loop_info->increment;
568   initial_value = loop_info->initial_value;
569   final_value = loop_info->final_value;
570 
571   neg_inc = 0;
572   abs_inc = INTVAL (increment);
573   if (abs_inc < 0)
574     {
575       abs_inc = -abs_inc;
576       neg_inc = 1;
577     }
578 
579   comparison_code = loop_info->comparison_code;
580   unsigned_p = (comparison_code == LTU
581 		|| comparison_code == LEU
582 		|| comparison_code == GTU
583 		|| comparison_code == GEU
584 		|| comparison_code == NE);
585 
586   /* The number of iterations (prior to any loop unrolling) is given by:
587 
588        n = (abs (final - initial) + abs_inc - 1) / abs_inc.
589 
590      However, it is possible for the summation to overflow, and a
591      safer method is:
592 
593        n = abs (final - initial) / abs_inc;
594        n += (abs (final - initial) % abs_inc) != 0;
595 
596      But when abs_inc is a power of two, the summation won't overflow
597      except in cases where the loop never terminates.  So we don't
598      need to use this more costly calculation.
599 
600      If the loop has been unrolled, the full calculation is
601 
602        t1 = abs_inc * unroll_number;		        increment per loop
603        n = (abs (final - initial) + abs_inc - 1) / t1;    full loops
604        n += (abs (final - initial) + abs_inc - 1) % t1) >= abs_inc;
605                                                           partial loop
606      which works out to be equivalent to
607 
608        n = (abs (final - initial) + t1 - 1) / t1;
609 
610      In the case where the loop was preconditioned, a few iterations
611      may have been executed earlier; but 'initial' was adjusted as they
612      were executed, so we don't need anything special for that case here.
613      As above, when t1 is a power of two we don't need to worry about
614      overflow.
615 
616      The division and modulo operations can be avoided by requiring
617      that the increment is a power of 2 (precondition_loop_p enforces
618      this requirement).  Nevertheless, the RTX_COSTS should be checked
619      to see if a fast divmod is available.  */
620 
621   start_sequence ();
622   /* abs (final - initial)  */
623   diff = expand_simple_binop (mode, MINUS,
624 			      copy_rtx (neg_inc ? initial_value : final_value),
625 			      copy_rtx (neg_inc ? final_value : initial_value),
626 			      NULL_RTX, unsigned_p, OPTAB_LIB_WIDEN);
627 
628   /* Some code transformations can result in code akin to
629 
630 	  tmp = i + 1;
631 	  ...
632 	  goto scan_start;
633 	top:
634 	  tmp = tmp + 1;
635 	scan_start:
636 	  i = tmp;
637 	  if (i < n) goto top;
638 
639      We'll have already detected this form of loop in scan_loop,
640      and set loop->top and loop->scan_start appropriately.
641 
642      In this situation, we skip the increment the first time through
643      the loop, which results in an incorrect estimate of the number
644      of iterations.  Adjust the difference to compensate.  */
645   /* ??? Logically, it would seem this belongs in loop_iterations.
646      However, this causes regressions e.g. on x86 execute/20011008-3.c,
647      so I do not believe we've properly characterized the exact nature
648      of the problem.  In the meantime, this fixes execute/20011126-2.c
649      on ia64 and some Ada front end miscompilation on ppc.  */
650 
651   if (loop->scan_start)
652     {
653       rtx iteration_var = loop_info->iteration_var;
654       struct loop_ivs *ivs = LOOP_IVS (loop);
655       struct iv_class *bl;
656 
657       if (REG_IV_TYPE (ivs, REGNO (iteration_var)) == BASIC_INDUCT)
658 	bl = REG_IV_CLASS (ivs, REGNO (iteration_var));
659       else if (REG_IV_TYPE (ivs, REGNO (iteration_var)) == GENERAL_INDUCT)
660 	{
661 	  struct induction *v = REG_IV_INFO (ivs, REGNO (iteration_var));
662 	  bl = REG_IV_CLASS (ivs, REGNO (v->src_reg));
663 	}
664       else
665 	/* Iteration var must be an induction variable to get here.  */
666 	abort ();
667 
668       if (INSN_UID (bl->biv->insn) < max_uid_for_loop
669 	  && INSN_LUID (bl->biv->insn) < INSN_LUID (loop->scan_start))
670 	{
671 	  if (loop_dump_stream)
672 	    fprintf (loop_dump_stream,
673 	         "Doloop: Basic induction var skips initial incr.\n");
674 
675 	  diff = expand_simple_binop (mode, PLUS, diff, GEN_INT (abs_inc),
676 				      diff, unsigned_p, OPTAB_LIB_WIDEN);
677 	}
678     }
679 
680   abs_loop_inc = abs_inc * loop_info->unroll_number;
681   if (abs_loop_inc != 1)
682     {
683       int shift_count;
684 
685       shift_count = exact_log2 (abs_loop_inc);
686       if (shift_count < 0)
687 	abort ();
688 
689       /* (abs (final - initial) + abs_inc * unroll_number - 1) */
690       diff = expand_simple_binop (GET_MODE (diff), PLUS,
691 				  diff, GEN_INT (abs_loop_inc - 1),
692 				  diff, 1, OPTAB_LIB_WIDEN);
693 
694       /* (abs (final - initial) + abs_inc * unroll_number - 1)
695 	 / (abs_inc * unroll_number)  */
696       diff = expand_simple_binop (GET_MODE (diff), LSHIFTRT,
697 				  diff, GEN_INT (shift_count),
698 				  diff, 1, OPTAB_LIB_WIDEN);
699     }
700   iterations = diff;
701 
702   /* If there is a NOTE_INSN_LOOP_VTOP, we have a `for' or `while'
703      style loop, with a loop exit test at the start.  Thus, we can
704      assume that the loop condition was true when the loop was
705      entered.
706 
707      `do-while' loops require special treatment since the exit test is
708      not executed before the start of the loop.  We need to determine
709      if the loop will terminate after the first pass and to limit the
710      iteration count to one if necessary.  */
711   if (! loop->vtop)
712     {
713       if (loop_dump_stream)
714 	fprintf (loop_dump_stream, "Doloop: Do-while loop.\n");
715 
716       /* A `do-while' loop must iterate at least once.  For code like
717 	 i = initial; do { ... } while (++i < final);
718 	 we will calculate a bogus iteration count if initial > final.
719 	 So detect this and set the iteration count to 1.
720 	 Note that if the loop has been unrolled, then the loop body
721 	 is guaranteed to execute at least once.  Also, when the
722 	 comparison is NE, our calculated count will be OK.  */
723       if (loop_info->unroll_number == 1 && comparison_code != NE)
724 	{
725 	  rtx label;
726 
727 	  /*  Emit insns to test if the loop will immediately
728 	      terminate and to set the iteration count to 1 if true.  */
729 	  label = gen_label_rtx();
730 	  emit_cmp_and_jump_insns (copy_rtx (initial_value),
731 				   copy_rtx (loop_info->comparison_value),
732 				   comparison_code, NULL_RTX, mode, 0,
733 				   label);
734 	  JUMP_LABEL (get_last_insn ()) = label;
735 	  LABEL_NUSES (label)++;
736 	  emit_move_insn (iterations, const1_rtx);
737 	  emit_label (label);
738 	}
739     }
740 
741   sequence = get_insns ();
742   end_sequence ();
743   emit_insn_before (sequence, loop->start);
744 
745   return doloop_modify (loop, iterations, iterations_max, doloop_seq,
746 			start_label, condition);
747 }
748 
749 
750 /* This is the main entry point.  Process loop described by LOOP
751    validating that the loop is suitable for conversion to use a low
752    overhead looping instruction, replacing the jump insn where
753    suitable.  We distinguish between loops with compile-time bounds
754    and those with run-time bounds.  Information from LOOP is used to
755    compute the number of iterations and to determine whether the loop
756    is a candidate for this optimization.  Returns nonzero if loop
757    successfully modified.  */
758 int
doloop_optimize(loop)759 doloop_optimize (loop)
760      const struct loop *loop;
761 {
762   struct loop_info *loop_info = LOOP_INFO (loop);
763   rtx initial_value;
764   rtx final_value;
765   rtx increment;
766   rtx jump_insn;
767   enum machine_mode mode;
768   unsigned HOST_WIDE_INT n_iterations;
769   unsigned HOST_WIDE_INT n_iterations_max;
770   rtx doloop_seq, doloop_pat, doloop_reg;
771   rtx iterations;
772   rtx iterations_max;
773   rtx start_label;
774   rtx condition;
775 
776   if (loop_dump_stream)
777     fprintf (loop_dump_stream,
778 	     "Doloop: Processing loop %d, enclosed levels %d.\n",
779 	     loop->num, loop->level);
780 
781   jump_insn = prev_nonnote_insn (loop->end);
782 
783   /* Check that loop is a candidate for a low-overhead looping insn.  */
784   if (! doloop_valid_p (loop, jump_insn))
785     return 0;
786 
787   /* Determine if the loop can be safely, and profitably,
788      preconditioned.  While we don't precondition the loop in a loop
789      unrolling sense, this test ensures that the loop is well behaved
790      and that the increment is a constant integer.  */
791   if (! precondition_loop_p (loop, &initial_value, &final_value,
792 			     &increment, &mode))
793     {
794       if (loop_dump_stream)
795 	fprintf (loop_dump_stream,
796 		 "Doloop: Cannot precondition loop.\n");
797       return 0;
798     }
799 
800   /* Determine or estimate the maximum number of loop iterations.  */
801   n_iterations = loop_info->n_iterations;
802   if (n_iterations)
803     {
804       /* This is the simple case where the initial and final loop
805 	 values are constants.  */
806       n_iterations_max = n_iterations;
807     }
808   else
809     {
810       int nonneg = find_reg_note (jump_insn, REG_NONNEG, 0) != 0;
811 
812       /* This is the harder case where the initial and final loop
813 	 values may not be constants.  */
814       n_iterations_max = doloop_iterations_max (loop_info, mode, nonneg);
815 
816       if (! n_iterations_max)
817 	{
818 	  /* We have something like `for (i = 0; i < 10; i--)'.  */
819 	  if (loop_dump_stream)
820 	    fprintf (loop_dump_stream,
821 		     "Doloop: Not normal loop.\n");
822 	  return 0;
823 	}
824     }
825 
826   /* Account for loop unrolling in the iteration count.  This will
827      have no effect if loop_iterations could not determine the number
828      of iterations.  */
829   n_iterations /= loop_info->unroll_number;
830   n_iterations_max /= loop_info->unroll_number;
831 
832   if (n_iterations && n_iterations < 3)
833     {
834       if (loop_dump_stream)
835 	fprintf (loop_dump_stream,
836 		 "Doloop: Too few iterations (%ld) to be profitable.\n",
837 		 (long int) n_iterations);
838       return 0;
839     }
840 
841   iterations = GEN_INT (n_iterations);
842   iterations_max = GEN_INT (n_iterations_max);
843 
844   /* Generate looping insn.  If the pattern FAILs then give up trying
845      to modify the loop since there is some aspect the back-end does
846      not like.  */
847   start_label = gen_label_rtx ();
848   doloop_reg = gen_reg_rtx (mode);
849   doloop_seq = gen_doloop_end (doloop_reg, iterations, iterations_max,
850 			       GEN_INT (loop->level), start_label);
851   if (! doloop_seq && mode != word_mode)
852     {
853       PUT_MODE (doloop_reg, word_mode);
854       doloop_seq = gen_doloop_end (doloop_reg, iterations, iterations_max,
855 				   GEN_INT (loop->level), start_label);
856     }
857   if (! doloop_seq)
858     {
859       if (loop_dump_stream)
860 	fprintf (loop_dump_stream,
861 		 "Doloop: Target unwilling to use doloop pattern!\n");
862       return 0;
863     }
864 
865   /* If multiple instructions were created, the last must be the
866      jump instruction.  Also, a raw define_insn may yield a plain
867      pattern.  */
868   doloop_pat = doloop_seq;
869   if (INSN_P (doloop_pat))
870     {
871       while (NEXT_INSN (doloop_pat) != NULL_RTX)
872 	doloop_pat = NEXT_INSN (doloop_pat);
873       if (GET_CODE (doloop_pat) == JUMP_INSN)
874 	doloop_pat = PATTERN (doloop_pat);
875       else
876 	doloop_pat = NULL_RTX;
877     }
878 
879   if (! doloop_pat
880       || ! (condition = doloop_condition_get (doloop_pat)))
881     {
882       if (loop_dump_stream)
883 	fprintf (loop_dump_stream,
884 		 "Doloop: Unrecognizable doloop pattern!\n");
885       return 0;
886     }
887 
888   if (n_iterations != 0)
889     /* Handle the simpler case, where we know the iteration count at
890        compile time.  */
891     return doloop_modify (loop, iterations, iterations_max, doloop_seq,
892 			  start_label, condition);
893   else
894     /* Handle the harder case, where we must add additional runtime tests.  */
895     return doloop_modify_runtime (loop, iterations_max, doloop_seq,
896 				  start_label, mode, condition);
897 }
898 
899 #endif /* HAVE_doloop_end */
900