xref: /netbsd-src/external/gpl3/gcc.old/dist/gcc/loop-unroll.c (revision b7b7574d3bf8eeb51a1fa3977b59142ec6434a55)
1 /* Loop unrolling and peeling.
2    Copyright (C) 2002, 2003, 2004, 2005, 2007, 2008, 2010
3    Free Software Foundation, Inc.
4 
5 This file is part of GCC.
6 
7 GCC is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 3, or (at your option) any later
10 version.
11 
12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
15 for more details.
16 
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3.  If not see
19 <http://www.gnu.org/licenses/>.  */
20 
21 #include "config.h"
22 #include "system.h"
23 #include "coretypes.h"
24 #include "tm.h"
25 #include "rtl.h"
26 #include "hard-reg-set.h"
27 #include "obstack.h"
28 #include "basic-block.h"
29 #include "cfgloop.h"
30 #include "cfglayout.h"
31 #include "params.h"
32 #include "output.h"
33 #include "expr.h"
34 #include "hashtab.h"
35 #include "recog.h"
36 
37 /* This pass performs loop unrolling and peeling.  We only perform these
38    optimizations on innermost loops (with single exception) because
39    the impact on performance is greatest here, and we want to avoid
40    unnecessary code size growth.  The gain is caused by greater sequentiality
41    of code, better code to optimize for further passes and in some cases
42    by fewer testings of exit conditions.  The main problem is code growth,
43    that impacts performance negatively due to effect of caches.
44 
45    What we do:
46 
47    -- complete peeling of once-rolling loops; this is the above mentioned
48       exception, as this causes loop to be cancelled completely and
49       does not cause code growth
50    -- complete peeling of loops that roll (small) constant times.
51    -- simple peeling of first iterations of loops that do not roll much
52       (according to profile feedback)
53    -- unrolling of loops that roll constant times; this is almost always
54       win, as we get rid of exit condition tests.
55    -- unrolling of loops that roll number of times that we can compute
56       in runtime; we also get rid of exit condition tests here, but there
57       is the extra expense for calculating the number of iterations
58    -- simple unrolling of remaining loops; this is performed only if we
59       are asked to, as the gain is questionable in this case and often
60       it may even slow down the code
61    For more detailed descriptions of each of those, see comments at
62    appropriate function below.
63 
64    There is a lot of parameters (defined and described in params.def) that
65    control how much we unroll/peel.
66 
67    ??? A great problem is that we don't have a good way how to determine
68    how many times we should unroll the loop; the experiments I have made
69    showed that this choice may affect performance in order of several %.
70    */
71 
72 /* Information about induction variables to split.  */
73 
74 struct iv_to_split
75 {
76   rtx insn;		/* The insn in that the induction variable occurs.  */
77   rtx base_var;		/* The variable on that the values in the further
78 			   iterations are based.  */
79   rtx step;		/* Step of the induction variable.  */
80   struct iv_to_split *next; /* Next entry in walking order.  */
81   unsigned n_loc;
82   unsigned loc[3];	/* Location where the definition of the induction
83 			   variable occurs in the insn.  For example if
84 			   N_LOC is 2, the expression is located at
85 			   XEXP (XEXP (single_set, loc[0]), loc[1]).  */
86 };
87 
88 /* Information about accumulators to expand.  */
89 
90 struct var_to_expand
91 {
92   rtx insn;		           /* The insn in that the variable expansion occurs.  */
93   rtx reg;                         /* The accumulator which is expanded.  */
94   VEC(rtx,heap) *var_expansions;   /* The copies of the accumulator which is expanded.  */
95   struct var_to_expand *next;	   /* Next entry in walking order.  */
96   enum rtx_code op;                /* The type of the accumulation - addition, subtraction
97                                       or multiplication.  */
98   int expansion_count;             /* Count the number of expansions generated so far.  */
99   int reuse_expansion;             /* The expansion we intend to reuse to expand
100                                       the accumulator.  If REUSE_EXPANSION is 0 reuse
101                                       the original accumulator.  Else use
102                                       var_expansions[REUSE_EXPANSION - 1].  */
103   unsigned accum_pos;              /* The position in which the accumulator is placed in
104                                       the insn src.  For example in x = x + something
105                                       accum_pos is 0 while in x = something + x accum_pos
106                                       is 1.  */
107 };
108 
109 /* Information about optimization applied in
110    the unrolled loop.  */
111 
112 struct opt_info
113 {
114   htab_t insns_to_split;           /* A hashtable of insns to split.  */
115   struct iv_to_split *iv_to_split_head; /* The first iv to split.  */
116   struct iv_to_split **iv_to_split_tail; /* Pointer to the tail of the list.  */
117   htab_t insns_with_var_to_expand; /* A hashtable of insns with accumulators
118                                       to expand.  */
119   struct var_to_expand *var_to_expand_head; /* The first var to expand.  */
120   struct var_to_expand **var_to_expand_tail; /* Pointer to the tail of the list.  */
121   unsigned first_new_block;        /* The first basic block that was
122                                       duplicated.  */
123   basic_block loop_exit;           /* The loop exit basic block.  */
124   basic_block loop_preheader;      /* The loop preheader basic block.  */
125 };
126 
127 static void decide_unrolling_and_peeling (int);
128 static void peel_loops_completely (int);
129 static void decide_peel_simple (struct loop *, int);
130 static void decide_peel_once_rolling (struct loop *, int);
131 static void decide_peel_completely (struct loop *, int);
132 static void decide_unroll_stupid (struct loop *, int);
133 static void decide_unroll_constant_iterations (struct loop *, int);
134 static void decide_unroll_runtime_iterations (struct loop *, int);
135 static void peel_loop_simple (struct loop *);
136 static void peel_loop_completely (struct loop *);
137 static void unroll_loop_stupid (struct loop *);
138 static void unroll_loop_constant_iterations (struct loop *);
139 static void unroll_loop_runtime_iterations (struct loop *);
140 static struct opt_info *analyze_insns_in_loop (struct loop *);
141 static void opt_info_start_duplication (struct opt_info *);
142 static void apply_opt_in_copies (struct opt_info *, unsigned, bool, bool);
143 static void free_opt_info (struct opt_info *);
144 static struct var_to_expand *analyze_insn_to_expand_var (struct loop*, rtx);
145 static bool referenced_in_one_insn_in_loop_p (struct loop *, rtx, int *);
146 static struct iv_to_split *analyze_iv_to_split_insn (rtx);
147 static void expand_var_during_unrolling (struct var_to_expand *, rtx);
148 static void insert_var_expansion_initialization (struct var_to_expand *,
149 						 basic_block);
150 static void combine_var_copies_in_loop_exit (struct var_to_expand *,
151 					     basic_block);
152 static rtx get_expansion (struct var_to_expand *);
153 
154 /* Unroll and/or peel (depending on FLAGS) LOOPS.  */
155 void
156 unroll_and_peel_loops (int flags)
157 {
158   struct loop *loop;
159   bool check;
160   loop_iterator li;
161 
162   /* First perform complete loop peeling (it is almost surely a win,
163      and affects parameters for further decision a lot).  */
164   peel_loops_completely (flags);
165 
166   /* Now decide rest of unrolling and peeling.  */
167   decide_unrolling_and_peeling (flags);
168 
169   /* Scan the loops, inner ones first.  */
170   FOR_EACH_LOOP (li, loop, LI_FROM_INNERMOST)
171     {
172       check = true;
173       /* And perform the appropriate transformations.  */
174       switch (loop->lpt_decision.decision)
175 	{
176 	case LPT_PEEL_COMPLETELY:
177 	  /* Already done.  */
178 	  gcc_unreachable ();
179 	case LPT_PEEL_SIMPLE:
180 	  peel_loop_simple (loop);
181 	  break;
182 	case LPT_UNROLL_CONSTANT:
183 	  unroll_loop_constant_iterations (loop);
184 	  break;
185 	case LPT_UNROLL_RUNTIME:
186 	  unroll_loop_runtime_iterations (loop);
187 	  break;
188 	case LPT_UNROLL_STUPID:
189 	  unroll_loop_stupid (loop);
190 	  break;
191 	case LPT_NONE:
192 	  check = false;
193 	  break;
194 	default:
195 	  gcc_unreachable ();
196 	}
197       if (check)
198 	{
199 #ifdef ENABLE_CHECKING
200 	  verify_dominators (CDI_DOMINATORS);
201 	  verify_loop_structure ();
202 #endif
203 	}
204     }
205 
206   iv_analysis_done ();
207 }
208 
209 /* Check whether exit of the LOOP is at the end of loop body.  */
210 
211 static bool
212 loop_exit_at_end_p (struct loop *loop)
213 {
214   struct niter_desc *desc = get_simple_loop_desc (loop);
215   rtx insn;
216 
217   if (desc->in_edge->dest != loop->latch)
218     return false;
219 
220   /* Check that the latch is empty.  */
221   FOR_BB_INSNS (loop->latch, insn)
222     {
223       if (INSN_P (insn))
224 	return false;
225     }
226 
227   return true;
228 }
229 
230 /* Depending on FLAGS, check whether to peel loops completely and do so.  */
231 static void
232 peel_loops_completely (int flags)
233 {
234   struct loop *loop;
235   loop_iterator li;
236 
237   /* Scan the loops, the inner ones first.  */
238   FOR_EACH_LOOP (li, loop, LI_FROM_INNERMOST)
239     {
240       loop->lpt_decision.decision = LPT_NONE;
241 
242       if (dump_file)
243 	fprintf (dump_file,
244 		 "\n;; *** Considering loop %d for complete peeling ***\n",
245 		 loop->num);
246 
247       loop->ninsns = num_loop_insns (loop);
248 
249       decide_peel_once_rolling (loop, flags);
250       if (loop->lpt_decision.decision == LPT_NONE)
251 	decide_peel_completely (loop, flags);
252 
253       if (loop->lpt_decision.decision == LPT_PEEL_COMPLETELY)
254 	{
255 	  peel_loop_completely (loop);
256 #ifdef ENABLE_CHECKING
257 	  verify_dominators (CDI_DOMINATORS);
258 	  verify_loop_structure ();
259 #endif
260 	}
261     }
262 }
263 
264 /* Decide whether unroll or peel loops (depending on FLAGS) and how much.  */
265 static void
266 decide_unrolling_and_peeling (int flags)
267 {
268   struct loop *loop;
269   loop_iterator li;
270 
271   /* Scan the loops, inner ones first.  */
272   FOR_EACH_LOOP (li, loop, LI_FROM_INNERMOST)
273     {
274       loop->lpt_decision.decision = LPT_NONE;
275 
276       if (dump_file)
277 	fprintf (dump_file, "\n;; *** Considering loop %d ***\n", loop->num);
278 
279       /* Do not peel cold areas.  */
280       if (optimize_loop_for_size_p (loop))
281 	{
282 	  if (dump_file)
283 	    fprintf (dump_file, ";; Not considering loop, cold area\n");
284 	  continue;
285 	}
286 
287       /* Can the loop be manipulated?  */
288       if (!can_duplicate_loop_p (loop))
289 	{
290 	  if (dump_file)
291 	    fprintf (dump_file,
292 		     ";; Not considering loop, cannot duplicate\n");
293 	  continue;
294 	}
295 
296       /* Skip non-innermost loops.  */
297       if (loop->inner)
298 	{
299 	  if (dump_file)
300 	    fprintf (dump_file, ";; Not considering loop, is not innermost\n");
301 	  continue;
302 	}
303 
304       loop->ninsns = num_loop_insns (loop);
305       loop->av_ninsns = average_num_loop_insns (loop);
306 
307       /* Try transformations one by one in decreasing order of
308 	 priority.  */
309 
310       decide_unroll_constant_iterations (loop, flags);
311       if (loop->lpt_decision.decision == LPT_NONE)
312 	decide_unroll_runtime_iterations (loop, flags);
313       if (loop->lpt_decision.decision == LPT_NONE)
314 	decide_unroll_stupid (loop, flags);
315       if (loop->lpt_decision.decision == LPT_NONE)
316 	decide_peel_simple (loop, flags);
317     }
318 }
319 
320 /* Decide whether the LOOP is once rolling and suitable for complete
321    peeling.  */
322 static void
323 decide_peel_once_rolling (struct loop *loop, int flags ATTRIBUTE_UNUSED)
324 {
325   struct niter_desc *desc;
326 
327   if (dump_file)
328     fprintf (dump_file, "\n;; Considering peeling once rolling loop\n");
329 
330   /* Is the loop small enough?  */
331   if ((unsigned) PARAM_VALUE (PARAM_MAX_ONCE_PEELED_INSNS) < loop->ninsns)
332     {
333       if (dump_file)
334 	fprintf (dump_file, ";; Not considering loop, is too big\n");
335       return;
336     }
337 
338   /* Check for simple loops.  */
339   desc = get_simple_loop_desc (loop);
340 
341   /* Check number of iterations.  */
342   if (!desc->simple_p
343       || desc->assumptions
344       || desc->infinite
345       || !desc->const_iter
346       || desc->niter != 0)
347     {
348       if (dump_file)
349 	fprintf (dump_file,
350 		 ";; Unable to prove that the loop rolls exactly once\n");
351       return;
352     }
353 
354   /* Success.  */
355   if (dump_file)
356     fprintf (dump_file, ";; Decided to peel exactly once rolling loop\n");
357   loop->lpt_decision.decision = LPT_PEEL_COMPLETELY;
358 }
359 
360 /* Decide whether the LOOP is suitable for complete peeling.  */
361 static void
362 decide_peel_completely (struct loop *loop, int flags ATTRIBUTE_UNUSED)
363 {
364   unsigned npeel;
365   struct niter_desc *desc;
366 
367   if (dump_file)
368     fprintf (dump_file, "\n;; Considering peeling completely\n");
369 
370   /* Skip non-innermost loops.  */
371   if (loop->inner)
372     {
373       if (dump_file)
374 	fprintf (dump_file, ";; Not considering loop, is not innermost\n");
375       return;
376     }
377 
378   /* Do not peel cold areas.  */
379   if (optimize_loop_for_size_p (loop))
380     {
381       if (dump_file)
382 	fprintf (dump_file, ";; Not considering loop, cold area\n");
383       return;
384     }
385 
386   /* Can the loop be manipulated?  */
387   if (!can_duplicate_loop_p (loop))
388     {
389       if (dump_file)
390 	fprintf (dump_file,
391 		 ";; Not considering loop, cannot duplicate\n");
392       return;
393     }
394 
395   /* npeel = number of iterations to peel.  */
396   npeel = PARAM_VALUE (PARAM_MAX_COMPLETELY_PEELED_INSNS) / loop->ninsns;
397   if (npeel > (unsigned) PARAM_VALUE (PARAM_MAX_COMPLETELY_PEEL_TIMES))
398     npeel = PARAM_VALUE (PARAM_MAX_COMPLETELY_PEEL_TIMES);
399 
400   /* Is the loop small enough?  */
401   if (!npeel)
402     {
403       if (dump_file)
404 	fprintf (dump_file, ";; Not considering loop, is too big\n");
405       return;
406     }
407 
408   /* Check for simple loops.  */
409   desc = get_simple_loop_desc (loop);
410 
411   /* Check number of iterations.  */
412   if (!desc->simple_p
413       || desc->assumptions
414       || !desc->const_iter
415       || desc->infinite)
416     {
417       if (dump_file)
418 	fprintf (dump_file,
419 		 ";; Unable to prove that the loop iterates constant times\n");
420       return;
421     }
422 
423   if (desc->niter > npeel - 1)
424     {
425       if (dump_file)
426 	{
427 	  fprintf (dump_file,
428 		   ";; Not peeling loop completely, rolls too much (");
429 	  fprintf (dump_file, HOST_WIDEST_INT_PRINT_DEC, desc->niter);
430 	  fprintf (dump_file, " iterations > %d [maximum peelings])\n", npeel);
431 	}
432       return;
433     }
434 
435   /* Success.  */
436   if (dump_file)
437     fprintf (dump_file, ";; Decided to peel loop completely\n");
438   loop->lpt_decision.decision = LPT_PEEL_COMPLETELY;
439 }
440 
441 /* Peel all iterations of LOOP, remove exit edges and cancel the loop
442    completely.  The transformation done:
443 
444    for (i = 0; i < 4; i++)
445      body;
446 
447    ==>
448 
449    i = 0;
450    body; i++;
451    body; i++;
452    body; i++;
453    body; i++;
454    */
455 static void
456 peel_loop_completely (struct loop *loop)
457 {
458   sbitmap wont_exit;
459   unsigned HOST_WIDE_INT npeel;
460   unsigned i;
461   VEC (edge, heap) *remove_edges;
462   edge ein;
463   struct niter_desc *desc = get_simple_loop_desc (loop);
464   struct opt_info *opt_info = NULL;
465 
466   npeel = desc->niter;
467 
468   if (npeel)
469     {
470       bool ok;
471 
472       wont_exit = sbitmap_alloc (npeel + 1);
473       sbitmap_ones (wont_exit);
474       RESET_BIT (wont_exit, 0);
475       if (desc->noloop_assumptions)
476 	RESET_BIT (wont_exit, 1);
477 
478       remove_edges = NULL;
479 
480       if (flag_split_ivs_in_unroller)
481         opt_info = analyze_insns_in_loop (loop);
482 
483       opt_info_start_duplication (opt_info);
484       ok = duplicate_loop_to_header_edge (loop, loop_preheader_edge (loop),
485 					  npeel,
486 					  wont_exit, desc->out_edge,
487 					  &remove_edges,
488 					  DLTHE_FLAG_UPDATE_FREQ
489 					  | DLTHE_FLAG_COMPLETTE_PEEL
490 					  | (opt_info
491 					     ? DLTHE_RECORD_COPY_NUMBER : 0));
492       gcc_assert (ok);
493 
494       free (wont_exit);
495 
496       if (opt_info)
497  	{
498  	  apply_opt_in_copies (opt_info, npeel, false, true);
499  	  free_opt_info (opt_info);
500  	}
501 
502       /* Remove the exit edges.  */
503       for (i = 0; VEC_iterate (edge, remove_edges, i, ein); i++)
504 	remove_path (ein);
505       VEC_free (edge, heap, remove_edges);
506     }
507 
508   ein = desc->in_edge;
509   free_simple_loop_desc (loop);
510 
511   /* Now remove the unreachable part of the last iteration and cancel
512      the loop.  */
513   remove_path (ein);
514 
515   if (dump_file)
516     fprintf (dump_file, ";; Peeled loop completely, %d times\n", (int) npeel);
517 }
518 
519 /* Decide whether to unroll LOOP iterating constant number of times
520    and how much.  */
521 
522 static void
523 decide_unroll_constant_iterations (struct loop *loop, int flags)
524 {
525   unsigned nunroll, nunroll_by_av, best_copies, best_unroll = 0, n_copies, i;
526   struct niter_desc *desc;
527 
528   if (!(flags & UAP_UNROLL))
529     {
530       /* We were not asked to, just return back silently.  */
531       return;
532     }
533 
534   if (dump_file)
535     fprintf (dump_file,
536 	     "\n;; Considering unrolling loop with constant "
537 	     "number of iterations\n");
538 
539   /* nunroll = total number of copies of the original loop body in
540      unrolled loop (i.e. if it is 2, we have to duplicate loop body once.  */
541   nunroll = PARAM_VALUE (PARAM_MAX_UNROLLED_INSNS) / loop->ninsns;
542   nunroll_by_av
543     = PARAM_VALUE (PARAM_MAX_AVERAGE_UNROLLED_INSNS) / loop->av_ninsns;
544   if (nunroll > nunroll_by_av)
545     nunroll = nunroll_by_av;
546   if (nunroll > (unsigned) PARAM_VALUE (PARAM_MAX_UNROLL_TIMES))
547     nunroll = PARAM_VALUE (PARAM_MAX_UNROLL_TIMES);
548 
549   /* Skip big loops.  */
550   if (nunroll <= 1)
551     {
552       if (dump_file)
553 	fprintf (dump_file, ";; Not considering loop, is too big\n");
554       return;
555     }
556 
557   /* Check for simple loops.  */
558   desc = get_simple_loop_desc (loop);
559 
560   /* Check number of iterations.  */
561   if (!desc->simple_p || !desc->const_iter || desc->assumptions)
562     {
563       if (dump_file)
564 	fprintf (dump_file,
565 		 ";; Unable to prove that the loop iterates constant times\n");
566       return;
567     }
568 
569   /* Check whether the loop rolls enough to consider.  */
570   if (desc->niter < 2 * nunroll)
571     {
572       if (dump_file)
573 	fprintf (dump_file, ";; Not unrolling loop, doesn't roll\n");
574       return;
575     }
576 
577   /* Success; now compute number of iterations to unroll.  We alter
578      nunroll so that as few as possible copies of loop body are
579      necessary, while still not decreasing the number of unrollings
580      too much (at most by 1).  */
581   best_copies = 2 * nunroll + 10;
582 
583   i = 2 * nunroll + 2;
584   if (i - 1 >= desc->niter)
585     i = desc->niter - 2;
586 
587   for (; i >= nunroll - 1; i--)
588     {
589       unsigned exit_mod = desc->niter % (i + 1);
590 
591       if (!loop_exit_at_end_p (loop))
592 	n_copies = exit_mod + i + 1;
593       else if (exit_mod != (unsigned) i
594 	       || desc->noloop_assumptions != NULL_RTX)
595 	n_copies = exit_mod + i + 2;
596       else
597 	n_copies = i + 1;
598 
599       if (n_copies < best_copies)
600 	{
601 	  best_copies = n_copies;
602 	  best_unroll = i;
603 	}
604     }
605 
606   if (dump_file)
607     fprintf (dump_file, ";; max_unroll %d (%d copies, initial %d).\n",
608 	     best_unroll + 1, best_copies, nunroll);
609 
610   loop->lpt_decision.decision = LPT_UNROLL_CONSTANT;
611   loop->lpt_decision.times = best_unroll;
612 
613   if (dump_file)
614     fprintf (dump_file,
615 	     ";; Decided to unroll the constant times rolling loop, %d times.\n",
616 	     loop->lpt_decision.times);
617 }
618 
619 /* Unroll LOOP with constant number of iterations LOOP->LPT_DECISION.TIMES + 1
620    times.  The transformation does this:
621 
622    for (i = 0; i < 102; i++)
623      body;
624 
625    ==>
626 
627    i = 0;
628    body; i++;
629    body; i++;
630    while (i < 102)
631      {
632        body; i++;
633        body; i++;
634        body; i++;
635        body; i++;
636      }
637   */
638 static void
639 unroll_loop_constant_iterations (struct loop *loop)
640 {
641   unsigned HOST_WIDE_INT niter;
642   unsigned exit_mod;
643   sbitmap wont_exit;
644   unsigned i;
645   VEC (edge, heap) *remove_edges;
646   edge e;
647   unsigned max_unroll = loop->lpt_decision.times;
648   struct niter_desc *desc = get_simple_loop_desc (loop);
649   bool exit_at_end = loop_exit_at_end_p (loop);
650   struct opt_info *opt_info = NULL;
651   bool ok;
652 
653   niter = desc->niter;
654 
655   /* Should not get here (such loop should be peeled instead).  */
656   gcc_assert (niter > max_unroll + 1);
657 
658   exit_mod = niter % (max_unroll + 1);
659 
660   wont_exit = sbitmap_alloc (max_unroll + 1);
661   sbitmap_ones (wont_exit);
662 
663   remove_edges = NULL;
664   if (flag_split_ivs_in_unroller
665       || flag_variable_expansion_in_unroller)
666     opt_info = analyze_insns_in_loop (loop);
667 
668   if (!exit_at_end)
669     {
670       /* The exit is not at the end of the loop; leave exit test
671 	 in the first copy, so that the loops that start with test
672 	 of exit condition have continuous body after unrolling.  */
673 
674       if (dump_file)
675 	fprintf (dump_file, ";; Condition on beginning of loop.\n");
676 
677       /* Peel exit_mod iterations.  */
678       RESET_BIT (wont_exit, 0);
679       if (desc->noloop_assumptions)
680 	RESET_BIT (wont_exit, 1);
681 
682       if (exit_mod)
683 	{
684 	  opt_info_start_duplication (opt_info);
685           ok = duplicate_loop_to_header_edge (loop, loop_preheader_edge (loop),
686 					      exit_mod,
687 					      wont_exit, desc->out_edge,
688 					      &remove_edges,
689 					      DLTHE_FLAG_UPDATE_FREQ
690 					      | (opt_info && exit_mod > 1
691 						 ? DLTHE_RECORD_COPY_NUMBER
692 						   : 0));
693 	  gcc_assert (ok);
694 
695           if (opt_info && exit_mod > 1)
696  	    apply_opt_in_copies (opt_info, exit_mod, false, false);
697 
698 	  desc->noloop_assumptions = NULL_RTX;
699 	  desc->niter -= exit_mod;
700 	  desc->niter_max -= exit_mod;
701 	}
702 
703       SET_BIT (wont_exit, 1);
704     }
705   else
706     {
707       /* Leave exit test in last copy, for the same reason as above if
708 	 the loop tests the condition at the end of loop body.  */
709 
710       if (dump_file)
711 	fprintf (dump_file, ";; Condition on end of loop.\n");
712 
713       /* We know that niter >= max_unroll + 2; so we do not need to care of
714 	 case when we would exit before reaching the loop.  So just peel
715 	 exit_mod + 1 iterations.  */
716       if (exit_mod != max_unroll
717 	  || desc->noloop_assumptions)
718 	{
719 	  RESET_BIT (wont_exit, 0);
720 	  if (desc->noloop_assumptions)
721 	    RESET_BIT (wont_exit, 1);
722 
723           opt_info_start_duplication (opt_info);
724 	  ok = duplicate_loop_to_header_edge (loop, loop_preheader_edge (loop),
725 					      exit_mod + 1,
726 					      wont_exit, desc->out_edge,
727 					      &remove_edges,
728 					      DLTHE_FLAG_UPDATE_FREQ
729 					      | (opt_info && exit_mod > 0
730 						 ? DLTHE_RECORD_COPY_NUMBER
731 						   : 0));
732 	  gcc_assert (ok);
733 
734           if (opt_info && exit_mod > 0)
735   	    apply_opt_in_copies (opt_info, exit_mod + 1, false, false);
736 
737 	  desc->niter -= exit_mod + 1;
738 	  desc->niter_max -= exit_mod + 1;
739 	  desc->noloop_assumptions = NULL_RTX;
740 
741 	  SET_BIT (wont_exit, 0);
742 	  SET_BIT (wont_exit, 1);
743 	}
744 
745       RESET_BIT (wont_exit, max_unroll);
746     }
747 
748   /* Now unroll the loop.  */
749 
750   opt_info_start_duplication (opt_info);
751   ok = duplicate_loop_to_header_edge (loop, loop_latch_edge (loop),
752 				      max_unroll,
753 				      wont_exit, desc->out_edge,
754 				      &remove_edges,
755 				      DLTHE_FLAG_UPDATE_FREQ
756 				      | (opt_info
757 					 ? DLTHE_RECORD_COPY_NUMBER
758 					   : 0));
759   gcc_assert (ok);
760 
761   if (opt_info)
762     {
763       apply_opt_in_copies (opt_info, max_unroll, true, true);
764       free_opt_info (opt_info);
765     }
766 
767   free (wont_exit);
768 
769   if (exit_at_end)
770     {
771       basic_block exit_block = get_bb_copy (desc->in_edge->src);
772       /* Find a new in and out edge; they are in the last copy we have made.  */
773 
774       if (EDGE_SUCC (exit_block, 0)->dest == desc->out_edge->dest)
775 	{
776 	  desc->out_edge = EDGE_SUCC (exit_block, 0);
777 	  desc->in_edge = EDGE_SUCC (exit_block, 1);
778 	}
779       else
780 	{
781 	  desc->out_edge = EDGE_SUCC (exit_block, 1);
782 	  desc->in_edge = EDGE_SUCC (exit_block, 0);
783 	}
784     }
785 
786   desc->niter /= max_unroll + 1;
787   desc->niter_max /= max_unroll + 1;
788   desc->niter_expr = GEN_INT (desc->niter);
789 
790   /* Remove the edges.  */
791   for (i = 0; VEC_iterate (edge, remove_edges, i, e); i++)
792     remove_path (e);
793   VEC_free (edge, heap, remove_edges);
794 
795   if (dump_file)
796     fprintf (dump_file,
797 	     ";; Unrolled loop %d times, constant # of iterations %i insns\n",
798 	     max_unroll, num_loop_insns (loop));
799 }
800 
801 /* Decide whether to unroll LOOP iterating runtime computable number of times
802    and how much.  */
803 static void
804 decide_unroll_runtime_iterations (struct loop *loop, int flags)
805 {
806   unsigned nunroll, nunroll_by_av, i;
807   struct niter_desc *desc;
808 
809   if (!(flags & UAP_UNROLL))
810     {
811       /* We were not asked to, just return back silently.  */
812       return;
813     }
814 
815   if (dump_file)
816     fprintf (dump_file,
817 	     "\n;; Considering unrolling loop with runtime "
818 	     "computable number of iterations\n");
819 
820   /* nunroll = total number of copies of the original loop body in
821      unrolled loop (i.e. if it is 2, we have to duplicate loop body once.  */
822   nunroll = PARAM_VALUE (PARAM_MAX_UNROLLED_INSNS) / loop->ninsns;
823   nunroll_by_av = PARAM_VALUE (PARAM_MAX_AVERAGE_UNROLLED_INSNS) / loop->av_ninsns;
824   if (nunroll > nunroll_by_av)
825     nunroll = nunroll_by_av;
826   if (nunroll > (unsigned) PARAM_VALUE (PARAM_MAX_UNROLL_TIMES))
827     nunroll = PARAM_VALUE (PARAM_MAX_UNROLL_TIMES);
828 
829   /* Skip big loops.  */
830   if (nunroll <= 1)
831     {
832       if (dump_file)
833 	fprintf (dump_file, ";; Not considering loop, is too big\n");
834       return;
835     }
836 
837   /* Check for simple loops.  */
838   desc = get_simple_loop_desc (loop);
839 
840   /* Check simpleness.  */
841   if (!desc->simple_p || desc->assumptions)
842     {
843       if (dump_file)
844 	fprintf (dump_file,
845 		 ";; Unable to prove that the number of iterations "
846 		 "can be counted in runtime\n");
847       return;
848     }
849 
850   if (desc->const_iter)
851     {
852       if (dump_file)
853 	fprintf (dump_file, ";; Loop iterates constant times\n");
854       return;
855     }
856 
857   /* If we have profile feedback, check whether the loop rolls.  */
858   if (loop->header->count && expected_loop_iterations (loop) < 2 * nunroll)
859     {
860       if (dump_file)
861 	fprintf (dump_file, ";; Not unrolling loop, doesn't roll\n");
862       return;
863     }
864 
865   /* Success; now force nunroll to be power of 2, as we are unable to
866      cope with overflows in computation of number of iterations.  */
867   for (i = 1; 2 * i <= nunroll; i *= 2)
868     continue;
869 
870   loop->lpt_decision.decision = LPT_UNROLL_RUNTIME;
871   loop->lpt_decision.times = i - 1;
872 
873   if (dump_file)
874     fprintf (dump_file,
875 	     ";; Decided to unroll the runtime computable "
876 	     "times rolling loop, %d times.\n",
877 	     loop->lpt_decision.times);
878 }
879 
880 /* Splits edge E and inserts the sequence of instructions INSNS on it, and
881    returns the newly created block.  If INSNS is NULL_RTX, nothing is changed
882    and NULL is returned instead.  */
883 
884 basic_block
885 split_edge_and_insert (edge e, rtx insns)
886 {
887   basic_block bb;
888 
889   if (!insns)
890     return NULL;
891   bb = split_edge (e);
892   emit_insn_after (insns, BB_END (bb));
893 
894   /* ??? We used to assume that INSNS can contain control flow insns, and
895      that we had to try to find sub basic blocks in BB to maintain a valid
896      CFG.  For this purpose we used to set the BB_SUPERBLOCK flag on BB
897      and call break_superblocks when going out of cfglayout mode.  But it
898      turns out that this never happens; and that if it does ever happen,
899      the verify_flow_info call in loop_optimizer_finalize would fail.
900 
901      There are two reasons why we expected we could have control flow insns
902      in INSNS.  The first is when a comparison has to be done in parts, and
903      the second is when the number of iterations is computed for loops with
904      the number of iterations known at runtime.  In both cases, test cases
905      to get control flow in INSNS appear to be impossible to construct:
906 
907       * If do_compare_rtx_and_jump needs several branches to do comparison
908 	in a mode that needs comparison by parts, we cannot analyze the
909 	number of iterations of the loop, and we never get to unrolling it.
910 
911       * The code in expand_divmod that was suspected to cause creation of
912 	branching code seems to be only accessed for signed division.  The
913 	divisions used by # of iterations analysis are always unsigned.
914 	Problems might arise on architectures that emits branching code
915 	for some operations that may appear in the unroller (especially
916 	for division), but we have no such architectures.
917 
918      Considering all this, it was decided that we should for now assume
919      that INSNS can in theory contain control flow insns, but in practice
920      it never does.  So we don't handle the theoretical case, and should
921      a real failure ever show up, we have a pretty good clue for how to
922      fix it.  */
923 
924   return bb;
925 }
926 
927 /* Unroll LOOP for that we are able to count number of iterations in runtime
928    LOOP->LPT_DECISION.TIMES + 1 times.  The transformation does this (with some
929    extra care for case n < 0):
930 
931    for (i = 0; i < n; i++)
932      body;
933 
934    ==>
935 
936    i = 0;
937    mod = n % 4;
938 
939    switch (mod)
940      {
941        case 3:
942          body; i++;
943        case 2:
944          body; i++;
945        case 1:
946          body; i++;
947        case 0: ;
948      }
949 
950    while (i < n)
951      {
952        body; i++;
953        body; i++;
954        body; i++;
955        body; i++;
956      }
957    */
958 static void
959 unroll_loop_runtime_iterations (struct loop *loop)
960 {
961   rtx old_niter, niter, init_code, branch_code, tmp;
962   unsigned i, j, p;
963   basic_block preheader, *body, swtch, ezc_swtch;
964   VEC (basic_block, heap) *dom_bbs;
965   sbitmap wont_exit;
966   int may_exit_copy;
967   unsigned n_peel;
968   VEC (edge, heap) *remove_edges;
969   edge e;
970   bool extra_zero_check, last_may_exit;
971   unsigned max_unroll = loop->lpt_decision.times;
972   struct niter_desc *desc = get_simple_loop_desc (loop);
973   bool exit_at_end = loop_exit_at_end_p (loop);
974   struct opt_info *opt_info = NULL;
975   bool ok;
976 
977   if (flag_split_ivs_in_unroller
978       || flag_variable_expansion_in_unroller)
979     opt_info = analyze_insns_in_loop (loop);
980 
981   /* Remember blocks whose dominators will have to be updated.  */
982   dom_bbs = NULL;
983 
984   body = get_loop_body (loop);
985   for (i = 0; i < loop->num_nodes; i++)
986     {
987       VEC (basic_block, heap) *ldom;
988       basic_block bb;
989 
990       ldom = get_dominated_by (CDI_DOMINATORS, body[i]);
991       for (j = 0; VEC_iterate (basic_block, ldom, j, bb); j++)
992 	if (!flow_bb_inside_loop_p (loop, bb))
993 	  VEC_safe_push (basic_block, heap, dom_bbs, bb);
994 
995       VEC_free (basic_block, heap, ldom);
996     }
997   free (body);
998 
999   if (!exit_at_end)
1000     {
1001       /* Leave exit in first copy (for explanation why see comment in
1002 	 unroll_loop_constant_iterations).  */
1003       may_exit_copy = 0;
1004       n_peel = max_unroll - 1;
1005       extra_zero_check = true;
1006       last_may_exit = false;
1007     }
1008   else
1009     {
1010       /* Leave exit in last copy (for explanation why see comment in
1011 	 unroll_loop_constant_iterations).  */
1012       may_exit_copy = max_unroll;
1013       n_peel = max_unroll;
1014       extra_zero_check = false;
1015       last_may_exit = true;
1016     }
1017 
1018   /* Get expression for number of iterations.  */
1019   start_sequence ();
1020   old_niter = niter = gen_reg_rtx (desc->mode);
1021   tmp = force_operand (copy_rtx (desc->niter_expr), niter);
1022   if (tmp != niter)
1023     emit_move_insn (niter, tmp);
1024 
1025   /* Count modulo by ANDing it with max_unroll; we use the fact that
1026      the number of unrollings is a power of two, and thus this is correct
1027      even if there is overflow in the computation.  */
1028   niter = expand_simple_binop (desc->mode, AND,
1029 			       niter,
1030 			       GEN_INT (max_unroll),
1031 			       NULL_RTX, 0, OPTAB_LIB_WIDEN);
1032 
1033   init_code = get_insns ();
1034   end_sequence ();
1035   unshare_all_rtl_in_chain (init_code);
1036 
1037   /* Precondition the loop.  */
1038   split_edge_and_insert (loop_preheader_edge (loop), init_code);
1039 
1040   remove_edges = NULL;
1041 
1042   wont_exit = sbitmap_alloc (max_unroll + 2);
1043 
1044   /* Peel the first copy of loop body (almost always we must leave exit test
1045      here; the only exception is when we have extra zero check and the number
1046      of iterations is reliable.  Also record the place of (possible) extra
1047      zero check.  */
1048   sbitmap_zero (wont_exit);
1049   if (extra_zero_check
1050       && !desc->noloop_assumptions)
1051     SET_BIT (wont_exit, 1);
1052   ezc_swtch = loop_preheader_edge (loop)->src;
1053   ok = duplicate_loop_to_header_edge (loop, loop_preheader_edge (loop),
1054 				      1, wont_exit, desc->out_edge,
1055 				      &remove_edges,
1056 				      DLTHE_FLAG_UPDATE_FREQ);
1057   gcc_assert (ok);
1058 
1059   /* Record the place where switch will be built for preconditioning.  */
1060   swtch = split_edge (loop_preheader_edge (loop));
1061 
1062   for (i = 0; i < n_peel; i++)
1063     {
1064       /* Peel the copy.  */
1065       sbitmap_zero (wont_exit);
1066       if (i != n_peel - 1 || !last_may_exit)
1067 	SET_BIT (wont_exit, 1);
1068       ok = duplicate_loop_to_header_edge (loop, loop_preheader_edge (loop),
1069 					  1, wont_exit, desc->out_edge,
1070 					  &remove_edges,
1071 					  DLTHE_FLAG_UPDATE_FREQ);
1072       gcc_assert (ok);
1073 
1074       /* Create item for switch.  */
1075       j = n_peel - i - (extra_zero_check ? 0 : 1);
1076       p = REG_BR_PROB_BASE / (i + 2);
1077 
1078       preheader = split_edge (loop_preheader_edge (loop));
1079       branch_code = compare_and_jump_seq (copy_rtx (niter), GEN_INT (j), EQ,
1080 					  block_label (preheader), p,
1081 					  NULL_RTX);
1082 
1083       /* We rely on the fact that the compare and jump cannot be optimized out,
1084 	 and hence the cfg we create is correct.  */
1085       gcc_assert (branch_code != NULL_RTX);
1086 
1087       swtch = split_edge_and_insert (single_pred_edge (swtch), branch_code);
1088       set_immediate_dominator (CDI_DOMINATORS, preheader, swtch);
1089       single_pred_edge (swtch)->probability = REG_BR_PROB_BASE - p;
1090       e = make_edge (swtch, preheader,
1091 		     single_succ_edge (swtch)->flags & EDGE_IRREDUCIBLE_LOOP);
1092       e->probability = p;
1093     }
1094 
1095   if (extra_zero_check)
1096     {
1097       /* Add branch for zero iterations.  */
1098       p = REG_BR_PROB_BASE / (max_unroll + 1);
1099       swtch = ezc_swtch;
1100       preheader = split_edge (loop_preheader_edge (loop));
1101       branch_code = compare_and_jump_seq (copy_rtx (niter), const0_rtx, EQ,
1102 					  block_label (preheader), p,
1103 					  NULL_RTX);
1104       gcc_assert (branch_code != NULL_RTX);
1105 
1106       swtch = split_edge_and_insert (single_succ_edge (swtch), branch_code);
1107       set_immediate_dominator (CDI_DOMINATORS, preheader, swtch);
1108       single_succ_edge (swtch)->probability = REG_BR_PROB_BASE - p;
1109       e = make_edge (swtch, preheader,
1110 		     single_succ_edge (swtch)->flags & EDGE_IRREDUCIBLE_LOOP);
1111       e->probability = p;
1112     }
1113 
1114   /* Recount dominators for outer blocks.  */
1115   iterate_fix_dominators (CDI_DOMINATORS, dom_bbs, false);
1116 
1117   /* And unroll loop.  */
1118 
1119   sbitmap_ones (wont_exit);
1120   RESET_BIT (wont_exit, may_exit_copy);
1121   opt_info_start_duplication (opt_info);
1122 
1123   ok = duplicate_loop_to_header_edge (loop, loop_latch_edge (loop),
1124 				      max_unroll,
1125 				      wont_exit, desc->out_edge,
1126 				      &remove_edges,
1127 				      DLTHE_FLAG_UPDATE_FREQ
1128 				      | (opt_info
1129 					 ? DLTHE_RECORD_COPY_NUMBER
1130 					   : 0));
1131   gcc_assert (ok);
1132 
1133   if (opt_info)
1134     {
1135       apply_opt_in_copies (opt_info, max_unroll, true, true);
1136       free_opt_info (opt_info);
1137     }
1138 
1139   free (wont_exit);
1140 
1141   if (exit_at_end)
1142     {
1143       basic_block exit_block = get_bb_copy (desc->in_edge->src);
1144       /* Find a new in and out edge; they are in the last copy we have
1145 	 made.  */
1146 
1147       if (EDGE_SUCC (exit_block, 0)->dest == desc->out_edge->dest)
1148 	{
1149 	  desc->out_edge = EDGE_SUCC (exit_block, 0);
1150 	  desc->in_edge = EDGE_SUCC (exit_block, 1);
1151 	}
1152       else
1153 	{
1154 	  desc->out_edge = EDGE_SUCC (exit_block, 1);
1155 	  desc->in_edge = EDGE_SUCC (exit_block, 0);
1156 	}
1157     }
1158 
1159   /* Remove the edges.  */
1160   for (i = 0; VEC_iterate (edge, remove_edges, i, e); i++)
1161     remove_path (e);
1162   VEC_free (edge, heap, remove_edges);
1163 
1164   /* We must be careful when updating the number of iterations due to
1165      preconditioning and the fact that the value must be valid at entry
1166      of the loop.  After passing through the above code, we see that
1167      the correct new number of iterations is this:  */
1168   gcc_assert (!desc->const_iter);
1169   desc->niter_expr =
1170     simplify_gen_binary (UDIV, desc->mode, old_niter,
1171 			 GEN_INT (max_unroll + 1));
1172   desc->niter_max /= max_unroll + 1;
1173   if (exit_at_end)
1174     {
1175       desc->niter_expr =
1176 	simplify_gen_binary (MINUS, desc->mode, desc->niter_expr, const1_rtx);
1177       desc->noloop_assumptions = NULL_RTX;
1178       desc->niter_max--;
1179     }
1180 
1181   if (dump_file)
1182     fprintf (dump_file,
1183 	     ";; Unrolled loop %d times, counting # of iterations "
1184 	     "in runtime, %i insns\n",
1185 	     max_unroll, num_loop_insns (loop));
1186 
1187   VEC_free (basic_block, heap, dom_bbs);
1188 }
1189 
1190 /* Decide whether to simply peel LOOP and how much.  */
1191 static void
1192 decide_peel_simple (struct loop *loop, int flags)
1193 {
1194   unsigned npeel;
1195   struct niter_desc *desc;
1196 
1197   if (!(flags & UAP_PEEL))
1198     {
1199       /* We were not asked to, just return back silently.  */
1200       return;
1201     }
1202 
1203   if (dump_file)
1204     fprintf (dump_file, "\n;; Considering simply peeling loop\n");
1205 
1206   /* npeel = number of iterations to peel.  */
1207   npeel = PARAM_VALUE (PARAM_MAX_PEELED_INSNS) / loop->ninsns;
1208   if (npeel > (unsigned) PARAM_VALUE (PARAM_MAX_PEEL_TIMES))
1209     npeel = PARAM_VALUE (PARAM_MAX_PEEL_TIMES);
1210 
1211   /* Skip big loops.  */
1212   if (!npeel)
1213     {
1214       if (dump_file)
1215 	fprintf (dump_file, ";; Not considering loop, is too big\n");
1216       return;
1217     }
1218 
1219   /* Check for simple loops.  */
1220   desc = get_simple_loop_desc (loop);
1221 
1222   /* Check number of iterations.  */
1223   if (desc->simple_p && !desc->assumptions && desc->const_iter)
1224     {
1225       if (dump_file)
1226 	fprintf (dump_file, ";; Loop iterates constant times\n");
1227       return;
1228     }
1229 
1230   /* Do not simply peel loops with branches inside -- it increases number
1231      of mispredicts.  */
1232   if (num_loop_branches (loop) > 1)
1233     {
1234       if (dump_file)
1235 	fprintf (dump_file, ";; Not peeling, contains branches\n");
1236       return;
1237     }
1238 
1239   if (loop->header->count)
1240     {
1241       unsigned niter = expected_loop_iterations (loop);
1242       if (niter + 1 > npeel)
1243 	{
1244 	  if (dump_file)
1245 	    {
1246 	      fprintf (dump_file, ";; Not peeling loop, rolls too much (");
1247 	      fprintf (dump_file, HOST_WIDEST_INT_PRINT_DEC,
1248 		       (HOST_WIDEST_INT) (niter + 1));
1249 	      fprintf (dump_file, " iterations > %d [maximum peelings])\n",
1250 		       npeel);
1251 	    }
1252 	  return;
1253 	}
1254       npeel = niter + 1;
1255     }
1256   else
1257     {
1258       /* For now we have no good heuristics to decide whether loop peeling
1259          will be effective, so disable it.  */
1260       if (dump_file)
1261 	fprintf (dump_file,
1262 		 ";; Not peeling loop, no evidence it will be profitable\n");
1263       return;
1264     }
1265 
1266   /* Success.  */
1267   loop->lpt_decision.decision = LPT_PEEL_SIMPLE;
1268   loop->lpt_decision.times = npeel;
1269 
1270   if (dump_file)
1271     fprintf (dump_file, ";; Decided to simply peel the loop, %d times.\n",
1272 	     loop->lpt_decision.times);
1273 }
1274 
1275 /* Peel a LOOP LOOP->LPT_DECISION.TIMES times.  The transformation:
1276    while (cond)
1277      body;
1278 
1279    ==>
1280 
1281    if (!cond) goto end;
1282    body;
1283    if (!cond) goto end;
1284    body;
1285    while (cond)
1286      body;
1287    end: ;
1288    */
1289 static void
1290 peel_loop_simple (struct loop *loop)
1291 {
1292   sbitmap wont_exit;
1293   unsigned npeel = loop->lpt_decision.times;
1294   struct niter_desc *desc = get_simple_loop_desc (loop);
1295   struct opt_info *opt_info = NULL;
1296   bool ok;
1297 
1298   if (flag_split_ivs_in_unroller && npeel > 1)
1299     opt_info = analyze_insns_in_loop (loop);
1300 
1301   wont_exit = sbitmap_alloc (npeel + 1);
1302   sbitmap_zero (wont_exit);
1303 
1304   opt_info_start_duplication (opt_info);
1305 
1306   ok = duplicate_loop_to_header_edge (loop, loop_preheader_edge (loop),
1307 				      npeel, wont_exit, NULL,
1308 				      NULL, DLTHE_FLAG_UPDATE_FREQ
1309 				      | (opt_info
1310 					 ? DLTHE_RECORD_COPY_NUMBER
1311 					   : 0));
1312   gcc_assert (ok);
1313 
1314   free (wont_exit);
1315 
1316   if (opt_info)
1317     {
1318       apply_opt_in_copies (opt_info, npeel, false, false);
1319       free_opt_info (opt_info);
1320     }
1321 
1322   if (desc->simple_p)
1323     {
1324       if (desc->const_iter)
1325 	{
1326 	  desc->niter -= npeel;
1327 	  desc->niter_expr = GEN_INT (desc->niter);
1328 	  desc->noloop_assumptions = NULL_RTX;
1329 	}
1330       else
1331 	{
1332 	  /* We cannot just update niter_expr, as its value might be clobbered
1333 	     inside loop.  We could handle this by counting the number into
1334 	     temporary just like we do in runtime unrolling, but it does not
1335 	     seem worthwhile.  */
1336 	  free_simple_loop_desc (loop);
1337 	}
1338     }
1339   if (dump_file)
1340     fprintf (dump_file, ";; Peeling loop %d times\n", npeel);
1341 }
1342 
1343 /* Decide whether to unroll LOOP stupidly and how much.  */
1344 static void
1345 decide_unroll_stupid (struct loop *loop, int flags)
1346 {
1347   unsigned nunroll, nunroll_by_av, i;
1348   struct niter_desc *desc;
1349 
1350   if (!(flags & UAP_UNROLL_ALL))
1351     {
1352       /* We were not asked to, just return back silently.  */
1353       return;
1354     }
1355 
1356   if (dump_file)
1357     fprintf (dump_file, "\n;; Considering unrolling loop stupidly\n");
1358 
1359   /* nunroll = total number of copies of the original loop body in
1360      unrolled loop (i.e. if it is 2, we have to duplicate loop body once.  */
1361   nunroll = PARAM_VALUE (PARAM_MAX_UNROLLED_INSNS) / loop->ninsns;
1362   nunroll_by_av
1363     = PARAM_VALUE (PARAM_MAX_AVERAGE_UNROLLED_INSNS) / loop->av_ninsns;
1364   if (nunroll > nunroll_by_av)
1365     nunroll = nunroll_by_av;
1366   if (nunroll > (unsigned) PARAM_VALUE (PARAM_MAX_UNROLL_TIMES))
1367     nunroll = PARAM_VALUE (PARAM_MAX_UNROLL_TIMES);
1368 
1369   /* Skip big loops.  */
1370   if (nunroll <= 1)
1371     {
1372       if (dump_file)
1373 	fprintf (dump_file, ";; Not considering loop, is too big\n");
1374       return;
1375     }
1376 
1377   /* Check for simple loops.  */
1378   desc = get_simple_loop_desc (loop);
1379 
1380   /* Check simpleness.  */
1381   if (desc->simple_p && !desc->assumptions)
1382     {
1383       if (dump_file)
1384 	fprintf (dump_file, ";; The loop is simple\n");
1385       return;
1386     }
1387 
1388   /* Do not unroll loops with branches inside -- it increases number
1389      of mispredicts.  */
1390   if (num_loop_branches (loop) > 1)
1391     {
1392       if (dump_file)
1393 	fprintf (dump_file, ";; Not unrolling, contains branches\n");
1394       return;
1395     }
1396 
1397   /* If we have profile feedback, check whether the loop rolls.  */
1398   if (loop->header->count
1399       && expected_loop_iterations (loop) < 2 * nunroll)
1400     {
1401       if (dump_file)
1402 	fprintf (dump_file, ";; Not unrolling loop, doesn't roll\n");
1403       return;
1404     }
1405 
1406   /* Success.  Now force nunroll to be power of 2, as it seems that this
1407      improves results (partially because of better alignments, partially
1408      because of some dark magic).  */
1409   for (i = 1; 2 * i <= nunroll; i *= 2)
1410     continue;
1411 
1412   loop->lpt_decision.decision = LPT_UNROLL_STUPID;
1413   loop->lpt_decision.times = i - 1;
1414 
1415   if (dump_file)
1416     fprintf (dump_file,
1417 	     ";; Decided to unroll the loop stupidly, %d times.\n",
1418 	     loop->lpt_decision.times);
1419 }
1420 
1421 /* Unroll a LOOP LOOP->LPT_DECISION.TIMES times.  The transformation:
1422    while (cond)
1423      body;
1424 
1425    ==>
1426 
1427    while (cond)
1428      {
1429        body;
1430        if (!cond) break;
1431        body;
1432        if (!cond) break;
1433        body;
1434        if (!cond) break;
1435        body;
1436      }
1437    */
1438 static void
1439 unroll_loop_stupid (struct loop *loop)
1440 {
1441   sbitmap wont_exit;
1442   unsigned nunroll = loop->lpt_decision.times;
1443   struct niter_desc *desc = get_simple_loop_desc (loop);
1444   struct opt_info *opt_info = NULL;
1445   bool ok;
1446 
1447   if (flag_split_ivs_in_unroller
1448       || flag_variable_expansion_in_unroller)
1449     opt_info = analyze_insns_in_loop (loop);
1450 
1451 
1452   wont_exit = sbitmap_alloc (nunroll + 1);
1453   sbitmap_zero (wont_exit);
1454   opt_info_start_duplication (opt_info);
1455 
1456   ok = duplicate_loop_to_header_edge (loop, loop_latch_edge (loop),
1457 				      nunroll, wont_exit,
1458 				      NULL, NULL,
1459 				      DLTHE_FLAG_UPDATE_FREQ
1460 				      | (opt_info
1461 					 ? DLTHE_RECORD_COPY_NUMBER
1462 					   : 0));
1463   gcc_assert (ok);
1464 
1465   if (opt_info)
1466     {
1467       apply_opt_in_copies (opt_info, nunroll, true, true);
1468       free_opt_info (opt_info);
1469     }
1470 
1471   free (wont_exit);
1472 
1473   if (desc->simple_p)
1474     {
1475       /* We indeed may get here provided that there are nontrivial assumptions
1476 	 for a loop to be really simple.  We could update the counts, but the
1477 	 problem is that we are unable to decide which exit will be taken
1478 	 (not really true in case the number of iterations is constant,
1479 	 but noone will do anything with this information, so we do not
1480 	 worry about it).  */
1481       desc->simple_p = false;
1482     }
1483 
1484   if (dump_file)
1485     fprintf (dump_file, ";; Unrolled loop %d times, %i insns\n",
1486 	     nunroll, num_loop_insns (loop));
1487 }
1488 
1489 /* A hash function for information about insns to split.  */
1490 
1491 static hashval_t
1492 si_info_hash (const void *ivts)
1493 {
1494   return (hashval_t) INSN_UID (((const struct iv_to_split *) ivts)->insn);
1495 }
1496 
1497 /* An equality functions for information about insns to split.  */
1498 
1499 static int
1500 si_info_eq (const void *ivts1, const void *ivts2)
1501 {
1502   const struct iv_to_split *const i1 = (const struct iv_to_split *) ivts1;
1503   const struct iv_to_split *const i2 = (const struct iv_to_split *) ivts2;
1504 
1505   return i1->insn == i2->insn;
1506 }
1507 
1508 /* Return a hash for VES, which is really a "var_to_expand *".  */
1509 
1510 static hashval_t
1511 ve_info_hash (const void *ves)
1512 {
1513   return (hashval_t) INSN_UID (((const struct var_to_expand *) ves)->insn);
1514 }
1515 
1516 /* Return true if IVTS1 and IVTS2 (which are really both of type
1517    "var_to_expand *") refer to the same instruction.  */
1518 
1519 static int
1520 ve_info_eq (const void *ivts1, const void *ivts2)
1521 {
1522   const struct var_to_expand *const i1 = (const struct var_to_expand *) ivts1;
1523   const struct var_to_expand *const i2 = (const struct var_to_expand *) ivts2;
1524 
1525   return i1->insn == i2->insn;
1526 }
1527 
1528 /* Returns true if REG is referenced in one nondebug insn in LOOP.
1529    Set *DEBUG_USES to the number of debug insns that reference the
1530    variable.  */
1531 
1532 bool
1533 referenced_in_one_insn_in_loop_p (struct loop *loop, rtx reg,
1534 				  int *debug_uses)
1535 {
1536   basic_block *body, bb;
1537   unsigned i;
1538   int count_ref = 0;
1539   rtx insn;
1540 
1541   body = get_loop_body (loop);
1542   for (i = 0; i < loop->num_nodes; i++)
1543     {
1544       bb = body[i];
1545 
1546       FOR_BB_INSNS (bb, insn)
1547 	if (!rtx_referenced_p (reg, insn))
1548 	  continue;
1549 	else if (DEBUG_INSN_P (insn))
1550 	  ++*debug_uses;
1551 	else if (++count_ref > 1)
1552 	  break;
1553     }
1554   free (body);
1555   return (count_ref  == 1);
1556 }
1557 
1558 /* Reset the DEBUG_USES debug insns in LOOP that reference REG.  */
1559 
1560 static void
1561 reset_debug_uses_in_loop (struct loop *loop, rtx reg, int debug_uses)
1562 {
1563   basic_block *body, bb;
1564   unsigned i;
1565   rtx insn;
1566 
1567   body = get_loop_body (loop);
1568   for (i = 0; debug_uses && i < loop->num_nodes; i++)
1569     {
1570       bb = body[i];
1571 
1572       FOR_BB_INSNS (bb, insn)
1573 	if (!DEBUG_INSN_P (insn) || !rtx_referenced_p (reg, insn))
1574 	  continue;
1575 	else
1576 	  {
1577 	    validate_change (insn, &INSN_VAR_LOCATION_LOC (insn),
1578 			     gen_rtx_UNKNOWN_VAR_LOC (), 0);
1579 	    if (!--debug_uses)
1580 	      break;
1581 	  }
1582     }
1583   free (body);
1584 }
1585 
1586 /* Determine whether INSN contains an accumulator
1587    which can be expanded into separate copies,
1588    one for each copy of the LOOP body.
1589 
1590    for (i = 0 ; i < n; i++)
1591      sum += a[i];
1592 
1593    ==>
1594 
1595    sum += a[i]
1596    ....
1597    i = i+1;
1598    sum1 += a[i]
1599    ....
1600    i = i+1
1601    sum2 += a[i];
1602    ....
1603 
1604    Return NULL if INSN contains no opportunity for expansion of accumulator.
1605    Otherwise, allocate a VAR_TO_EXPAND structure, fill it with the relevant
1606    information and return a pointer to it.
1607 */
1608 
1609 static struct var_to_expand *
1610 analyze_insn_to_expand_var (struct loop *loop, rtx insn)
1611 {
1612   rtx set, dest, src, op1, op2, something;
1613   struct var_to_expand *ves;
1614   enum machine_mode mode1, mode2;
1615   unsigned accum_pos;
1616   int debug_uses = 0;
1617 
1618   set = single_set (insn);
1619   if (!set)
1620     return NULL;
1621 
1622   dest = SET_DEST (set);
1623   src = SET_SRC (set);
1624 
1625   if (GET_CODE (src) != PLUS
1626       && GET_CODE (src) != MINUS
1627       && GET_CODE (src) != MULT)
1628     return NULL;
1629 
1630   /* Hmm, this is a bit paradoxical.  We know that INSN is a valid insn
1631      in MD.  But if there is no optab to generate the insn, we can not
1632      perform the variable expansion.  This can happen if an MD provides
1633      an insn but not a named pattern to generate it, for example to avoid
1634      producing code that needs additional mode switches like for x87/mmx.
1635 
1636      So we check have_insn_for which looks for an optab for the operation
1637      in SRC.  If it doesn't exist, we can't perform the expansion even
1638      though INSN is valid.  */
1639   if (!have_insn_for (GET_CODE (src), GET_MODE (src)))
1640     return NULL;
1641 
1642   op1 = XEXP (src, 0);
1643   op2 = XEXP (src, 1);
1644 
1645   if (!REG_P (dest)
1646       && !(GET_CODE (dest) == SUBREG
1647            && REG_P (SUBREG_REG (dest))))
1648     return NULL;
1649 
1650   if (rtx_equal_p (dest, op1))
1651     accum_pos = 0;
1652   else if (rtx_equal_p (dest, op2))
1653     accum_pos = 1;
1654   else
1655     return NULL;
1656 
1657   /* The method of expansion that we are using; which includes
1658      the initialization of the expansions with zero and the summation of
1659      the expansions at the end of the computation will yield wrong results
1660      for (x = something - x) thus avoid using it in that case.  */
1661   if (accum_pos == 1
1662       && GET_CODE (src) == MINUS)
1663    return NULL;
1664 
1665   something = (accum_pos == 0) ? op2 : op1;
1666 
1667   if (rtx_referenced_p (dest, something))
1668     return NULL;
1669 
1670   if (!referenced_in_one_insn_in_loop_p (loop, dest, &debug_uses))
1671     return NULL;
1672 
1673   mode1 = GET_MODE (dest);
1674   mode2 = GET_MODE (something);
1675   if ((FLOAT_MODE_P (mode1)
1676        || FLOAT_MODE_P (mode2))
1677       && !flag_associative_math)
1678     return NULL;
1679 
1680   if (dump_file)
1681   {
1682     fprintf (dump_file,
1683     "\n;; Expanding Accumulator ");
1684     print_rtl (dump_file, dest);
1685     fprintf (dump_file, "\n");
1686   }
1687 
1688   if (debug_uses)
1689     /* Instead of resetting the debug insns, we could replace each
1690        debug use in the loop with the sum or product of all expanded
1691        accummulators.  Since we'll only know of all expansions at the
1692        end, we'd have to keep track of which vars_to_expand a debug
1693        insn in the loop references, take note of each copy of the
1694        debug insn during unrolling, and when it's all done, compute
1695        the sum or product of each variable and adjust the original
1696        debug insn and each copy thereof.  What a pain!  */
1697     reset_debug_uses_in_loop (loop, dest, debug_uses);
1698 
1699   /* Record the accumulator to expand.  */
1700   ves = XNEW (struct var_to_expand);
1701   ves->insn = insn;
1702   ves->reg = copy_rtx (dest);
1703   ves->var_expansions = VEC_alloc (rtx, heap, 1);
1704   ves->next = NULL;
1705   ves->op = GET_CODE (src);
1706   ves->expansion_count = 0;
1707   ves->reuse_expansion = 0;
1708   ves->accum_pos = accum_pos;
1709   return ves;
1710 }
1711 
1712 /* Determine whether there is an induction variable in INSN that
1713    we would like to split during unrolling.
1714 
1715    I.e. replace
1716 
1717    i = i + 1;
1718    ...
1719    i = i + 1;
1720    ...
1721    i = i + 1;
1722    ...
1723 
1724    type chains by
1725 
1726    i0 = i + 1
1727    ...
1728    i = i0 + 1
1729    ...
1730    i = i0 + 2
1731    ...
1732 
1733    Return NULL if INSN contains no interesting IVs.  Otherwise, allocate
1734    an IV_TO_SPLIT structure, fill it with the relevant information and return a
1735    pointer to it.  */
1736 
1737 static struct iv_to_split *
1738 analyze_iv_to_split_insn (rtx insn)
1739 {
1740   rtx set, dest;
1741   struct rtx_iv iv;
1742   struct iv_to_split *ivts;
1743   bool ok;
1744 
1745   /* For now we just split the basic induction variables.  Later this may be
1746      extended for example by selecting also addresses of memory references.  */
1747   set = single_set (insn);
1748   if (!set)
1749     return NULL;
1750 
1751   dest = SET_DEST (set);
1752   if (!REG_P (dest))
1753     return NULL;
1754 
1755   if (!biv_p (insn, dest))
1756     return NULL;
1757 
1758   ok = iv_analyze_result (insn, dest, &iv);
1759 
1760   /* This used to be an assert under the assumption that if biv_p returns
1761      true that iv_analyze_result must also return true.  However, that
1762      assumption is not strictly correct as evidenced by pr25569.
1763 
1764      Returning NULL when iv_analyze_result returns false is safe and
1765      avoids the problems in pr25569 until the iv_analyze_* routines
1766      can be fixed, which is apparently hard and time consuming
1767      according to their author.  */
1768   if (! ok)
1769     return NULL;
1770 
1771   if (iv.step == const0_rtx
1772       || iv.mode != iv.extend_mode)
1773     return NULL;
1774 
1775   /* Record the insn to split.  */
1776   ivts = XNEW (struct iv_to_split);
1777   ivts->insn = insn;
1778   ivts->base_var = NULL_RTX;
1779   ivts->step = iv.step;
1780   ivts->next = NULL;
1781   ivts->n_loc = 1;
1782   ivts->loc[0] = 1;
1783 
1784   return ivts;
1785 }
1786 
1787 /* Determines which of insns in LOOP can be optimized.
1788    Return a OPT_INFO struct with the relevant hash tables filled
1789    with all insns to be optimized.  The FIRST_NEW_BLOCK field
1790    is undefined for the return value.  */
1791 
1792 static struct opt_info *
1793 analyze_insns_in_loop (struct loop *loop)
1794 {
1795   basic_block *body, bb;
1796   unsigned i;
1797   struct opt_info *opt_info = XCNEW (struct opt_info);
1798   rtx insn;
1799   struct iv_to_split *ivts = NULL;
1800   struct var_to_expand *ves = NULL;
1801   PTR *slot1;
1802   PTR *slot2;
1803   VEC (edge, heap) *edges = get_loop_exit_edges (loop);
1804   edge exit;
1805   bool can_apply = false;
1806 
1807   iv_analysis_loop_init (loop);
1808 
1809   body = get_loop_body (loop);
1810 
1811   if (flag_split_ivs_in_unroller)
1812     {
1813       opt_info->insns_to_split = htab_create (5 * loop->num_nodes,
1814 					      si_info_hash, si_info_eq, free);
1815       opt_info->iv_to_split_head = NULL;
1816       opt_info->iv_to_split_tail = &opt_info->iv_to_split_head;
1817     }
1818 
1819   /* Record the loop exit bb and loop preheader before the unrolling.  */
1820   opt_info->loop_preheader = loop_preheader_edge (loop)->src;
1821 
1822   if (VEC_length (edge, edges) == 1)
1823     {
1824       exit = VEC_index (edge, edges, 0);
1825       if (!(exit->flags & EDGE_COMPLEX))
1826 	{
1827 	  opt_info->loop_exit = split_edge (exit);
1828 	  can_apply = true;
1829 	}
1830     }
1831 
1832   if (flag_variable_expansion_in_unroller
1833       && can_apply)
1834     {
1835       opt_info->insns_with_var_to_expand = htab_create (5 * loop->num_nodes,
1836 							ve_info_hash,
1837 							ve_info_eq, free);
1838       opt_info->var_to_expand_head = NULL;
1839       opt_info->var_to_expand_tail = &opt_info->var_to_expand_head;
1840     }
1841 
1842   for (i = 0; i < loop->num_nodes; i++)
1843     {
1844       bb = body[i];
1845       if (!dominated_by_p (CDI_DOMINATORS, loop->latch, bb))
1846 	continue;
1847 
1848       FOR_BB_INSNS (bb, insn)
1849       {
1850         if (!INSN_P (insn))
1851           continue;
1852 
1853         if (opt_info->insns_to_split)
1854           ivts = analyze_iv_to_split_insn (insn);
1855 
1856         if (ivts)
1857           {
1858             slot1 = htab_find_slot (opt_info->insns_to_split, ivts, INSERT);
1859 	    gcc_assert (*slot1 == NULL);
1860             *slot1 = ivts;
1861 	    *opt_info->iv_to_split_tail = ivts;
1862 	    opt_info->iv_to_split_tail = &ivts->next;
1863             continue;
1864           }
1865 
1866         if (opt_info->insns_with_var_to_expand)
1867           ves = analyze_insn_to_expand_var (loop, insn);
1868 
1869         if (ves)
1870           {
1871             slot2 = htab_find_slot (opt_info->insns_with_var_to_expand, ves, INSERT);
1872 	    gcc_assert (*slot2 == NULL);
1873             *slot2 = ves;
1874 	    *opt_info->var_to_expand_tail = ves;
1875 	    opt_info->var_to_expand_tail = &ves->next;
1876           }
1877       }
1878     }
1879 
1880   VEC_free (edge, heap, edges);
1881   free (body);
1882   return opt_info;
1883 }
1884 
1885 /* Called just before loop duplication.  Records start of duplicated area
1886    to OPT_INFO.  */
1887 
1888 static void
1889 opt_info_start_duplication (struct opt_info *opt_info)
1890 {
1891   if (opt_info)
1892     opt_info->first_new_block = last_basic_block;
1893 }
1894 
1895 /* Determine the number of iterations between initialization of the base
1896    variable and the current copy (N_COPY).  N_COPIES is the total number
1897    of newly created copies.  UNROLLING is true if we are unrolling
1898    (not peeling) the loop.  */
1899 
1900 static unsigned
1901 determine_split_iv_delta (unsigned n_copy, unsigned n_copies, bool unrolling)
1902 {
1903   if (unrolling)
1904     {
1905       /* If we are unrolling, initialization is done in the original loop
1906 	 body (number 0).  */
1907       return n_copy;
1908     }
1909   else
1910     {
1911       /* If we are peeling, the copy in that the initialization occurs has
1912 	 number 1.  The original loop (number 0) is the last.  */
1913       if (n_copy)
1914 	return n_copy - 1;
1915       else
1916 	return n_copies;
1917     }
1918 }
1919 
1920 /* Locate in EXPR the expression corresponding to the location recorded
1921    in IVTS, and return a pointer to the RTX for this location.  */
1922 
1923 static rtx *
1924 get_ivts_expr (rtx expr, struct iv_to_split *ivts)
1925 {
1926   unsigned i;
1927   rtx *ret = &expr;
1928 
1929   for (i = 0; i < ivts->n_loc; i++)
1930     ret = &XEXP (*ret, ivts->loc[i]);
1931 
1932   return ret;
1933 }
1934 
1935 /* Allocate basic variable for the induction variable chain.  */
1936 
1937 static void
1938 allocate_basic_variable (struct iv_to_split *ivts)
1939 {
1940   rtx expr = *get_ivts_expr (single_set (ivts->insn), ivts);
1941 
1942   ivts->base_var = gen_reg_rtx (GET_MODE (expr));
1943 }
1944 
1945 /* Insert initialization of basic variable of IVTS before INSN, taking
1946    the initial value from INSN.  */
1947 
1948 static void
1949 insert_base_initialization (struct iv_to_split *ivts, rtx insn)
1950 {
1951   rtx expr = copy_rtx (*get_ivts_expr (single_set (insn), ivts));
1952   rtx seq;
1953 
1954   start_sequence ();
1955   expr = force_operand (expr, ivts->base_var);
1956   if (expr != ivts->base_var)
1957     emit_move_insn (ivts->base_var, expr);
1958   seq = get_insns ();
1959   end_sequence ();
1960 
1961   emit_insn_before (seq, insn);
1962 }
1963 
1964 /* Replace the use of induction variable described in IVTS in INSN
1965    by base variable + DELTA * step.  */
1966 
1967 static void
1968 split_iv (struct iv_to_split *ivts, rtx insn, unsigned delta)
1969 {
1970   rtx expr, *loc, seq, incr, var;
1971   enum machine_mode mode = GET_MODE (ivts->base_var);
1972   rtx src, dest, set;
1973 
1974   /* Construct base + DELTA * step.  */
1975   if (!delta)
1976     expr = ivts->base_var;
1977   else
1978     {
1979       incr = simplify_gen_binary (MULT, mode,
1980 				  ivts->step, gen_int_mode (delta, mode));
1981       expr = simplify_gen_binary (PLUS, GET_MODE (ivts->base_var),
1982 				  ivts->base_var, incr);
1983     }
1984 
1985   /* Figure out where to do the replacement.  */
1986   loc = get_ivts_expr (single_set (insn), ivts);
1987 
1988   /* If we can make the replacement right away, we're done.  */
1989   if (validate_change (insn, loc, expr, 0))
1990     return;
1991 
1992   /* Otherwise, force EXPR into a register and try again.  */
1993   start_sequence ();
1994   var = gen_reg_rtx (mode);
1995   expr = force_operand (expr, var);
1996   if (expr != var)
1997     emit_move_insn (var, expr);
1998   seq = get_insns ();
1999   end_sequence ();
2000   emit_insn_before (seq, insn);
2001 
2002   if (validate_change (insn, loc, var, 0))
2003     return;
2004 
2005   /* The last chance.  Try recreating the assignment in insn
2006      completely from scratch.  */
2007   set = single_set (insn);
2008   gcc_assert (set);
2009 
2010   start_sequence ();
2011   *loc = var;
2012   src = copy_rtx (SET_SRC (set));
2013   dest = copy_rtx (SET_DEST (set));
2014   src = force_operand (src, dest);
2015   if (src != dest)
2016     emit_move_insn (dest, src);
2017   seq = get_insns ();
2018   end_sequence ();
2019 
2020   emit_insn_before (seq, insn);
2021   delete_insn (insn);
2022 }
2023 
2024 
2025 /* Return one expansion of the accumulator recorded in struct VE.  */
2026 
2027 static rtx
2028 get_expansion (struct var_to_expand *ve)
2029 {
2030   rtx reg;
2031 
2032   if (ve->reuse_expansion == 0)
2033     reg = ve->reg;
2034   else
2035     reg = VEC_index (rtx, ve->var_expansions, ve->reuse_expansion - 1);
2036 
2037   if (VEC_length (rtx, ve->var_expansions) == (unsigned) ve->reuse_expansion)
2038     ve->reuse_expansion = 0;
2039   else
2040     ve->reuse_expansion++;
2041 
2042   return reg;
2043 }
2044 
2045 
2046 /* Given INSN replace the uses of the accumulator recorded in VE
2047    with a new register.  */
2048 
2049 static void
2050 expand_var_during_unrolling (struct var_to_expand *ve, rtx insn)
2051 {
2052   rtx new_reg, set;
2053   bool really_new_expansion = false;
2054 
2055   set = single_set (insn);
2056   gcc_assert (set);
2057 
2058   /* Generate a new register only if the expansion limit has not been
2059      reached.  Else reuse an already existing expansion.  */
2060   if (PARAM_VALUE (PARAM_MAX_VARIABLE_EXPANSIONS) > ve->expansion_count)
2061     {
2062       really_new_expansion = true;
2063       new_reg = gen_reg_rtx (GET_MODE (ve->reg));
2064     }
2065   else
2066     new_reg = get_expansion (ve);
2067 
2068   validate_change (insn, &SET_DEST (set), new_reg, 1);
2069   validate_change (insn, &XEXP (SET_SRC (set), ve->accum_pos), new_reg, 1);
2070 
2071   if (apply_change_group ())
2072     if (really_new_expansion)
2073       {
2074         VEC_safe_push (rtx, heap, ve->var_expansions, new_reg);
2075         ve->expansion_count++;
2076       }
2077 }
2078 
2079 /* Initialize the variable expansions in loop preheader.  PLACE is the
2080    loop-preheader basic block where the initialization of the
2081    expansions should take place.  The expansions are initialized with
2082    (-0) when the operation is plus or minus to honor sign zero.  This
2083    way we can prevent cases where the sign of the final result is
2084    effected by the sign of the expansion.  Here is an example to
2085    demonstrate this:
2086 
2087    for (i = 0 ; i < n; i++)
2088      sum += something;
2089 
2090    ==>
2091 
2092    sum += something
2093    ....
2094    i = i+1;
2095    sum1 += something
2096    ....
2097    i = i+1
2098    sum2 += something;
2099    ....
2100 
2101    When SUM is initialized with -zero and SOMETHING is also -zero; the
2102    final result of sum should be -zero thus the expansions sum1 and sum2
2103    should be initialized with -zero as well (otherwise we will get +zero
2104    as the final result).  */
2105 
2106 static void
2107 insert_var_expansion_initialization (struct var_to_expand *ve,
2108 				     basic_block place)
2109 {
2110   rtx seq, var, zero_init, insn;
2111   unsigned i;
2112   enum machine_mode mode = GET_MODE (ve->reg);
2113   bool honor_signed_zero_p = HONOR_SIGNED_ZEROS (mode);
2114 
2115   if (VEC_length (rtx, ve->var_expansions) == 0)
2116     return;
2117 
2118   start_sequence ();
2119   if (ve->op == PLUS || ve->op == MINUS)
2120     for (i = 0; VEC_iterate (rtx, ve->var_expansions, i, var); i++)
2121       {
2122 	if (honor_signed_zero_p)
2123 	  zero_init = simplify_gen_unary (NEG, mode, CONST0_RTX (mode), mode);
2124 	else
2125 	  zero_init = CONST0_RTX (mode);
2126 
2127         emit_move_insn (var, zero_init);
2128       }
2129   else if (ve->op == MULT)
2130     for (i = 0; VEC_iterate (rtx, ve->var_expansions, i, var); i++)
2131       {
2132         zero_init =  CONST1_RTX (GET_MODE (var));
2133         emit_move_insn (var, zero_init);
2134       }
2135 
2136   seq = get_insns ();
2137   end_sequence ();
2138 
2139   insn = BB_HEAD (place);
2140   while (!NOTE_INSN_BASIC_BLOCK_P (insn))
2141     insn = NEXT_INSN (insn);
2142 
2143   emit_insn_after (seq, insn);
2144 }
2145 
2146 /* Combine the variable expansions at the loop exit.  PLACE is the
2147    loop exit basic block where the summation of the expansions should
2148    take place.  */
2149 
2150 static void
2151 combine_var_copies_in_loop_exit (struct var_to_expand *ve, basic_block place)
2152 {
2153   rtx sum = ve->reg;
2154   rtx expr, seq, var, insn;
2155   unsigned i;
2156 
2157   if (VEC_length (rtx, ve->var_expansions) == 0)
2158     return;
2159 
2160   start_sequence ();
2161   if (ve->op == PLUS || ve->op == MINUS)
2162     for (i = 0; VEC_iterate (rtx, ve->var_expansions, i, var); i++)
2163       {
2164         sum = simplify_gen_binary (PLUS, GET_MODE (ve->reg),
2165                                    var, sum);
2166       }
2167   else if (ve->op == MULT)
2168     for (i = 0; VEC_iterate (rtx, ve->var_expansions, i, var); i++)
2169       {
2170         sum = simplify_gen_binary (MULT, GET_MODE (ve->reg),
2171                                    var, sum);
2172       }
2173 
2174   expr = force_operand (sum, ve->reg);
2175   if (expr != ve->reg)
2176     emit_move_insn (ve->reg, expr);
2177   seq = get_insns ();
2178   end_sequence ();
2179 
2180   insn = BB_HEAD (place);
2181   while (!NOTE_INSN_BASIC_BLOCK_P (insn))
2182     insn = NEXT_INSN (insn);
2183 
2184   emit_insn_after (seq, insn);
2185 }
2186 
2187 /* Apply loop optimizations in loop copies using the
2188    data which gathered during the unrolling.  Structure
2189    OPT_INFO record that data.
2190 
2191    UNROLLING is true if we unrolled (not peeled) the loop.
2192    REWRITE_ORIGINAL_BODY is true if we should also rewrite the original body of
2193    the loop (as it should happen in complete unrolling, but not in ordinary
2194    peeling of the loop).  */
2195 
2196 static void
2197 apply_opt_in_copies (struct opt_info *opt_info,
2198                      unsigned n_copies, bool unrolling,
2199                      bool rewrite_original_loop)
2200 {
2201   unsigned i, delta;
2202   basic_block bb, orig_bb;
2203   rtx insn, orig_insn, next;
2204   struct iv_to_split ivts_templ, *ivts;
2205   struct var_to_expand ve_templ, *ves;
2206 
2207   /* Sanity check -- we need to put initialization in the original loop
2208      body.  */
2209   gcc_assert (!unrolling || rewrite_original_loop);
2210 
2211   /* Allocate the basic variables (i0).  */
2212   if (opt_info->insns_to_split)
2213     for (ivts = opt_info->iv_to_split_head; ivts; ivts = ivts->next)
2214       allocate_basic_variable (ivts);
2215 
2216   for (i = opt_info->first_new_block; i < (unsigned) last_basic_block; i++)
2217     {
2218       bb = BASIC_BLOCK (i);
2219       orig_bb = get_bb_original (bb);
2220 
2221       /* bb->aux holds position in copy sequence initialized by
2222 	 duplicate_loop_to_header_edge.  */
2223       delta = determine_split_iv_delta ((size_t)bb->aux, n_copies,
2224 					unrolling);
2225       bb->aux = 0;
2226       orig_insn = BB_HEAD (orig_bb);
2227       for (insn = BB_HEAD (bb); insn != NEXT_INSN (BB_END (bb)); insn = next)
2228         {
2229           next = NEXT_INSN (insn);
2230           if (!INSN_P (insn))
2231             continue;
2232 
2233           while (!INSN_P (orig_insn))
2234             orig_insn = NEXT_INSN (orig_insn);
2235 
2236           ivts_templ.insn = orig_insn;
2237           ve_templ.insn = orig_insn;
2238 
2239           /* Apply splitting iv optimization.  */
2240           if (opt_info->insns_to_split)
2241             {
2242               ivts = (struct iv_to_split *)
2243 		htab_find (opt_info->insns_to_split, &ivts_templ);
2244 
2245               if (ivts)
2246                 {
2247 		  gcc_assert (GET_CODE (PATTERN (insn))
2248 			      == GET_CODE (PATTERN (orig_insn)));
2249 
2250                   if (!delta)
2251                     insert_base_initialization (ivts, insn);
2252                   split_iv (ivts, insn, delta);
2253                 }
2254             }
2255           /* Apply variable expansion optimization.  */
2256           if (unrolling && opt_info->insns_with_var_to_expand)
2257             {
2258               ves = (struct var_to_expand *)
2259 		htab_find (opt_info->insns_with_var_to_expand, &ve_templ);
2260               if (ves)
2261                 {
2262 		  gcc_assert (GET_CODE (PATTERN (insn))
2263 			      == GET_CODE (PATTERN (orig_insn)));
2264                   expand_var_during_unrolling (ves, insn);
2265                 }
2266             }
2267           orig_insn = NEXT_INSN (orig_insn);
2268         }
2269     }
2270 
2271   if (!rewrite_original_loop)
2272     return;
2273 
2274   /* Initialize the variable expansions in the loop preheader
2275      and take care of combining them at the loop exit.  */
2276   if (opt_info->insns_with_var_to_expand)
2277     {
2278       for (ves = opt_info->var_to_expand_head; ves; ves = ves->next)
2279 	insert_var_expansion_initialization (ves, opt_info->loop_preheader);
2280       for (ves = opt_info->var_to_expand_head; ves; ves = ves->next)
2281 	combine_var_copies_in_loop_exit (ves, opt_info->loop_exit);
2282     }
2283 
2284   /* Rewrite also the original loop body.  Find them as originals of the blocks
2285      in the last copied iteration, i.e. those that have
2286      get_bb_copy (get_bb_original (bb)) == bb.  */
2287   for (i = opt_info->first_new_block; i < (unsigned) last_basic_block; i++)
2288     {
2289       bb = BASIC_BLOCK (i);
2290       orig_bb = get_bb_original (bb);
2291       if (get_bb_copy (orig_bb) != bb)
2292 	continue;
2293 
2294       delta = determine_split_iv_delta (0, n_copies, unrolling);
2295       for (orig_insn = BB_HEAD (orig_bb);
2296            orig_insn != NEXT_INSN (BB_END (bb));
2297            orig_insn = next)
2298         {
2299           next = NEXT_INSN (orig_insn);
2300 
2301           if (!INSN_P (orig_insn))
2302  	    continue;
2303 
2304           ivts_templ.insn = orig_insn;
2305           if (opt_info->insns_to_split)
2306             {
2307               ivts = (struct iv_to_split *)
2308 		htab_find (opt_info->insns_to_split, &ivts_templ);
2309               if (ivts)
2310                 {
2311                   if (!delta)
2312                     insert_base_initialization (ivts, orig_insn);
2313                   split_iv (ivts, orig_insn, delta);
2314                   continue;
2315                 }
2316             }
2317 
2318         }
2319     }
2320 }
2321 
2322 /* Release OPT_INFO.  */
2323 
2324 static void
2325 free_opt_info (struct opt_info *opt_info)
2326 {
2327   if (opt_info->insns_to_split)
2328     htab_delete (opt_info->insns_to_split);
2329   if (opt_info->insns_with_var_to_expand)
2330     {
2331       struct var_to_expand *ves;
2332 
2333       for (ves = opt_info->var_to_expand_head; ves; ves = ves->next)
2334 	VEC_free (rtx, heap, ves->var_expansions);
2335       htab_delete (opt_info->insns_with_var_to_expand);
2336     }
2337   free (opt_info);
2338 }
2339