xref: /netbsd-src/external/gpl3/gcc.old/dist/gcc/loop-unroll.c (revision e6c7e151de239c49d2e38720a061ed9d1fa99309)
1 /* Loop unrolling.
2    Copyright (C) 2002-2017 Free Software Foundation, Inc.
3 
4 This file is part of GCC.
5 
6 GCC is free software; you can redistribute it and/or modify it under
7 the terms of the GNU General Public License as published by the Free
8 Software Foundation; either version 3, or (at your option) any later
9 version.
10 
11 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
12 WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
14 for more details.
15 
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING3.  If not see
18 <http://www.gnu.org/licenses/>.  */
19 
20 #include "config.h"
21 #include "system.h"
22 #include "coretypes.h"
23 #include "backend.h"
24 #include "target.h"
25 #include "rtl.h"
26 #include "tree.h"
27 #include "cfghooks.h"
28 #include "memmodel.h"
29 #include "optabs.h"
30 #include "emit-rtl.h"
31 #include "recog.h"
32 #include "profile.h"
33 #include "cfgrtl.h"
34 #include "cfgloop.h"
35 #include "params.h"
36 #include "dojump.h"
37 #include "expr.h"
38 #include "dumpfile.h"
39 
40 /* This pass performs loop unrolling.  We only perform this
41    optimization on innermost loops (with single exception) because
42    the impact on performance is greatest here, and we want to avoid
43    unnecessary code size growth.  The gain is caused by greater sequentiality
44    of code, better code to optimize for further passes and in some cases
45    by fewer testings of exit conditions.  The main problem is code growth,
46    that impacts performance negatively due to effect of caches.
47 
48    What we do:
49 
50    -- unrolling of loops that roll constant times; this is almost always
51       win, as we get rid of exit condition tests.
52    -- unrolling of loops that roll number of times that we can compute
53       in runtime; we also get rid of exit condition tests here, but there
54       is the extra expense for calculating the number of iterations
55    -- simple unrolling of remaining loops; this is performed only if we
56       are asked to, as the gain is questionable in this case and often
57       it may even slow down the code
58    For more detailed descriptions of each of those, see comments at
59    appropriate function below.
60 
61    There is a lot of parameters (defined and described in params.def) that
62    control how much we unroll.
63 
64    ??? A great problem is that we don't have a good way how to determine
65    how many times we should unroll the loop; the experiments I have made
66    showed that this choice may affect performance in order of several %.
67    */
68 
69 /* Information about induction variables to split.  */
70 
71 struct iv_to_split
72 {
73   rtx_insn *insn;	/* The insn in that the induction variable occurs.  */
74   rtx orig_var;		/* The variable (register) for the IV before split.  */
75   rtx base_var;		/* The variable on that the values in the further
76 			   iterations are based.  */
77   rtx step;		/* Step of the induction variable.  */
78   struct iv_to_split *next; /* Next entry in walking order.  */
79 };
80 
81 /* Information about accumulators to expand.  */
82 
83 struct var_to_expand
84 {
85   rtx_insn *insn;	           /* The insn in that the variable expansion occurs.  */
86   rtx reg;                         /* The accumulator which is expanded.  */
87   vec<rtx> var_expansions;   /* The copies of the accumulator which is expanded.  */
88   struct var_to_expand *next;	   /* Next entry in walking order.  */
89   enum rtx_code op;                /* The type of the accumulation - addition, subtraction
90                                       or multiplication.  */
91   int expansion_count;             /* Count the number of expansions generated so far.  */
92   int reuse_expansion;             /* The expansion we intend to reuse to expand
93                                       the accumulator.  If REUSE_EXPANSION is 0 reuse
94                                       the original accumulator.  Else use
95                                       var_expansions[REUSE_EXPANSION - 1].  */
96 };
97 
98 /* Hashtable helper for iv_to_split.  */
99 
100 struct iv_split_hasher : free_ptr_hash <iv_to_split>
101 {
102   static inline hashval_t hash (const iv_to_split *);
103   static inline bool equal (const iv_to_split *, const iv_to_split *);
104 };
105 
106 
107 /* A hash function for information about insns to split.  */
108 
109 inline hashval_t
110 iv_split_hasher::hash (const iv_to_split *ivts)
111 {
112   return (hashval_t) INSN_UID (ivts->insn);
113 }
114 
115 /* An equality functions for information about insns to split.  */
116 
117 inline bool
118 iv_split_hasher::equal (const iv_to_split *i1, const iv_to_split *i2)
119 {
120   return i1->insn == i2->insn;
121 }
122 
123 /* Hashtable helper for iv_to_split.  */
124 
125 struct var_expand_hasher : free_ptr_hash <var_to_expand>
126 {
127   static inline hashval_t hash (const var_to_expand *);
128   static inline bool equal (const var_to_expand *, const var_to_expand *);
129 };
130 
131 /* Return a hash for VES.  */
132 
133 inline hashval_t
134 var_expand_hasher::hash (const var_to_expand *ves)
135 {
136   return (hashval_t) INSN_UID (ves->insn);
137 }
138 
139 /* Return true if I1 and I2 refer to the same instruction.  */
140 
141 inline bool
142 var_expand_hasher::equal (const var_to_expand *i1, const var_to_expand *i2)
143 {
144   return i1->insn == i2->insn;
145 }
146 
147 /* Information about optimization applied in
148    the unrolled loop.  */
149 
150 struct opt_info
151 {
152   hash_table<iv_split_hasher> *insns_to_split; /* A hashtable of insns to
153 						  split.  */
154   struct iv_to_split *iv_to_split_head; /* The first iv to split.  */
155   struct iv_to_split **iv_to_split_tail; /* Pointer to the tail of the list.  */
156   hash_table<var_expand_hasher> *insns_with_var_to_expand; /* A hashtable of
157 					insns with accumulators to expand.  */
158   struct var_to_expand *var_to_expand_head; /* The first var to expand.  */
159   struct var_to_expand **var_to_expand_tail; /* Pointer to the tail of the list.  */
160   unsigned first_new_block;        /* The first basic block that was
161                                       duplicated.  */
162   basic_block loop_exit;           /* The loop exit basic block.  */
163   basic_block loop_preheader;      /* The loop preheader basic block.  */
164 };
165 
166 static void decide_unroll_stupid (struct loop *, int);
167 static void decide_unroll_constant_iterations (struct loop *, int);
168 static void decide_unroll_runtime_iterations (struct loop *, int);
169 static void unroll_loop_stupid (struct loop *);
170 static void decide_unrolling (int);
171 static void unroll_loop_constant_iterations (struct loop *);
172 static void unroll_loop_runtime_iterations (struct loop *);
173 static struct opt_info *analyze_insns_in_loop (struct loop *);
174 static void opt_info_start_duplication (struct opt_info *);
175 static void apply_opt_in_copies (struct opt_info *, unsigned, bool, bool);
176 static void free_opt_info (struct opt_info *);
177 static struct var_to_expand *analyze_insn_to_expand_var (struct loop*, rtx_insn *);
178 static bool referenced_in_one_insn_in_loop_p (struct loop *, rtx, int *);
179 static struct iv_to_split *analyze_iv_to_split_insn (rtx_insn *);
180 static void expand_var_during_unrolling (struct var_to_expand *, rtx_insn *);
181 static void insert_var_expansion_initialization (struct var_to_expand *,
182 						 basic_block);
183 static void combine_var_copies_in_loop_exit (struct var_to_expand *,
184 					     basic_block);
185 static rtx get_expansion (struct var_to_expand *);
186 
187 /* Emit a message summarizing the unroll that will be
188    performed for LOOP, along with the loop's location LOCUS, if
189    appropriate given the dump or -fopt-info settings.  */
190 
191 static void
192 report_unroll (struct loop *loop, location_t locus)
193 {
194   int report_flags = MSG_OPTIMIZED_LOCATIONS | TDF_RTL | TDF_DETAILS;
195 
196   if (loop->lpt_decision.decision == LPT_NONE)
197     return;
198 
199   if (!dump_enabled_p ())
200     return;
201 
202   dump_printf_loc (report_flags, locus,
203                    "loop unrolled %d times",
204                    loop->lpt_decision.times);
205   if (profile_info)
206     dump_printf (report_flags,
207                  " (header execution count %d)",
208                  (int)loop->header->count);
209 
210   dump_printf (report_flags, "\n");
211 }
212 
213 /* Decide whether unroll loops and how much.  */
214 static void
215 decide_unrolling (int flags)
216 {
217   struct loop *loop;
218 
219   /* Scan the loops, inner ones first.  */
220   FOR_EACH_LOOP (loop, LI_FROM_INNERMOST)
221     {
222       loop->lpt_decision.decision = LPT_NONE;
223       location_t locus = get_loop_location (loop);
224 
225       if (dump_enabled_p ())
226 	dump_printf_loc (TDF_RTL, locus,
227                          ";; *** Considering loop %d at BB %d for "
228                          "unrolling ***\n",
229                          loop->num, loop->header->index);
230 
231       /* Do not peel cold areas.  */
232       if (optimize_loop_for_size_p (loop))
233 	{
234 	  if (dump_file)
235 	    fprintf (dump_file, ";; Not considering loop, cold area\n");
236 	  continue;
237 	}
238 
239       /* Can the loop be manipulated?  */
240       if (!can_duplicate_loop_p (loop))
241 	{
242 	  if (dump_file)
243 	    fprintf (dump_file,
244 		     ";; Not considering loop, cannot duplicate\n");
245 	  continue;
246 	}
247 
248       /* Skip non-innermost loops.  */
249       if (loop->inner)
250 	{
251 	  if (dump_file)
252 	    fprintf (dump_file, ";; Not considering loop, is not innermost\n");
253 	  continue;
254 	}
255 
256       loop->ninsns = num_loop_insns (loop);
257       loop->av_ninsns = average_num_loop_insns (loop);
258 
259       /* Try transformations one by one in decreasing order of
260 	 priority.  */
261 
262       decide_unroll_constant_iterations (loop, flags);
263       if (loop->lpt_decision.decision == LPT_NONE)
264 	decide_unroll_runtime_iterations (loop, flags);
265       if (loop->lpt_decision.decision == LPT_NONE)
266 	decide_unroll_stupid (loop, flags);
267 
268       report_unroll (loop, locus);
269     }
270 }
271 
272 /* Unroll LOOPS.  */
273 void
274 unroll_loops (int flags)
275 {
276   struct loop *loop;
277   bool changed = false;
278 
279   /* Now decide rest of unrolling.  */
280   decide_unrolling (flags);
281 
282   /* Scan the loops, inner ones first.  */
283   FOR_EACH_LOOP (loop, LI_FROM_INNERMOST)
284     {
285       /* And perform the appropriate transformations.  */
286       switch (loop->lpt_decision.decision)
287 	{
288 	case LPT_UNROLL_CONSTANT:
289 	  unroll_loop_constant_iterations (loop);
290 	  changed = true;
291 	  break;
292 	case LPT_UNROLL_RUNTIME:
293 	  unroll_loop_runtime_iterations (loop);
294 	  changed = true;
295 	  break;
296 	case LPT_UNROLL_STUPID:
297 	  unroll_loop_stupid (loop);
298 	  changed = true;
299 	  break;
300 	case LPT_NONE:
301 	  break;
302 	default:
303 	  gcc_unreachable ();
304 	}
305     }
306 
307     if (changed)
308       {
309 	calculate_dominance_info (CDI_DOMINATORS);
310 	fix_loop_structure (NULL);
311       }
312 
313   iv_analysis_done ();
314 }
315 
316 /* Check whether exit of the LOOP is at the end of loop body.  */
317 
318 static bool
319 loop_exit_at_end_p (struct loop *loop)
320 {
321   struct niter_desc *desc = get_simple_loop_desc (loop);
322   rtx_insn *insn;
323 
324   /* We should never have conditional in latch block.  */
325   gcc_assert (desc->in_edge->dest != loop->header);
326 
327   if (desc->in_edge->dest != loop->latch)
328     return false;
329 
330   /* Check that the latch is empty.  */
331   FOR_BB_INSNS (loop->latch, insn)
332     {
333       if (INSN_P (insn) && active_insn_p (insn))
334 	return false;
335     }
336 
337   return true;
338 }
339 
340 /* Decide whether to unroll LOOP iterating constant number of times
341    and how much.  */
342 
343 static void
344 decide_unroll_constant_iterations (struct loop *loop, int flags)
345 {
346   unsigned nunroll, nunroll_by_av, best_copies, best_unroll = 0, n_copies, i;
347   struct niter_desc *desc;
348   widest_int iterations;
349 
350   if (!(flags & UAP_UNROLL))
351     {
352       /* We were not asked to, just return back silently.  */
353       return;
354     }
355 
356   if (dump_file)
357     fprintf (dump_file,
358 	     "\n;; Considering unrolling loop with constant "
359 	     "number of iterations\n");
360 
361   /* nunroll = total number of copies of the original loop body in
362      unrolled loop (i.e. if it is 2, we have to duplicate loop body once.  */
363   nunroll = PARAM_VALUE (PARAM_MAX_UNROLLED_INSNS) / loop->ninsns;
364   nunroll_by_av
365     = PARAM_VALUE (PARAM_MAX_AVERAGE_UNROLLED_INSNS) / loop->av_ninsns;
366   if (nunroll > nunroll_by_av)
367     nunroll = nunroll_by_av;
368   if (nunroll > (unsigned) PARAM_VALUE (PARAM_MAX_UNROLL_TIMES))
369     nunroll = PARAM_VALUE (PARAM_MAX_UNROLL_TIMES);
370 
371   if (targetm.loop_unroll_adjust)
372     nunroll = targetm.loop_unroll_adjust (nunroll, loop);
373 
374   /* Skip big loops.  */
375   if (nunroll <= 1)
376     {
377       if (dump_file)
378 	fprintf (dump_file, ";; Not considering loop, is too big\n");
379       return;
380     }
381 
382   /* Check for simple loops.  */
383   desc = get_simple_loop_desc (loop);
384 
385   /* Check number of iterations.  */
386   if (!desc->simple_p || !desc->const_iter || desc->assumptions)
387     {
388       if (dump_file)
389 	fprintf (dump_file,
390 		 ";; Unable to prove that the loop iterates constant times\n");
391       return;
392     }
393 
394   /* Check whether the loop rolls enough to consider.
395      Consult also loop bounds and profile; in the case the loop has more
396      than one exit it may well loop less than determined maximal number
397      of iterations.  */
398   if (desc->niter < 2 * nunroll
399       || ((get_estimated_loop_iterations (loop, &iterations)
400 	   || get_likely_max_loop_iterations (loop, &iterations))
401 	  && wi::ltu_p (iterations, 2 * nunroll)))
402     {
403       if (dump_file)
404 	fprintf (dump_file, ";; Not unrolling loop, doesn't roll\n");
405       return;
406     }
407 
408   /* Success; now compute number of iterations to unroll.  We alter
409      nunroll so that as few as possible copies of loop body are
410      necessary, while still not decreasing the number of unrollings
411      too much (at most by 1).  */
412   best_copies = 2 * nunroll + 10;
413 
414   i = 2 * nunroll + 2;
415   if (i - 1 >= desc->niter)
416     i = desc->niter - 2;
417 
418   for (; i >= nunroll - 1; i--)
419     {
420       unsigned exit_mod = desc->niter % (i + 1);
421 
422       if (!loop_exit_at_end_p (loop))
423 	n_copies = exit_mod + i + 1;
424       else if (exit_mod != (unsigned) i
425 	       || desc->noloop_assumptions != NULL_RTX)
426 	n_copies = exit_mod + i + 2;
427       else
428 	n_copies = i + 1;
429 
430       if (n_copies < best_copies)
431 	{
432 	  best_copies = n_copies;
433 	  best_unroll = i;
434 	}
435     }
436 
437   loop->lpt_decision.decision = LPT_UNROLL_CONSTANT;
438   loop->lpt_decision.times = best_unroll;
439 }
440 
441 /* Unroll LOOP with constant number of iterations LOOP->LPT_DECISION.TIMES times.
442    The transformation does this:
443 
444    for (i = 0; i < 102; i++)
445      body;
446 
447    ==>  (LOOP->LPT_DECISION.TIMES == 3)
448 
449    i = 0;
450    body; i++;
451    body; i++;
452    while (i < 102)
453      {
454        body; i++;
455        body; i++;
456        body; i++;
457        body; i++;
458      }
459   */
460 static void
461 unroll_loop_constant_iterations (struct loop *loop)
462 {
463   unsigned HOST_WIDE_INT niter;
464   unsigned exit_mod;
465   unsigned i;
466   edge e;
467   unsigned max_unroll = loop->lpt_decision.times;
468   struct niter_desc *desc = get_simple_loop_desc (loop);
469   bool exit_at_end = loop_exit_at_end_p (loop);
470   struct opt_info *opt_info = NULL;
471   bool ok;
472 
473   niter = desc->niter;
474 
475   /* Should not get here (such loop should be peeled instead).  */
476   gcc_assert (niter > max_unroll + 1);
477 
478   exit_mod = niter % (max_unroll + 1);
479 
480   auto_sbitmap wont_exit (max_unroll + 2);
481   bitmap_ones (wont_exit);
482 
483   auto_vec<edge> remove_edges;
484   if (flag_split_ivs_in_unroller
485       || flag_variable_expansion_in_unroller)
486     opt_info = analyze_insns_in_loop (loop);
487 
488   if (!exit_at_end)
489     {
490       /* The exit is not at the end of the loop; leave exit test
491 	 in the first copy, so that the loops that start with test
492 	 of exit condition have continuous body after unrolling.  */
493 
494       if (dump_file)
495 	fprintf (dump_file, ";; Condition at beginning of loop.\n");
496 
497       /* Peel exit_mod iterations.  */
498       bitmap_clear_bit (wont_exit, 0);
499       if (desc->noloop_assumptions)
500 	bitmap_clear_bit (wont_exit, 1);
501 
502       if (exit_mod)
503 	{
504 	  opt_info_start_duplication (opt_info);
505           ok = duplicate_loop_to_header_edge (loop, loop_preheader_edge (loop),
506 					      exit_mod,
507 					      wont_exit, desc->out_edge,
508 					      &remove_edges,
509 					      DLTHE_FLAG_UPDATE_FREQ
510 					      | (opt_info && exit_mod > 1
511 						 ? DLTHE_RECORD_COPY_NUMBER
512 						   : 0));
513 	  gcc_assert (ok);
514 
515           if (opt_info && exit_mod > 1)
516  	    apply_opt_in_copies (opt_info, exit_mod, false, false);
517 
518 	  desc->noloop_assumptions = NULL_RTX;
519 	  desc->niter -= exit_mod;
520 	  loop->nb_iterations_upper_bound -= exit_mod;
521 	  if (loop->any_estimate
522 	      && wi::leu_p (exit_mod, loop->nb_iterations_estimate))
523 	    loop->nb_iterations_estimate -= exit_mod;
524 	  else
525 	    loop->any_estimate = false;
526 	  if (loop->any_likely_upper_bound
527 	      && wi::leu_p (exit_mod, loop->nb_iterations_likely_upper_bound))
528 	    loop->nb_iterations_likely_upper_bound -= exit_mod;
529 	  else
530 	    loop->any_likely_upper_bound = false;
531 	}
532 
533       bitmap_set_bit (wont_exit, 1);
534     }
535   else
536     {
537       /* Leave exit test in last copy, for the same reason as above if
538 	 the loop tests the condition at the end of loop body.  */
539 
540       if (dump_file)
541 	fprintf (dump_file, ";; Condition at end of loop.\n");
542 
543       /* We know that niter >= max_unroll + 2; so we do not need to care of
544 	 case when we would exit before reaching the loop.  So just peel
545 	 exit_mod + 1 iterations.  */
546       if (exit_mod != max_unroll
547 	  || desc->noloop_assumptions)
548 	{
549 	  bitmap_clear_bit (wont_exit, 0);
550 	  if (desc->noloop_assumptions)
551 	    bitmap_clear_bit (wont_exit, 1);
552 
553           opt_info_start_duplication (opt_info);
554 	  ok = duplicate_loop_to_header_edge (loop, loop_preheader_edge (loop),
555 					      exit_mod + 1,
556 					      wont_exit, desc->out_edge,
557 					      &remove_edges,
558 					      DLTHE_FLAG_UPDATE_FREQ
559 					      | (opt_info && exit_mod > 0
560 						 ? DLTHE_RECORD_COPY_NUMBER
561 						   : 0));
562 	  gcc_assert (ok);
563 
564           if (opt_info && exit_mod > 0)
565   	    apply_opt_in_copies (opt_info, exit_mod + 1, false, false);
566 
567 	  desc->niter -= exit_mod + 1;
568 	  loop->nb_iterations_upper_bound -= exit_mod + 1;
569 	  if (loop->any_estimate
570 	      && wi::leu_p (exit_mod + 1, loop->nb_iterations_estimate))
571 	    loop->nb_iterations_estimate -= exit_mod + 1;
572 	  else
573 	    loop->any_estimate = false;
574 	  if (loop->any_likely_upper_bound
575 	      && wi::leu_p (exit_mod + 1, loop->nb_iterations_likely_upper_bound))
576 	    loop->nb_iterations_likely_upper_bound -= exit_mod + 1;
577 	  else
578 	    loop->any_likely_upper_bound = false;
579 	  desc->noloop_assumptions = NULL_RTX;
580 
581 	  bitmap_set_bit (wont_exit, 0);
582 	  bitmap_set_bit (wont_exit, 1);
583 	}
584 
585       bitmap_clear_bit (wont_exit, max_unroll);
586     }
587 
588   /* Now unroll the loop.  */
589 
590   opt_info_start_duplication (opt_info);
591   ok = duplicate_loop_to_header_edge (loop, loop_latch_edge (loop),
592 				      max_unroll,
593 				      wont_exit, desc->out_edge,
594 				      &remove_edges,
595 				      DLTHE_FLAG_UPDATE_FREQ
596 				      | (opt_info
597 					 ? DLTHE_RECORD_COPY_NUMBER
598 					   : 0));
599   gcc_assert (ok);
600 
601   if (opt_info)
602     {
603       apply_opt_in_copies (opt_info, max_unroll, true, true);
604       free_opt_info (opt_info);
605     }
606 
607   if (exit_at_end)
608     {
609       basic_block exit_block = get_bb_copy (desc->in_edge->src);
610       /* Find a new in and out edge; they are in the last copy we have made.  */
611 
612       if (EDGE_SUCC (exit_block, 0)->dest == desc->out_edge->dest)
613 	{
614 	  desc->out_edge = EDGE_SUCC (exit_block, 0);
615 	  desc->in_edge = EDGE_SUCC (exit_block, 1);
616 	}
617       else
618 	{
619 	  desc->out_edge = EDGE_SUCC (exit_block, 1);
620 	  desc->in_edge = EDGE_SUCC (exit_block, 0);
621 	}
622     }
623 
624   desc->niter /= max_unroll + 1;
625   loop->nb_iterations_upper_bound
626     = wi::udiv_trunc (loop->nb_iterations_upper_bound, max_unroll + 1);
627   if (loop->any_estimate)
628     loop->nb_iterations_estimate
629       = wi::udiv_trunc (loop->nb_iterations_estimate, max_unroll + 1);
630   if (loop->any_likely_upper_bound)
631     loop->nb_iterations_likely_upper_bound
632       = wi::udiv_trunc (loop->nb_iterations_likely_upper_bound, max_unroll + 1);
633   desc->niter_expr = GEN_INT (desc->niter);
634 
635   /* Remove the edges.  */
636   FOR_EACH_VEC_ELT (remove_edges, i, e)
637     remove_path (e);
638 
639   if (dump_file)
640     fprintf (dump_file,
641 	     ";; Unrolled loop %d times, constant # of iterations %i insns\n",
642 	     max_unroll, num_loop_insns (loop));
643 }
644 
645 /* Decide whether to unroll LOOP iterating runtime computable number of times
646    and how much.  */
647 static void
648 decide_unroll_runtime_iterations (struct loop *loop, int flags)
649 {
650   unsigned nunroll, nunroll_by_av, i;
651   struct niter_desc *desc;
652   widest_int iterations;
653 
654   if (!(flags & UAP_UNROLL))
655     {
656       /* We were not asked to, just return back silently.  */
657       return;
658     }
659 
660   if (dump_file)
661     fprintf (dump_file,
662 	     "\n;; Considering unrolling loop with runtime "
663 	     "computable number of iterations\n");
664 
665   /* nunroll = total number of copies of the original loop body in
666      unrolled loop (i.e. if it is 2, we have to duplicate loop body once.  */
667   nunroll = PARAM_VALUE (PARAM_MAX_UNROLLED_INSNS) / loop->ninsns;
668   nunroll_by_av = PARAM_VALUE (PARAM_MAX_AVERAGE_UNROLLED_INSNS) / loop->av_ninsns;
669   if (nunroll > nunroll_by_av)
670     nunroll = nunroll_by_av;
671   if (nunroll > (unsigned) PARAM_VALUE (PARAM_MAX_UNROLL_TIMES))
672     nunroll = PARAM_VALUE (PARAM_MAX_UNROLL_TIMES);
673 
674   if (targetm.loop_unroll_adjust)
675     nunroll = targetm.loop_unroll_adjust (nunroll, loop);
676 
677   /* Skip big loops.  */
678   if (nunroll <= 1)
679     {
680       if (dump_file)
681 	fprintf (dump_file, ";; Not considering loop, is too big\n");
682       return;
683     }
684 
685   /* Check for simple loops.  */
686   desc = get_simple_loop_desc (loop);
687 
688   /* Check simpleness.  */
689   if (!desc->simple_p || desc->assumptions)
690     {
691       if (dump_file)
692 	fprintf (dump_file,
693 		 ";; Unable to prove that the number of iterations "
694 		 "can be counted in runtime\n");
695       return;
696     }
697 
698   if (desc->const_iter)
699     {
700       if (dump_file)
701 	fprintf (dump_file, ";; Loop iterates constant times\n");
702       return;
703     }
704 
705   /* Check whether the loop rolls.  */
706   if ((get_estimated_loop_iterations (loop, &iterations)
707        || get_likely_max_loop_iterations (loop, &iterations))
708       && wi::ltu_p (iterations, 2 * nunroll))
709     {
710       if (dump_file)
711 	fprintf (dump_file, ";; Not unrolling loop, doesn't roll\n");
712       return;
713     }
714 
715   /* Success; now force nunroll to be power of 2, as we are unable to
716      cope with overflows in computation of number of iterations.  */
717   for (i = 1; 2 * i <= nunroll; i *= 2)
718     continue;
719 
720   loop->lpt_decision.decision = LPT_UNROLL_RUNTIME;
721   loop->lpt_decision.times = i - 1;
722 }
723 
724 /* Splits edge E and inserts the sequence of instructions INSNS on it, and
725    returns the newly created block.  If INSNS is NULL_RTX, nothing is changed
726    and NULL is returned instead.  */
727 
728 basic_block
729 split_edge_and_insert (edge e, rtx_insn *insns)
730 {
731   basic_block bb;
732 
733   if (!insns)
734     return NULL;
735   bb = split_edge (e);
736   emit_insn_after (insns, BB_END (bb));
737 
738   /* ??? We used to assume that INSNS can contain control flow insns, and
739      that we had to try to find sub basic blocks in BB to maintain a valid
740      CFG.  For this purpose we used to set the BB_SUPERBLOCK flag on BB
741      and call break_superblocks when going out of cfglayout mode.  But it
742      turns out that this never happens; and that if it does ever happen,
743      the verify_flow_info at the end of the RTL loop passes would fail.
744 
745      There are two reasons why we expected we could have control flow insns
746      in INSNS.  The first is when a comparison has to be done in parts, and
747      the second is when the number of iterations is computed for loops with
748      the number of iterations known at runtime.  In both cases, test cases
749      to get control flow in INSNS appear to be impossible to construct:
750 
751       * If do_compare_rtx_and_jump needs several branches to do comparison
752 	in a mode that needs comparison by parts, we cannot analyze the
753 	number of iterations of the loop, and we never get to unrolling it.
754 
755       * The code in expand_divmod that was suspected to cause creation of
756 	branching code seems to be only accessed for signed division.  The
757 	divisions used by # of iterations analysis are always unsigned.
758 	Problems might arise on architectures that emits branching code
759 	for some operations that may appear in the unroller (especially
760 	for division), but we have no such architectures.
761 
762      Considering all this, it was decided that we should for now assume
763      that INSNS can in theory contain control flow insns, but in practice
764      it never does.  So we don't handle the theoretical case, and should
765      a real failure ever show up, we have a pretty good clue for how to
766      fix it.  */
767 
768   return bb;
769 }
770 
771 /* Prepare a sequence comparing OP0 with OP1 using COMP and jumping to LABEL if
772    true, with probability PROB.  If CINSN is not NULL, it is the insn to copy
773    in order to create a jump.  */
774 
775 static rtx_insn *
776 compare_and_jump_seq (rtx op0, rtx op1, enum rtx_code comp,
777 		      rtx_code_label *label, int prob, rtx_insn *cinsn)
778 {
779   rtx_insn *seq;
780   rtx_jump_insn *jump;
781   rtx cond;
782   machine_mode mode;
783 
784   mode = GET_MODE (op0);
785   if (mode == VOIDmode)
786     mode = GET_MODE (op1);
787 
788   start_sequence ();
789   if (GET_MODE_CLASS (mode) == MODE_CC)
790     {
791       /* A hack -- there seems to be no easy generic way how to make a
792 	 conditional jump from a ccmode comparison.  */
793       gcc_assert (cinsn);
794       cond = XEXP (SET_SRC (pc_set (cinsn)), 0);
795       gcc_assert (GET_CODE (cond) == comp);
796       gcc_assert (rtx_equal_p (op0, XEXP (cond, 0)));
797       gcc_assert (rtx_equal_p (op1, XEXP (cond, 1)));
798       emit_jump_insn (copy_insn (PATTERN (cinsn)));
799       jump = as_a <rtx_jump_insn *> (get_last_insn ());
800       JUMP_LABEL (jump) = JUMP_LABEL (cinsn);
801       LABEL_NUSES (JUMP_LABEL (jump))++;
802       redirect_jump (jump, label, 0);
803     }
804   else
805     {
806       gcc_assert (!cinsn);
807 
808       op0 = force_operand (op0, NULL_RTX);
809       op1 = force_operand (op1, NULL_RTX);
810       do_compare_rtx_and_jump (op0, op1, comp, 0,
811 			       mode, NULL_RTX, NULL, label, -1);
812       jump = as_a <rtx_jump_insn *> (get_last_insn ());
813       jump->set_jump_target (label);
814       LABEL_NUSES (label)++;
815     }
816   add_int_reg_note (jump, REG_BR_PROB, prob);
817 
818   seq = get_insns ();
819   end_sequence ();
820 
821   return seq;
822 }
823 
824 /* Unroll LOOP for which we are able to count number of iterations in runtime
825    LOOP->LPT_DECISION.TIMES times.  The transformation does this (with some
826    extra care for case n < 0):
827 
828    for (i = 0; i < n; i++)
829      body;
830 
831    ==>  (LOOP->LPT_DECISION.TIMES == 3)
832 
833    i = 0;
834    mod = n % 4;
835 
836    switch (mod)
837      {
838        case 3:
839          body; i++;
840        case 2:
841          body; i++;
842        case 1:
843          body; i++;
844        case 0: ;
845      }
846 
847    while (i < n)
848      {
849        body; i++;
850        body; i++;
851        body; i++;
852        body; i++;
853      }
854    */
855 static void
856 unroll_loop_runtime_iterations (struct loop *loop)
857 {
858   rtx old_niter, niter, tmp;
859   rtx_insn *init_code, *branch_code;
860   unsigned i, j, p;
861   basic_block preheader, *body, swtch, ezc_swtch = NULL;
862   int may_exit_copy, iter_freq, new_freq;
863   gcov_type iter_count, new_count;
864   unsigned n_peel;
865   edge e;
866   bool extra_zero_check, last_may_exit;
867   unsigned max_unroll = loop->lpt_decision.times;
868   struct niter_desc *desc = get_simple_loop_desc (loop);
869   bool exit_at_end = loop_exit_at_end_p (loop);
870   struct opt_info *opt_info = NULL;
871   bool ok;
872 
873   if (flag_split_ivs_in_unroller
874       || flag_variable_expansion_in_unroller)
875     opt_info = analyze_insns_in_loop (loop);
876 
877   /* Remember blocks whose dominators will have to be updated.  */
878   auto_vec<basic_block> dom_bbs;
879 
880   body = get_loop_body (loop);
881   for (i = 0; i < loop->num_nodes; i++)
882     {
883       vec<basic_block> ldom;
884       basic_block bb;
885 
886       ldom = get_dominated_by (CDI_DOMINATORS, body[i]);
887       FOR_EACH_VEC_ELT (ldom, j, bb)
888 	if (!flow_bb_inside_loop_p (loop, bb))
889 	  dom_bbs.safe_push (bb);
890 
891       ldom.release ();
892     }
893   free (body);
894 
895   if (!exit_at_end)
896     {
897       /* Leave exit in first copy (for explanation why see comment in
898 	 unroll_loop_constant_iterations).  */
899       may_exit_copy = 0;
900       n_peel = max_unroll - 1;
901       extra_zero_check = true;
902       last_may_exit = false;
903     }
904   else
905     {
906       /* Leave exit in last copy (for explanation why see comment in
907 	 unroll_loop_constant_iterations).  */
908       may_exit_copy = max_unroll;
909       n_peel = max_unroll;
910       extra_zero_check = false;
911       last_may_exit = true;
912     }
913 
914   /* Get expression for number of iterations.  */
915   start_sequence ();
916   old_niter = niter = gen_reg_rtx (desc->mode);
917   tmp = force_operand (copy_rtx (desc->niter_expr), niter);
918   if (tmp != niter)
919     emit_move_insn (niter, tmp);
920 
921   /* For loops that exit at end and whose number of iterations is reliable,
922      add one to niter to account for first pass through loop body before
923      reaching exit test. */
924   if (exit_at_end && !desc->noloop_assumptions)
925     {
926       niter = expand_simple_binop (desc->mode, PLUS,
927 				   niter, const1_rtx,
928 				   NULL_RTX, 0, OPTAB_LIB_WIDEN);
929       old_niter = niter;
930     }
931 
932   /* Count modulo by ANDing it with max_unroll; we use the fact that
933      the number of unrollings is a power of two, and thus this is correct
934      even if there is overflow in the computation.  */
935   niter = expand_simple_binop (desc->mode, AND,
936 			       niter, gen_int_mode (max_unroll, desc->mode),
937 			       NULL_RTX, 0, OPTAB_LIB_WIDEN);
938 
939   init_code = get_insns ();
940   end_sequence ();
941   unshare_all_rtl_in_chain (init_code);
942 
943   /* Precondition the loop.  */
944   split_edge_and_insert (loop_preheader_edge (loop), init_code);
945 
946   auto_vec<edge> remove_edges;
947 
948   auto_sbitmap wont_exit (max_unroll + 2);
949 
950   if (extra_zero_check || desc->noloop_assumptions)
951     {
952       /* Peel the first copy of loop body.  Leave the exit test if the number
953 	 of iterations is not reliable.  Also record the place of the extra zero
954 	 check.  */
955       bitmap_clear (wont_exit);
956       if (!desc->noloop_assumptions)
957 	bitmap_set_bit (wont_exit, 1);
958       ezc_swtch = loop_preheader_edge (loop)->src;
959       ok = duplicate_loop_to_header_edge (loop, loop_preheader_edge (loop),
960 					  1, wont_exit, desc->out_edge,
961 					  &remove_edges,
962 					  DLTHE_FLAG_UPDATE_FREQ);
963       gcc_assert (ok);
964     }
965 
966   /* Record the place where switch will be built for preconditioning.  */
967   swtch = split_edge (loop_preheader_edge (loop));
968 
969   /* Compute frequency/count increments for each switch block and initialize
970      innermost switch block.  Switch blocks and peeled loop copies are built
971      from innermost outward.  */
972   iter_freq = new_freq = swtch->frequency / (max_unroll + 1);
973   iter_count = new_count = swtch->count / (max_unroll + 1);
974   swtch->frequency = new_freq;
975   swtch->count = new_count;
976   single_succ_edge (swtch)->count = new_count;
977 
978   for (i = 0; i < n_peel; i++)
979     {
980       /* Peel the copy.  */
981       bitmap_clear (wont_exit);
982       if (i != n_peel - 1 || !last_may_exit)
983 	bitmap_set_bit (wont_exit, 1);
984       ok = duplicate_loop_to_header_edge (loop, loop_preheader_edge (loop),
985 					  1, wont_exit, desc->out_edge,
986 					  &remove_edges,
987 					  DLTHE_FLAG_UPDATE_FREQ);
988       gcc_assert (ok);
989 
990       /* Create item for switch.  */
991       j = n_peel - i - (extra_zero_check ? 0 : 1);
992       p = REG_BR_PROB_BASE / (i + 2);
993 
994       preheader = split_edge (loop_preheader_edge (loop));
995       /* Add in frequency/count of edge from switch block.  */
996       preheader->frequency += iter_freq;
997       preheader->count += iter_count;
998       single_succ_edge (preheader)->count = preheader->count;
999       branch_code = compare_and_jump_seq (copy_rtx (niter), GEN_INT (j), EQ,
1000 					  block_label (preheader), p,
1001 					  NULL);
1002 
1003       /* We rely on the fact that the compare and jump cannot be optimized out,
1004 	 and hence the cfg we create is correct.  */
1005       gcc_assert (branch_code != NULL_RTX);
1006 
1007       swtch = split_edge_and_insert (single_pred_edge (swtch), branch_code);
1008       set_immediate_dominator (CDI_DOMINATORS, preheader, swtch);
1009       single_succ_edge (swtch)->probability = REG_BR_PROB_BASE - p;
1010       single_succ_edge (swtch)->count = new_count;
1011       new_freq += iter_freq;
1012       new_count += iter_count;
1013       swtch->frequency = new_freq;
1014       swtch->count = new_count;
1015       e = make_edge (swtch, preheader,
1016 		     single_succ_edge (swtch)->flags & EDGE_IRREDUCIBLE_LOOP);
1017       e->count = iter_count;
1018       e->probability = p;
1019     }
1020 
1021   if (extra_zero_check)
1022     {
1023       /* Add branch for zero iterations.  */
1024       p = REG_BR_PROB_BASE / (max_unroll + 1);
1025       swtch = ezc_swtch;
1026       preheader = split_edge (loop_preheader_edge (loop));
1027       /* Recompute frequency/count adjustments since initial peel copy may
1028 	 have exited and reduced those values that were computed above.  */
1029       iter_freq = swtch->frequency / (max_unroll + 1);
1030       iter_count = swtch->count / (max_unroll + 1);
1031       /* Add in frequency/count of edge from switch block.  */
1032       preheader->frequency += iter_freq;
1033       preheader->count += iter_count;
1034       single_succ_edge (preheader)->count = preheader->count;
1035       branch_code = compare_and_jump_seq (copy_rtx (niter), const0_rtx, EQ,
1036 					  block_label (preheader), p,
1037 					  NULL);
1038       gcc_assert (branch_code != NULL_RTX);
1039 
1040       swtch = split_edge_and_insert (single_succ_edge (swtch), branch_code);
1041       set_immediate_dominator (CDI_DOMINATORS, preheader, swtch);
1042       single_succ_edge (swtch)->probability = REG_BR_PROB_BASE - p;
1043       single_succ_edge (swtch)->count -= iter_count;
1044       e = make_edge (swtch, preheader,
1045 		     single_succ_edge (swtch)->flags & EDGE_IRREDUCIBLE_LOOP);
1046       e->count = iter_count;
1047       e->probability = p;
1048     }
1049 
1050   /* Recount dominators for outer blocks.  */
1051   iterate_fix_dominators (CDI_DOMINATORS, dom_bbs, false);
1052 
1053   /* And unroll loop.  */
1054 
1055   bitmap_ones (wont_exit);
1056   bitmap_clear_bit (wont_exit, may_exit_copy);
1057   opt_info_start_duplication (opt_info);
1058 
1059   ok = duplicate_loop_to_header_edge (loop, loop_latch_edge (loop),
1060 				      max_unroll,
1061 				      wont_exit, desc->out_edge,
1062 				      &remove_edges,
1063 				      DLTHE_FLAG_UPDATE_FREQ
1064 				      | (opt_info
1065 					 ? DLTHE_RECORD_COPY_NUMBER
1066 					   : 0));
1067   gcc_assert (ok);
1068 
1069   if (opt_info)
1070     {
1071       apply_opt_in_copies (opt_info, max_unroll, true, true);
1072       free_opt_info (opt_info);
1073     }
1074 
1075   if (exit_at_end)
1076     {
1077       basic_block exit_block = get_bb_copy (desc->in_edge->src);
1078       /* Find a new in and out edge; they are in the last copy we have
1079 	 made.  */
1080 
1081       if (EDGE_SUCC (exit_block, 0)->dest == desc->out_edge->dest)
1082 	{
1083 	  desc->out_edge = EDGE_SUCC (exit_block, 0);
1084 	  desc->in_edge = EDGE_SUCC (exit_block, 1);
1085 	}
1086       else
1087 	{
1088 	  desc->out_edge = EDGE_SUCC (exit_block, 1);
1089 	  desc->in_edge = EDGE_SUCC (exit_block, 0);
1090 	}
1091     }
1092 
1093   /* Remove the edges.  */
1094   FOR_EACH_VEC_ELT (remove_edges, i, e)
1095     remove_path (e);
1096 
1097   /* We must be careful when updating the number of iterations due to
1098      preconditioning and the fact that the value must be valid at entry
1099      of the loop.  After passing through the above code, we see that
1100      the correct new number of iterations is this:  */
1101   gcc_assert (!desc->const_iter);
1102   desc->niter_expr =
1103     simplify_gen_binary (UDIV, desc->mode, old_niter,
1104 			 gen_int_mode (max_unroll + 1, desc->mode));
1105   loop->nb_iterations_upper_bound
1106     = wi::udiv_trunc (loop->nb_iterations_upper_bound, max_unroll + 1);
1107   if (loop->any_estimate)
1108     loop->nb_iterations_estimate
1109       = wi::udiv_trunc (loop->nb_iterations_estimate, max_unroll + 1);
1110   if (loop->any_likely_upper_bound)
1111     loop->nb_iterations_likely_upper_bound
1112       = wi::udiv_trunc (loop->nb_iterations_likely_upper_bound, max_unroll + 1);
1113   if (exit_at_end)
1114     {
1115       desc->niter_expr =
1116 	simplify_gen_binary (MINUS, desc->mode, desc->niter_expr, const1_rtx);
1117       desc->noloop_assumptions = NULL_RTX;
1118       --loop->nb_iterations_upper_bound;
1119       if (loop->any_estimate
1120 	  && loop->nb_iterations_estimate != 0)
1121 	--loop->nb_iterations_estimate;
1122       else
1123 	loop->any_estimate = false;
1124       if (loop->any_likely_upper_bound
1125 	  && loop->nb_iterations_likely_upper_bound != 0)
1126 	--loop->nb_iterations_likely_upper_bound;
1127       else
1128 	loop->any_likely_upper_bound = false;
1129     }
1130 
1131   if (dump_file)
1132     fprintf (dump_file,
1133 	     ";; Unrolled loop %d times, counting # of iterations "
1134 	     "in runtime, %i insns\n",
1135 	     max_unroll, num_loop_insns (loop));
1136 }
1137 
1138 /* Decide whether to unroll LOOP stupidly and how much.  */
1139 static void
1140 decide_unroll_stupid (struct loop *loop, int flags)
1141 {
1142   unsigned nunroll, nunroll_by_av, i;
1143   struct niter_desc *desc;
1144   widest_int iterations;
1145 
1146   if (!(flags & UAP_UNROLL_ALL))
1147     {
1148       /* We were not asked to, just return back silently.  */
1149       return;
1150     }
1151 
1152   if (dump_file)
1153     fprintf (dump_file, "\n;; Considering unrolling loop stupidly\n");
1154 
1155   /* nunroll = total number of copies of the original loop body in
1156      unrolled loop (i.e. if it is 2, we have to duplicate loop body once.  */
1157   nunroll = PARAM_VALUE (PARAM_MAX_UNROLLED_INSNS) / loop->ninsns;
1158   nunroll_by_av
1159     = PARAM_VALUE (PARAM_MAX_AVERAGE_UNROLLED_INSNS) / loop->av_ninsns;
1160   if (nunroll > nunroll_by_av)
1161     nunroll = nunroll_by_av;
1162   if (nunroll > (unsigned) PARAM_VALUE (PARAM_MAX_UNROLL_TIMES))
1163     nunroll = PARAM_VALUE (PARAM_MAX_UNROLL_TIMES);
1164 
1165   if (targetm.loop_unroll_adjust)
1166     nunroll = targetm.loop_unroll_adjust (nunroll, loop);
1167 
1168   /* Skip big loops.  */
1169   if (nunroll <= 1)
1170     {
1171       if (dump_file)
1172 	fprintf (dump_file, ";; Not considering loop, is too big\n");
1173       return;
1174     }
1175 
1176   /* Check for simple loops.  */
1177   desc = get_simple_loop_desc (loop);
1178 
1179   /* Check simpleness.  */
1180   if (desc->simple_p && !desc->assumptions)
1181     {
1182       if (dump_file)
1183 	fprintf (dump_file, ";; The loop is simple\n");
1184       return;
1185     }
1186 
1187   /* Do not unroll loops with branches inside -- it increases number
1188      of mispredicts.
1189      TODO: this heuristic needs tunning; call inside the loop body
1190      is also relatively good reason to not unroll.  */
1191   if (num_loop_branches (loop) > 1)
1192     {
1193       if (dump_file)
1194 	fprintf (dump_file, ";; Not unrolling, contains branches\n");
1195       return;
1196     }
1197 
1198   /* Check whether the loop rolls.  */
1199   if ((get_estimated_loop_iterations (loop, &iterations)
1200        || get_likely_max_loop_iterations (loop, &iterations))
1201       && wi::ltu_p (iterations, 2 * nunroll))
1202     {
1203       if (dump_file)
1204 	fprintf (dump_file, ";; Not unrolling loop, doesn't roll\n");
1205       return;
1206     }
1207 
1208   /* Success.  Now force nunroll to be power of 2, as it seems that this
1209      improves results (partially because of better alignments, partially
1210      because of some dark magic).  */
1211   for (i = 1; 2 * i <= nunroll; i *= 2)
1212     continue;
1213 
1214   loop->lpt_decision.decision = LPT_UNROLL_STUPID;
1215   loop->lpt_decision.times = i - 1;
1216 }
1217 
1218 /* Unroll a LOOP LOOP->LPT_DECISION.TIMES times.  The transformation does this:
1219 
1220    while (cond)
1221      body;
1222 
1223    ==>  (LOOP->LPT_DECISION.TIMES == 3)
1224 
1225    while (cond)
1226      {
1227        body;
1228        if (!cond) break;
1229        body;
1230        if (!cond) break;
1231        body;
1232        if (!cond) break;
1233        body;
1234      }
1235    */
1236 static void
1237 unroll_loop_stupid (struct loop *loop)
1238 {
1239   unsigned nunroll = loop->lpt_decision.times;
1240   struct niter_desc *desc = get_simple_loop_desc (loop);
1241   struct opt_info *opt_info = NULL;
1242   bool ok;
1243 
1244   if (flag_split_ivs_in_unroller
1245       || flag_variable_expansion_in_unroller)
1246     opt_info = analyze_insns_in_loop (loop);
1247 
1248   auto_sbitmap wont_exit (nunroll + 1);
1249   bitmap_clear (wont_exit);
1250   opt_info_start_duplication (opt_info);
1251 
1252   ok = duplicate_loop_to_header_edge (loop, loop_latch_edge (loop),
1253 				      nunroll, wont_exit,
1254 				      NULL, NULL,
1255 				      DLTHE_FLAG_UPDATE_FREQ
1256 				      | (opt_info
1257 					 ? DLTHE_RECORD_COPY_NUMBER
1258 					   : 0));
1259   gcc_assert (ok);
1260 
1261   if (opt_info)
1262     {
1263       apply_opt_in_copies (opt_info, nunroll, true, true);
1264       free_opt_info (opt_info);
1265     }
1266 
1267   if (desc->simple_p)
1268     {
1269       /* We indeed may get here provided that there are nontrivial assumptions
1270 	 for a loop to be really simple.  We could update the counts, but the
1271 	 problem is that we are unable to decide which exit will be taken
1272 	 (not really true in case the number of iterations is constant,
1273 	 but no one will do anything with this information, so we do not
1274 	 worry about it).  */
1275       desc->simple_p = false;
1276     }
1277 
1278   if (dump_file)
1279     fprintf (dump_file, ";; Unrolled loop %d times, %i insns\n",
1280 	     nunroll, num_loop_insns (loop));
1281 }
1282 
1283 /* Returns true if REG is referenced in one nondebug insn in LOOP.
1284    Set *DEBUG_USES to the number of debug insns that reference the
1285    variable.  */
1286 
1287 static bool
1288 referenced_in_one_insn_in_loop_p (struct loop *loop, rtx reg,
1289 				  int *debug_uses)
1290 {
1291   basic_block *body, bb;
1292   unsigned i;
1293   int count_ref = 0;
1294   rtx_insn *insn;
1295 
1296   body = get_loop_body (loop);
1297   for (i = 0; i < loop->num_nodes; i++)
1298     {
1299       bb = body[i];
1300 
1301       FOR_BB_INSNS (bb, insn)
1302 	if (!rtx_referenced_p (reg, insn))
1303 	  continue;
1304 	else if (DEBUG_INSN_P (insn))
1305 	  ++*debug_uses;
1306 	else if (++count_ref > 1)
1307 	  break;
1308     }
1309   free (body);
1310   return (count_ref  == 1);
1311 }
1312 
1313 /* Reset the DEBUG_USES debug insns in LOOP that reference REG.  */
1314 
1315 static void
1316 reset_debug_uses_in_loop (struct loop *loop, rtx reg, int debug_uses)
1317 {
1318   basic_block *body, bb;
1319   unsigned i;
1320   rtx_insn *insn;
1321 
1322   body = get_loop_body (loop);
1323   for (i = 0; debug_uses && i < loop->num_nodes; i++)
1324     {
1325       bb = body[i];
1326 
1327       FOR_BB_INSNS (bb, insn)
1328 	if (!DEBUG_INSN_P (insn) || !rtx_referenced_p (reg, insn))
1329 	  continue;
1330 	else
1331 	  {
1332 	    validate_change (insn, &INSN_VAR_LOCATION_LOC (insn),
1333 			     gen_rtx_UNKNOWN_VAR_LOC (), 0);
1334 	    if (!--debug_uses)
1335 	      break;
1336 	  }
1337     }
1338   free (body);
1339 }
1340 
1341 /* Determine whether INSN contains an accumulator
1342    which can be expanded into separate copies,
1343    one for each copy of the LOOP body.
1344 
1345    for (i = 0 ; i < n; i++)
1346      sum += a[i];
1347 
1348    ==>
1349 
1350    sum += a[i]
1351    ....
1352    i = i+1;
1353    sum1 += a[i]
1354    ....
1355    i = i+1
1356    sum2 += a[i];
1357    ....
1358 
1359    Return NULL if INSN contains no opportunity for expansion of accumulator.
1360    Otherwise, allocate a VAR_TO_EXPAND structure, fill it with the relevant
1361    information and return a pointer to it.
1362 */
1363 
1364 static struct var_to_expand *
1365 analyze_insn_to_expand_var (struct loop *loop, rtx_insn *insn)
1366 {
1367   rtx set, dest, src;
1368   struct var_to_expand *ves;
1369   unsigned accum_pos;
1370   enum rtx_code code;
1371   int debug_uses = 0;
1372 
1373   set = single_set (insn);
1374   if (!set)
1375     return NULL;
1376 
1377   dest = SET_DEST (set);
1378   src = SET_SRC (set);
1379   code = GET_CODE (src);
1380 
1381   if (code != PLUS && code != MINUS && code != MULT && code != FMA)
1382     return NULL;
1383 
1384   if (FLOAT_MODE_P (GET_MODE (dest)))
1385     {
1386       if (!flag_associative_math)
1387         return NULL;
1388       /* In the case of FMA, we're also changing the rounding.  */
1389       if (code == FMA && !flag_unsafe_math_optimizations)
1390 	return NULL;
1391     }
1392 
1393   /* Hmm, this is a bit paradoxical.  We know that INSN is a valid insn
1394      in MD.  But if there is no optab to generate the insn, we can not
1395      perform the variable expansion.  This can happen if an MD provides
1396      an insn but not a named pattern to generate it, for example to avoid
1397      producing code that needs additional mode switches like for x87/mmx.
1398 
1399      So we check have_insn_for which looks for an optab for the operation
1400      in SRC.  If it doesn't exist, we can't perform the expansion even
1401      though INSN is valid.  */
1402   if (!have_insn_for (code, GET_MODE (src)))
1403     return NULL;
1404 
1405   if (!REG_P (dest)
1406       && !(GET_CODE (dest) == SUBREG
1407            && REG_P (SUBREG_REG (dest))))
1408     return NULL;
1409 
1410   /* Find the accumulator use within the operation.  */
1411   if (code == FMA)
1412     {
1413       /* We only support accumulation via FMA in the ADD position.  */
1414       if (!rtx_equal_p  (dest, XEXP (src, 2)))
1415 	return NULL;
1416       accum_pos = 2;
1417     }
1418   else if (rtx_equal_p (dest, XEXP (src, 0)))
1419     accum_pos = 0;
1420   else if (rtx_equal_p (dest, XEXP (src, 1)))
1421     {
1422       /* The method of expansion that we are using; which includes the
1423 	 initialization of the expansions with zero and the summation of
1424          the expansions at the end of the computation will yield wrong
1425 	 results for (x = something - x) thus avoid using it in that case.  */
1426       if (code == MINUS)
1427 	return NULL;
1428       accum_pos = 1;
1429     }
1430   else
1431     return NULL;
1432 
1433   /* It must not otherwise be used.  */
1434   if (code == FMA)
1435     {
1436       if (rtx_referenced_p (dest, XEXP (src, 0))
1437 	  || rtx_referenced_p (dest, XEXP (src, 1)))
1438 	return NULL;
1439     }
1440   else if (rtx_referenced_p (dest, XEXP (src, 1 - accum_pos)))
1441     return NULL;
1442 
1443   /* It must be used in exactly one insn.  */
1444   if (!referenced_in_one_insn_in_loop_p (loop, dest, &debug_uses))
1445     return NULL;
1446 
1447   if (dump_file)
1448     {
1449       fprintf (dump_file, "\n;; Expanding Accumulator ");
1450       print_rtl (dump_file, dest);
1451       fprintf (dump_file, "\n");
1452     }
1453 
1454   if (debug_uses)
1455     /* Instead of resetting the debug insns, we could replace each
1456        debug use in the loop with the sum or product of all expanded
1457        accumulators.  Since we'll only know of all expansions at the
1458        end, we'd have to keep track of which vars_to_expand a debug
1459        insn in the loop references, take note of each copy of the
1460        debug insn during unrolling, and when it's all done, compute
1461        the sum or product of each variable and adjust the original
1462        debug insn and each copy thereof.  What a pain!  */
1463     reset_debug_uses_in_loop (loop, dest, debug_uses);
1464 
1465   /* Record the accumulator to expand.  */
1466   ves = XNEW (struct var_to_expand);
1467   ves->insn = insn;
1468   ves->reg = copy_rtx (dest);
1469   ves->var_expansions.create (1);
1470   ves->next = NULL;
1471   ves->op = GET_CODE (src);
1472   ves->expansion_count = 0;
1473   ves->reuse_expansion = 0;
1474   return ves;
1475 }
1476 
1477 /* Determine whether there is an induction variable in INSN that
1478    we would like to split during unrolling.
1479 
1480    I.e. replace
1481 
1482    i = i + 1;
1483    ...
1484    i = i + 1;
1485    ...
1486    i = i + 1;
1487    ...
1488 
1489    type chains by
1490 
1491    i0 = i + 1
1492    ...
1493    i = i0 + 1
1494    ...
1495    i = i0 + 2
1496    ...
1497 
1498    Return NULL if INSN contains no interesting IVs.  Otherwise, allocate
1499    an IV_TO_SPLIT structure, fill it with the relevant information and return a
1500    pointer to it.  */
1501 
1502 static struct iv_to_split *
1503 analyze_iv_to_split_insn (rtx_insn *insn)
1504 {
1505   rtx set, dest;
1506   struct rtx_iv iv;
1507   struct iv_to_split *ivts;
1508   bool ok;
1509 
1510   /* For now we just split the basic induction variables.  Later this may be
1511      extended for example by selecting also addresses of memory references.  */
1512   set = single_set (insn);
1513   if (!set)
1514     return NULL;
1515 
1516   dest = SET_DEST (set);
1517   if (!REG_P (dest))
1518     return NULL;
1519 
1520   if (!biv_p (insn, dest))
1521     return NULL;
1522 
1523   ok = iv_analyze_result (insn, dest, &iv);
1524 
1525   /* This used to be an assert under the assumption that if biv_p returns
1526      true that iv_analyze_result must also return true.  However, that
1527      assumption is not strictly correct as evidenced by pr25569.
1528 
1529      Returning NULL when iv_analyze_result returns false is safe and
1530      avoids the problems in pr25569 until the iv_analyze_* routines
1531      can be fixed, which is apparently hard and time consuming
1532      according to their author.  */
1533   if (! ok)
1534     return NULL;
1535 
1536   if (iv.step == const0_rtx
1537       || iv.mode != iv.extend_mode)
1538     return NULL;
1539 
1540   /* Record the insn to split.  */
1541   ivts = XNEW (struct iv_to_split);
1542   ivts->insn = insn;
1543   ivts->orig_var = dest;
1544   ivts->base_var = NULL_RTX;
1545   ivts->step = iv.step;
1546   ivts->next = NULL;
1547 
1548   return ivts;
1549 }
1550 
1551 /* Determines which of insns in LOOP can be optimized.
1552    Return a OPT_INFO struct with the relevant hash tables filled
1553    with all insns to be optimized.  The FIRST_NEW_BLOCK field
1554    is undefined for the return value.  */
1555 
1556 static struct opt_info *
1557 analyze_insns_in_loop (struct loop *loop)
1558 {
1559   basic_block *body, bb;
1560   unsigned i;
1561   struct opt_info *opt_info = XCNEW (struct opt_info);
1562   rtx_insn *insn;
1563   struct iv_to_split *ivts = NULL;
1564   struct var_to_expand *ves = NULL;
1565   iv_to_split **slot1;
1566   var_to_expand **slot2;
1567   vec<edge> edges = get_loop_exit_edges (loop);
1568   edge exit;
1569   bool can_apply = false;
1570 
1571   iv_analysis_loop_init (loop);
1572 
1573   body = get_loop_body (loop);
1574 
1575   if (flag_split_ivs_in_unroller)
1576     {
1577       opt_info->insns_to_split
1578        	= new hash_table<iv_split_hasher> (5 * loop->num_nodes);
1579       opt_info->iv_to_split_head = NULL;
1580       opt_info->iv_to_split_tail = &opt_info->iv_to_split_head;
1581     }
1582 
1583   /* Record the loop exit bb and loop preheader before the unrolling.  */
1584   opt_info->loop_preheader = loop_preheader_edge (loop)->src;
1585 
1586   if (edges.length () == 1)
1587     {
1588       exit = edges[0];
1589       if (!(exit->flags & EDGE_COMPLEX))
1590 	{
1591 	  opt_info->loop_exit = split_edge (exit);
1592 	  can_apply = true;
1593 	}
1594     }
1595 
1596   if (flag_variable_expansion_in_unroller
1597       && can_apply)
1598     {
1599       opt_info->insns_with_var_to_expand
1600        	= new hash_table<var_expand_hasher> (5 * loop->num_nodes);
1601       opt_info->var_to_expand_head = NULL;
1602       opt_info->var_to_expand_tail = &opt_info->var_to_expand_head;
1603     }
1604 
1605   for (i = 0; i < loop->num_nodes; i++)
1606     {
1607       bb = body[i];
1608       if (!dominated_by_p (CDI_DOMINATORS, loop->latch, bb))
1609 	continue;
1610 
1611       FOR_BB_INSNS (bb, insn)
1612       {
1613         if (!INSN_P (insn))
1614           continue;
1615 
1616         if (opt_info->insns_to_split)
1617           ivts = analyze_iv_to_split_insn (insn);
1618 
1619         if (ivts)
1620           {
1621             slot1 = opt_info->insns_to_split->find_slot (ivts, INSERT);
1622 	    gcc_assert (*slot1 == NULL);
1623             *slot1 = ivts;
1624 	    *opt_info->iv_to_split_tail = ivts;
1625 	    opt_info->iv_to_split_tail = &ivts->next;
1626             continue;
1627           }
1628 
1629         if (opt_info->insns_with_var_to_expand)
1630           ves = analyze_insn_to_expand_var (loop, insn);
1631 
1632         if (ves)
1633           {
1634             slot2 = opt_info->insns_with_var_to_expand->find_slot (ves, INSERT);
1635 	    gcc_assert (*slot2 == NULL);
1636             *slot2 = ves;
1637 	    *opt_info->var_to_expand_tail = ves;
1638 	    opt_info->var_to_expand_tail = &ves->next;
1639           }
1640       }
1641     }
1642 
1643   edges.release ();
1644   free (body);
1645   return opt_info;
1646 }
1647 
1648 /* Called just before loop duplication.  Records start of duplicated area
1649    to OPT_INFO.  */
1650 
1651 static void
1652 opt_info_start_duplication (struct opt_info *opt_info)
1653 {
1654   if (opt_info)
1655     opt_info->first_new_block = last_basic_block_for_fn (cfun);
1656 }
1657 
1658 /* Determine the number of iterations between initialization of the base
1659    variable and the current copy (N_COPY).  N_COPIES is the total number
1660    of newly created copies.  UNROLLING is true if we are unrolling
1661    (not peeling) the loop.  */
1662 
1663 static unsigned
1664 determine_split_iv_delta (unsigned n_copy, unsigned n_copies, bool unrolling)
1665 {
1666   if (unrolling)
1667     {
1668       /* If we are unrolling, initialization is done in the original loop
1669 	 body (number 0).  */
1670       return n_copy;
1671     }
1672   else
1673     {
1674       /* If we are peeling, the copy in that the initialization occurs has
1675 	 number 1.  The original loop (number 0) is the last.  */
1676       if (n_copy)
1677 	return n_copy - 1;
1678       else
1679 	return n_copies;
1680     }
1681 }
1682 
1683 /* Allocate basic variable for the induction variable chain.  */
1684 
1685 static void
1686 allocate_basic_variable (struct iv_to_split *ivts)
1687 {
1688   rtx expr = SET_SRC (single_set (ivts->insn));
1689 
1690   ivts->base_var = gen_reg_rtx (GET_MODE (expr));
1691 }
1692 
1693 /* Insert initialization of basic variable of IVTS before INSN, taking
1694    the initial value from INSN.  */
1695 
1696 static void
1697 insert_base_initialization (struct iv_to_split *ivts, rtx_insn *insn)
1698 {
1699   rtx expr = copy_rtx (SET_SRC (single_set (insn)));
1700   rtx_insn *seq;
1701 
1702   start_sequence ();
1703   expr = force_operand (expr, ivts->base_var);
1704   if (expr != ivts->base_var)
1705     emit_move_insn (ivts->base_var, expr);
1706   seq = get_insns ();
1707   end_sequence ();
1708 
1709   emit_insn_before (seq, insn);
1710 }
1711 
1712 /* Replace the use of induction variable described in IVTS in INSN
1713    by base variable + DELTA * step.  */
1714 
1715 static void
1716 split_iv (struct iv_to_split *ivts, rtx_insn *insn, unsigned delta)
1717 {
1718   rtx expr, *loc, incr, var;
1719   rtx_insn *seq;
1720   machine_mode mode = GET_MODE (ivts->base_var);
1721   rtx src, dest, set;
1722 
1723   /* Construct base + DELTA * step.  */
1724   if (!delta)
1725     expr = ivts->base_var;
1726   else
1727     {
1728       incr = simplify_gen_binary (MULT, mode,
1729 				  ivts->step, gen_int_mode (delta, mode));
1730       expr = simplify_gen_binary (PLUS, GET_MODE (ivts->base_var),
1731 				  ivts->base_var, incr);
1732     }
1733 
1734   /* Figure out where to do the replacement.  */
1735   loc = &SET_SRC (single_set (insn));
1736 
1737   /* If we can make the replacement right away, we're done.  */
1738   if (validate_change (insn, loc, expr, 0))
1739     return;
1740 
1741   /* Otherwise, force EXPR into a register and try again.  */
1742   start_sequence ();
1743   var = gen_reg_rtx (mode);
1744   expr = force_operand (expr, var);
1745   if (expr != var)
1746     emit_move_insn (var, expr);
1747   seq = get_insns ();
1748   end_sequence ();
1749   emit_insn_before (seq, insn);
1750 
1751   if (validate_change (insn, loc, var, 0))
1752     return;
1753 
1754   /* The last chance.  Try recreating the assignment in insn
1755      completely from scratch.  */
1756   set = single_set (insn);
1757   gcc_assert (set);
1758 
1759   start_sequence ();
1760   *loc = var;
1761   src = copy_rtx (SET_SRC (set));
1762   dest = copy_rtx (SET_DEST (set));
1763   src = force_operand (src, dest);
1764   if (src != dest)
1765     emit_move_insn (dest, src);
1766   seq = get_insns ();
1767   end_sequence ();
1768 
1769   emit_insn_before (seq, insn);
1770   delete_insn (insn);
1771 }
1772 
1773 
1774 /* Return one expansion of the accumulator recorded in struct VE.  */
1775 
1776 static rtx
1777 get_expansion (struct var_to_expand *ve)
1778 {
1779   rtx reg;
1780 
1781   if (ve->reuse_expansion == 0)
1782     reg = ve->reg;
1783   else
1784     reg = ve->var_expansions[ve->reuse_expansion - 1];
1785 
1786   if (ve->var_expansions.length () == (unsigned) ve->reuse_expansion)
1787     ve->reuse_expansion = 0;
1788   else
1789     ve->reuse_expansion++;
1790 
1791   return reg;
1792 }
1793 
1794 
1795 /* Given INSN replace the uses of the accumulator recorded in VE
1796    with a new register.  */
1797 
1798 static void
1799 expand_var_during_unrolling (struct var_to_expand *ve, rtx_insn *insn)
1800 {
1801   rtx new_reg, set;
1802   bool really_new_expansion = false;
1803 
1804   set = single_set (insn);
1805   gcc_assert (set);
1806 
1807   /* Generate a new register only if the expansion limit has not been
1808      reached.  Else reuse an already existing expansion.  */
1809   if (PARAM_VALUE (PARAM_MAX_VARIABLE_EXPANSIONS) > ve->expansion_count)
1810     {
1811       really_new_expansion = true;
1812       new_reg = gen_reg_rtx (GET_MODE (ve->reg));
1813     }
1814   else
1815     new_reg = get_expansion (ve);
1816 
1817   validate_replace_rtx_group (SET_DEST (set), new_reg, insn);
1818   if (apply_change_group ())
1819     if (really_new_expansion)
1820       {
1821         ve->var_expansions.safe_push (new_reg);
1822         ve->expansion_count++;
1823       }
1824 }
1825 
1826 /* Initialize the variable expansions in loop preheader.  PLACE is the
1827    loop-preheader basic block where the initialization of the
1828    expansions should take place.  The expansions are initialized with
1829    (-0) when the operation is plus or minus to honor sign zero.  This
1830    way we can prevent cases where the sign of the final result is
1831    effected by the sign of the expansion.  Here is an example to
1832    demonstrate this:
1833 
1834    for (i = 0 ; i < n; i++)
1835      sum += something;
1836 
1837    ==>
1838 
1839    sum += something
1840    ....
1841    i = i+1;
1842    sum1 += something
1843    ....
1844    i = i+1
1845    sum2 += something;
1846    ....
1847 
1848    When SUM is initialized with -zero and SOMETHING is also -zero; the
1849    final result of sum should be -zero thus the expansions sum1 and sum2
1850    should be initialized with -zero as well (otherwise we will get +zero
1851    as the final result).  */
1852 
1853 static void
1854 insert_var_expansion_initialization (struct var_to_expand *ve,
1855 				     basic_block place)
1856 {
1857   rtx_insn *seq;
1858   rtx var, zero_init;
1859   unsigned i;
1860   machine_mode mode = GET_MODE (ve->reg);
1861   bool honor_signed_zero_p = HONOR_SIGNED_ZEROS (mode);
1862 
1863   if (ve->var_expansions.length () == 0)
1864     return;
1865 
1866   start_sequence ();
1867   switch (ve->op)
1868     {
1869     case FMA:
1870       /* Note that we only accumulate FMA via the ADD operand.  */
1871     case PLUS:
1872     case MINUS:
1873       FOR_EACH_VEC_ELT (ve->var_expansions, i, var)
1874         {
1875 	  if (honor_signed_zero_p)
1876 	    zero_init = simplify_gen_unary (NEG, mode, CONST0_RTX (mode), mode);
1877 	  else
1878 	    zero_init = CONST0_RTX (mode);
1879           emit_move_insn (var, zero_init);
1880         }
1881       break;
1882 
1883     case MULT:
1884       FOR_EACH_VEC_ELT (ve->var_expansions, i, var)
1885         {
1886           zero_init = CONST1_RTX (GET_MODE (var));
1887           emit_move_insn (var, zero_init);
1888         }
1889       break;
1890 
1891     default:
1892       gcc_unreachable ();
1893     }
1894 
1895   seq = get_insns ();
1896   end_sequence ();
1897 
1898   emit_insn_after (seq, BB_END (place));
1899 }
1900 
1901 /* Combine the variable expansions at the loop exit.  PLACE is the
1902    loop exit basic block where the summation of the expansions should
1903    take place.  */
1904 
1905 static void
1906 combine_var_copies_in_loop_exit (struct var_to_expand *ve, basic_block place)
1907 {
1908   rtx sum = ve->reg;
1909   rtx expr, var;
1910   rtx_insn *seq, *insn;
1911   unsigned i;
1912 
1913   if (ve->var_expansions.length () == 0)
1914     return;
1915 
1916   /* ve->reg might be SUBREG or some other non-shareable RTL, and we use
1917      it both here and as the destination of the assignment.  */
1918   sum = copy_rtx (sum);
1919   start_sequence ();
1920   switch (ve->op)
1921     {
1922     case FMA:
1923       /* Note that we only accumulate FMA via the ADD operand.  */
1924     case PLUS:
1925     case MINUS:
1926       FOR_EACH_VEC_ELT (ve->var_expansions, i, var)
1927 	sum = simplify_gen_binary (PLUS, GET_MODE (ve->reg), var, sum);
1928       break;
1929 
1930     case MULT:
1931       FOR_EACH_VEC_ELT (ve->var_expansions, i, var)
1932 	sum = simplify_gen_binary (MULT, GET_MODE (ve->reg), var, sum);
1933       break;
1934 
1935     default:
1936       gcc_unreachable ();
1937     }
1938 
1939   expr = force_operand (sum, ve->reg);
1940   if (expr != ve->reg)
1941     emit_move_insn (ve->reg, expr);
1942   seq = get_insns ();
1943   end_sequence ();
1944 
1945   insn = BB_HEAD (place);
1946   while (!NOTE_INSN_BASIC_BLOCK_P (insn))
1947     insn = NEXT_INSN (insn);
1948 
1949   emit_insn_after (seq, insn);
1950 }
1951 
1952 /* Strip away REG_EQUAL notes for IVs we're splitting.
1953 
1954    Updating REG_EQUAL notes for IVs we split is tricky: We
1955    cannot tell until after unrolling, DF-rescanning, and liveness
1956    updating, whether an EQ_USE is reached by the split IV while
1957    the IV reg is still live.  See PR55006.
1958 
1959    ??? We cannot use remove_reg_equal_equiv_notes_for_regno,
1960    because RTL loop-iv requires us to defer rescanning insns and
1961    any notes attached to them.  So resort to old techniques...  */
1962 
1963 static void
1964 maybe_strip_eq_note_for_split_iv (struct opt_info *opt_info, rtx_insn *insn)
1965 {
1966   struct iv_to_split *ivts;
1967   rtx note = find_reg_equal_equiv_note (insn);
1968   if (! note)
1969     return;
1970   for (ivts = opt_info->iv_to_split_head; ivts; ivts = ivts->next)
1971     if (reg_mentioned_p (ivts->orig_var, note))
1972       {
1973 	remove_note (insn, note);
1974 	return;
1975       }
1976 }
1977 
1978 /* Apply loop optimizations in loop copies using the
1979    data which gathered during the unrolling.  Structure
1980    OPT_INFO record that data.
1981 
1982    UNROLLING is true if we unrolled (not peeled) the loop.
1983    REWRITE_ORIGINAL_BODY is true if we should also rewrite the original body of
1984    the loop (as it should happen in complete unrolling, but not in ordinary
1985    peeling of the loop).  */
1986 
1987 static void
1988 apply_opt_in_copies (struct opt_info *opt_info,
1989                      unsigned n_copies, bool unrolling,
1990                      bool rewrite_original_loop)
1991 {
1992   unsigned i, delta;
1993   basic_block bb, orig_bb;
1994   rtx_insn *insn, *orig_insn, *next;
1995   struct iv_to_split ivts_templ, *ivts;
1996   struct var_to_expand ve_templ, *ves;
1997 
1998   /* Sanity check -- we need to put initialization in the original loop
1999      body.  */
2000   gcc_assert (!unrolling || rewrite_original_loop);
2001 
2002   /* Allocate the basic variables (i0).  */
2003   if (opt_info->insns_to_split)
2004     for (ivts = opt_info->iv_to_split_head; ivts; ivts = ivts->next)
2005       allocate_basic_variable (ivts);
2006 
2007   for (i = opt_info->first_new_block;
2008        i < (unsigned) last_basic_block_for_fn (cfun);
2009        i++)
2010     {
2011       bb = BASIC_BLOCK_FOR_FN (cfun, i);
2012       orig_bb = get_bb_original (bb);
2013 
2014       /* bb->aux holds position in copy sequence initialized by
2015 	 duplicate_loop_to_header_edge.  */
2016       delta = determine_split_iv_delta ((size_t)bb->aux, n_copies,
2017 					unrolling);
2018       bb->aux = 0;
2019       orig_insn = BB_HEAD (orig_bb);
2020       FOR_BB_INSNS_SAFE (bb, insn, next)
2021         {
2022 	  if (!INSN_P (insn)
2023 	      || (DEBUG_INSN_P (insn)
2024 		  && TREE_CODE (INSN_VAR_LOCATION_DECL (insn)) == LABEL_DECL))
2025             continue;
2026 
2027 	  while (!INSN_P (orig_insn)
2028 		 || (DEBUG_INSN_P (orig_insn)
2029 		     && (TREE_CODE (INSN_VAR_LOCATION_DECL (orig_insn))
2030 			 == LABEL_DECL)))
2031             orig_insn = NEXT_INSN (orig_insn);
2032 
2033           ivts_templ.insn = orig_insn;
2034           ve_templ.insn = orig_insn;
2035 
2036           /* Apply splitting iv optimization.  */
2037           if (opt_info->insns_to_split)
2038             {
2039 	      maybe_strip_eq_note_for_split_iv (opt_info, insn);
2040 
2041               ivts = opt_info->insns_to_split->find (&ivts_templ);
2042 
2043               if (ivts)
2044                 {
2045 		  gcc_assert (GET_CODE (PATTERN (insn))
2046 			      == GET_CODE (PATTERN (orig_insn)));
2047 
2048                   if (!delta)
2049                     insert_base_initialization (ivts, insn);
2050                   split_iv (ivts, insn, delta);
2051                 }
2052             }
2053           /* Apply variable expansion optimization.  */
2054           if (unrolling && opt_info->insns_with_var_to_expand)
2055             {
2056               ves = (struct var_to_expand *)
2057 		opt_info->insns_with_var_to_expand->find (&ve_templ);
2058               if (ves)
2059                 {
2060 		  gcc_assert (GET_CODE (PATTERN (insn))
2061 			      == GET_CODE (PATTERN (orig_insn)));
2062                   expand_var_during_unrolling (ves, insn);
2063                 }
2064             }
2065           orig_insn = NEXT_INSN (orig_insn);
2066         }
2067     }
2068 
2069   if (!rewrite_original_loop)
2070     return;
2071 
2072   /* Initialize the variable expansions in the loop preheader
2073      and take care of combining them at the loop exit.  */
2074   if (opt_info->insns_with_var_to_expand)
2075     {
2076       for (ves = opt_info->var_to_expand_head; ves; ves = ves->next)
2077 	insert_var_expansion_initialization (ves, opt_info->loop_preheader);
2078       for (ves = opt_info->var_to_expand_head; ves; ves = ves->next)
2079 	combine_var_copies_in_loop_exit (ves, opt_info->loop_exit);
2080     }
2081 
2082   /* Rewrite also the original loop body.  Find them as originals of the blocks
2083      in the last copied iteration, i.e. those that have
2084      get_bb_copy (get_bb_original (bb)) == bb.  */
2085   for (i = opt_info->first_new_block;
2086        i < (unsigned) last_basic_block_for_fn (cfun);
2087        i++)
2088     {
2089       bb = BASIC_BLOCK_FOR_FN (cfun, i);
2090       orig_bb = get_bb_original (bb);
2091       if (get_bb_copy (orig_bb) != bb)
2092 	continue;
2093 
2094       delta = determine_split_iv_delta (0, n_copies, unrolling);
2095       for (orig_insn = BB_HEAD (orig_bb);
2096            orig_insn != NEXT_INSN (BB_END (bb));
2097            orig_insn = next)
2098         {
2099           next = NEXT_INSN (orig_insn);
2100 
2101           if (!INSN_P (orig_insn))
2102  	    continue;
2103 
2104           ivts_templ.insn = orig_insn;
2105           if (opt_info->insns_to_split)
2106             {
2107 	      maybe_strip_eq_note_for_split_iv (opt_info, orig_insn);
2108 
2109               ivts = (struct iv_to_split *)
2110 		opt_info->insns_to_split->find (&ivts_templ);
2111               if (ivts)
2112                 {
2113                   if (!delta)
2114                     insert_base_initialization (ivts, orig_insn);
2115                   split_iv (ivts, orig_insn, delta);
2116                   continue;
2117                 }
2118             }
2119 
2120         }
2121     }
2122 }
2123 
2124 /* Release OPT_INFO.  */
2125 
2126 static void
2127 free_opt_info (struct opt_info *opt_info)
2128 {
2129   delete opt_info->insns_to_split;
2130   opt_info->insns_to_split = NULL;
2131   if (opt_info->insns_with_var_to_expand)
2132     {
2133       struct var_to_expand *ves;
2134 
2135       for (ves = opt_info->var_to_expand_head; ves; ves = ves->next)
2136 	ves->var_expansions.release ();
2137       delete opt_info->insns_with_var_to_expand;
2138       opt_info->insns_with_var_to_expand = NULL;
2139     }
2140   free (opt_info);
2141 }
2142