xref: /netbsd-src/external/gpl3/gcc.old/dist/gcc/loop-unroll.c (revision cef8759bd76c1b621f8eab8faa6f208faabc2e15)
1 /* Loop unrolling.
2    Copyright (C) 2002-2017 Free Software Foundation, Inc.
3 
4 This file is part of GCC.
5 
6 GCC is free software; you can redistribute it and/or modify it under
7 the terms of the GNU General Public License as published by the Free
8 Software Foundation; either version 3, or (at your option) any later
9 version.
10 
11 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
12 WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
14 for more details.
15 
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING3.  If not see
18 <http://www.gnu.org/licenses/>.  */
19 
20 #include "config.h"
21 #include "system.h"
22 #include "coretypes.h"
23 #include "backend.h"
24 #include "target.h"
25 #include "rtl.h"
26 #include "tree.h"
27 #include "cfghooks.h"
28 #include "memmodel.h"
29 #include "optabs.h"
30 #include "emit-rtl.h"
31 #include "recog.h"
32 #include "profile.h"
33 #include "cfgrtl.h"
34 #include "cfgloop.h"
35 #include "params.h"
36 #include "dojump.h"
37 #include "expr.h"
38 #include "dumpfile.h"
39 
40 /* This pass performs loop unrolling.  We only perform this
41    optimization on innermost loops (with single exception) because
42    the impact on performance is greatest here, and we want to avoid
43    unnecessary code size growth.  The gain is caused by greater sequentiality
44    of code, better code to optimize for further passes and in some cases
45    by fewer testings of exit conditions.  The main problem is code growth,
46    that impacts performance negatively due to effect of caches.
47 
48    What we do:
49 
50    -- unrolling of loops that roll constant times; this is almost always
51       win, as we get rid of exit condition tests.
52    -- unrolling of loops that roll number of times that we can compute
53       in runtime; we also get rid of exit condition tests here, but there
54       is the extra expense for calculating the number of iterations
55    -- simple unrolling of remaining loops; this is performed only if we
56       are asked to, as the gain is questionable in this case and often
57       it may even slow down the code
58    For more detailed descriptions of each of those, see comments at
59    appropriate function below.
60 
61    There is a lot of parameters (defined and described in params.def) that
62    control how much we unroll.
63 
64    ??? A great problem is that we don't have a good way how to determine
65    how many times we should unroll the loop; the experiments I have made
66    showed that this choice may affect performance in order of several %.
67    */
68 
69 /* Information about induction variables to split.  */
70 
71 struct iv_to_split
72 {
73   rtx_insn *insn;	/* The insn in that the induction variable occurs.  */
74   rtx orig_var;		/* The variable (register) for the IV before split.  */
75   rtx base_var;		/* The variable on that the values in the further
76 			   iterations are based.  */
77   rtx step;		/* Step of the induction variable.  */
78   struct iv_to_split *next; /* Next entry in walking order.  */
79 };
80 
81 /* Information about accumulators to expand.  */
82 
83 struct var_to_expand
84 {
85   rtx_insn *insn;	           /* The insn in that the variable expansion occurs.  */
86   rtx reg;                         /* The accumulator which is expanded.  */
87   vec<rtx> var_expansions;   /* The copies of the accumulator which is expanded.  */
88   struct var_to_expand *next;	   /* Next entry in walking order.  */
89   enum rtx_code op;                /* The type of the accumulation - addition, subtraction
90                                       or multiplication.  */
91   int expansion_count;             /* Count the number of expansions generated so far.  */
92   int reuse_expansion;             /* The expansion we intend to reuse to expand
93                                       the accumulator.  If REUSE_EXPANSION is 0 reuse
94                                       the original accumulator.  Else use
95                                       var_expansions[REUSE_EXPANSION - 1].  */
96 };
97 
98 /* Hashtable helper for iv_to_split.  */
99 
100 struct iv_split_hasher : free_ptr_hash <iv_to_split>
101 {
102   static inline hashval_t hash (const iv_to_split *);
103   static inline bool equal (const iv_to_split *, const iv_to_split *);
104 };
105 
106 
107 /* A hash function for information about insns to split.  */
108 
109 inline hashval_t
110 iv_split_hasher::hash (const iv_to_split *ivts)
111 {
112   return (hashval_t) INSN_UID (ivts->insn);
113 }
114 
115 /* An equality functions for information about insns to split.  */
116 
117 inline bool
118 iv_split_hasher::equal (const iv_to_split *i1, const iv_to_split *i2)
119 {
120   return i1->insn == i2->insn;
121 }
122 
123 /* Hashtable helper for iv_to_split.  */
124 
125 struct var_expand_hasher : free_ptr_hash <var_to_expand>
126 {
127   static inline hashval_t hash (const var_to_expand *);
128   static inline bool equal (const var_to_expand *, const var_to_expand *);
129 };
130 
131 /* Return a hash for VES.  */
132 
133 inline hashval_t
134 var_expand_hasher::hash (const var_to_expand *ves)
135 {
136   return (hashval_t) INSN_UID (ves->insn);
137 }
138 
139 /* Return true if I1 and I2 refer to the same instruction.  */
140 
141 inline bool
142 var_expand_hasher::equal (const var_to_expand *i1, const var_to_expand *i2)
143 {
144   return i1->insn == i2->insn;
145 }
146 
147 /* Information about optimization applied in
148    the unrolled loop.  */
149 
150 struct opt_info
151 {
152   hash_table<iv_split_hasher> *insns_to_split; /* A hashtable of insns to
153 						  split.  */
154   struct iv_to_split *iv_to_split_head; /* The first iv to split.  */
155   struct iv_to_split **iv_to_split_tail; /* Pointer to the tail of the list.  */
156   hash_table<var_expand_hasher> *insns_with_var_to_expand; /* A hashtable of
157 					insns with accumulators to expand.  */
158   struct var_to_expand *var_to_expand_head; /* The first var to expand.  */
159   struct var_to_expand **var_to_expand_tail; /* Pointer to the tail of the list.  */
160   unsigned first_new_block;        /* The first basic block that was
161                                       duplicated.  */
162   basic_block loop_exit;           /* The loop exit basic block.  */
163   basic_block loop_preheader;      /* The loop preheader basic block.  */
164 };
165 
166 static void decide_unroll_stupid (struct loop *, int);
167 static void decide_unroll_constant_iterations (struct loop *, int);
168 static void decide_unroll_runtime_iterations (struct loop *, int);
169 static void unroll_loop_stupid (struct loop *);
170 static void decide_unrolling (int);
171 static void unroll_loop_constant_iterations (struct loop *);
172 static void unroll_loop_runtime_iterations (struct loop *);
173 static struct opt_info *analyze_insns_in_loop (struct loop *);
174 static void opt_info_start_duplication (struct opt_info *);
175 static void apply_opt_in_copies (struct opt_info *, unsigned, bool, bool);
176 static void free_opt_info (struct opt_info *);
177 static struct var_to_expand *analyze_insn_to_expand_var (struct loop*, rtx_insn *);
178 static bool referenced_in_one_insn_in_loop_p (struct loop *, rtx, int *);
179 static struct iv_to_split *analyze_iv_to_split_insn (rtx_insn *);
180 static void expand_var_during_unrolling (struct var_to_expand *, rtx_insn *);
181 static void insert_var_expansion_initialization (struct var_to_expand *,
182 						 basic_block);
183 static void combine_var_copies_in_loop_exit (struct var_to_expand *,
184 					     basic_block);
185 static rtx get_expansion (struct var_to_expand *);
186 
187 /* Emit a message summarizing the unroll that will be
188    performed for LOOP, along with the loop's location LOCUS, if
189    appropriate given the dump or -fopt-info settings.  */
190 
191 static void
192 report_unroll (struct loop *loop, location_t locus)
193 {
194   int report_flags = MSG_OPTIMIZED_LOCATIONS | TDF_RTL | TDF_DETAILS;
195 
196   if (loop->lpt_decision.decision == LPT_NONE)
197     return;
198 
199   if (!dump_enabled_p ())
200     return;
201 
202   dump_printf_loc (report_flags, locus,
203                    "loop unrolled %d times",
204                    loop->lpt_decision.times);
205   if (profile_info)
206     dump_printf (report_flags,
207                  " (header execution count %d)",
208                  (int)loop->header->count);
209 
210   dump_printf (report_flags, "\n");
211 }
212 
213 /* Decide whether unroll loops and how much.  */
214 static void
215 decide_unrolling (int flags)
216 {
217   struct loop *loop;
218 
219   /* Scan the loops, inner ones first.  */
220   FOR_EACH_LOOP (loop, LI_FROM_INNERMOST)
221     {
222       loop->lpt_decision.decision = LPT_NONE;
223       location_t locus = get_loop_location (loop);
224 
225       if (dump_enabled_p ())
226 	dump_printf_loc (TDF_RTL, locus,
227                          ";; *** Considering loop %d at BB %d for "
228                          "unrolling ***\n",
229                          loop->num, loop->header->index);
230 
231       /* Do not peel cold areas.  */
232       if (optimize_loop_for_size_p (loop))
233 	{
234 	  if (dump_file)
235 	    fprintf (dump_file, ";; Not considering loop, cold area\n");
236 	  continue;
237 	}
238 
239       /* Can the loop be manipulated?  */
240       if (!can_duplicate_loop_p (loop))
241 	{
242 	  if (dump_file)
243 	    fprintf (dump_file,
244 		     ";; Not considering loop, cannot duplicate\n");
245 	  continue;
246 	}
247 
248       /* Skip non-innermost loops.  */
249       if (loop->inner)
250 	{
251 	  if (dump_file)
252 	    fprintf (dump_file, ";; Not considering loop, is not innermost\n");
253 	  continue;
254 	}
255 
256       loop->ninsns = num_loop_insns (loop);
257       loop->av_ninsns = average_num_loop_insns (loop);
258 
259       /* Try transformations one by one in decreasing order of
260 	 priority.  */
261 
262       decide_unroll_constant_iterations (loop, flags);
263       if (loop->lpt_decision.decision == LPT_NONE)
264 	decide_unroll_runtime_iterations (loop, flags);
265       if (loop->lpt_decision.decision == LPT_NONE)
266 	decide_unroll_stupid (loop, flags);
267 
268       report_unroll (loop, locus);
269     }
270 }
271 
272 /* Unroll LOOPS.  */
273 void
274 unroll_loops (int flags)
275 {
276   struct loop *loop;
277   bool changed = false;
278 
279   /* Now decide rest of unrolling.  */
280   decide_unrolling (flags);
281 
282   /* Scan the loops, inner ones first.  */
283   FOR_EACH_LOOP (loop, LI_FROM_INNERMOST)
284     {
285       /* And perform the appropriate transformations.  */
286       switch (loop->lpt_decision.decision)
287 	{
288 	case LPT_UNROLL_CONSTANT:
289 	  unroll_loop_constant_iterations (loop);
290 	  changed = true;
291 	  break;
292 	case LPT_UNROLL_RUNTIME:
293 	  unroll_loop_runtime_iterations (loop);
294 	  changed = true;
295 	  break;
296 	case LPT_UNROLL_STUPID:
297 	  unroll_loop_stupid (loop);
298 	  changed = true;
299 	  break;
300 	case LPT_NONE:
301 	  break;
302 	default:
303 	  gcc_unreachable ();
304 	}
305     }
306 
307     if (changed)
308       {
309 	calculate_dominance_info (CDI_DOMINATORS);
310 	fix_loop_structure (NULL);
311       }
312 
313   iv_analysis_done ();
314 }
315 
316 /* Check whether exit of the LOOP is at the end of loop body.  */
317 
318 static bool
319 loop_exit_at_end_p (struct loop *loop)
320 {
321   struct niter_desc *desc = get_simple_loop_desc (loop);
322   rtx_insn *insn;
323 
324   /* We should never have conditional in latch block.  */
325   gcc_assert (desc->in_edge->dest != loop->header);
326 
327   if (desc->in_edge->dest != loop->latch)
328     return false;
329 
330   /* Check that the latch is empty.  */
331   FOR_BB_INSNS (loop->latch, insn)
332     {
333       if (INSN_P (insn) && active_insn_p (insn))
334 	return false;
335     }
336 
337   return true;
338 }
339 
340 /* Decide whether to unroll LOOP iterating constant number of times
341    and how much.  */
342 
343 static void
344 decide_unroll_constant_iterations (struct loop *loop, int flags)
345 {
346   unsigned nunroll, nunroll_by_av, best_copies, best_unroll = 0, n_copies, i;
347   struct niter_desc *desc;
348   widest_int iterations;
349 
350   if (!(flags & UAP_UNROLL))
351     {
352       /* We were not asked to, just return back silently.  */
353       return;
354     }
355 
356   if (dump_file)
357     fprintf (dump_file,
358 	     "\n;; Considering unrolling loop with constant "
359 	     "number of iterations\n");
360 
361   /* nunroll = total number of copies of the original loop body in
362      unrolled loop (i.e. if it is 2, we have to duplicate loop body once.  */
363   nunroll = PARAM_VALUE (PARAM_MAX_UNROLLED_INSNS) / loop->ninsns;
364   nunroll_by_av
365     = PARAM_VALUE (PARAM_MAX_AVERAGE_UNROLLED_INSNS) / loop->av_ninsns;
366   if (nunroll > nunroll_by_av)
367     nunroll = nunroll_by_av;
368   if (nunroll > (unsigned) PARAM_VALUE (PARAM_MAX_UNROLL_TIMES))
369     nunroll = PARAM_VALUE (PARAM_MAX_UNROLL_TIMES);
370 
371   if (targetm.loop_unroll_adjust)
372     nunroll = targetm.loop_unroll_adjust (nunroll, loop);
373 
374   /* Skip big loops.  */
375   if (nunroll <= 1)
376     {
377       if (dump_file)
378 	fprintf (dump_file, ";; Not considering loop, is too big\n");
379       return;
380     }
381 
382   /* Check for simple loops.  */
383   desc = get_simple_loop_desc (loop);
384 
385   /* Check number of iterations.  */
386   if (!desc->simple_p || !desc->const_iter || desc->assumptions)
387     {
388       if (dump_file)
389 	fprintf (dump_file,
390 		 ";; Unable to prove that the loop iterates constant times\n");
391       return;
392     }
393 
394   /* Check whether the loop rolls enough to consider.
395      Consult also loop bounds and profile; in the case the loop has more
396      than one exit it may well loop less than determined maximal number
397      of iterations.  */
398   if (desc->niter < 2 * nunroll
399       || ((get_estimated_loop_iterations (loop, &iterations)
400 	   || get_likely_max_loop_iterations (loop, &iterations))
401 	  && wi::ltu_p (iterations, 2 * nunroll)))
402     {
403       if (dump_file)
404 	fprintf (dump_file, ";; Not unrolling loop, doesn't roll\n");
405       return;
406     }
407 
408   /* Success; now compute number of iterations to unroll.  We alter
409      nunroll so that as few as possible copies of loop body are
410      necessary, while still not decreasing the number of unrollings
411      too much (at most by 1).  */
412   best_copies = 2 * nunroll + 10;
413 
414   i = 2 * nunroll + 2;
415   if (i - 1 >= desc->niter)
416     i = desc->niter - 2;
417 
418   for (; i >= nunroll - 1; i--)
419     {
420       unsigned exit_mod = desc->niter % (i + 1);
421 
422       if (!loop_exit_at_end_p (loop))
423 	n_copies = exit_mod + i + 1;
424       else if (exit_mod != (unsigned) i
425 	       || desc->noloop_assumptions != NULL_RTX)
426 	n_copies = exit_mod + i + 2;
427       else
428 	n_copies = i + 1;
429 
430       if (n_copies < best_copies)
431 	{
432 	  best_copies = n_copies;
433 	  best_unroll = i;
434 	}
435     }
436 
437   loop->lpt_decision.decision = LPT_UNROLL_CONSTANT;
438   loop->lpt_decision.times = best_unroll;
439 }
440 
441 /* Unroll LOOP with constant number of iterations LOOP->LPT_DECISION.TIMES times.
442    The transformation does this:
443 
444    for (i = 0; i < 102; i++)
445      body;
446 
447    ==>  (LOOP->LPT_DECISION.TIMES == 3)
448 
449    i = 0;
450    body; i++;
451    body; i++;
452    while (i < 102)
453      {
454        body; i++;
455        body; i++;
456        body; i++;
457        body; i++;
458      }
459   */
460 static void
461 unroll_loop_constant_iterations (struct loop *loop)
462 {
463   unsigned HOST_WIDE_INT niter;
464   unsigned exit_mod;
465   unsigned i;
466   edge e;
467   unsigned max_unroll = loop->lpt_decision.times;
468   struct niter_desc *desc = get_simple_loop_desc (loop);
469   bool exit_at_end = loop_exit_at_end_p (loop);
470   struct opt_info *opt_info = NULL;
471   bool ok;
472 
473   niter = desc->niter;
474 
475   /* Should not get here (such loop should be peeled instead).  */
476   gcc_assert (niter > max_unroll + 1);
477 
478   exit_mod = niter % (max_unroll + 1);
479 
480   auto_sbitmap wont_exit (max_unroll + 2);
481   bitmap_ones (wont_exit);
482 
483   auto_vec<edge> remove_edges;
484   if (flag_split_ivs_in_unroller
485       || flag_variable_expansion_in_unroller)
486     opt_info = analyze_insns_in_loop (loop);
487 
488   if (!exit_at_end)
489     {
490       /* The exit is not at the end of the loop; leave exit test
491 	 in the first copy, so that the loops that start with test
492 	 of exit condition have continuous body after unrolling.  */
493 
494       if (dump_file)
495 	fprintf (dump_file, ";; Condition at beginning of loop.\n");
496 
497       /* Peel exit_mod iterations.  */
498       bitmap_clear_bit (wont_exit, 0);
499       if (desc->noloop_assumptions)
500 	bitmap_clear_bit (wont_exit, 1);
501 
502       if (exit_mod)
503 	{
504 	  opt_info_start_duplication (opt_info);
505           ok = duplicate_loop_to_header_edge (loop, loop_preheader_edge (loop),
506 					      exit_mod,
507 					      wont_exit, desc->out_edge,
508 					      &remove_edges,
509 					      DLTHE_FLAG_UPDATE_FREQ
510 					      | (opt_info && exit_mod > 1
511 						 ? DLTHE_RECORD_COPY_NUMBER
512 						   : 0));
513 	  gcc_assert (ok);
514 
515           if (opt_info && exit_mod > 1)
516  	    apply_opt_in_copies (opt_info, exit_mod, false, false);
517 
518 	  desc->noloop_assumptions = NULL_RTX;
519 	  desc->niter -= exit_mod;
520 	  loop->nb_iterations_upper_bound -= exit_mod;
521 	  if (loop->any_estimate
522 	      && wi::leu_p (exit_mod, loop->nb_iterations_estimate))
523 	    loop->nb_iterations_estimate -= exit_mod;
524 	  else
525 	    loop->any_estimate = false;
526 	  if (loop->any_likely_upper_bound
527 	      && wi::leu_p (exit_mod, loop->nb_iterations_likely_upper_bound))
528 	    loop->nb_iterations_likely_upper_bound -= exit_mod;
529 	  else
530 	    loop->any_likely_upper_bound = false;
531 	}
532 
533       bitmap_set_bit (wont_exit, 1);
534     }
535   else
536     {
537       /* Leave exit test in last copy, for the same reason as above if
538 	 the loop tests the condition at the end of loop body.  */
539 
540       if (dump_file)
541 	fprintf (dump_file, ";; Condition at end of loop.\n");
542 
543       /* We know that niter >= max_unroll + 2; so we do not need to care of
544 	 case when we would exit before reaching the loop.  So just peel
545 	 exit_mod + 1 iterations.  */
546       if (exit_mod != max_unroll
547 	  || desc->noloop_assumptions)
548 	{
549 	  bitmap_clear_bit (wont_exit, 0);
550 	  if (desc->noloop_assumptions)
551 	    bitmap_clear_bit (wont_exit, 1);
552 
553           opt_info_start_duplication (opt_info);
554 	  ok = duplicate_loop_to_header_edge (loop, loop_preheader_edge (loop),
555 					      exit_mod + 1,
556 					      wont_exit, desc->out_edge,
557 					      &remove_edges,
558 					      DLTHE_FLAG_UPDATE_FREQ
559 					      | (opt_info && exit_mod > 0
560 						 ? DLTHE_RECORD_COPY_NUMBER
561 						   : 0));
562 	  gcc_assert (ok);
563 
564           if (opt_info && exit_mod > 0)
565   	    apply_opt_in_copies (opt_info, exit_mod + 1, false, false);
566 
567 	  desc->niter -= exit_mod + 1;
568 	  loop->nb_iterations_upper_bound -= exit_mod + 1;
569 	  if (loop->any_estimate
570 	      && wi::leu_p (exit_mod + 1, loop->nb_iterations_estimate))
571 	    loop->nb_iterations_estimate -= exit_mod + 1;
572 	  else
573 	    loop->any_estimate = false;
574 	  if (loop->any_likely_upper_bound
575 	      && wi::leu_p (exit_mod + 1, loop->nb_iterations_likely_upper_bound))
576 	    loop->nb_iterations_likely_upper_bound -= exit_mod + 1;
577 	  else
578 	    loop->any_likely_upper_bound = false;
579 	  desc->noloop_assumptions = NULL_RTX;
580 
581 	  bitmap_set_bit (wont_exit, 0);
582 	  bitmap_set_bit (wont_exit, 1);
583 	}
584 
585       bitmap_clear_bit (wont_exit, max_unroll);
586     }
587 
588   /* Now unroll the loop.  */
589 
590   opt_info_start_duplication (opt_info);
591   ok = duplicate_loop_to_header_edge (loop, loop_latch_edge (loop),
592 				      max_unroll,
593 				      wont_exit, desc->out_edge,
594 				      &remove_edges,
595 				      DLTHE_FLAG_UPDATE_FREQ
596 				      | (opt_info
597 					 ? DLTHE_RECORD_COPY_NUMBER
598 					   : 0));
599   gcc_assert (ok);
600 
601   if (opt_info)
602     {
603       apply_opt_in_copies (opt_info, max_unroll, true, true);
604       free_opt_info (opt_info);
605     }
606 
607   if (exit_at_end)
608     {
609       basic_block exit_block = get_bb_copy (desc->in_edge->src);
610       /* Find a new in and out edge; they are in the last copy we have made.  */
611 
612       if (EDGE_SUCC (exit_block, 0)->dest == desc->out_edge->dest)
613 	{
614 	  desc->out_edge = EDGE_SUCC (exit_block, 0);
615 	  desc->in_edge = EDGE_SUCC (exit_block, 1);
616 	}
617       else
618 	{
619 	  desc->out_edge = EDGE_SUCC (exit_block, 1);
620 	  desc->in_edge = EDGE_SUCC (exit_block, 0);
621 	}
622     }
623 
624   desc->niter /= max_unroll + 1;
625   loop->nb_iterations_upper_bound
626     = wi::udiv_trunc (loop->nb_iterations_upper_bound, max_unroll + 1);
627   if (loop->any_estimate)
628     loop->nb_iterations_estimate
629       = wi::udiv_trunc (loop->nb_iterations_estimate, max_unroll + 1);
630   if (loop->any_likely_upper_bound)
631     loop->nb_iterations_likely_upper_bound
632       = wi::udiv_trunc (loop->nb_iterations_likely_upper_bound, max_unroll + 1);
633   desc->niter_expr = gen_int_mode (desc->niter, desc->mode);
634 
635   /* Remove the edges.  */
636   FOR_EACH_VEC_ELT (remove_edges, i, e)
637     remove_path (e);
638 
639   if (dump_file)
640     fprintf (dump_file,
641 	     ";; Unrolled loop %d times, constant # of iterations %i insns\n",
642 	     max_unroll, num_loop_insns (loop));
643 }
644 
645 /* Decide whether to unroll LOOP iterating runtime computable number of times
646    and how much.  */
647 static void
648 decide_unroll_runtime_iterations (struct loop *loop, int flags)
649 {
650   unsigned nunroll, nunroll_by_av, i;
651   struct niter_desc *desc;
652   widest_int iterations;
653 
654   if (!(flags & UAP_UNROLL))
655     {
656       /* We were not asked to, just return back silently.  */
657       return;
658     }
659 
660   if (dump_file)
661     fprintf (dump_file,
662 	     "\n;; Considering unrolling loop with runtime "
663 	     "computable number of iterations\n");
664 
665   /* nunroll = total number of copies of the original loop body in
666      unrolled loop (i.e. if it is 2, we have to duplicate loop body once.  */
667   nunroll = PARAM_VALUE (PARAM_MAX_UNROLLED_INSNS) / loop->ninsns;
668   nunroll_by_av = PARAM_VALUE (PARAM_MAX_AVERAGE_UNROLLED_INSNS) / loop->av_ninsns;
669   if (nunroll > nunroll_by_av)
670     nunroll = nunroll_by_av;
671   if (nunroll > (unsigned) PARAM_VALUE (PARAM_MAX_UNROLL_TIMES))
672     nunroll = PARAM_VALUE (PARAM_MAX_UNROLL_TIMES);
673 
674   if (targetm.loop_unroll_adjust)
675     nunroll = targetm.loop_unroll_adjust (nunroll, loop);
676 
677   /* Skip big loops.  */
678   if (nunroll <= 1)
679     {
680       if (dump_file)
681 	fprintf (dump_file, ";; Not considering loop, is too big\n");
682       return;
683     }
684 
685   /* Check for simple loops.  */
686   desc = get_simple_loop_desc (loop);
687 
688   /* Check simpleness.  */
689   if (!desc->simple_p || desc->assumptions)
690     {
691       if (dump_file)
692 	fprintf (dump_file,
693 		 ";; Unable to prove that the number of iterations "
694 		 "can be counted in runtime\n");
695       return;
696     }
697 
698   if (desc->const_iter)
699     {
700       if (dump_file)
701 	fprintf (dump_file, ";; Loop iterates constant times\n");
702       return;
703     }
704 
705   /* Check whether the loop rolls.  */
706   if ((get_estimated_loop_iterations (loop, &iterations)
707        || get_likely_max_loop_iterations (loop, &iterations))
708       && wi::ltu_p (iterations, 2 * nunroll))
709     {
710       if (dump_file)
711 	fprintf (dump_file, ";; Not unrolling loop, doesn't roll\n");
712       return;
713     }
714 
715   /* Success; now force nunroll to be power of 2, as we are unable to
716      cope with overflows in computation of number of iterations.  */
717   for (i = 1; 2 * i <= nunroll; i *= 2)
718     continue;
719 
720   loop->lpt_decision.decision = LPT_UNROLL_RUNTIME;
721   loop->lpt_decision.times = i - 1;
722 }
723 
724 /* Splits edge E and inserts the sequence of instructions INSNS on it, and
725    returns the newly created block.  If INSNS is NULL_RTX, nothing is changed
726    and NULL is returned instead.  */
727 
728 basic_block
729 split_edge_and_insert (edge e, rtx_insn *insns)
730 {
731   basic_block bb;
732 
733   if (!insns)
734     return NULL;
735   bb = split_edge (e);
736   emit_insn_after (insns, BB_END (bb));
737 
738   /* ??? We used to assume that INSNS can contain control flow insns, and
739      that we had to try to find sub basic blocks in BB to maintain a valid
740      CFG.  For this purpose we used to set the BB_SUPERBLOCK flag on BB
741      and call break_superblocks when going out of cfglayout mode.  But it
742      turns out that this never happens; and that if it does ever happen,
743      the verify_flow_info at the end of the RTL loop passes would fail.
744 
745      There are two reasons why we expected we could have control flow insns
746      in INSNS.  The first is when a comparison has to be done in parts, and
747      the second is when the number of iterations is computed for loops with
748      the number of iterations known at runtime.  In both cases, test cases
749      to get control flow in INSNS appear to be impossible to construct:
750 
751       * If do_compare_rtx_and_jump needs several branches to do comparison
752 	in a mode that needs comparison by parts, we cannot analyze the
753 	number of iterations of the loop, and we never get to unrolling it.
754 
755       * The code in expand_divmod that was suspected to cause creation of
756 	branching code seems to be only accessed for signed division.  The
757 	divisions used by # of iterations analysis are always unsigned.
758 	Problems might arise on architectures that emits branching code
759 	for some operations that may appear in the unroller (especially
760 	for division), but we have no such architectures.
761 
762      Considering all this, it was decided that we should for now assume
763      that INSNS can in theory contain control flow insns, but in practice
764      it never does.  So we don't handle the theoretical case, and should
765      a real failure ever show up, we have a pretty good clue for how to
766      fix it.  */
767 
768   return bb;
769 }
770 
771 /* Prepare a sequence comparing OP0 with OP1 using COMP and jumping to LABEL if
772    true, with probability PROB.  If CINSN is not NULL, it is the insn to copy
773    in order to create a jump.  */
774 
775 static rtx_insn *
776 compare_and_jump_seq (rtx op0, rtx op1, enum rtx_code comp,
777 		      rtx_code_label *label, int prob, rtx_insn *cinsn)
778 {
779   rtx_insn *seq;
780   rtx_jump_insn *jump;
781   rtx cond;
782   machine_mode mode;
783 
784   mode = GET_MODE (op0);
785   if (mode == VOIDmode)
786     mode = GET_MODE (op1);
787 
788   start_sequence ();
789   if (GET_MODE_CLASS (mode) == MODE_CC)
790     {
791       /* A hack -- there seems to be no easy generic way how to make a
792 	 conditional jump from a ccmode comparison.  */
793       gcc_assert (cinsn);
794       cond = XEXP (SET_SRC (pc_set (cinsn)), 0);
795       gcc_assert (GET_CODE (cond) == comp);
796       gcc_assert (rtx_equal_p (op0, XEXP (cond, 0)));
797       gcc_assert (rtx_equal_p (op1, XEXP (cond, 1)));
798       emit_jump_insn (copy_insn (PATTERN (cinsn)));
799       jump = as_a <rtx_jump_insn *> (get_last_insn ());
800       JUMP_LABEL (jump) = JUMP_LABEL (cinsn);
801       LABEL_NUSES (JUMP_LABEL (jump))++;
802       redirect_jump (jump, label, 0);
803     }
804   else
805     {
806       gcc_assert (!cinsn);
807 
808       op0 = force_operand (op0, NULL_RTX);
809       op1 = force_operand (op1, NULL_RTX);
810       do_compare_rtx_and_jump (op0, op1, comp, 0,
811 			       mode, NULL_RTX, NULL, label, -1);
812       jump = as_a <rtx_jump_insn *> (get_last_insn ());
813       jump->set_jump_target (label);
814       LABEL_NUSES (label)++;
815     }
816   add_int_reg_note (jump, REG_BR_PROB, prob);
817 
818   seq = get_insns ();
819   end_sequence ();
820 
821   return seq;
822 }
823 
824 /* Unroll LOOP for which we are able to count number of iterations in runtime
825    LOOP->LPT_DECISION.TIMES times.  The transformation does this (with some
826    extra care for case n < 0):
827 
828    for (i = 0; i < n; i++)
829      body;
830 
831    ==>  (LOOP->LPT_DECISION.TIMES == 3)
832 
833    i = 0;
834    mod = n % 4;
835 
836    switch (mod)
837      {
838        case 3:
839          body; i++;
840        case 2:
841          body; i++;
842        case 1:
843          body; i++;
844        case 0: ;
845      }
846 
847    while (i < n)
848      {
849        body; i++;
850        body; i++;
851        body; i++;
852        body; i++;
853      }
854    */
855 static void
856 unroll_loop_runtime_iterations (struct loop *loop)
857 {
858   rtx old_niter, niter, tmp;
859   rtx_insn *init_code, *branch_code;
860   unsigned i, j, p;
861   basic_block preheader, *body, swtch, ezc_swtch = NULL;
862   int may_exit_copy, iter_freq, new_freq;
863   gcov_type iter_count, new_count;
864   unsigned n_peel;
865   edge e;
866   bool extra_zero_check, last_may_exit;
867   unsigned max_unroll = loop->lpt_decision.times;
868   struct niter_desc *desc = get_simple_loop_desc (loop);
869   bool exit_at_end = loop_exit_at_end_p (loop);
870   struct opt_info *opt_info = NULL;
871   bool ok;
872 
873   if (flag_split_ivs_in_unroller
874       || flag_variable_expansion_in_unroller)
875     opt_info = analyze_insns_in_loop (loop);
876 
877   /* Remember blocks whose dominators will have to be updated.  */
878   auto_vec<basic_block> dom_bbs;
879 
880   body = get_loop_body (loop);
881   for (i = 0; i < loop->num_nodes; i++)
882     {
883       vec<basic_block> ldom;
884       basic_block bb;
885 
886       ldom = get_dominated_by (CDI_DOMINATORS, body[i]);
887       FOR_EACH_VEC_ELT (ldom, j, bb)
888 	if (!flow_bb_inside_loop_p (loop, bb))
889 	  dom_bbs.safe_push (bb);
890 
891       ldom.release ();
892     }
893   free (body);
894 
895   if (!exit_at_end)
896     {
897       /* Leave exit in first copy (for explanation why see comment in
898 	 unroll_loop_constant_iterations).  */
899       may_exit_copy = 0;
900       n_peel = max_unroll - 1;
901       extra_zero_check = true;
902       last_may_exit = false;
903     }
904   else
905     {
906       /* Leave exit in last copy (for explanation why see comment in
907 	 unroll_loop_constant_iterations).  */
908       may_exit_copy = max_unroll;
909       n_peel = max_unroll;
910       extra_zero_check = false;
911       last_may_exit = true;
912     }
913 
914   /* Get expression for number of iterations.  */
915   start_sequence ();
916   old_niter = niter = gen_reg_rtx (desc->mode);
917   tmp = force_operand (copy_rtx (desc->niter_expr), niter);
918   if (tmp != niter)
919     emit_move_insn (niter, tmp);
920 
921   /* For loops that exit at end and whose number of iterations is reliable,
922      add one to niter to account for first pass through loop body before
923      reaching exit test. */
924   if (exit_at_end && !desc->noloop_assumptions)
925     {
926       niter = expand_simple_binop (desc->mode, PLUS,
927 				   niter, const1_rtx,
928 				   NULL_RTX, 0, OPTAB_LIB_WIDEN);
929       old_niter = niter;
930     }
931 
932   /* Count modulo by ANDing it with max_unroll; we use the fact that
933      the number of unrollings is a power of two, and thus this is correct
934      even if there is overflow in the computation.  */
935   niter = expand_simple_binop (desc->mode, AND,
936 			       niter, gen_int_mode (max_unroll, desc->mode),
937 			       NULL_RTX, 0, OPTAB_LIB_WIDEN);
938 
939   init_code = get_insns ();
940   end_sequence ();
941   unshare_all_rtl_in_chain (init_code);
942 
943   /* Precondition the loop.  */
944   split_edge_and_insert (loop_preheader_edge (loop), init_code);
945 
946   auto_vec<edge> remove_edges;
947 
948   auto_sbitmap wont_exit (max_unroll + 2);
949 
950   if (extra_zero_check || desc->noloop_assumptions)
951     {
952       /* Peel the first copy of loop body.  Leave the exit test if the number
953 	 of iterations is not reliable.  Also record the place of the extra zero
954 	 check.  */
955       bitmap_clear (wont_exit);
956       if (!desc->noloop_assumptions)
957 	bitmap_set_bit (wont_exit, 1);
958       ezc_swtch = loop_preheader_edge (loop)->src;
959       ok = duplicate_loop_to_header_edge (loop, loop_preheader_edge (loop),
960 					  1, wont_exit, desc->out_edge,
961 					  &remove_edges,
962 					  DLTHE_FLAG_UPDATE_FREQ);
963       gcc_assert (ok);
964     }
965 
966   /* Record the place where switch will be built for preconditioning.  */
967   swtch = split_edge (loop_preheader_edge (loop));
968 
969   /* Compute frequency/count increments for each switch block and initialize
970      innermost switch block.  Switch blocks and peeled loop copies are built
971      from innermost outward.  */
972   iter_freq = new_freq = swtch->frequency / (max_unroll + 1);
973   iter_count = new_count = swtch->count / (max_unroll + 1);
974   swtch->frequency = new_freq;
975   swtch->count = new_count;
976   single_succ_edge (swtch)->count = new_count;
977 
978   for (i = 0; i < n_peel; i++)
979     {
980       /* Peel the copy.  */
981       bitmap_clear (wont_exit);
982       if (i != n_peel - 1 || !last_may_exit)
983 	bitmap_set_bit (wont_exit, 1);
984       ok = duplicate_loop_to_header_edge (loop, loop_preheader_edge (loop),
985 					  1, wont_exit, desc->out_edge,
986 					  &remove_edges,
987 					  DLTHE_FLAG_UPDATE_FREQ);
988       gcc_assert (ok);
989 
990       /* Create item for switch.  */
991       j = n_peel - i - (extra_zero_check ? 0 : 1);
992       p = REG_BR_PROB_BASE / (i + 2);
993 
994       preheader = split_edge (loop_preheader_edge (loop));
995       /* Add in frequency/count of edge from switch block.  */
996       preheader->frequency += iter_freq;
997       preheader->count += iter_count;
998       single_succ_edge (preheader)->count = preheader->count;
999       branch_code = compare_and_jump_seq (copy_rtx (niter),
1000 					  gen_int_mode (j, desc->mode), EQ,
1001 					  block_label (preheader), p,
1002 					  NULL);
1003 
1004       /* We rely on the fact that the compare and jump cannot be optimized out,
1005 	 and hence the cfg we create is correct.  */
1006       gcc_assert (branch_code != NULL_RTX);
1007 
1008       swtch = split_edge_and_insert (single_pred_edge (swtch), branch_code);
1009       set_immediate_dominator (CDI_DOMINATORS, preheader, swtch);
1010       single_succ_edge (swtch)->probability = REG_BR_PROB_BASE - p;
1011       single_succ_edge (swtch)->count = new_count;
1012       new_freq += iter_freq;
1013       new_count += iter_count;
1014       swtch->frequency = new_freq;
1015       swtch->count = new_count;
1016       e = make_edge (swtch, preheader,
1017 		     single_succ_edge (swtch)->flags & EDGE_IRREDUCIBLE_LOOP);
1018       e->count = iter_count;
1019       e->probability = p;
1020     }
1021 
1022   if (extra_zero_check)
1023     {
1024       /* Add branch for zero iterations.  */
1025       p = REG_BR_PROB_BASE / (max_unroll + 1);
1026       swtch = ezc_swtch;
1027       preheader = split_edge (loop_preheader_edge (loop));
1028       /* Recompute frequency/count adjustments since initial peel copy may
1029 	 have exited and reduced those values that were computed above.  */
1030       iter_freq = swtch->frequency / (max_unroll + 1);
1031       iter_count = swtch->count / (max_unroll + 1);
1032       /* Add in frequency/count of edge from switch block.  */
1033       preheader->frequency += iter_freq;
1034       preheader->count += iter_count;
1035       single_succ_edge (preheader)->count = preheader->count;
1036       branch_code = compare_and_jump_seq (copy_rtx (niter), const0_rtx, EQ,
1037 					  block_label (preheader), p,
1038 					  NULL);
1039       gcc_assert (branch_code != NULL_RTX);
1040 
1041       swtch = split_edge_and_insert (single_succ_edge (swtch), branch_code);
1042       set_immediate_dominator (CDI_DOMINATORS, preheader, swtch);
1043       single_succ_edge (swtch)->probability = REG_BR_PROB_BASE - p;
1044       single_succ_edge (swtch)->count -= iter_count;
1045       e = make_edge (swtch, preheader,
1046 		     single_succ_edge (swtch)->flags & EDGE_IRREDUCIBLE_LOOP);
1047       e->count = iter_count;
1048       e->probability = p;
1049     }
1050 
1051   /* Recount dominators for outer blocks.  */
1052   iterate_fix_dominators (CDI_DOMINATORS, dom_bbs, false);
1053 
1054   /* And unroll loop.  */
1055 
1056   bitmap_ones (wont_exit);
1057   bitmap_clear_bit (wont_exit, may_exit_copy);
1058   opt_info_start_duplication (opt_info);
1059 
1060   ok = duplicate_loop_to_header_edge (loop, loop_latch_edge (loop),
1061 				      max_unroll,
1062 				      wont_exit, desc->out_edge,
1063 				      &remove_edges,
1064 				      DLTHE_FLAG_UPDATE_FREQ
1065 				      | (opt_info
1066 					 ? DLTHE_RECORD_COPY_NUMBER
1067 					   : 0));
1068   gcc_assert (ok);
1069 
1070   if (opt_info)
1071     {
1072       apply_opt_in_copies (opt_info, max_unroll, true, true);
1073       free_opt_info (opt_info);
1074     }
1075 
1076   if (exit_at_end)
1077     {
1078       basic_block exit_block = get_bb_copy (desc->in_edge->src);
1079       /* Find a new in and out edge; they are in the last copy we have
1080 	 made.  */
1081 
1082       if (EDGE_SUCC (exit_block, 0)->dest == desc->out_edge->dest)
1083 	{
1084 	  desc->out_edge = EDGE_SUCC (exit_block, 0);
1085 	  desc->in_edge = EDGE_SUCC (exit_block, 1);
1086 	}
1087       else
1088 	{
1089 	  desc->out_edge = EDGE_SUCC (exit_block, 1);
1090 	  desc->in_edge = EDGE_SUCC (exit_block, 0);
1091 	}
1092     }
1093 
1094   /* Remove the edges.  */
1095   FOR_EACH_VEC_ELT (remove_edges, i, e)
1096     remove_path (e);
1097 
1098   /* We must be careful when updating the number of iterations due to
1099      preconditioning and the fact that the value must be valid at entry
1100      of the loop.  After passing through the above code, we see that
1101      the correct new number of iterations is this:  */
1102   gcc_assert (!desc->const_iter);
1103   desc->niter_expr =
1104     simplify_gen_binary (UDIV, desc->mode, old_niter,
1105 			 gen_int_mode (max_unroll + 1, desc->mode));
1106   loop->nb_iterations_upper_bound
1107     = wi::udiv_trunc (loop->nb_iterations_upper_bound, max_unroll + 1);
1108   if (loop->any_estimate)
1109     loop->nb_iterations_estimate
1110       = wi::udiv_trunc (loop->nb_iterations_estimate, max_unroll + 1);
1111   if (loop->any_likely_upper_bound)
1112     loop->nb_iterations_likely_upper_bound
1113       = wi::udiv_trunc (loop->nb_iterations_likely_upper_bound, max_unroll + 1);
1114   if (exit_at_end)
1115     {
1116       desc->niter_expr =
1117 	simplify_gen_binary (MINUS, desc->mode, desc->niter_expr, const1_rtx);
1118       desc->noloop_assumptions = NULL_RTX;
1119       --loop->nb_iterations_upper_bound;
1120       if (loop->any_estimate
1121 	  && loop->nb_iterations_estimate != 0)
1122 	--loop->nb_iterations_estimate;
1123       else
1124 	loop->any_estimate = false;
1125       if (loop->any_likely_upper_bound
1126 	  && loop->nb_iterations_likely_upper_bound != 0)
1127 	--loop->nb_iterations_likely_upper_bound;
1128       else
1129 	loop->any_likely_upper_bound = false;
1130     }
1131 
1132   if (dump_file)
1133     fprintf (dump_file,
1134 	     ";; Unrolled loop %d times, counting # of iterations "
1135 	     "in runtime, %i insns\n",
1136 	     max_unroll, num_loop_insns (loop));
1137 }
1138 
1139 /* Decide whether to unroll LOOP stupidly and how much.  */
1140 static void
1141 decide_unroll_stupid (struct loop *loop, int flags)
1142 {
1143   unsigned nunroll, nunroll_by_av, i;
1144   struct niter_desc *desc;
1145   widest_int iterations;
1146 
1147   if (!(flags & UAP_UNROLL_ALL))
1148     {
1149       /* We were not asked to, just return back silently.  */
1150       return;
1151     }
1152 
1153   if (dump_file)
1154     fprintf (dump_file, "\n;; Considering unrolling loop stupidly\n");
1155 
1156   /* nunroll = total number of copies of the original loop body in
1157      unrolled loop (i.e. if it is 2, we have to duplicate loop body once.  */
1158   nunroll = PARAM_VALUE (PARAM_MAX_UNROLLED_INSNS) / loop->ninsns;
1159   nunroll_by_av
1160     = PARAM_VALUE (PARAM_MAX_AVERAGE_UNROLLED_INSNS) / loop->av_ninsns;
1161   if (nunroll > nunroll_by_av)
1162     nunroll = nunroll_by_av;
1163   if (nunroll > (unsigned) PARAM_VALUE (PARAM_MAX_UNROLL_TIMES))
1164     nunroll = PARAM_VALUE (PARAM_MAX_UNROLL_TIMES);
1165 
1166   if (targetm.loop_unroll_adjust)
1167     nunroll = targetm.loop_unroll_adjust (nunroll, loop);
1168 
1169   /* Skip big loops.  */
1170   if (nunroll <= 1)
1171     {
1172       if (dump_file)
1173 	fprintf (dump_file, ";; Not considering loop, is too big\n");
1174       return;
1175     }
1176 
1177   /* Check for simple loops.  */
1178   desc = get_simple_loop_desc (loop);
1179 
1180   /* Check simpleness.  */
1181   if (desc->simple_p && !desc->assumptions)
1182     {
1183       if (dump_file)
1184 	fprintf (dump_file, ";; The loop is simple\n");
1185       return;
1186     }
1187 
1188   /* Do not unroll loops with branches inside -- it increases number
1189      of mispredicts.
1190      TODO: this heuristic needs tunning; call inside the loop body
1191      is also relatively good reason to not unroll.  */
1192   if (num_loop_branches (loop) > 1)
1193     {
1194       if (dump_file)
1195 	fprintf (dump_file, ";; Not unrolling, contains branches\n");
1196       return;
1197     }
1198 
1199   /* Check whether the loop rolls.  */
1200   if ((get_estimated_loop_iterations (loop, &iterations)
1201        || get_likely_max_loop_iterations (loop, &iterations))
1202       && wi::ltu_p (iterations, 2 * nunroll))
1203     {
1204       if (dump_file)
1205 	fprintf (dump_file, ";; Not unrolling loop, doesn't roll\n");
1206       return;
1207     }
1208 
1209   /* Success.  Now force nunroll to be power of 2, as it seems that this
1210      improves results (partially because of better alignments, partially
1211      because of some dark magic).  */
1212   for (i = 1; 2 * i <= nunroll; i *= 2)
1213     continue;
1214 
1215   loop->lpt_decision.decision = LPT_UNROLL_STUPID;
1216   loop->lpt_decision.times = i - 1;
1217 }
1218 
1219 /* Unroll a LOOP LOOP->LPT_DECISION.TIMES times.  The transformation does this:
1220 
1221    while (cond)
1222      body;
1223 
1224    ==>  (LOOP->LPT_DECISION.TIMES == 3)
1225 
1226    while (cond)
1227      {
1228        body;
1229        if (!cond) break;
1230        body;
1231        if (!cond) break;
1232        body;
1233        if (!cond) break;
1234        body;
1235      }
1236    */
1237 static void
1238 unroll_loop_stupid (struct loop *loop)
1239 {
1240   unsigned nunroll = loop->lpt_decision.times;
1241   struct niter_desc *desc = get_simple_loop_desc (loop);
1242   struct opt_info *opt_info = NULL;
1243   bool ok;
1244 
1245   if (flag_split_ivs_in_unroller
1246       || flag_variable_expansion_in_unroller)
1247     opt_info = analyze_insns_in_loop (loop);
1248 
1249   auto_sbitmap wont_exit (nunroll + 1);
1250   bitmap_clear (wont_exit);
1251   opt_info_start_duplication (opt_info);
1252 
1253   ok = duplicate_loop_to_header_edge (loop, loop_latch_edge (loop),
1254 				      nunroll, wont_exit,
1255 				      NULL, NULL,
1256 				      DLTHE_FLAG_UPDATE_FREQ
1257 				      | (opt_info
1258 					 ? DLTHE_RECORD_COPY_NUMBER
1259 					   : 0));
1260   gcc_assert (ok);
1261 
1262   if (opt_info)
1263     {
1264       apply_opt_in_copies (opt_info, nunroll, true, true);
1265       free_opt_info (opt_info);
1266     }
1267 
1268   if (desc->simple_p)
1269     {
1270       /* We indeed may get here provided that there are nontrivial assumptions
1271 	 for a loop to be really simple.  We could update the counts, but the
1272 	 problem is that we are unable to decide which exit will be taken
1273 	 (not really true in case the number of iterations is constant,
1274 	 but no one will do anything with this information, so we do not
1275 	 worry about it).  */
1276       desc->simple_p = false;
1277     }
1278 
1279   if (dump_file)
1280     fprintf (dump_file, ";; Unrolled loop %d times, %i insns\n",
1281 	     nunroll, num_loop_insns (loop));
1282 }
1283 
1284 /* Returns true if REG is referenced in one nondebug insn in LOOP.
1285    Set *DEBUG_USES to the number of debug insns that reference the
1286    variable.  */
1287 
1288 static bool
1289 referenced_in_one_insn_in_loop_p (struct loop *loop, rtx reg,
1290 				  int *debug_uses)
1291 {
1292   basic_block *body, bb;
1293   unsigned i;
1294   int count_ref = 0;
1295   rtx_insn *insn;
1296 
1297   body = get_loop_body (loop);
1298   for (i = 0; i < loop->num_nodes; i++)
1299     {
1300       bb = body[i];
1301 
1302       FOR_BB_INSNS (bb, insn)
1303 	if (!rtx_referenced_p (reg, insn))
1304 	  continue;
1305 	else if (DEBUG_INSN_P (insn))
1306 	  ++*debug_uses;
1307 	else if (++count_ref > 1)
1308 	  break;
1309     }
1310   free (body);
1311   return (count_ref  == 1);
1312 }
1313 
1314 /* Reset the DEBUG_USES debug insns in LOOP that reference REG.  */
1315 
1316 static void
1317 reset_debug_uses_in_loop (struct loop *loop, rtx reg, int debug_uses)
1318 {
1319   basic_block *body, bb;
1320   unsigned i;
1321   rtx_insn *insn;
1322 
1323   body = get_loop_body (loop);
1324   for (i = 0; debug_uses && i < loop->num_nodes; i++)
1325     {
1326       bb = body[i];
1327 
1328       FOR_BB_INSNS (bb, insn)
1329 	if (!DEBUG_INSN_P (insn) || !rtx_referenced_p (reg, insn))
1330 	  continue;
1331 	else
1332 	  {
1333 	    validate_change (insn, &INSN_VAR_LOCATION_LOC (insn),
1334 			     gen_rtx_UNKNOWN_VAR_LOC (), 0);
1335 	    if (!--debug_uses)
1336 	      break;
1337 	  }
1338     }
1339   free (body);
1340 }
1341 
1342 /* Determine whether INSN contains an accumulator
1343    which can be expanded into separate copies,
1344    one for each copy of the LOOP body.
1345 
1346    for (i = 0 ; i < n; i++)
1347      sum += a[i];
1348 
1349    ==>
1350 
1351    sum += a[i]
1352    ....
1353    i = i+1;
1354    sum1 += a[i]
1355    ....
1356    i = i+1
1357    sum2 += a[i];
1358    ....
1359 
1360    Return NULL if INSN contains no opportunity for expansion of accumulator.
1361    Otherwise, allocate a VAR_TO_EXPAND structure, fill it with the relevant
1362    information and return a pointer to it.
1363 */
1364 
1365 static struct var_to_expand *
1366 analyze_insn_to_expand_var (struct loop *loop, rtx_insn *insn)
1367 {
1368   rtx set, dest, src;
1369   struct var_to_expand *ves;
1370   unsigned accum_pos;
1371   enum rtx_code code;
1372   int debug_uses = 0;
1373 
1374   set = single_set (insn);
1375   if (!set)
1376     return NULL;
1377 
1378   dest = SET_DEST (set);
1379   src = SET_SRC (set);
1380   code = GET_CODE (src);
1381 
1382   if (code != PLUS && code != MINUS && code != MULT && code != FMA)
1383     return NULL;
1384 
1385   if (FLOAT_MODE_P (GET_MODE (dest)))
1386     {
1387       if (!flag_associative_math)
1388         return NULL;
1389       /* In the case of FMA, we're also changing the rounding.  */
1390       if (code == FMA && !flag_unsafe_math_optimizations)
1391 	return NULL;
1392     }
1393 
1394   /* Hmm, this is a bit paradoxical.  We know that INSN is a valid insn
1395      in MD.  But if there is no optab to generate the insn, we can not
1396      perform the variable expansion.  This can happen if an MD provides
1397      an insn but not a named pattern to generate it, for example to avoid
1398      producing code that needs additional mode switches like for x87/mmx.
1399 
1400      So we check have_insn_for which looks for an optab for the operation
1401      in SRC.  If it doesn't exist, we can't perform the expansion even
1402      though INSN is valid.  */
1403   if (!have_insn_for (code, GET_MODE (src)))
1404     return NULL;
1405 
1406   if (!REG_P (dest)
1407       && !(GET_CODE (dest) == SUBREG
1408            && REG_P (SUBREG_REG (dest))))
1409     return NULL;
1410 
1411   /* Find the accumulator use within the operation.  */
1412   if (code == FMA)
1413     {
1414       /* We only support accumulation via FMA in the ADD position.  */
1415       if (!rtx_equal_p  (dest, XEXP (src, 2)))
1416 	return NULL;
1417       accum_pos = 2;
1418     }
1419   else if (rtx_equal_p (dest, XEXP (src, 0)))
1420     accum_pos = 0;
1421   else if (rtx_equal_p (dest, XEXP (src, 1)))
1422     {
1423       /* The method of expansion that we are using; which includes the
1424 	 initialization of the expansions with zero and the summation of
1425          the expansions at the end of the computation will yield wrong
1426 	 results for (x = something - x) thus avoid using it in that case.  */
1427       if (code == MINUS)
1428 	return NULL;
1429       accum_pos = 1;
1430     }
1431   else
1432     return NULL;
1433 
1434   /* It must not otherwise be used.  */
1435   if (code == FMA)
1436     {
1437       if (rtx_referenced_p (dest, XEXP (src, 0))
1438 	  || rtx_referenced_p (dest, XEXP (src, 1)))
1439 	return NULL;
1440     }
1441   else if (rtx_referenced_p (dest, XEXP (src, 1 - accum_pos)))
1442     return NULL;
1443 
1444   /* It must be used in exactly one insn.  */
1445   if (!referenced_in_one_insn_in_loop_p (loop, dest, &debug_uses))
1446     return NULL;
1447 
1448   if (dump_file)
1449     {
1450       fprintf (dump_file, "\n;; Expanding Accumulator ");
1451       print_rtl (dump_file, dest);
1452       fprintf (dump_file, "\n");
1453     }
1454 
1455   if (debug_uses)
1456     /* Instead of resetting the debug insns, we could replace each
1457        debug use in the loop with the sum or product of all expanded
1458        accumulators.  Since we'll only know of all expansions at the
1459        end, we'd have to keep track of which vars_to_expand a debug
1460        insn in the loop references, take note of each copy of the
1461        debug insn during unrolling, and when it's all done, compute
1462        the sum or product of each variable and adjust the original
1463        debug insn and each copy thereof.  What a pain!  */
1464     reset_debug_uses_in_loop (loop, dest, debug_uses);
1465 
1466   /* Record the accumulator to expand.  */
1467   ves = XNEW (struct var_to_expand);
1468   ves->insn = insn;
1469   ves->reg = copy_rtx (dest);
1470   ves->var_expansions.create (1);
1471   ves->next = NULL;
1472   ves->op = GET_CODE (src);
1473   ves->expansion_count = 0;
1474   ves->reuse_expansion = 0;
1475   return ves;
1476 }
1477 
1478 /* Determine whether there is an induction variable in INSN that
1479    we would like to split during unrolling.
1480 
1481    I.e. replace
1482 
1483    i = i + 1;
1484    ...
1485    i = i + 1;
1486    ...
1487    i = i + 1;
1488    ...
1489 
1490    type chains by
1491 
1492    i0 = i + 1
1493    ...
1494    i = i0 + 1
1495    ...
1496    i = i0 + 2
1497    ...
1498 
1499    Return NULL if INSN contains no interesting IVs.  Otherwise, allocate
1500    an IV_TO_SPLIT structure, fill it with the relevant information and return a
1501    pointer to it.  */
1502 
1503 static struct iv_to_split *
1504 analyze_iv_to_split_insn (rtx_insn *insn)
1505 {
1506   rtx set, dest;
1507   struct rtx_iv iv;
1508   struct iv_to_split *ivts;
1509   bool ok;
1510 
1511   /* For now we just split the basic induction variables.  Later this may be
1512      extended for example by selecting also addresses of memory references.  */
1513   set = single_set (insn);
1514   if (!set)
1515     return NULL;
1516 
1517   dest = SET_DEST (set);
1518   if (!REG_P (dest))
1519     return NULL;
1520 
1521   if (!biv_p (insn, dest))
1522     return NULL;
1523 
1524   ok = iv_analyze_result (insn, dest, &iv);
1525 
1526   /* This used to be an assert under the assumption that if biv_p returns
1527      true that iv_analyze_result must also return true.  However, that
1528      assumption is not strictly correct as evidenced by pr25569.
1529 
1530      Returning NULL when iv_analyze_result returns false is safe and
1531      avoids the problems in pr25569 until the iv_analyze_* routines
1532      can be fixed, which is apparently hard and time consuming
1533      according to their author.  */
1534   if (! ok)
1535     return NULL;
1536 
1537   if (iv.step == const0_rtx
1538       || iv.mode != iv.extend_mode)
1539     return NULL;
1540 
1541   /* Record the insn to split.  */
1542   ivts = XNEW (struct iv_to_split);
1543   ivts->insn = insn;
1544   ivts->orig_var = dest;
1545   ivts->base_var = NULL_RTX;
1546   ivts->step = iv.step;
1547   ivts->next = NULL;
1548 
1549   return ivts;
1550 }
1551 
1552 /* Determines which of insns in LOOP can be optimized.
1553    Return a OPT_INFO struct with the relevant hash tables filled
1554    with all insns to be optimized.  The FIRST_NEW_BLOCK field
1555    is undefined for the return value.  */
1556 
1557 static struct opt_info *
1558 analyze_insns_in_loop (struct loop *loop)
1559 {
1560   basic_block *body, bb;
1561   unsigned i;
1562   struct opt_info *opt_info = XCNEW (struct opt_info);
1563   rtx_insn *insn;
1564   struct iv_to_split *ivts = NULL;
1565   struct var_to_expand *ves = NULL;
1566   iv_to_split **slot1;
1567   var_to_expand **slot2;
1568   vec<edge> edges = get_loop_exit_edges (loop);
1569   edge exit;
1570   bool can_apply = false;
1571 
1572   iv_analysis_loop_init (loop);
1573 
1574   body = get_loop_body (loop);
1575 
1576   if (flag_split_ivs_in_unroller)
1577     {
1578       opt_info->insns_to_split
1579        	= new hash_table<iv_split_hasher> (5 * loop->num_nodes);
1580       opt_info->iv_to_split_head = NULL;
1581       opt_info->iv_to_split_tail = &opt_info->iv_to_split_head;
1582     }
1583 
1584   /* Record the loop exit bb and loop preheader before the unrolling.  */
1585   opt_info->loop_preheader = loop_preheader_edge (loop)->src;
1586 
1587   if (edges.length () == 1)
1588     {
1589       exit = edges[0];
1590       if (!(exit->flags & EDGE_COMPLEX))
1591 	{
1592 	  opt_info->loop_exit = split_edge (exit);
1593 	  can_apply = true;
1594 	}
1595     }
1596 
1597   if (flag_variable_expansion_in_unroller
1598       && can_apply)
1599     {
1600       opt_info->insns_with_var_to_expand
1601        	= new hash_table<var_expand_hasher> (5 * loop->num_nodes);
1602       opt_info->var_to_expand_head = NULL;
1603       opt_info->var_to_expand_tail = &opt_info->var_to_expand_head;
1604     }
1605 
1606   for (i = 0; i < loop->num_nodes; i++)
1607     {
1608       bb = body[i];
1609       if (!dominated_by_p (CDI_DOMINATORS, loop->latch, bb))
1610 	continue;
1611 
1612       FOR_BB_INSNS (bb, insn)
1613       {
1614         if (!INSN_P (insn))
1615           continue;
1616 
1617         if (opt_info->insns_to_split)
1618           ivts = analyze_iv_to_split_insn (insn);
1619 
1620         if (ivts)
1621           {
1622             slot1 = opt_info->insns_to_split->find_slot (ivts, INSERT);
1623 	    gcc_assert (*slot1 == NULL);
1624             *slot1 = ivts;
1625 	    *opt_info->iv_to_split_tail = ivts;
1626 	    opt_info->iv_to_split_tail = &ivts->next;
1627             continue;
1628           }
1629 
1630         if (opt_info->insns_with_var_to_expand)
1631           ves = analyze_insn_to_expand_var (loop, insn);
1632 
1633         if (ves)
1634           {
1635             slot2 = opt_info->insns_with_var_to_expand->find_slot (ves, INSERT);
1636 	    gcc_assert (*slot2 == NULL);
1637             *slot2 = ves;
1638 	    *opt_info->var_to_expand_tail = ves;
1639 	    opt_info->var_to_expand_tail = &ves->next;
1640           }
1641       }
1642     }
1643 
1644   edges.release ();
1645   free (body);
1646   return opt_info;
1647 }
1648 
1649 /* Called just before loop duplication.  Records start of duplicated area
1650    to OPT_INFO.  */
1651 
1652 static void
1653 opt_info_start_duplication (struct opt_info *opt_info)
1654 {
1655   if (opt_info)
1656     opt_info->first_new_block = last_basic_block_for_fn (cfun);
1657 }
1658 
1659 /* Determine the number of iterations between initialization of the base
1660    variable and the current copy (N_COPY).  N_COPIES is the total number
1661    of newly created copies.  UNROLLING is true if we are unrolling
1662    (not peeling) the loop.  */
1663 
1664 static unsigned
1665 determine_split_iv_delta (unsigned n_copy, unsigned n_copies, bool unrolling)
1666 {
1667   if (unrolling)
1668     {
1669       /* If we are unrolling, initialization is done in the original loop
1670 	 body (number 0).  */
1671       return n_copy;
1672     }
1673   else
1674     {
1675       /* If we are peeling, the copy in that the initialization occurs has
1676 	 number 1.  The original loop (number 0) is the last.  */
1677       if (n_copy)
1678 	return n_copy - 1;
1679       else
1680 	return n_copies;
1681     }
1682 }
1683 
1684 /* Allocate basic variable for the induction variable chain.  */
1685 
1686 static void
1687 allocate_basic_variable (struct iv_to_split *ivts)
1688 {
1689   rtx expr = SET_SRC (single_set (ivts->insn));
1690 
1691   ivts->base_var = gen_reg_rtx (GET_MODE (expr));
1692 }
1693 
1694 /* Insert initialization of basic variable of IVTS before INSN, taking
1695    the initial value from INSN.  */
1696 
1697 static void
1698 insert_base_initialization (struct iv_to_split *ivts, rtx_insn *insn)
1699 {
1700   rtx expr = copy_rtx (SET_SRC (single_set (insn)));
1701   rtx_insn *seq;
1702 
1703   start_sequence ();
1704   expr = force_operand (expr, ivts->base_var);
1705   if (expr != ivts->base_var)
1706     emit_move_insn (ivts->base_var, expr);
1707   seq = get_insns ();
1708   end_sequence ();
1709 
1710   emit_insn_before (seq, insn);
1711 }
1712 
1713 /* Replace the use of induction variable described in IVTS in INSN
1714    by base variable + DELTA * step.  */
1715 
1716 static void
1717 split_iv (struct iv_to_split *ivts, rtx_insn *insn, unsigned delta)
1718 {
1719   rtx expr, *loc, incr, var;
1720   rtx_insn *seq;
1721   machine_mode mode = GET_MODE (ivts->base_var);
1722   rtx src, dest, set;
1723 
1724   /* Construct base + DELTA * step.  */
1725   if (!delta)
1726     expr = ivts->base_var;
1727   else
1728     {
1729       incr = simplify_gen_binary (MULT, mode,
1730 				  ivts->step, gen_int_mode (delta, mode));
1731       expr = simplify_gen_binary (PLUS, GET_MODE (ivts->base_var),
1732 				  ivts->base_var, incr);
1733     }
1734 
1735   /* Figure out where to do the replacement.  */
1736   loc = &SET_SRC (single_set (insn));
1737 
1738   /* If we can make the replacement right away, we're done.  */
1739   if (validate_change (insn, loc, expr, 0))
1740     return;
1741 
1742   /* Otherwise, force EXPR into a register and try again.  */
1743   start_sequence ();
1744   var = gen_reg_rtx (mode);
1745   expr = force_operand (expr, var);
1746   if (expr != var)
1747     emit_move_insn (var, expr);
1748   seq = get_insns ();
1749   end_sequence ();
1750   emit_insn_before (seq, insn);
1751 
1752   if (validate_change (insn, loc, var, 0))
1753     return;
1754 
1755   /* The last chance.  Try recreating the assignment in insn
1756      completely from scratch.  */
1757   set = single_set (insn);
1758   gcc_assert (set);
1759 
1760   start_sequence ();
1761   *loc = var;
1762   src = copy_rtx (SET_SRC (set));
1763   dest = copy_rtx (SET_DEST (set));
1764   src = force_operand (src, dest);
1765   if (src != dest)
1766     emit_move_insn (dest, src);
1767   seq = get_insns ();
1768   end_sequence ();
1769 
1770   emit_insn_before (seq, insn);
1771   delete_insn (insn);
1772 }
1773 
1774 
1775 /* Return one expansion of the accumulator recorded in struct VE.  */
1776 
1777 static rtx
1778 get_expansion (struct var_to_expand *ve)
1779 {
1780   rtx reg;
1781 
1782   if (ve->reuse_expansion == 0)
1783     reg = ve->reg;
1784   else
1785     reg = ve->var_expansions[ve->reuse_expansion - 1];
1786 
1787   if (ve->var_expansions.length () == (unsigned) ve->reuse_expansion)
1788     ve->reuse_expansion = 0;
1789   else
1790     ve->reuse_expansion++;
1791 
1792   return reg;
1793 }
1794 
1795 
1796 /* Given INSN replace the uses of the accumulator recorded in VE
1797    with a new register.  */
1798 
1799 static void
1800 expand_var_during_unrolling (struct var_to_expand *ve, rtx_insn *insn)
1801 {
1802   rtx new_reg, set;
1803   bool really_new_expansion = false;
1804 
1805   set = single_set (insn);
1806   gcc_assert (set);
1807 
1808   /* Generate a new register only if the expansion limit has not been
1809      reached.  Else reuse an already existing expansion.  */
1810   if (PARAM_VALUE (PARAM_MAX_VARIABLE_EXPANSIONS) > ve->expansion_count)
1811     {
1812       really_new_expansion = true;
1813       new_reg = gen_reg_rtx (GET_MODE (ve->reg));
1814     }
1815   else
1816     new_reg = get_expansion (ve);
1817 
1818   validate_replace_rtx_group (SET_DEST (set), new_reg, insn);
1819   if (apply_change_group ())
1820     if (really_new_expansion)
1821       {
1822         ve->var_expansions.safe_push (new_reg);
1823         ve->expansion_count++;
1824       }
1825 }
1826 
1827 /* Initialize the variable expansions in loop preheader.  PLACE is the
1828    loop-preheader basic block where the initialization of the
1829    expansions should take place.  The expansions are initialized with
1830    (-0) when the operation is plus or minus to honor sign zero.  This
1831    way we can prevent cases where the sign of the final result is
1832    effected by the sign of the expansion.  Here is an example to
1833    demonstrate this:
1834 
1835    for (i = 0 ; i < n; i++)
1836      sum += something;
1837 
1838    ==>
1839 
1840    sum += something
1841    ....
1842    i = i+1;
1843    sum1 += something
1844    ....
1845    i = i+1
1846    sum2 += something;
1847    ....
1848 
1849    When SUM is initialized with -zero and SOMETHING is also -zero; the
1850    final result of sum should be -zero thus the expansions sum1 and sum2
1851    should be initialized with -zero as well (otherwise we will get +zero
1852    as the final result).  */
1853 
1854 static void
1855 insert_var_expansion_initialization (struct var_to_expand *ve,
1856 				     basic_block place)
1857 {
1858   rtx_insn *seq;
1859   rtx var, zero_init;
1860   unsigned i;
1861   machine_mode mode = GET_MODE (ve->reg);
1862   bool honor_signed_zero_p = HONOR_SIGNED_ZEROS (mode);
1863 
1864   if (ve->var_expansions.length () == 0)
1865     return;
1866 
1867   start_sequence ();
1868   switch (ve->op)
1869     {
1870     case FMA:
1871       /* Note that we only accumulate FMA via the ADD operand.  */
1872     case PLUS:
1873     case MINUS:
1874       FOR_EACH_VEC_ELT (ve->var_expansions, i, var)
1875         {
1876 	  if (honor_signed_zero_p)
1877 	    zero_init = simplify_gen_unary (NEG, mode, CONST0_RTX (mode), mode);
1878 	  else
1879 	    zero_init = CONST0_RTX (mode);
1880           emit_move_insn (var, zero_init);
1881         }
1882       break;
1883 
1884     case MULT:
1885       FOR_EACH_VEC_ELT (ve->var_expansions, i, var)
1886         {
1887           zero_init = CONST1_RTX (GET_MODE (var));
1888           emit_move_insn (var, zero_init);
1889         }
1890       break;
1891 
1892     default:
1893       gcc_unreachable ();
1894     }
1895 
1896   seq = get_insns ();
1897   end_sequence ();
1898 
1899   emit_insn_after (seq, BB_END (place));
1900 }
1901 
1902 /* Combine the variable expansions at the loop exit.  PLACE is the
1903    loop exit basic block where the summation of the expansions should
1904    take place.  */
1905 
1906 static void
1907 combine_var_copies_in_loop_exit (struct var_to_expand *ve, basic_block place)
1908 {
1909   rtx sum = ve->reg;
1910   rtx expr, var;
1911   rtx_insn *seq, *insn;
1912   unsigned i;
1913 
1914   if (ve->var_expansions.length () == 0)
1915     return;
1916 
1917   /* ve->reg might be SUBREG or some other non-shareable RTL, and we use
1918      it both here and as the destination of the assignment.  */
1919   sum = copy_rtx (sum);
1920   start_sequence ();
1921   switch (ve->op)
1922     {
1923     case FMA:
1924       /* Note that we only accumulate FMA via the ADD operand.  */
1925     case PLUS:
1926     case MINUS:
1927       FOR_EACH_VEC_ELT (ve->var_expansions, i, var)
1928 	sum = simplify_gen_binary (PLUS, GET_MODE (ve->reg), var, sum);
1929       break;
1930 
1931     case MULT:
1932       FOR_EACH_VEC_ELT (ve->var_expansions, i, var)
1933 	sum = simplify_gen_binary (MULT, GET_MODE (ve->reg), var, sum);
1934       break;
1935 
1936     default:
1937       gcc_unreachable ();
1938     }
1939 
1940   expr = force_operand (sum, ve->reg);
1941   if (expr != ve->reg)
1942     emit_move_insn (ve->reg, expr);
1943   seq = get_insns ();
1944   end_sequence ();
1945 
1946   insn = BB_HEAD (place);
1947   while (!NOTE_INSN_BASIC_BLOCK_P (insn))
1948     insn = NEXT_INSN (insn);
1949 
1950   emit_insn_after (seq, insn);
1951 }
1952 
1953 /* Strip away REG_EQUAL notes for IVs we're splitting.
1954 
1955    Updating REG_EQUAL notes for IVs we split is tricky: We
1956    cannot tell until after unrolling, DF-rescanning, and liveness
1957    updating, whether an EQ_USE is reached by the split IV while
1958    the IV reg is still live.  See PR55006.
1959 
1960    ??? We cannot use remove_reg_equal_equiv_notes_for_regno,
1961    because RTL loop-iv requires us to defer rescanning insns and
1962    any notes attached to them.  So resort to old techniques...  */
1963 
1964 static void
1965 maybe_strip_eq_note_for_split_iv (struct opt_info *opt_info, rtx_insn *insn)
1966 {
1967   struct iv_to_split *ivts;
1968   rtx note = find_reg_equal_equiv_note (insn);
1969   if (! note)
1970     return;
1971   for (ivts = opt_info->iv_to_split_head; ivts; ivts = ivts->next)
1972     if (reg_mentioned_p (ivts->orig_var, note))
1973       {
1974 	remove_note (insn, note);
1975 	return;
1976       }
1977 }
1978 
1979 /* Apply loop optimizations in loop copies using the
1980    data which gathered during the unrolling.  Structure
1981    OPT_INFO record that data.
1982 
1983    UNROLLING is true if we unrolled (not peeled) the loop.
1984    REWRITE_ORIGINAL_BODY is true if we should also rewrite the original body of
1985    the loop (as it should happen in complete unrolling, but not in ordinary
1986    peeling of the loop).  */
1987 
1988 static void
1989 apply_opt_in_copies (struct opt_info *opt_info,
1990                      unsigned n_copies, bool unrolling,
1991                      bool rewrite_original_loop)
1992 {
1993   unsigned i, delta;
1994   basic_block bb, orig_bb;
1995   rtx_insn *insn, *orig_insn, *next;
1996   struct iv_to_split ivts_templ, *ivts;
1997   struct var_to_expand ve_templ, *ves;
1998 
1999   /* Sanity check -- we need to put initialization in the original loop
2000      body.  */
2001   gcc_assert (!unrolling || rewrite_original_loop);
2002 
2003   /* Allocate the basic variables (i0).  */
2004   if (opt_info->insns_to_split)
2005     for (ivts = opt_info->iv_to_split_head; ivts; ivts = ivts->next)
2006       allocate_basic_variable (ivts);
2007 
2008   for (i = opt_info->first_new_block;
2009        i < (unsigned) last_basic_block_for_fn (cfun);
2010        i++)
2011     {
2012       bb = BASIC_BLOCK_FOR_FN (cfun, i);
2013       orig_bb = get_bb_original (bb);
2014 
2015       /* bb->aux holds position in copy sequence initialized by
2016 	 duplicate_loop_to_header_edge.  */
2017       delta = determine_split_iv_delta ((size_t)bb->aux, n_copies,
2018 					unrolling);
2019       bb->aux = 0;
2020       orig_insn = BB_HEAD (orig_bb);
2021       FOR_BB_INSNS_SAFE (bb, insn, next)
2022         {
2023 	  if (!INSN_P (insn)
2024 	      || (DEBUG_INSN_P (insn)
2025 		  && TREE_CODE (INSN_VAR_LOCATION_DECL (insn)) == LABEL_DECL))
2026             continue;
2027 
2028 	  while (!INSN_P (orig_insn)
2029 		 || (DEBUG_INSN_P (orig_insn)
2030 		     && (TREE_CODE (INSN_VAR_LOCATION_DECL (orig_insn))
2031 			 == LABEL_DECL)))
2032             orig_insn = NEXT_INSN (orig_insn);
2033 
2034           ivts_templ.insn = orig_insn;
2035           ve_templ.insn = orig_insn;
2036 
2037           /* Apply splitting iv optimization.  */
2038           if (opt_info->insns_to_split)
2039             {
2040 	      maybe_strip_eq_note_for_split_iv (opt_info, insn);
2041 
2042               ivts = opt_info->insns_to_split->find (&ivts_templ);
2043 
2044               if (ivts)
2045                 {
2046 		  gcc_assert (GET_CODE (PATTERN (insn))
2047 			      == GET_CODE (PATTERN (orig_insn)));
2048 
2049                   if (!delta)
2050                     insert_base_initialization (ivts, insn);
2051                   split_iv (ivts, insn, delta);
2052                 }
2053             }
2054           /* Apply variable expansion optimization.  */
2055           if (unrolling && opt_info->insns_with_var_to_expand)
2056             {
2057               ves = (struct var_to_expand *)
2058 		opt_info->insns_with_var_to_expand->find (&ve_templ);
2059               if (ves)
2060                 {
2061 		  gcc_assert (GET_CODE (PATTERN (insn))
2062 			      == GET_CODE (PATTERN (orig_insn)));
2063                   expand_var_during_unrolling (ves, insn);
2064                 }
2065             }
2066           orig_insn = NEXT_INSN (orig_insn);
2067         }
2068     }
2069 
2070   if (!rewrite_original_loop)
2071     return;
2072 
2073   /* Initialize the variable expansions in the loop preheader
2074      and take care of combining them at the loop exit.  */
2075   if (opt_info->insns_with_var_to_expand)
2076     {
2077       for (ves = opt_info->var_to_expand_head; ves; ves = ves->next)
2078 	insert_var_expansion_initialization (ves, opt_info->loop_preheader);
2079       for (ves = opt_info->var_to_expand_head; ves; ves = ves->next)
2080 	combine_var_copies_in_loop_exit (ves, opt_info->loop_exit);
2081     }
2082 
2083   /* Rewrite also the original loop body.  Find them as originals of the blocks
2084      in the last copied iteration, i.e. those that have
2085      get_bb_copy (get_bb_original (bb)) == bb.  */
2086   for (i = opt_info->first_new_block;
2087        i < (unsigned) last_basic_block_for_fn (cfun);
2088        i++)
2089     {
2090       bb = BASIC_BLOCK_FOR_FN (cfun, i);
2091       orig_bb = get_bb_original (bb);
2092       if (get_bb_copy (orig_bb) != bb)
2093 	continue;
2094 
2095       delta = determine_split_iv_delta (0, n_copies, unrolling);
2096       for (orig_insn = BB_HEAD (orig_bb);
2097            orig_insn != NEXT_INSN (BB_END (bb));
2098            orig_insn = next)
2099         {
2100           next = NEXT_INSN (orig_insn);
2101 
2102           if (!INSN_P (orig_insn))
2103  	    continue;
2104 
2105           ivts_templ.insn = orig_insn;
2106           if (opt_info->insns_to_split)
2107             {
2108 	      maybe_strip_eq_note_for_split_iv (opt_info, orig_insn);
2109 
2110               ivts = (struct iv_to_split *)
2111 		opt_info->insns_to_split->find (&ivts_templ);
2112               if (ivts)
2113                 {
2114                   if (!delta)
2115                     insert_base_initialization (ivts, orig_insn);
2116                   split_iv (ivts, orig_insn, delta);
2117                   continue;
2118                 }
2119             }
2120 
2121         }
2122     }
2123 }
2124 
2125 /* Release OPT_INFO.  */
2126 
2127 static void
2128 free_opt_info (struct opt_info *opt_info)
2129 {
2130   delete opt_info->insns_to_split;
2131   opt_info->insns_to_split = NULL;
2132   if (opt_info->insns_with_var_to_expand)
2133     {
2134       struct var_to_expand *ves;
2135 
2136       for (ves = opt_info->var_to_expand_head; ves; ves = ves->next)
2137 	ves->var_expansions.release ();
2138       delete opt_info->insns_with_var_to_expand;
2139       opt_info->insns_with_var_to_expand = NULL;
2140     }
2141   free (opt_info);
2142 }
2143