xref: /openbsd-src/gnu/gcc/gcc/loop-unroll.c (revision 404b540a9034ac75a6199ad1a32d1bbc7a0d4210)
1 /* Loop unrolling and peeling.
2    Copyright (C) 2002, 2003, 2004, 2005 Free Software Foundation, Inc.
3 
4 This file is part of GCC.
5 
6 GCC is free software; you can redistribute it and/or modify it under
7 the terms of the GNU General Public License as published by the Free
8 Software Foundation; either version 2, or (at your option) any later
9 version.
10 
11 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
12 WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
14 for more details.
15 
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING.  If not, write to the Free
18 Software Foundation, 51 Franklin Street, Fifth Floor, Boston, MA
19 02110-1301, USA.  */
20 
21 #include "config.h"
22 #include "system.h"
23 #include "coretypes.h"
24 #include "tm.h"
25 #include "rtl.h"
26 #include "hard-reg-set.h"
27 #include "obstack.h"
28 #include "basic-block.h"
29 #include "cfgloop.h"
30 #include "cfglayout.h"
31 #include "params.h"
32 #include "output.h"
33 #include "expr.h"
34 #include "hashtab.h"
35 #include "recog.h"
36 
37 /* This pass performs loop unrolling and peeling.  We only perform these
38    optimizations on innermost loops (with single exception) because
39    the impact on performance is greatest here, and we want to avoid
40    unnecessary code size growth.  The gain is caused by greater sequentiality
41    of code, better code to optimize for further passes and in some cases
42    by fewer testings of exit conditions.  The main problem is code growth,
43    that impacts performance negatively due to effect of caches.
44 
45    What we do:
46 
47    -- complete peeling of once-rolling loops; this is the above mentioned
48       exception, as this causes loop to be cancelled completely and
49       does not cause code growth
50    -- complete peeling of loops that roll (small) constant times.
51    -- simple peeling of first iterations of loops that do not roll much
52       (according to profile feedback)
53    -- unrolling of loops that roll constant times; this is almost always
54       win, as we get rid of exit condition tests.
55    -- unrolling of loops that roll number of times that we can compute
56       in runtime; we also get rid of exit condition tests here, but there
57       is the extra expense for calculating the number of iterations
58    -- simple unrolling of remaining loops; this is performed only if we
59       are asked to, as the gain is questionable in this case and often
60       it may even slow down the code
61    For more detailed descriptions of each of those, see comments at
62    appropriate function below.
63 
64    There is a lot of parameters (defined and described in params.def) that
65    control how much we unroll/peel.
66 
67    ??? A great problem is that we don't have a good way how to determine
68    how many times we should unroll the loop; the experiments I have made
69    showed that this choice may affect performance in order of several %.
70    */
71 
72 /* Information about induction variables to split.  */
73 
74 struct iv_to_split
75 {
76   rtx insn;		/* The insn in that the induction variable occurs.  */
77   rtx base_var;		/* The variable on that the values in the further
78 			   iterations are based.  */
79   rtx step;		/* Step of the induction variable.  */
80   unsigned n_loc;
81   unsigned loc[3];	/* Location where the definition of the induction
82 			   variable occurs in the insn.  For example if
83 			   N_LOC is 2, the expression is located at
84 			   XEXP (XEXP (single_set, loc[0]), loc[1]).  */
85 };
86 
87 /* Information about accumulators to expand.  */
88 
89 struct var_to_expand
90 {
91   rtx insn;		           /* The insn in that the variable expansion occurs.  */
92   rtx reg;                         /* The accumulator which is expanded.  */
93   VEC(rtx,heap) *var_expansions;   /* The copies of the accumulator which is expanded.  */
94   enum rtx_code op;                /* The type of the accumulation - addition, subtraction
95                                       or multiplication.  */
96   int expansion_count;             /* Count the number of expansions generated so far.  */
97   int reuse_expansion;             /* The expansion we intend to reuse to expand
98                                       the accumulator.  If REUSE_EXPANSION is 0 reuse
99                                       the original accumulator.  Else use
100                                       var_expansions[REUSE_EXPANSION - 1].  */
101 };
102 
103 /* Information about optimization applied in
104    the unrolled loop.  */
105 
106 struct opt_info
107 {
108   htab_t insns_to_split;           /* A hashtable of insns to split.  */
109   htab_t insns_with_var_to_expand; /* A hashtable of insns with accumulators
110                                       to expand.  */
111   unsigned first_new_block;        /* The first basic block that was
112                                       duplicated.  */
113   basic_block loop_exit;           /* The loop exit basic block.  */
114   basic_block loop_preheader;      /* The loop preheader basic block.  */
115 };
116 
117 static void decide_unrolling_and_peeling (struct loops *, int);
118 static void peel_loops_completely (struct loops *, int);
119 static void decide_peel_simple (struct loop *, int);
120 static void decide_peel_once_rolling (struct loop *, int);
121 static void decide_peel_completely (struct loop *, int);
122 static void decide_unroll_stupid (struct loop *, int);
123 static void decide_unroll_constant_iterations (struct loop *, int);
124 static void decide_unroll_runtime_iterations (struct loop *, int);
125 static void peel_loop_simple (struct loops *, struct loop *);
126 static void peel_loop_completely (struct loops *, struct loop *);
127 static void unroll_loop_stupid (struct loops *, struct loop *);
128 static void unroll_loop_constant_iterations (struct loops *, struct loop *);
129 static void unroll_loop_runtime_iterations (struct loops *, struct loop *);
130 static struct opt_info *analyze_insns_in_loop (struct loop *);
131 static void opt_info_start_duplication (struct opt_info *);
132 static void apply_opt_in_copies (struct opt_info *, unsigned, bool, bool);
133 static void free_opt_info (struct opt_info *);
134 static struct var_to_expand *analyze_insn_to_expand_var (struct loop*, rtx);
135 static bool referenced_in_one_insn_in_loop_p (struct loop *, rtx);
136 static struct iv_to_split *analyze_iv_to_split_insn (rtx);
137 static void expand_var_during_unrolling (struct var_to_expand *, rtx);
138 static int insert_var_expansion_initialization (void **, void *);
139 static int combine_var_copies_in_loop_exit (void **, void *);
140 static int release_var_copies (void **, void *);
141 static rtx get_expansion (struct var_to_expand *);
142 
143 /* Unroll and/or peel (depending on FLAGS) LOOPS.  */
144 void
unroll_and_peel_loops(struct loops * loops,int flags)145 unroll_and_peel_loops (struct loops *loops, int flags)
146 {
147   struct loop *loop, *next;
148   bool check;
149 
150   /* First perform complete loop peeling (it is almost surely a win,
151      and affects parameters for further decision a lot).  */
152   peel_loops_completely (loops, flags);
153 
154   /* Now decide rest of unrolling and peeling.  */
155   decide_unrolling_and_peeling (loops, flags);
156 
157   loop = loops->tree_root;
158   while (loop->inner)
159     loop = loop->inner;
160 
161   /* Scan the loops, inner ones first.  */
162   while (loop != loops->tree_root)
163     {
164       if (loop->next)
165 	{
166 	  next = loop->next;
167 	  while (next->inner)
168 	    next = next->inner;
169 	}
170       else
171 	next = loop->outer;
172 
173       check = true;
174       /* And perform the appropriate transformations.  */
175       switch (loop->lpt_decision.decision)
176 	{
177 	case LPT_PEEL_COMPLETELY:
178 	  /* Already done.  */
179 	  gcc_unreachable ();
180 	case LPT_PEEL_SIMPLE:
181 	  peel_loop_simple (loops, loop);
182 	  break;
183 	case LPT_UNROLL_CONSTANT:
184 	  unroll_loop_constant_iterations (loops, loop);
185 	  break;
186 	case LPT_UNROLL_RUNTIME:
187 	  unroll_loop_runtime_iterations (loops, loop);
188 	  break;
189 	case LPT_UNROLL_STUPID:
190 	  unroll_loop_stupid (loops, loop);
191 	  break;
192 	case LPT_NONE:
193 	  check = false;
194 	  break;
195 	default:
196 	  gcc_unreachable ();
197 	}
198       if (check)
199 	{
200 #ifdef ENABLE_CHECKING
201 	  verify_dominators (CDI_DOMINATORS);
202 	  verify_loop_structure (loops);
203 #endif
204 	}
205       loop = next;
206     }
207 
208   iv_analysis_done ();
209 }
210 
211 /* Check whether exit of the LOOP is at the end of loop body.  */
212 
213 static bool
loop_exit_at_end_p(struct loop * loop)214 loop_exit_at_end_p (struct loop *loop)
215 {
216   struct niter_desc *desc = get_simple_loop_desc (loop);
217   rtx insn;
218 
219   if (desc->in_edge->dest != loop->latch)
220     return false;
221 
222   /* Check that the latch is empty.  */
223   FOR_BB_INSNS (loop->latch, insn)
224     {
225       if (INSN_P (insn))
226 	return false;
227     }
228 
229   return true;
230 }
231 
232 /* Check whether to peel LOOPS (depending on FLAGS) completely and do so.  */
233 static void
peel_loops_completely(struct loops * loops,int flags)234 peel_loops_completely (struct loops *loops, int flags)
235 {
236   struct loop *loop;
237   unsigned i;
238 
239   /* Scan the loops, the inner ones first.  */
240   for (i = loops->num - 1; i > 0; i--)
241     {
242       loop = loops->parray[i];
243       if (!loop)
244 	continue;
245 
246       loop->lpt_decision.decision = LPT_NONE;
247 
248       if (dump_file)
249 	fprintf (dump_file,
250 		 "\n;; *** Considering loop %d for complete peeling ***\n",
251 		 loop->num);
252 
253       loop->ninsns = num_loop_insns (loop);
254 
255       decide_peel_once_rolling (loop, flags);
256       if (loop->lpt_decision.decision == LPT_NONE)
257 	decide_peel_completely (loop, flags);
258 
259       if (loop->lpt_decision.decision == LPT_PEEL_COMPLETELY)
260 	{
261 	  peel_loop_completely (loops, loop);
262 #ifdef ENABLE_CHECKING
263 	  verify_dominators (CDI_DOMINATORS);
264 	  verify_loop_structure (loops);
265 #endif
266 	}
267     }
268 }
269 
270 /* Decide whether unroll or peel LOOPS (depending on FLAGS) and how much.  */
271 static void
decide_unrolling_and_peeling(struct loops * loops,int flags)272 decide_unrolling_and_peeling (struct loops *loops, int flags)
273 {
274   struct loop *loop = loops->tree_root, *next;
275 
276   while (loop->inner)
277     loop = loop->inner;
278 
279   /* Scan the loops, inner ones first.  */
280   while (loop != loops->tree_root)
281     {
282       if (loop->next)
283 	{
284 	  next = loop->next;
285 	  while (next->inner)
286 	    next = next->inner;
287 	}
288       else
289 	next = loop->outer;
290 
291       loop->lpt_decision.decision = LPT_NONE;
292 
293       if (dump_file)
294 	fprintf (dump_file, "\n;; *** Considering loop %d ***\n", loop->num);
295 
296       /* Do not peel cold areas.  */
297       if (!maybe_hot_bb_p (loop->header))
298 	{
299 	  if (dump_file)
300 	    fprintf (dump_file, ";; Not considering loop, cold area\n");
301 	  loop = next;
302 	  continue;
303 	}
304 
305       /* Can the loop be manipulated?  */
306       if (!can_duplicate_loop_p (loop))
307 	{
308 	  if (dump_file)
309 	    fprintf (dump_file,
310 		     ";; Not considering loop, cannot duplicate\n");
311 	  loop = next;
312 	  continue;
313 	}
314 
315       /* Skip non-innermost loops.  */
316       if (loop->inner)
317 	{
318 	  if (dump_file)
319 	    fprintf (dump_file, ";; Not considering loop, is not innermost\n");
320 	  loop = next;
321 	  continue;
322 	}
323 
324       loop->ninsns = num_loop_insns (loop);
325       loop->av_ninsns = average_num_loop_insns (loop);
326 
327       /* Try transformations one by one in decreasing order of
328 	 priority.  */
329 
330       decide_unroll_constant_iterations (loop, flags);
331       if (loop->lpt_decision.decision == LPT_NONE)
332 	decide_unroll_runtime_iterations (loop, flags);
333       if (loop->lpt_decision.decision == LPT_NONE)
334 	decide_unroll_stupid (loop, flags);
335       if (loop->lpt_decision.decision == LPT_NONE)
336 	decide_peel_simple (loop, flags);
337 
338       loop = next;
339     }
340 }
341 
342 /* Decide whether the LOOP is once rolling and suitable for complete
343    peeling.  */
344 static void
decide_peel_once_rolling(struct loop * loop,int flags ATTRIBUTE_UNUSED)345 decide_peel_once_rolling (struct loop *loop, int flags ATTRIBUTE_UNUSED)
346 {
347   struct niter_desc *desc;
348 
349   if (dump_file)
350     fprintf (dump_file, "\n;; Considering peeling once rolling loop\n");
351 
352   /* Is the loop small enough?  */
353   if ((unsigned) PARAM_VALUE (PARAM_MAX_ONCE_PEELED_INSNS) < loop->ninsns)
354     {
355       if (dump_file)
356 	fprintf (dump_file, ";; Not considering loop, is too big\n");
357       return;
358     }
359 
360   /* Check for simple loops.  */
361   desc = get_simple_loop_desc (loop);
362 
363   /* Check number of iterations.  */
364   if (!desc->simple_p
365       || desc->assumptions
366       || desc->infinite
367       || !desc->const_iter
368       || desc->niter != 0)
369     {
370       if (dump_file)
371 	fprintf (dump_file,
372 		 ";; Unable to prove that the loop rolls exactly once\n");
373       return;
374     }
375 
376   /* Success.  */
377   if (dump_file)
378     fprintf (dump_file, ";; Decided to peel exactly once rolling loop\n");
379   loop->lpt_decision.decision = LPT_PEEL_COMPLETELY;
380 }
381 
382 /* Decide whether the LOOP is suitable for complete peeling.  */
383 static void
decide_peel_completely(struct loop * loop,int flags ATTRIBUTE_UNUSED)384 decide_peel_completely (struct loop *loop, int flags ATTRIBUTE_UNUSED)
385 {
386   unsigned npeel;
387   struct niter_desc *desc;
388 
389   if (dump_file)
390     fprintf (dump_file, "\n;; Considering peeling completely\n");
391 
392   /* Skip non-innermost loops.  */
393   if (loop->inner)
394     {
395       if (dump_file)
396 	fprintf (dump_file, ";; Not considering loop, is not innermost\n");
397       return;
398     }
399 
400   /* Do not peel cold areas.  */
401   if (!maybe_hot_bb_p (loop->header))
402     {
403       if (dump_file)
404 	fprintf (dump_file, ";; Not considering loop, cold area\n");
405       return;
406     }
407 
408   /* Can the loop be manipulated?  */
409   if (!can_duplicate_loop_p (loop))
410     {
411       if (dump_file)
412 	fprintf (dump_file,
413 		 ";; Not considering loop, cannot duplicate\n");
414       return;
415     }
416 
417   /* npeel = number of iterations to peel.  */
418   npeel = PARAM_VALUE (PARAM_MAX_COMPLETELY_PEELED_INSNS) / loop->ninsns;
419   if (npeel > (unsigned) PARAM_VALUE (PARAM_MAX_COMPLETELY_PEEL_TIMES))
420     npeel = PARAM_VALUE (PARAM_MAX_COMPLETELY_PEEL_TIMES);
421 
422   /* Is the loop small enough?  */
423   if (!npeel)
424     {
425       if (dump_file)
426 	fprintf (dump_file, ";; Not considering loop, is too big\n");
427       return;
428     }
429 
430   /* Check for simple loops.  */
431   desc = get_simple_loop_desc (loop);
432 
433   /* Check number of iterations.  */
434   if (!desc->simple_p
435       || desc->assumptions
436       || !desc->const_iter
437       || desc->infinite)
438     {
439       if (dump_file)
440 	fprintf (dump_file,
441 		 ";; Unable to prove that the loop iterates constant times\n");
442       return;
443     }
444 
445   if (desc->niter > npeel - 1)
446     {
447       if (dump_file)
448 	{
449 	  fprintf (dump_file,
450 		   ";; Not peeling loop completely, rolls too much (");
451 	  fprintf (dump_file, HOST_WIDEST_INT_PRINT_DEC, desc->niter);
452 	  fprintf (dump_file, " iterations > %d [maximum peelings])\n", npeel);
453 	}
454       return;
455     }
456 
457   /* Success.  */
458   if (dump_file)
459     fprintf (dump_file, ";; Decided to peel loop completely\n");
460   loop->lpt_decision.decision = LPT_PEEL_COMPLETELY;
461 }
462 
463 /* Peel all iterations of LOOP, remove exit edges and cancel the loop
464    completely.  The transformation done:
465 
466    for (i = 0; i < 4; i++)
467      body;
468 
469    ==>
470 
471    i = 0;
472    body; i++;
473    body; i++;
474    body; i++;
475    body; i++;
476    */
477 static void
peel_loop_completely(struct loops * loops,struct loop * loop)478 peel_loop_completely (struct loops *loops, struct loop *loop)
479 {
480   sbitmap wont_exit;
481   unsigned HOST_WIDE_INT npeel;
482   unsigned n_remove_edges, i;
483   edge *remove_edges, ein;
484   struct niter_desc *desc = get_simple_loop_desc (loop);
485   struct opt_info *opt_info = NULL;
486 
487   npeel = desc->niter;
488 
489   if (npeel)
490     {
491       bool ok;
492 
493       wont_exit = sbitmap_alloc (npeel + 1);
494       sbitmap_ones (wont_exit);
495       RESET_BIT (wont_exit, 0);
496       if (desc->noloop_assumptions)
497 	RESET_BIT (wont_exit, 1);
498 
499       remove_edges = XCNEWVEC (edge, npeel);
500       n_remove_edges = 0;
501 
502       if (flag_split_ivs_in_unroller)
503         opt_info = analyze_insns_in_loop (loop);
504 
505       opt_info_start_duplication (opt_info);
506       ok = duplicate_loop_to_header_edge (loop, loop_preheader_edge (loop),
507 					  loops, npeel,
508 					  wont_exit, desc->out_edge,
509 					  remove_edges, &n_remove_edges,
510 					  DLTHE_FLAG_UPDATE_FREQ
511 					  | DLTHE_FLAG_COMPLETTE_PEEL
512 					  | (opt_info
513 					     ? DLTHE_RECORD_COPY_NUMBER : 0));
514       gcc_assert (ok);
515 
516       free (wont_exit);
517 
518       if (opt_info)
519  	{
520  	  apply_opt_in_copies (opt_info, npeel, false, true);
521  	  free_opt_info (opt_info);
522  	}
523 
524       /* Remove the exit edges.  */
525       for (i = 0; i < n_remove_edges; i++)
526 	remove_path (loops, remove_edges[i]);
527       free (remove_edges);
528     }
529 
530   ein = desc->in_edge;
531   free_simple_loop_desc (loop);
532 
533   /* Now remove the unreachable part of the last iteration and cancel
534      the loop.  */
535   remove_path (loops, ein);
536 
537   if (dump_file)
538     fprintf (dump_file, ";; Peeled loop completely, %d times\n", (int) npeel);
539 }
540 
541 /* Decide whether to unroll LOOP iterating constant number of times
542    and how much.  */
543 
544 static void
decide_unroll_constant_iterations(struct loop * loop,int flags)545 decide_unroll_constant_iterations (struct loop *loop, int flags)
546 {
547   unsigned nunroll, nunroll_by_av, best_copies, best_unroll = 0, n_copies, i;
548   struct niter_desc *desc;
549 
550   if (!(flags & UAP_UNROLL))
551     {
552       /* We were not asked to, just return back silently.  */
553       return;
554     }
555 
556   if (dump_file)
557     fprintf (dump_file,
558 	     "\n;; Considering unrolling loop with constant "
559 	     "number of iterations\n");
560 
561   /* nunroll = total number of copies of the original loop body in
562      unrolled loop (i.e. if it is 2, we have to duplicate loop body once.  */
563   nunroll = PARAM_VALUE (PARAM_MAX_UNROLLED_INSNS) / loop->ninsns;
564   nunroll_by_av
565     = PARAM_VALUE (PARAM_MAX_AVERAGE_UNROLLED_INSNS) / loop->av_ninsns;
566   if (nunroll > nunroll_by_av)
567     nunroll = nunroll_by_av;
568   if (nunroll > (unsigned) PARAM_VALUE (PARAM_MAX_UNROLL_TIMES))
569     nunroll = PARAM_VALUE (PARAM_MAX_UNROLL_TIMES);
570 
571   /* Skip big loops.  */
572   if (nunroll <= 1)
573     {
574       if (dump_file)
575 	fprintf (dump_file, ";; Not considering loop, is too big\n");
576       return;
577     }
578 
579   /* Check for simple loops.  */
580   desc = get_simple_loop_desc (loop);
581 
582   /* Check number of iterations.  */
583   if (!desc->simple_p || !desc->const_iter || desc->assumptions)
584     {
585       if (dump_file)
586 	fprintf (dump_file,
587 		 ";; Unable to prove that the loop iterates constant times\n");
588       return;
589     }
590 
591   /* Check whether the loop rolls enough to consider.  */
592   if (desc->niter < 2 * nunroll)
593     {
594       if (dump_file)
595 	fprintf (dump_file, ";; Not unrolling loop, doesn't roll\n");
596       return;
597     }
598 
599   /* Success; now compute number of iterations to unroll.  We alter
600      nunroll so that as few as possible copies of loop body are
601      necessary, while still not decreasing the number of unrollings
602      too much (at most by 1).  */
603   best_copies = 2 * nunroll + 10;
604 
605   i = 2 * nunroll + 2;
606   if (i - 1 >= desc->niter)
607     i = desc->niter - 2;
608 
609   for (; i >= nunroll - 1; i--)
610     {
611       unsigned exit_mod = desc->niter % (i + 1);
612 
613       if (!loop_exit_at_end_p (loop))
614 	n_copies = exit_mod + i + 1;
615       else if (exit_mod != (unsigned) i
616 	       || desc->noloop_assumptions != NULL_RTX)
617 	n_copies = exit_mod + i + 2;
618       else
619 	n_copies = i + 1;
620 
621       if (n_copies < best_copies)
622 	{
623 	  best_copies = n_copies;
624 	  best_unroll = i;
625 	}
626     }
627 
628   if (dump_file)
629     fprintf (dump_file, ";; max_unroll %d (%d copies, initial %d).\n",
630 	     best_unroll + 1, best_copies, nunroll);
631 
632   loop->lpt_decision.decision = LPT_UNROLL_CONSTANT;
633   loop->lpt_decision.times = best_unroll;
634 
635   if (dump_file)
636     fprintf (dump_file,
637 	     ";; Decided to unroll the constant times rolling loop, %d times.\n",
638 	     loop->lpt_decision.times);
639 }
640 
641 /* Unroll LOOP with constant number of iterations LOOP->LPT_DECISION.TIMES + 1
642    times.  The transformation does this:
643 
644    for (i = 0; i < 102; i++)
645      body;
646 
647    ==>
648 
649    i = 0;
650    body; i++;
651    body; i++;
652    while (i < 102)
653      {
654        body; i++;
655        body; i++;
656        body; i++;
657        body; i++;
658      }
659   */
660 static void
unroll_loop_constant_iterations(struct loops * loops,struct loop * loop)661 unroll_loop_constant_iterations (struct loops *loops, struct loop *loop)
662 {
663   unsigned HOST_WIDE_INT niter;
664   unsigned exit_mod;
665   sbitmap wont_exit;
666   unsigned n_remove_edges, i;
667   edge *remove_edges;
668   unsigned max_unroll = loop->lpt_decision.times;
669   struct niter_desc *desc = get_simple_loop_desc (loop);
670   bool exit_at_end = loop_exit_at_end_p (loop);
671   struct opt_info *opt_info = NULL;
672   bool ok;
673 
674   niter = desc->niter;
675 
676   /* Should not get here (such loop should be peeled instead).  */
677   gcc_assert (niter > max_unroll + 1);
678 
679   exit_mod = niter % (max_unroll + 1);
680 
681   wont_exit = sbitmap_alloc (max_unroll + 1);
682   sbitmap_ones (wont_exit);
683 
684   remove_edges = XCNEWVEC (edge, max_unroll + exit_mod + 1);
685   n_remove_edges = 0;
686   if (flag_split_ivs_in_unroller
687       || flag_variable_expansion_in_unroller)
688     opt_info = analyze_insns_in_loop (loop);
689 
690   if (!exit_at_end)
691     {
692       /* The exit is not at the end of the loop; leave exit test
693 	 in the first copy, so that the loops that start with test
694 	 of exit condition have continuous body after unrolling.  */
695 
696       if (dump_file)
697 	fprintf (dump_file, ";; Condition on beginning of loop.\n");
698 
699       /* Peel exit_mod iterations.  */
700       RESET_BIT (wont_exit, 0);
701       if (desc->noloop_assumptions)
702 	RESET_BIT (wont_exit, 1);
703 
704       if (exit_mod)
705 	{
706 	  opt_info_start_duplication (opt_info);
707           ok = duplicate_loop_to_header_edge (loop, loop_preheader_edge (loop),
708 					      loops, exit_mod,
709 					      wont_exit, desc->out_edge,
710 					      remove_edges, &n_remove_edges,
711 					      DLTHE_FLAG_UPDATE_FREQ
712 					      | (opt_info && exit_mod > 1
713 						 ? DLTHE_RECORD_COPY_NUMBER
714 						   : 0));
715 	  gcc_assert (ok);
716 
717           if (opt_info && exit_mod > 1)
718  	    apply_opt_in_copies (opt_info, exit_mod, false, false);
719 
720 	  desc->noloop_assumptions = NULL_RTX;
721 	  desc->niter -= exit_mod;
722 	  desc->niter_max -= exit_mod;
723 	}
724 
725       SET_BIT (wont_exit, 1);
726     }
727   else
728     {
729       /* Leave exit test in last copy, for the same reason as above if
730 	 the loop tests the condition at the end of loop body.  */
731 
732       if (dump_file)
733 	fprintf (dump_file, ";; Condition on end of loop.\n");
734 
735       /* We know that niter >= max_unroll + 2; so we do not need to care of
736 	 case when we would exit before reaching the loop.  So just peel
737 	 exit_mod + 1 iterations.  */
738       if (exit_mod != max_unroll
739 	  || desc->noloop_assumptions)
740 	{
741 	  RESET_BIT (wont_exit, 0);
742 	  if (desc->noloop_assumptions)
743 	    RESET_BIT (wont_exit, 1);
744 
745           opt_info_start_duplication (opt_info);
746 	  ok = duplicate_loop_to_header_edge (loop, loop_preheader_edge (loop),
747 					      loops, exit_mod + 1,
748 					      wont_exit, desc->out_edge,
749 					      remove_edges, &n_remove_edges,
750 					      DLTHE_FLAG_UPDATE_FREQ
751 					      | (opt_info && exit_mod > 0
752 						 ? DLTHE_RECORD_COPY_NUMBER
753 						   : 0));
754 	  gcc_assert (ok);
755 
756           if (opt_info && exit_mod > 0)
757   	    apply_opt_in_copies (opt_info, exit_mod + 1, false, false);
758 
759 	  desc->niter -= exit_mod + 1;
760 	  desc->niter_max -= exit_mod + 1;
761 	  desc->noloop_assumptions = NULL_RTX;
762 
763 	  SET_BIT (wont_exit, 0);
764 	  SET_BIT (wont_exit, 1);
765 	}
766 
767       RESET_BIT (wont_exit, max_unroll);
768     }
769 
770   /* Now unroll the loop.  */
771 
772   opt_info_start_duplication (opt_info);
773   ok = duplicate_loop_to_header_edge (loop, loop_latch_edge (loop),
774 				      loops, max_unroll,
775 				      wont_exit, desc->out_edge,
776 				      remove_edges, &n_remove_edges,
777 				      DLTHE_FLAG_UPDATE_FREQ
778 				      | (opt_info
779 					 ? DLTHE_RECORD_COPY_NUMBER
780 					   : 0));
781   gcc_assert (ok);
782 
783   if (opt_info)
784     {
785       apply_opt_in_copies (opt_info, max_unroll, true, true);
786       free_opt_info (opt_info);
787     }
788 
789   free (wont_exit);
790 
791   if (exit_at_end)
792     {
793       basic_block exit_block = get_bb_copy (desc->in_edge->src);
794       /* Find a new in and out edge; they are in the last copy we have made.  */
795 
796       if (EDGE_SUCC (exit_block, 0)->dest == desc->out_edge->dest)
797 	{
798 	  desc->out_edge = EDGE_SUCC (exit_block, 0);
799 	  desc->in_edge = EDGE_SUCC (exit_block, 1);
800 	}
801       else
802 	{
803 	  desc->out_edge = EDGE_SUCC (exit_block, 1);
804 	  desc->in_edge = EDGE_SUCC (exit_block, 0);
805 	}
806     }
807 
808   desc->niter /= max_unroll + 1;
809   desc->niter_max /= max_unroll + 1;
810   desc->niter_expr = GEN_INT (desc->niter);
811 
812   /* Remove the edges.  */
813   for (i = 0; i < n_remove_edges; i++)
814     remove_path (loops, remove_edges[i]);
815   free (remove_edges);
816 
817   if (dump_file)
818     fprintf (dump_file,
819 	     ";; Unrolled loop %d times, constant # of iterations %i insns\n",
820 	     max_unroll, num_loop_insns (loop));
821 }
822 
823 /* Decide whether to unroll LOOP iterating runtime computable number of times
824    and how much.  */
825 static void
decide_unroll_runtime_iterations(struct loop * loop,int flags)826 decide_unroll_runtime_iterations (struct loop *loop, int flags)
827 {
828   unsigned nunroll, nunroll_by_av, i;
829   struct niter_desc *desc;
830 
831   if (!(flags & UAP_UNROLL))
832     {
833       /* We were not asked to, just return back silently.  */
834       return;
835     }
836 
837   if (dump_file)
838     fprintf (dump_file,
839 	     "\n;; Considering unrolling loop with runtime "
840 	     "computable number of iterations\n");
841 
842   /* nunroll = total number of copies of the original loop body in
843      unrolled loop (i.e. if it is 2, we have to duplicate loop body once.  */
844   nunroll = PARAM_VALUE (PARAM_MAX_UNROLLED_INSNS) / loop->ninsns;
845   nunroll_by_av = PARAM_VALUE (PARAM_MAX_AVERAGE_UNROLLED_INSNS) / loop->av_ninsns;
846   if (nunroll > nunroll_by_av)
847     nunroll = nunroll_by_av;
848   if (nunroll > (unsigned) PARAM_VALUE (PARAM_MAX_UNROLL_TIMES))
849     nunroll = PARAM_VALUE (PARAM_MAX_UNROLL_TIMES);
850 
851   /* Skip big loops.  */
852   if (nunroll <= 1)
853     {
854       if (dump_file)
855 	fprintf (dump_file, ";; Not considering loop, is too big\n");
856       return;
857     }
858 
859   /* Check for simple loops.  */
860   desc = get_simple_loop_desc (loop);
861 
862   /* Check simpleness.  */
863   if (!desc->simple_p || desc->assumptions)
864     {
865       if (dump_file)
866 	fprintf (dump_file,
867 		 ";; Unable to prove that the number of iterations "
868 		 "can be counted in runtime\n");
869       return;
870     }
871 
872   if (desc->const_iter)
873     {
874       if (dump_file)
875 	fprintf (dump_file, ";; Loop iterates constant times\n");
876       return;
877     }
878 
879   /* If we have profile feedback, check whether the loop rolls.  */
880   if (loop->header->count && expected_loop_iterations (loop) < 2 * nunroll)
881     {
882       if (dump_file)
883 	fprintf (dump_file, ";; Not unrolling loop, doesn't roll\n");
884       return;
885     }
886 
887   /* Success; now force nunroll to be power of 2, as we are unable to
888      cope with overflows in computation of number of iterations.  */
889   for (i = 1; 2 * i <= nunroll; i *= 2)
890     continue;
891 
892   loop->lpt_decision.decision = LPT_UNROLL_RUNTIME;
893   loop->lpt_decision.times = i - 1;
894 
895   if (dump_file)
896     fprintf (dump_file,
897 	     ";; Decided to unroll the runtime computable "
898 	     "times rolling loop, %d times.\n",
899 	     loop->lpt_decision.times);
900 }
901 
902 /* Unroll LOOP for that we are able to count number of iterations in runtime
903    LOOP->LPT_DECISION.TIMES + 1 times.  The transformation does this (with some
904    extra care for case n < 0):
905 
906    for (i = 0; i < n; i++)
907      body;
908 
909    ==>
910 
911    i = 0;
912    mod = n % 4;
913 
914    switch (mod)
915      {
916        case 3:
917          body; i++;
918        case 2:
919          body; i++;
920        case 1:
921          body; i++;
922        case 0: ;
923      }
924 
925    while (i < n)
926      {
927        body; i++;
928        body; i++;
929        body; i++;
930        body; i++;
931      }
932    */
933 static void
unroll_loop_runtime_iterations(struct loops * loops,struct loop * loop)934 unroll_loop_runtime_iterations (struct loops *loops, struct loop *loop)
935 {
936   rtx old_niter, niter, init_code, branch_code, tmp;
937   unsigned i, j, p;
938   basic_block preheader, *body, *dom_bbs, swtch, ezc_swtch;
939   unsigned n_dom_bbs;
940   sbitmap wont_exit;
941   int may_exit_copy;
942   unsigned n_peel, n_remove_edges;
943   edge *remove_edges, e;
944   bool extra_zero_check, last_may_exit;
945   unsigned max_unroll = loop->lpt_decision.times;
946   struct niter_desc *desc = get_simple_loop_desc (loop);
947   bool exit_at_end = loop_exit_at_end_p (loop);
948   struct opt_info *opt_info = NULL;
949   bool ok;
950 
951   if (flag_split_ivs_in_unroller
952       || flag_variable_expansion_in_unroller)
953     opt_info = analyze_insns_in_loop (loop);
954 
955   /* Remember blocks whose dominators will have to be updated.  */
956   dom_bbs = XCNEWVEC (basic_block, n_basic_blocks);
957   n_dom_bbs = 0;
958 
959   body = get_loop_body (loop);
960   for (i = 0; i < loop->num_nodes; i++)
961     {
962       unsigned nldom;
963       basic_block *ldom;
964 
965       nldom = get_dominated_by (CDI_DOMINATORS, body[i], &ldom);
966       for (j = 0; j < nldom; j++)
967 	if (!flow_bb_inside_loop_p (loop, ldom[j]))
968 	  dom_bbs[n_dom_bbs++] = ldom[j];
969 
970       free (ldom);
971     }
972   free (body);
973 
974   if (!exit_at_end)
975     {
976       /* Leave exit in first copy (for explanation why see comment in
977 	 unroll_loop_constant_iterations).  */
978       may_exit_copy = 0;
979       n_peel = max_unroll - 1;
980       extra_zero_check = true;
981       last_may_exit = false;
982     }
983   else
984     {
985       /* Leave exit in last copy (for explanation why see comment in
986 	 unroll_loop_constant_iterations).  */
987       may_exit_copy = max_unroll;
988       n_peel = max_unroll;
989       extra_zero_check = false;
990       last_may_exit = true;
991     }
992 
993   /* Get expression for number of iterations.  */
994   start_sequence ();
995   old_niter = niter = gen_reg_rtx (desc->mode);
996   tmp = force_operand (copy_rtx (desc->niter_expr), niter);
997   if (tmp != niter)
998     emit_move_insn (niter, tmp);
999 
1000   /* Count modulo by ANDing it with max_unroll; we use the fact that
1001      the number of unrollings is a power of two, and thus this is correct
1002      even if there is overflow in the computation.  */
1003   niter = expand_simple_binop (desc->mode, AND,
1004 			       niter,
1005 			       GEN_INT (max_unroll),
1006 			       NULL_RTX, 0, OPTAB_LIB_WIDEN);
1007 
1008   init_code = get_insns ();
1009   end_sequence ();
1010 
1011   /* Precondition the loop.  */
1012   loop_split_edge_with (loop_preheader_edge (loop), init_code);
1013 
1014   remove_edges = XCNEWVEC (edge, max_unroll + n_peel + 1);
1015   n_remove_edges = 0;
1016 
1017   wont_exit = sbitmap_alloc (max_unroll + 2);
1018 
1019   /* Peel the first copy of loop body (almost always we must leave exit test
1020      here; the only exception is when we have extra zero check and the number
1021      of iterations is reliable.  Also record the place of (possible) extra
1022      zero check.  */
1023   sbitmap_zero (wont_exit);
1024   if (extra_zero_check
1025       && !desc->noloop_assumptions)
1026     SET_BIT (wont_exit, 1);
1027   ezc_swtch = loop_preheader_edge (loop)->src;
1028   ok = duplicate_loop_to_header_edge (loop, loop_preheader_edge (loop),
1029 				      loops, 1,
1030 				      wont_exit, desc->out_edge,
1031 				      remove_edges, &n_remove_edges,
1032 				      DLTHE_FLAG_UPDATE_FREQ);
1033   gcc_assert (ok);
1034 
1035   /* Record the place where switch will be built for preconditioning.  */
1036   swtch = loop_split_edge_with (loop_preheader_edge (loop),
1037 				NULL_RTX);
1038 
1039   for (i = 0; i < n_peel; i++)
1040     {
1041       /* Peel the copy.  */
1042       sbitmap_zero (wont_exit);
1043       if (i != n_peel - 1 || !last_may_exit)
1044 	SET_BIT (wont_exit, 1);
1045       ok = duplicate_loop_to_header_edge (loop, loop_preheader_edge (loop),
1046 					  loops, 1,
1047 					  wont_exit, desc->out_edge,
1048 					  remove_edges, &n_remove_edges,
1049 					  DLTHE_FLAG_UPDATE_FREQ);
1050       gcc_assert (ok);
1051 
1052       /* Create item for switch.  */
1053       j = n_peel - i - (extra_zero_check ? 0 : 1);
1054       p = REG_BR_PROB_BASE / (i + 2);
1055 
1056       preheader = loop_split_edge_with (loop_preheader_edge (loop), NULL_RTX);
1057       branch_code = compare_and_jump_seq (copy_rtx (niter), GEN_INT (j), EQ,
1058 					  block_label (preheader), p,
1059 					  NULL_RTX);
1060 
1061       swtch = loop_split_edge_with (single_pred_edge (swtch), branch_code);
1062       set_immediate_dominator (CDI_DOMINATORS, preheader, swtch);
1063       single_pred_edge (swtch)->probability = REG_BR_PROB_BASE - p;
1064       e = make_edge (swtch, preheader,
1065 		     single_succ_edge (swtch)->flags & EDGE_IRREDUCIBLE_LOOP);
1066       e->probability = p;
1067     }
1068 
1069   if (extra_zero_check)
1070     {
1071       /* Add branch for zero iterations.  */
1072       p = REG_BR_PROB_BASE / (max_unroll + 1);
1073       swtch = ezc_swtch;
1074       preheader = loop_split_edge_with (loop_preheader_edge (loop), NULL_RTX);
1075       branch_code = compare_and_jump_seq (copy_rtx (niter), const0_rtx, EQ,
1076 					  block_label (preheader), p,
1077 					  NULL_RTX);
1078 
1079       swtch = loop_split_edge_with (single_succ_edge (swtch), branch_code);
1080       set_immediate_dominator (CDI_DOMINATORS, preheader, swtch);
1081       single_succ_edge (swtch)->probability = REG_BR_PROB_BASE - p;
1082       e = make_edge (swtch, preheader,
1083 		     single_succ_edge (swtch)->flags & EDGE_IRREDUCIBLE_LOOP);
1084       e->probability = p;
1085     }
1086 
1087   /* Recount dominators for outer blocks.  */
1088   iterate_fix_dominators (CDI_DOMINATORS, dom_bbs, n_dom_bbs);
1089 
1090   /* And unroll loop.  */
1091 
1092   sbitmap_ones (wont_exit);
1093   RESET_BIT (wont_exit, may_exit_copy);
1094   opt_info_start_duplication (opt_info);
1095 
1096   ok = duplicate_loop_to_header_edge (loop, loop_latch_edge (loop),
1097 				      loops, max_unroll,
1098 				      wont_exit, desc->out_edge,
1099 				      remove_edges, &n_remove_edges,
1100 				      DLTHE_FLAG_UPDATE_FREQ
1101 				      | (opt_info
1102 					 ? DLTHE_RECORD_COPY_NUMBER
1103 					   : 0));
1104   gcc_assert (ok);
1105 
1106   if (opt_info)
1107     {
1108       apply_opt_in_copies (opt_info, max_unroll, true, true);
1109       free_opt_info (opt_info);
1110     }
1111 
1112   free (wont_exit);
1113 
1114   if (exit_at_end)
1115     {
1116       basic_block exit_block = get_bb_copy (desc->in_edge->src);
1117       /* Find a new in and out edge; they are in the last copy we have
1118 	 made.  */
1119 
1120       if (EDGE_SUCC (exit_block, 0)->dest == desc->out_edge->dest)
1121 	{
1122 	  desc->out_edge = EDGE_SUCC (exit_block, 0);
1123 	  desc->in_edge = EDGE_SUCC (exit_block, 1);
1124 	}
1125       else
1126 	{
1127 	  desc->out_edge = EDGE_SUCC (exit_block, 1);
1128 	  desc->in_edge = EDGE_SUCC (exit_block, 0);
1129 	}
1130     }
1131 
1132   /* Remove the edges.  */
1133   for (i = 0; i < n_remove_edges; i++)
1134     remove_path (loops, remove_edges[i]);
1135   free (remove_edges);
1136 
1137   /* We must be careful when updating the number of iterations due to
1138      preconditioning and the fact that the value must be valid at entry
1139      of the loop.  After passing through the above code, we see that
1140      the correct new number of iterations is this:  */
1141   gcc_assert (!desc->const_iter);
1142   desc->niter_expr =
1143     simplify_gen_binary (UDIV, desc->mode, old_niter,
1144 			 GEN_INT (max_unroll + 1));
1145   desc->niter_max /= max_unroll + 1;
1146   if (exit_at_end)
1147     {
1148       desc->niter_expr =
1149 	simplify_gen_binary (MINUS, desc->mode, desc->niter_expr, const1_rtx);
1150       desc->noloop_assumptions = NULL_RTX;
1151       desc->niter_max--;
1152     }
1153 
1154   if (dump_file)
1155     fprintf (dump_file,
1156 	     ";; Unrolled loop %d times, counting # of iterations "
1157 	     "in runtime, %i insns\n",
1158 	     max_unroll, num_loop_insns (loop));
1159 
1160   if (dom_bbs)
1161     free (dom_bbs);
1162 }
1163 
1164 /* Decide whether to simply peel LOOP and how much.  */
1165 static void
decide_peel_simple(struct loop * loop,int flags)1166 decide_peel_simple (struct loop *loop, int flags)
1167 {
1168   unsigned npeel;
1169   struct niter_desc *desc;
1170 
1171   if (!(flags & UAP_PEEL))
1172     {
1173       /* We were not asked to, just return back silently.  */
1174       return;
1175     }
1176 
1177   if (dump_file)
1178     fprintf (dump_file, "\n;; Considering simply peeling loop\n");
1179 
1180   /* npeel = number of iterations to peel.  */
1181   npeel = PARAM_VALUE (PARAM_MAX_PEELED_INSNS) / loop->ninsns;
1182   if (npeel > (unsigned) PARAM_VALUE (PARAM_MAX_PEEL_TIMES))
1183     npeel = PARAM_VALUE (PARAM_MAX_PEEL_TIMES);
1184 
1185   /* Skip big loops.  */
1186   if (!npeel)
1187     {
1188       if (dump_file)
1189 	fprintf (dump_file, ";; Not considering loop, is too big\n");
1190       return;
1191     }
1192 
1193   /* Check for simple loops.  */
1194   desc = get_simple_loop_desc (loop);
1195 
1196   /* Check number of iterations.  */
1197   if (desc->simple_p && !desc->assumptions && desc->const_iter)
1198     {
1199       if (dump_file)
1200 	fprintf (dump_file, ";; Loop iterates constant times\n");
1201       return;
1202     }
1203 
1204   /* Do not simply peel loops with branches inside -- it increases number
1205      of mispredicts.  */
1206   if (num_loop_branches (loop) > 1)
1207     {
1208       if (dump_file)
1209 	fprintf (dump_file, ";; Not peeling, contains branches\n");
1210       return;
1211     }
1212 
1213   if (loop->header->count)
1214     {
1215       unsigned niter = expected_loop_iterations (loop);
1216       if (niter + 1 > npeel)
1217 	{
1218 	  if (dump_file)
1219 	    {
1220 	      fprintf (dump_file, ";; Not peeling loop, rolls too much (");
1221 	      fprintf (dump_file, HOST_WIDEST_INT_PRINT_DEC,
1222 		       (HOST_WIDEST_INT) (niter + 1));
1223 	      fprintf (dump_file, " iterations > %d [maximum peelings])\n",
1224 		       npeel);
1225 	    }
1226 	  return;
1227 	}
1228       npeel = niter + 1;
1229     }
1230   else
1231     {
1232       /* For now we have no good heuristics to decide whether loop peeling
1233          will be effective, so disable it.  */
1234       if (dump_file)
1235 	fprintf (dump_file,
1236 		 ";; Not peeling loop, no evidence it will be profitable\n");
1237       return;
1238     }
1239 
1240   /* Success.  */
1241   loop->lpt_decision.decision = LPT_PEEL_SIMPLE;
1242   loop->lpt_decision.times = npeel;
1243 
1244   if (dump_file)
1245     fprintf (dump_file, ";; Decided to simply peel the loop, %d times.\n",
1246 	     loop->lpt_decision.times);
1247 }
1248 
1249 /* Peel a LOOP LOOP->LPT_DECISION.TIMES times.  The transformation:
1250    while (cond)
1251      body;
1252 
1253    ==>
1254 
1255    if (!cond) goto end;
1256    body;
1257    if (!cond) goto end;
1258    body;
1259    while (cond)
1260      body;
1261    end: ;
1262    */
1263 static void
peel_loop_simple(struct loops * loops,struct loop * loop)1264 peel_loop_simple (struct loops *loops, struct loop *loop)
1265 {
1266   sbitmap wont_exit;
1267   unsigned npeel = loop->lpt_decision.times;
1268   struct niter_desc *desc = get_simple_loop_desc (loop);
1269   struct opt_info *opt_info = NULL;
1270   bool ok;
1271 
1272   if (flag_split_ivs_in_unroller && npeel > 1)
1273     opt_info = analyze_insns_in_loop (loop);
1274 
1275   wont_exit = sbitmap_alloc (npeel + 1);
1276   sbitmap_zero (wont_exit);
1277 
1278   opt_info_start_duplication (opt_info);
1279 
1280   ok = duplicate_loop_to_header_edge (loop, loop_preheader_edge (loop),
1281 				      loops, npeel, wont_exit,
1282 				      NULL, NULL,
1283 				      NULL, DLTHE_FLAG_UPDATE_FREQ
1284 				      | (opt_info
1285 					 ? DLTHE_RECORD_COPY_NUMBER
1286 					   : 0));
1287   gcc_assert (ok);
1288 
1289   free (wont_exit);
1290 
1291   if (opt_info)
1292     {
1293       apply_opt_in_copies (opt_info, npeel, false, false);
1294       free_opt_info (opt_info);
1295     }
1296 
1297   if (desc->simple_p)
1298     {
1299       if (desc->const_iter)
1300 	{
1301 	  desc->niter -= npeel;
1302 	  desc->niter_expr = GEN_INT (desc->niter);
1303 	  desc->noloop_assumptions = NULL_RTX;
1304 	}
1305       else
1306 	{
1307 	  /* We cannot just update niter_expr, as its value might be clobbered
1308 	     inside loop.  We could handle this by counting the number into
1309 	     temporary just like we do in runtime unrolling, but it does not
1310 	     seem worthwhile.  */
1311 	  free_simple_loop_desc (loop);
1312 	}
1313     }
1314   if (dump_file)
1315     fprintf (dump_file, ";; Peeling loop %d times\n", npeel);
1316 }
1317 
1318 /* Decide whether to unroll LOOP stupidly and how much.  */
1319 static void
decide_unroll_stupid(struct loop * loop,int flags)1320 decide_unroll_stupid (struct loop *loop, int flags)
1321 {
1322   unsigned nunroll, nunroll_by_av, i;
1323   struct niter_desc *desc;
1324 
1325   if (!(flags & UAP_UNROLL_ALL))
1326     {
1327       /* We were not asked to, just return back silently.  */
1328       return;
1329     }
1330 
1331   if (dump_file)
1332     fprintf (dump_file, "\n;; Considering unrolling loop stupidly\n");
1333 
1334   /* nunroll = total number of copies of the original loop body in
1335      unrolled loop (i.e. if it is 2, we have to duplicate loop body once.  */
1336   nunroll = PARAM_VALUE (PARAM_MAX_UNROLLED_INSNS) / loop->ninsns;
1337   nunroll_by_av
1338     = PARAM_VALUE (PARAM_MAX_AVERAGE_UNROLLED_INSNS) / loop->av_ninsns;
1339   if (nunroll > nunroll_by_av)
1340     nunroll = nunroll_by_av;
1341   if (nunroll > (unsigned) PARAM_VALUE (PARAM_MAX_UNROLL_TIMES))
1342     nunroll = PARAM_VALUE (PARAM_MAX_UNROLL_TIMES);
1343 
1344   /* Skip big loops.  */
1345   if (nunroll <= 1)
1346     {
1347       if (dump_file)
1348 	fprintf (dump_file, ";; Not considering loop, is too big\n");
1349       return;
1350     }
1351 
1352   /* Check for simple loops.  */
1353   desc = get_simple_loop_desc (loop);
1354 
1355   /* Check simpleness.  */
1356   if (desc->simple_p && !desc->assumptions)
1357     {
1358       if (dump_file)
1359 	fprintf (dump_file, ";; The loop is simple\n");
1360       return;
1361     }
1362 
1363   /* Do not unroll loops with branches inside -- it increases number
1364      of mispredicts.  */
1365   if (num_loop_branches (loop) > 1)
1366     {
1367       if (dump_file)
1368 	fprintf (dump_file, ";; Not unrolling, contains branches\n");
1369       return;
1370     }
1371 
1372   /* If we have profile feedback, check whether the loop rolls.  */
1373   if (loop->header->count
1374       && expected_loop_iterations (loop) < 2 * nunroll)
1375     {
1376       if (dump_file)
1377 	fprintf (dump_file, ";; Not unrolling loop, doesn't roll\n");
1378       return;
1379     }
1380 
1381   /* Success.  Now force nunroll to be power of 2, as it seems that this
1382      improves results (partially because of better alignments, partially
1383      because of some dark magic).  */
1384   for (i = 1; 2 * i <= nunroll; i *= 2)
1385     continue;
1386 
1387   loop->lpt_decision.decision = LPT_UNROLL_STUPID;
1388   loop->lpt_decision.times = i - 1;
1389 
1390   if (dump_file)
1391     fprintf (dump_file,
1392 	     ";; Decided to unroll the loop stupidly, %d times.\n",
1393 	     loop->lpt_decision.times);
1394 }
1395 
1396 /* Unroll a LOOP LOOP->LPT_DECISION.TIMES times.  The transformation:
1397    while (cond)
1398      body;
1399 
1400    ==>
1401 
1402    while (cond)
1403      {
1404        body;
1405        if (!cond) break;
1406        body;
1407        if (!cond) break;
1408        body;
1409        if (!cond) break;
1410        body;
1411      }
1412    */
1413 static void
unroll_loop_stupid(struct loops * loops,struct loop * loop)1414 unroll_loop_stupid (struct loops *loops, struct loop *loop)
1415 {
1416   sbitmap wont_exit;
1417   unsigned nunroll = loop->lpt_decision.times;
1418   struct niter_desc *desc = get_simple_loop_desc (loop);
1419   struct opt_info *opt_info = NULL;
1420   bool ok;
1421 
1422   if (flag_split_ivs_in_unroller
1423       || flag_variable_expansion_in_unroller)
1424     opt_info = analyze_insns_in_loop (loop);
1425 
1426 
1427   wont_exit = sbitmap_alloc (nunroll + 1);
1428   sbitmap_zero (wont_exit);
1429   opt_info_start_duplication (opt_info);
1430 
1431   ok = duplicate_loop_to_header_edge (loop, loop_latch_edge (loop),
1432 				      loops, nunroll, wont_exit,
1433 				      NULL, NULL, NULL,
1434 				      DLTHE_FLAG_UPDATE_FREQ
1435 				      | (opt_info
1436 					 ? DLTHE_RECORD_COPY_NUMBER
1437 					   : 0));
1438   gcc_assert (ok);
1439 
1440   if (opt_info)
1441     {
1442       apply_opt_in_copies (opt_info, nunroll, true, true);
1443       free_opt_info (opt_info);
1444     }
1445 
1446   free (wont_exit);
1447 
1448   if (desc->simple_p)
1449     {
1450       /* We indeed may get here provided that there are nontrivial assumptions
1451 	 for a loop to be really simple.  We could update the counts, but the
1452 	 problem is that we are unable to decide which exit will be taken
1453 	 (not really true in case the number of iterations is constant,
1454 	 but noone will do anything with this information, so we do not
1455 	 worry about it).  */
1456       desc->simple_p = false;
1457     }
1458 
1459   if (dump_file)
1460     fprintf (dump_file, ";; Unrolled loop %d times, %i insns\n",
1461 	     nunroll, num_loop_insns (loop));
1462 }
1463 
1464 /* A hash function for information about insns to split.  */
1465 
1466 static hashval_t
si_info_hash(const void * ivts)1467 si_info_hash (const void *ivts)
1468 {
1469   return (hashval_t) INSN_UID (((struct iv_to_split *) ivts)->insn);
1470 }
1471 
1472 /* An equality functions for information about insns to split.  */
1473 
1474 static int
si_info_eq(const void * ivts1,const void * ivts2)1475 si_info_eq (const void *ivts1, const void *ivts2)
1476 {
1477   const struct iv_to_split *i1 = ivts1;
1478   const struct iv_to_split *i2 = ivts2;
1479 
1480   return i1->insn == i2->insn;
1481 }
1482 
1483 /* Return a hash for VES, which is really a "var_to_expand *".  */
1484 
1485 static hashval_t
ve_info_hash(const void * ves)1486 ve_info_hash (const void *ves)
1487 {
1488   return (hashval_t) INSN_UID (((struct var_to_expand *) ves)->insn);
1489 }
1490 
1491 /* Return true if IVTS1 and IVTS2 (which are really both of type
1492    "var_to_expand *") refer to the same instruction.  */
1493 
1494 static int
ve_info_eq(const void * ivts1,const void * ivts2)1495 ve_info_eq (const void *ivts1, const void *ivts2)
1496 {
1497   const struct var_to_expand *i1 = ivts1;
1498   const struct var_to_expand *i2 = ivts2;
1499 
1500   return i1->insn == i2->insn;
1501 }
1502 
1503 /* Returns true if REG is referenced in one insn in LOOP.  */
1504 
1505 bool
referenced_in_one_insn_in_loop_p(struct loop * loop,rtx reg)1506 referenced_in_one_insn_in_loop_p (struct loop *loop, rtx reg)
1507 {
1508   basic_block *body, bb;
1509   unsigned i;
1510   int count_ref = 0;
1511   rtx insn;
1512 
1513   body = get_loop_body (loop);
1514   for (i = 0; i < loop->num_nodes; i++)
1515     {
1516       bb = body[i];
1517 
1518       FOR_BB_INSNS (bb, insn)
1519       {
1520         if (rtx_referenced_p (reg, insn))
1521           count_ref++;
1522       }
1523     }
1524   return (count_ref  == 1);
1525 }
1526 
1527 /* Determine whether INSN contains an accumulator
1528    which can be expanded into separate copies,
1529    one for each copy of the LOOP body.
1530 
1531    for (i = 0 ; i < n; i++)
1532      sum += a[i];
1533 
1534    ==>
1535 
1536    sum += a[i]
1537    ....
1538    i = i+1;
1539    sum1 += a[i]
1540    ....
1541    i = i+1
1542    sum2 += a[i];
1543    ....
1544 
1545    Return NULL if INSN contains no opportunity for expansion of accumulator.
1546    Otherwise, allocate a VAR_TO_EXPAND structure, fill it with the relevant
1547    information and return a pointer to it.
1548 */
1549 
1550 static struct var_to_expand *
analyze_insn_to_expand_var(struct loop * loop,rtx insn)1551 analyze_insn_to_expand_var (struct loop *loop, rtx insn)
1552 {
1553   rtx set, dest, src, op1;
1554   struct var_to_expand *ves;
1555   enum machine_mode mode1, mode2;
1556 
1557   set = single_set (insn);
1558   if (!set)
1559     return NULL;
1560 
1561   dest = SET_DEST (set);
1562   src = SET_SRC (set);
1563 
1564   if (GET_CODE (src) != PLUS
1565       && GET_CODE (src) != MINUS
1566       && GET_CODE (src) != MULT)
1567     return NULL;
1568 
1569   /* Hmm, this is a bit paradoxical.  We know that INSN is a valid insn
1570      in MD.  But if there is no optab to generate the insn, we can not
1571      perform the variable expansion.  This can happen if an MD provides
1572      an insn but not a named pattern to generate it, for example to avoid
1573      producing code that needs additional mode switches like for x87/mmx.
1574 
1575      So we check have_insn_for which looks for an optab for the operation
1576      in SRC.  If it doesn't exist, we can't perform the expansion even
1577      though INSN is valid.  */
1578   if (!have_insn_for (GET_CODE (src), GET_MODE (src)))
1579     return NULL;
1580 
1581   if (!XEXP (src, 0))
1582     return NULL;
1583 
1584   op1 = XEXP (src, 0);
1585 
1586   if (!REG_P (dest)
1587       && !(GET_CODE (dest) == SUBREG
1588            && REG_P (SUBREG_REG (dest))))
1589     return NULL;
1590 
1591   if (!rtx_equal_p (dest, op1))
1592     return NULL;
1593 
1594   if (!referenced_in_one_insn_in_loop_p (loop, dest))
1595     return NULL;
1596 
1597   if (rtx_referenced_p (dest, XEXP (src, 1)))
1598     return NULL;
1599 
1600   mode1 = GET_MODE (dest);
1601   mode2 = GET_MODE (XEXP (src, 1));
1602   if ((FLOAT_MODE_P (mode1)
1603        || FLOAT_MODE_P (mode2))
1604       && !flag_unsafe_math_optimizations)
1605     return NULL;
1606 
1607   /* Record the accumulator to expand.  */
1608   ves = XNEW (struct var_to_expand);
1609   ves->insn = insn;
1610   ves->var_expansions = VEC_alloc (rtx, heap, 1);
1611   ves->reg = copy_rtx (dest);
1612   ves->op = GET_CODE (src);
1613   ves->expansion_count = 0;
1614   ves->reuse_expansion = 0;
1615   return ves;
1616 }
1617 
1618 /* Determine whether there is an induction variable in INSN that
1619    we would like to split during unrolling.
1620 
1621    I.e. replace
1622 
1623    i = i + 1;
1624    ...
1625    i = i + 1;
1626    ...
1627    i = i + 1;
1628    ...
1629 
1630    type chains by
1631 
1632    i0 = i + 1
1633    ...
1634    i = i0 + 1
1635    ...
1636    i = i0 + 2
1637    ...
1638 
1639    Return NULL if INSN contains no interesting IVs.  Otherwise, allocate
1640    an IV_TO_SPLIT structure, fill it with the relevant information and return a
1641    pointer to it.  */
1642 
1643 static struct iv_to_split *
analyze_iv_to_split_insn(rtx insn)1644 analyze_iv_to_split_insn (rtx insn)
1645 {
1646   rtx set, dest;
1647   struct rtx_iv iv;
1648   struct iv_to_split *ivts;
1649   bool ok;
1650 
1651   /* For now we just split the basic induction variables.  Later this may be
1652      extended for example by selecting also addresses of memory references.  */
1653   set = single_set (insn);
1654   if (!set)
1655     return NULL;
1656 
1657   dest = SET_DEST (set);
1658   if (!REG_P (dest))
1659     return NULL;
1660 
1661   if (!biv_p (insn, dest))
1662     return NULL;
1663 
1664   ok = iv_analyze_result (insn, dest, &iv);
1665 
1666   /* This used to be an assert under the assumption that if biv_p returns
1667      true that iv_analyze_result must also return true.  However, that
1668      assumption is not strictly correct as evidenced by pr25569.
1669 
1670      Returning NULL when iv_analyze_result returns false is safe and
1671      avoids the problems in pr25569 until the iv_analyze_* routines
1672      can be fixed, which is apparently hard and time consuming
1673      according to their author.  */
1674   if (! ok)
1675     return NULL;
1676 
1677   if (iv.step == const0_rtx
1678       || iv.mode != iv.extend_mode)
1679     return NULL;
1680 
1681   /* Record the insn to split.  */
1682   ivts = XNEW (struct iv_to_split);
1683   ivts->insn = insn;
1684   ivts->base_var = NULL_RTX;
1685   ivts->step = iv.step;
1686   ivts->n_loc = 1;
1687   ivts->loc[0] = 1;
1688 
1689   return ivts;
1690 }
1691 
1692 /* Determines which of insns in LOOP can be optimized.
1693    Return a OPT_INFO struct with the relevant hash tables filled
1694    with all insns to be optimized.  The FIRST_NEW_BLOCK field
1695    is undefined for the return value.  */
1696 
1697 static struct opt_info *
analyze_insns_in_loop(struct loop * loop)1698 analyze_insns_in_loop (struct loop *loop)
1699 {
1700   basic_block *body, bb;
1701   unsigned i, num_edges = 0;
1702   struct opt_info *opt_info = XCNEW (struct opt_info);
1703   rtx insn;
1704   struct iv_to_split *ivts = NULL;
1705   struct var_to_expand *ves = NULL;
1706   PTR *slot1;
1707   PTR *slot2;
1708   edge *edges = get_loop_exit_edges (loop, &num_edges);
1709   bool can_apply = false;
1710 
1711   iv_analysis_loop_init (loop);
1712 
1713   body = get_loop_body (loop);
1714 
1715   if (flag_split_ivs_in_unroller)
1716     opt_info->insns_to_split = htab_create (5 * loop->num_nodes,
1717                                             si_info_hash, si_info_eq, free);
1718 
1719   /* Record the loop exit bb and loop preheader before the unrolling.  */
1720   if (!loop_preheader_edge (loop)->src)
1721     {
1722       loop_split_edge_with (loop_preheader_edge (loop), NULL_RTX);
1723       opt_info->loop_preheader = loop_split_edge_with (loop_preheader_edge (loop), NULL_RTX);
1724     }
1725   else
1726     opt_info->loop_preheader = loop_preheader_edge (loop)->src;
1727 
1728   if (num_edges == 1
1729       && !(edges[0]->flags & EDGE_COMPLEX))
1730     {
1731       opt_info->loop_exit = loop_split_edge_with (edges[0], NULL_RTX);
1732       can_apply = true;
1733     }
1734 
1735   if (flag_variable_expansion_in_unroller
1736       && can_apply)
1737     opt_info->insns_with_var_to_expand = htab_create (5 * loop->num_nodes,
1738 						      ve_info_hash, ve_info_eq, free);
1739 
1740   for (i = 0; i < loop->num_nodes; i++)
1741     {
1742       bb = body[i];
1743       if (!dominated_by_p (CDI_DOMINATORS, loop->latch, bb))
1744 	continue;
1745 
1746       FOR_BB_INSNS (bb, insn)
1747       {
1748         if (!INSN_P (insn))
1749           continue;
1750 
1751         if (opt_info->insns_to_split)
1752           ivts = analyze_iv_to_split_insn (insn);
1753 
1754         if (ivts)
1755           {
1756             slot1 = htab_find_slot (opt_info->insns_to_split, ivts, INSERT);
1757             *slot1 = ivts;
1758             continue;
1759           }
1760 
1761         if (opt_info->insns_with_var_to_expand)
1762           ves = analyze_insn_to_expand_var (loop, insn);
1763 
1764         if (ves)
1765           {
1766             slot2 = htab_find_slot (opt_info->insns_with_var_to_expand, ves, INSERT);
1767             *slot2 = ves;
1768           }
1769       }
1770     }
1771 
1772   free (edges);
1773   free (body);
1774   return opt_info;
1775 }
1776 
1777 /* Called just before loop duplication.  Records start of duplicated area
1778    to OPT_INFO.  */
1779 
1780 static void
opt_info_start_duplication(struct opt_info * opt_info)1781 opt_info_start_duplication (struct opt_info *opt_info)
1782 {
1783   if (opt_info)
1784     opt_info->first_new_block = last_basic_block;
1785 }
1786 
1787 /* Determine the number of iterations between initialization of the base
1788    variable and the current copy (N_COPY).  N_COPIES is the total number
1789    of newly created copies.  UNROLLING is true if we are unrolling
1790    (not peeling) the loop.  */
1791 
1792 static unsigned
determine_split_iv_delta(unsigned n_copy,unsigned n_copies,bool unrolling)1793 determine_split_iv_delta (unsigned n_copy, unsigned n_copies, bool unrolling)
1794 {
1795   if (unrolling)
1796     {
1797       /* If we are unrolling, initialization is done in the original loop
1798 	 body (number 0).  */
1799       return n_copy;
1800     }
1801   else
1802     {
1803       /* If we are peeling, the copy in that the initialization occurs has
1804 	 number 1.  The original loop (number 0) is the last.  */
1805       if (n_copy)
1806 	return n_copy - 1;
1807       else
1808 	return n_copies;
1809     }
1810 }
1811 
1812 /* Locate in EXPR the expression corresponding to the location recorded
1813    in IVTS, and return a pointer to the RTX for this location.  */
1814 
1815 static rtx *
get_ivts_expr(rtx expr,struct iv_to_split * ivts)1816 get_ivts_expr (rtx expr, struct iv_to_split *ivts)
1817 {
1818   unsigned i;
1819   rtx *ret = &expr;
1820 
1821   for (i = 0; i < ivts->n_loc; i++)
1822     ret = &XEXP (*ret, ivts->loc[i]);
1823 
1824   return ret;
1825 }
1826 
1827 /* Allocate basic variable for the induction variable chain.  Callback for
1828    htab_traverse.  */
1829 
1830 static int
allocate_basic_variable(void ** slot,void * data ATTRIBUTE_UNUSED)1831 allocate_basic_variable (void **slot, void *data ATTRIBUTE_UNUSED)
1832 {
1833   struct iv_to_split *ivts = *slot;
1834   rtx expr = *get_ivts_expr (single_set (ivts->insn), ivts);
1835 
1836   ivts->base_var = gen_reg_rtx (GET_MODE (expr));
1837 
1838   return 1;
1839 }
1840 
1841 /* Insert initialization of basic variable of IVTS before INSN, taking
1842    the initial value from INSN.  */
1843 
1844 static void
insert_base_initialization(struct iv_to_split * ivts,rtx insn)1845 insert_base_initialization (struct iv_to_split *ivts, rtx insn)
1846 {
1847   rtx expr = copy_rtx (*get_ivts_expr (single_set (insn), ivts));
1848   rtx seq;
1849 
1850   start_sequence ();
1851   expr = force_operand (expr, ivts->base_var);
1852   if (expr != ivts->base_var)
1853     emit_move_insn (ivts->base_var, expr);
1854   seq = get_insns ();
1855   end_sequence ();
1856 
1857   emit_insn_before (seq, insn);
1858 }
1859 
1860 /* Replace the use of induction variable described in IVTS in INSN
1861    by base variable + DELTA * step.  */
1862 
1863 static void
split_iv(struct iv_to_split * ivts,rtx insn,unsigned delta)1864 split_iv (struct iv_to_split *ivts, rtx insn, unsigned delta)
1865 {
1866   rtx expr, *loc, seq, incr, var;
1867   enum machine_mode mode = GET_MODE (ivts->base_var);
1868   rtx src, dest, set;
1869 
1870   /* Construct base + DELTA * step.  */
1871   if (!delta)
1872     expr = ivts->base_var;
1873   else
1874     {
1875       incr = simplify_gen_binary (MULT, mode,
1876 				  ivts->step, gen_int_mode (delta, mode));
1877       expr = simplify_gen_binary (PLUS, GET_MODE (ivts->base_var),
1878 				  ivts->base_var, incr);
1879     }
1880 
1881   /* Figure out where to do the replacement.  */
1882   loc = get_ivts_expr (single_set (insn), ivts);
1883 
1884   /* If we can make the replacement right away, we're done.  */
1885   if (validate_change (insn, loc, expr, 0))
1886     return;
1887 
1888   /* Otherwise, force EXPR into a register and try again.  */
1889   start_sequence ();
1890   var = gen_reg_rtx (mode);
1891   expr = force_operand (expr, var);
1892   if (expr != var)
1893     emit_move_insn (var, expr);
1894   seq = get_insns ();
1895   end_sequence ();
1896   emit_insn_before (seq, insn);
1897 
1898   if (validate_change (insn, loc, var, 0))
1899     return;
1900 
1901   /* The last chance.  Try recreating the assignment in insn
1902      completely from scratch.  */
1903   set = single_set (insn);
1904   gcc_assert (set);
1905 
1906   start_sequence ();
1907   *loc = var;
1908   src = copy_rtx (SET_SRC (set));
1909   dest = copy_rtx (SET_DEST (set));
1910   src = force_operand (src, dest);
1911   if (src != dest)
1912     emit_move_insn (dest, src);
1913   seq = get_insns ();
1914   end_sequence ();
1915 
1916   emit_insn_before (seq, insn);
1917   delete_insn (insn);
1918 }
1919 
1920 
1921 /* Return one expansion of the accumulator recorded in struct VE.  */
1922 
1923 static rtx
get_expansion(struct var_to_expand * ve)1924 get_expansion (struct var_to_expand *ve)
1925 {
1926   rtx reg;
1927 
1928   if (ve->reuse_expansion == 0)
1929     reg = ve->reg;
1930   else
1931     reg = VEC_index (rtx, ve->var_expansions, ve->reuse_expansion - 1);
1932 
1933   if (VEC_length (rtx, ve->var_expansions) == (unsigned) ve->reuse_expansion)
1934     ve->reuse_expansion = 0;
1935   else
1936     ve->reuse_expansion++;
1937 
1938   return reg;
1939 }
1940 
1941 
1942 /* Given INSN replace the uses of the accumulator recorded in VE
1943    with a new register.  */
1944 
1945 static void
expand_var_during_unrolling(struct var_to_expand * ve,rtx insn)1946 expand_var_during_unrolling (struct var_to_expand *ve, rtx insn)
1947 {
1948   rtx new_reg, set;
1949   bool really_new_expansion = false;
1950 
1951   set = single_set (insn);
1952   gcc_assert (set);
1953 
1954   /* Generate a new register only if the expansion limit has not been
1955      reached.  Else reuse an already existing expansion.  */
1956   if (PARAM_VALUE (PARAM_MAX_VARIABLE_EXPANSIONS) > ve->expansion_count)
1957     {
1958       really_new_expansion = true;
1959       new_reg = gen_reg_rtx (GET_MODE (ve->reg));
1960     }
1961   else
1962     new_reg = get_expansion (ve);
1963 
1964   validate_change (insn, &SET_DEST (set), new_reg, 1);
1965   validate_change (insn, &XEXP (SET_SRC (set), 0), new_reg, 1);
1966 
1967   if (apply_change_group ())
1968     if (really_new_expansion)
1969       {
1970         VEC_safe_push (rtx, heap, ve->var_expansions, new_reg);
1971         ve->expansion_count++;
1972       }
1973 }
1974 
1975 /* Initialize the variable expansions in loop preheader.
1976    Callbacks for htab_traverse.  PLACE_P is the loop-preheader
1977    basic block where the initialization of the expansions
1978    should take place.  */
1979 
1980 static int
insert_var_expansion_initialization(void ** slot,void * place_p)1981 insert_var_expansion_initialization (void **slot, void *place_p)
1982 {
1983   struct var_to_expand *ve = *slot;
1984   basic_block place = (basic_block)place_p;
1985   rtx seq, var, zero_init, insn;
1986   unsigned i;
1987 
1988   if (VEC_length (rtx, ve->var_expansions) == 0)
1989     return 1;
1990 
1991   start_sequence ();
1992   if (ve->op == PLUS || ve->op == MINUS)
1993     for (i = 0; VEC_iterate (rtx, ve->var_expansions, i, var); i++)
1994       {
1995         zero_init =  CONST0_RTX (GET_MODE (var));
1996         emit_move_insn (var, zero_init);
1997       }
1998   else if (ve->op == MULT)
1999     for (i = 0; VEC_iterate (rtx, ve->var_expansions, i, var); i++)
2000       {
2001         zero_init =  CONST1_RTX (GET_MODE (var));
2002         emit_move_insn (var, zero_init);
2003       }
2004 
2005   seq = get_insns ();
2006   end_sequence ();
2007 
2008   insn = BB_HEAD (place);
2009   while (!NOTE_INSN_BASIC_BLOCK_P (insn))
2010     insn = NEXT_INSN (insn);
2011 
2012   emit_insn_after (seq, insn);
2013   /* Continue traversing the hash table.  */
2014   return 1;
2015 }
2016 
2017 /*  Combine the variable expansions at the loop exit.
2018     Callbacks for htab_traverse.  PLACE_P is the loop exit
2019     basic block where the summation of the expansions should
2020     take place.  */
2021 
2022 static int
combine_var_copies_in_loop_exit(void ** slot,void * place_p)2023 combine_var_copies_in_loop_exit (void **slot, void *place_p)
2024 {
2025   struct var_to_expand *ve = *slot;
2026   basic_block place = (basic_block)place_p;
2027   rtx sum = ve->reg;
2028   rtx expr, seq, var, insn;
2029   unsigned i;
2030 
2031   if (VEC_length (rtx, ve->var_expansions) == 0)
2032     return 1;
2033 
2034   start_sequence ();
2035   if (ve->op == PLUS || ve->op == MINUS)
2036     for (i = 0; VEC_iterate (rtx, ve->var_expansions, i, var); i++)
2037       {
2038         sum = simplify_gen_binary (PLUS, GET_MODE (ve->reg),
2039                                    var, sum);
2040       }
2041   else if (ve->op == MULT)
2042     for (i = 0; VEC_iterate (rtx, ve->var_expansions, i, var); i++)
2043       {
2044         sum = simplify_gen_binary (MULT, GET_MODE (ve->reg),
2045                                    var, sum);
2046       }
2047 
2048   expr = force_operand (sum, ve->reg);
2049   if (expr != ve->reg)
2050     emit_move_insn (ve->reg, expr);
2051   seq = get_insns ();
2052   end_sequence ();
2053 
2054   insn = BB_HEAD (place);
2055   while (!NOTE_INSN_BASIC_BLOCK_P (insn))
2056     insn = NEXT_INSN (insn);
2057 
2058   emit_insn_after (seq, insn);
2059 
2060   /* Continue traversing the hash table.  */
2061   return 1;
2062 }
2063 
2064 /* Apply loop optimizations in loop copies using the
2065    data which gathered during the unrolling.  Structure
2066    OPT_INFO record that data.
2067 
2068    UNROLLING is true if we unrolled (not peeled) the loop.
2069    REWRITE_ORIGINAL_BODY is true if we should also rewrite the original body of
2070    the loop (as it should happen in complete unrolling, but not in ordinary
2071    peeling of the loop).  */
2072 
2073 static void
apply_opt_in_copies(struct opt_info * opt_info,unsigned n_copies,bool unrolling,bool rewrite_original_loop)2074 apply_opt_in_copies (struct opt_info *opt_info,
2075                      unsigned n_copies, bool unrolling,
2076                      bool rewrite_original_loop)
2077 {
2078   unsigned i, delta;
2079   basic_block bb, orig_bb;
2080   rtx insn, orig_insn, next;
2081   struct iv_to_split ivts_templ, *ivts;
2082   struct var_to_expand ve_templ, *ves;
2083 
2084   /* Sanity check -- we need to put initialization in the original loop
2085      body.  */
2086   gcc_assert (!unrolling || rewrite_original_loop);
2087 
2088   /* Allocate the basic variables (i0).  */
2089   if (opt_info->insns_to_split)
2090     htab_traverse (opt_info->insns_to_split, allocate_basic_variable, NULL);
2091 
2092   for (i = opt_info->first_new_block; i < (unsigned) last_basic_block; i++)
2093     {
2094       bb = BASIC_BLOCK (i);
2095       orig_bb = get_bb_original (bb);
2096 
2097       /* bb->aux holds position in copy sequence initialized by
2098 	 duplicate_loop_to_header_edge.  */
2099       delta = determine_split_iv_delta ((size_t)bb->aux, n_copies,
2100 					unrolling);
2101       bb->aux = 0;
2102       orig_insn = BB_HEAD (orig_bb);
2103       for (insn = BB_HEAD (bb); insn != NEXT_INSN (BB_END (bb)); insn = next)
2104         {
2105           next = NEXT_INSN (insn);
2106           if (!INSN_P (insn))
2107             continue;
2108 
2109           while (!INSN_P (orig_insn))
2110             orig_insn = NEXT_INSN (orig_insn);
2111 
2112           ivts_templ.insn = orig_insn;
2113           ve_templ.insn = orig_insn;
2114 
2115           /* Apply splitting iv optimization.  */
2116           if (opt_info->insns_to_split)
2117             {
2118               ivts = htab_find (opt_info->insns_to_split, &ivts_templ);
2119 
2120               if (ivts)
2121                 {
2122 		  gcc_assert (GET_CODE (PATTERN (insn))
2123 			      == GET_CODE (PATTERN (orig_insn)));
2124 
2125                   if (!delta)
2126                     insert_base_initialization (ivts, insn);
2127                   split_iv (ivts, insn, delta);
2128                 }
2129             }
2130           /* Apply variable expansion optimization.  */
2131           if (unrolling && opt_info->insns_with_var_to_expand)
2132             {
2133               ves = htab_find (opt_info->insns_with_var_to_expand, &ve_templ);
2134               if (ves)
2135                 {
2136 		  gcc_assert (GET_CODE (PATTERN (insn))
2137 			      == GET_CODE (PATTERN (orig_insn)));
2138                   expand_var_during_unrolling (ves, insn);
2139                 }
2140             }
2141           orig_insn = NEXT_INSN (orig_insn);
2142         }
2143     }
2144 
2145   if (!rewrite_original_loop)
2146     return;
2147 
2148   /* Initialize the variable expansions in the loop preheader
2149      and take care of combining them at the loop exit.  */
2150   if (opt_info->insns_with_var_to_expand)
2151     {
2152       htab_traverse (opt_info->insns_with_var_to_expand,
2153                      insert_var_expansion_initialization,
2154                      opt_info->loop_preheader);
2155       htab_traverse (opt_info->insns_with_var_to_expand,
2156                      combine_var_copies_in_loop_exit,
2157                      opt_info->loop_exit);
2158     }
2159 
2160   /* Rewrite also the original loop body.  Find them as originals of the blocks
2161      in the last copied iteration, i.e. those that have
2162      get_bb_copy (get_bb_original (bb)) == bb.  */
2163   for (i = opt_info->first_new_block; i < (unsigned) last_basic_block; i++)
2164     {
2165       bb = BASIC_BLOCK (i);
2166       orig_bb = get_bb_original (bb);
2167       if (get_bb_copy (orig_bb) != bb)
2168 	continue;
2169 
2170       delta = determine_split_iv_delta (0, n_copies, unrolling);
2171       for (orig_insn = BB_HEAD (orig_bb);
2172            orig_insn != NEXT_INSN (BB_END (bb));
2173            orig_insn = next)
2174         {
2175           next = NEXT_INSN (orig_insn);
2176 
2177           if (!INSN_P (orig_insn))
2178  	    continue;
2179 
2180           ivts_templ.insn = orig_insn;
2181           if (opt_info->insns_to_split)
2182             {
2183               ivts = htab_find (opt_info->insns_to_split, &ivts_templ);
2184               if (ivts)
2185                 {
2186                   if (!delta)
2187                     insert_base_initialization (ivts, orig_insn);
2188                   split_iv (ivts, orig_insn, delta);
2189                   continue;
2190                 }
2191             }
2192 
2193         }
2194     }
2195 }
2196 
2197 /*  Release the data structures used for the variable expansion
2198     optimization.  Callbacks for htab_traverse.  */
2199 
2200 static int
release_var_copies(void ** slot,void * data ATTRIBUTE_UNUSED)2201 release_var_copies (void **slot, void *data ATTRIBUTE_UNUSED)
2202 {
2203   struct var_to_expand *ve = *slot;
2204 
2205   VEC_free (rtx, heap, ve->var_expansions);
2206 
2207   /* Continue traversing the hash table.  */
2208   return 1;
2209 }
2210 
2211 /* Release OPT_INFO.  */
2212 
2213 static void
free_opt_info(struct opt_info * opt_info)2214 free_opt_info (struct opt_info *opt_info)
2215 {
2216   if (opt_info->insns_to_split)
2217     htab_delete (opt_info->insns_to_split);
2218   if (opt_info->insns_with_var_to_expand)
2219     {
2220       htab_traverse (opt_info->insns_with_var_to_expand,
2221                      release_var_copies, NULL);
2222       htab_delete (opt_info->insns_with_var_to_expand);
2223     }
2224   free (opt_info);
2225 }
2226