xref: /dflybsd-src/contrib/gcc-4.7/gcc/loop-unroll.c (revision 04febcfb30580676d3e95f58a16c5137ee478b32)
1*e4b17023SJohn Marino /* Loop unrolling and peeling.
2*e4b17023SJohn Marino    Copyright (C) 2002, 2003, 2004, 2005, 2007, 2008, 2010, 2011
3*e4b17023SJohn Marino    Free Software Foundation, Inc.
4*e4b17023SJohn Marino 
5*e4b17023SJohn Marino This file is part of GCC.
6*e4b17023SJohn Marino 
7*e4b17023SJohn Marino GCC is free software; you can redistribute it and/or modify it under
8*e4b17023SJohn Marino the terms of the GNU General Public License as published by the Free
9*e4b17023SJohn Marino Software Foundation; either version 3, or (at your option) any later
10*e4b17023SJohn Marino version.
11*e4b17023SJohn Marino 
12*e4b17023SJohn Marino GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13*e4b17023SJohn Marino WARRANTY; without even the implied warranty of MERCHANTABILITY or
14*e4b17023SJohn Marino FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
15*e4b17023SJohn Marino for more details.
16*e4b17023SJohn Marino 
17*e4b17023SJohn Marino You should have received a copy of the GNU General Public License
18*e4b17023SJohn Marino along with GCC; see the file COPYING3.  If not see
19*e4b17023SJohn Marino <http://www.gnu.org/licenses/>.  */
20*e4b17023SJohn Marino 
21*e4b17023SJohn Marino #include "config.h"
22*e4b17023SJohn Marino #include "system.h"
23*e4b17023SJohn Marino #include "coretypes.h"
24*e4b17023SJohn Marino #include "tm.h"
25*e4b17023SJohn Marino #include "rtl.h"
26*e4b17023SJohn Marino #include "hard-reg-set.h"
27*e4b17023SJohn Marino #include "obstack.h"
28*e4b17023SJohn Marino #include "basic-block.h"
29*e4b17023SJohn Marino #include "cfgloop.h"
30*e4b17023SJohn Marino #include "cfglayout.h"
31*e4b17023SJohn Marino #include "params.h"
32*e4b17023SJohn Marino #include "output.h"
33*e4b17023SJohn Marino #include "expr.h"
34*e4b17023SJohn Marino #include "hashtab.h"
35*e4b17023SJohn Marino #include "recog.h"
36*e4b17023SJohn Marino #include "target.h"
37*e4b17023SJohn Marino 
38*e4b17023SJohn Marino /* This pass performs loop unrolling and peeling.  We only perform these
39*e4b17023SJohn Marino    optimizations on innermost loops (with single exception) because
40*e4b17023SJohn Marino    the impact on performance is greatest here, and we want to avoid
41*e4b17023SJohn Marino    unnecessary code size growth.  The gain is caused by greater sequentiality
42*e4b17023SJohn Marino    of code, better code to optimize for further passes and in some cases
43*e4b17023SJohn Marino    by fewer testings of exit conditions.  The main problem is code growth,
44*e4b17023SJohn Marino    that impacts performance negatively due to effect of caches.
45*e4b17023SJohn Marino 
46*e4b17023SJohn Marino    What we do:
47*e4b17023SJohn Marino 
48*e4b17023SJohn Marino    -- complete peeling of once-rolling loops; this is the above mentioned
49*e4b17023SJohn Marino       exception, as this causes loop to be cancelled completely and
50*e4b17023SJohn Marino       does not cause code growth
51*e4b17023SJohn Marino    -- complete peeling of loops that roll (small) constant times.
52*e4b17023SJohn Marino    -- simple peeling of first iterations of loops that do not roll much
53*e4b17023SJohn Marino       (according to profile feedback)
54*e4b17023SJohn Marino    -- unrolling of loops that roll constant times; this is almost always
55*e4b17023SJohn Marino       win, as we get rid of exit condition tests.
56*e4b17023SJohn Marino    -- unrolling of loops that roll number of times that we can compute
57*e4b17023SJohn Marino       in runtime; we also get rid of exit condition tests here, but there
58*e4b17023SJohn Marino       is the extra expense for calculating the number of iterations
59*e4b17023SJohn Marino    -- simple unrolling of remaining loops; this is performed only if we
60*e4b17023SJohn Marino       are asked to, as the gain is questionable in this case and often
61*e4b17023SJohn Marino       it may even slow down the code
62*e4b17023SJohn Marino    For more detailed descriptions of each of those, see comments at
63*e4b17023SJohn Marino    appropriate function below.
64*e4b17023SJohn Marino 
65*e4b17023SJohn Marino    There is a lot of parameters (defined and described in params.def) that
66*e4b17023SJohn Marino    control how much we unroll/peel.
67*e4b17023SJohn Marino 
68*e4b17023SJohn Marino    ??? A great problem is that we don't have a good way how to determine
69*e4b17023SJohn Marino    how many times we should unroll the loop; the experiments I have made
70*e4b17023SJohn Marino    showed that this choice may affect performance in order of several %.
71*e4b17023SJohn Marino    */
72*e4b17023SJohn Marino 
73*e4b17023SJohn Marino /* Information about induction variables to split.  */
74*e4b17023SJohn Marino 
75*e4b17023SJohn Marino struct iv_to_split
76*e4b17023SJohn Marino {
77*e4b17023SJohn Marino   rtx insn;		/* The insn in that the induction variable occurs.  */
78*e4b17023SJohn Marino   rtx base_var;		/* The variable on that the values in the further
79*e4b17023SJohn Marino 			   iterations are based.  */
80*e4b17023SJohn Marino   rtx step;		/* Step of the induction variable.  */
81*e4b17023SJohn Marino   struct iv_to_split *next; /* Next entry in walking order.  */
82*e4b17023SJohn Marino   unsigned n_loc;
83*e4b17023SJohn Marino   unsigned loc[3];	/* Location where the definition of the induction
84*e4b17023SJohn Marino 			   variable occurs in the insn.  For example if
85*e4b17023SJohn Marino 			   N_LOC is 2, the expression is located at
86*e4b17023SJohn Marino 			   XEXP (XEXP (single_set, loc[0]), loc[1]).  */
87*e4b17023SJohn Marino };
88*e4b17023SJohn Marino 
89*e4b17023SJohn Marino /* Information about accumulators to expand.  */
90*e4b17023SJohn Marino 
91*e4b17023SJohn Marino struct var_to_expand
92*e4b17023SJohn Marino {
93*e4b17023SJohn Marino   rtx insn;		           /* The insn in that the variable expansion occurs.  */
94*e4b17023SJohn Marino   rtx reg;                         /* The accumulator which is expanded.  */
95*e4b17023SJohn Marino   VEC(rtx,heap) *var_expansions;   /* The copies of the accumulator which is expanded.  */
96*e4b17023SJohn Marino   struct var_to_expand *next;	   /* Next entry in walking order.  */
97*e4b17023SJohn Marino   enum rtx_code op;                /* The type of the accumulation - addition, subtraction
98*e4b17023SJohn Marino                                       or multiplication.  */
99*e4b17023SJohn Marino   int expansion_count;             /* Count the number of expansions generated so far.  */
100*e4b17023SJohn Marino   int reuse_expansion;             /* The expansion we intend to reuse to expand
101*e4b17023SJohn Marino                                       the accumulator.  If REUSE_EXPANSION is 0 reuse
102*e4b17023SJohn Marino                                       the original accumulator.  Else use
103*e4b17023SJohn Marino                                       var_expansions[REUSE_EXPANSION - 1].  */
104*e4b17023SJohn Marino   unsigned accum_pos;              /* The position in which the accumulator is placed in
105*e4b17023SJohn Marino                                       the insn src.  For example in x = x + something
106*e4b17023SJohn Marino                                       accum_pos is 0 while in x = something + x accum_pos
107*e4b17023SJohn Marino                                       is 1.  */
108*e4b17023SJohn Marino };
109*e4b17023SJohn Marino 
110*e4b17023SJohn Marino /* Information about optimization applied in
111*e4b17023SJohn Marino    the unrolled loop.  */
112*e4b17023SJohn Marino 
113*e4b17023SJohn Marino struct opt_info
114*e4b17023SJohn Marino {
115*e4b17023SJohn Marino   htab_t insns_to_split;           /* A hashtable of insns to split.  */
116*e4b17023SJohn Marino   struct iv_to_split *iv_to_split_head; /* The first iv to split.  */
117*e4b17023SJohn Marino   struct iv_to_split **iv_to_split_tail; /* Pointer to the tail of the list.  */
118*e4b17023SJohn Marino   htab_t insns_with_var_to_expand; /* A hashtable of insns with accumulators
119*e4b17023SJohn Marino                                       to expand.  */
120*e4b17023SJohn Marino   struct var_to_expand *var_to_expand_head; /* The first var to expand.  */
121*e4b17023SJohn Marino   struct var_to_expand **var_to_expand_tail; /* Pointer to the tail of the list.  */
122*e4b17023SJohn Marino   unsigned first_new_block;        /* The first basic block that was
123*e4b17023SJohn Marino                                       duplicated.  */
124*e4b17023SJohn Marino   basic_block loop_exit;           /* The loop exit basic block.  */
125*e4b17023SJohn Marino   basic_block loop_preheader;      /* The loop preheader basic block.  */
126*e4b17023SJohn Marino };
127*e4b17023SJohn Marino 
128*e4b17023SJohn Marino static void decide_unrolling_and_peeling (int);
129*e4b17023SJohn Marino static void peel_loops_completely (int);
130*e4b17023SJohn Marino static void decide_peel_simple (struct loop *, int);
131*e4b17023SJohn Marino static void decide_peel_once_rolling (struct loop *, int);
132*e4b17023SJohn Marino static void decide_peel_completely (struct loop *, int);
133*e4b17023SJohn Marino static void decide_unroll_stupid (struct loop *, int);
134*e4b17023SJohn Marino static void decide_unroll_constant_iterations (struct loop *, int);
135*e4b17023SJohn Marino static void decide_unroll_runtime_iterations (struct loop *, int);
136*e4b17023SJohn Marino static void peel_loop_simple (struct loop *);
137*e4b17023SJohn Marino static void peel_loop_completely (struct loop *);
138*e4b17023SJohn Marino static void unroll_loop_stupid (struct loop *);
139*e4b17023SJohn Marino static void unroll_loop_constant_iterations (struct loop *);
140*e4b17023SJohn Marino static void unroll_loop_runtime_iterations (struct loop *);
141*e4b17023SJohn Marino static struct opt_info *analyze_insns_in_loop (struct loop *);
142*e4b17023SJohn Marino static void opt_info_start_duplication (struct opt_info *);
143*e4b17023SJohn Marino static void apply_opt_in_copies (struct opt_info *, unsigned, bool, bool);
144*e4b17023SJohn Marino static void free_opt_info (struct opt_info *);
145*e4b17023SJohn Marino static struct var_to_expand *analyze_insn_to_expand_var (struct loop*, rtx);
146*e4b17023SJohn Marino static bool referenced_in_one_insn_in_loop_p (struct loop *, rtx, int *);
147*e4b17023SJohn Marino static struct iv_to_split *analyze_iv_to_split_insn (rtx);
148*e4b17023SJohn Marino static void expand_var_during_unrolling (struct var_to_expand *, rtx);
149*e4b17023SJohn Marino static void insert_var_expansion_initialization (struct var_to_expand *,
150*e4b17023SJohn Marino 						 basic_block);
151*e4b17023SJohn Marino static void combine_var_copies_in_loop_exit (struct var_to_expand *,
152*e4b17023SJohn Marino 					     basic_block);
153*e4b17023SJohn Marino static rtx get_expansion (struct var_to_expand *);
154*e4b17023SJohn Marino 
155*e4b17023SJohn Marino /* Unroll and/or peel (depending on FLAGS) LOOPS.  */
156*e4b17023SJohn Marino void
unroll_and_peel_loops(int flags)157*e4b17023SJohn Marino unroll_and_peel_loops (int flags)
158*e4b17023SJohn Marino {
159*e4b17023SJohn Marino   struct loop *loop;
160*e4b17023SJohn Marino   bool check;
161*e4b17023SJohn Marino   loop_iterator li;
162*e4b17023SJohn Marino 
163*e4b17023SJohn Marino   /* First perform complete loop peeling (it is almost surely a win,
164*e4b17023SJohn Marino      and affects parameters for further decision a lot).  */
165*e4b17023SJohn Marino   peel_loops_completely (flags);
166*e4b17023SJohn Marino 
167*e4b17023SJohn Marino   /* Now decide rest of unrolling and peeling.  */
168*e4b17023SJohn Marino   decide_unrolling_and_peeling (flags);
169*e4b17023SJohn Marino 
170*e4b17023SJohn Marino   /* Scan the loops, inner ones first.  */
171*e4b17023SJohn Marino   FOR_EACH_LOOP (li, loop, LI_FROM_INNERMOST)
172*e4b17023SJohn Marino     {
173*e4b17023SJohn Marino       check = true;
174*e4b17023SJohn Marino       /* And perform the appropriate transformations.  */
175*e4b17023SJohn Marino       switch (loop->lpt_decision.decision)
176*e4b17023SJohn Marino 	{
177*e4b17023SJohn Marino 	case LPT_PEEL_COMPLETELY:
178*e4b17023SJohn Marino 	  /* Already done.  */
179*e4b17023SJohn Marino 	  gcc_unreachable ();
180*e4b17023SJohn Marino 	case LPT_PEEL_SIMPLE:
181*e4b17023SJohn Marino 	  peel_loop_simple (loop);
182*e4b17023SJohn Marino 	  break;
183*e4b17023SJohn Marino 	case LPT_UNROLL_CONSTANT:
184*e4b17023SJohn Marino 	  unroll_loop_constant_iterations (loop);
185*e4b17023SJohn Marino 	  break;
186*e4b17023SJohn Marino 	case LPT_UNROLL_RUNTIME:
187*e4b17023SJohn Marino 	  unroll_loop_runtime_iterations (loop);
188*e4b17023SJohn Marino 	  break;
189*e4b17023SJohn Marino 	case LPT_UNROLL_STUPID:
190*e4b17023SJohn Marino 	  unroll_loop_stupid (loop);
191*e4b17023SJohn Marino 	  break;
192*e4b17023SJohn Marino 	case LPT_NONE:
193*e4b17023SJohn Marino 	  check = false;
194*e4b17023SJohn Marino 	  break;
195*e4b17023SJohn Marino 	default:
196*e4b17023SJohn Marino 	  gcc_unreachable ();
197*e4b17023SJohn Marino 	}
198*e4b17023SJohn Marino       if (check)
199*e4b17023SJohn Marino 	{
200*e4b17023SJohn Marino #ifdef ENABLE_CHECKING
201*e4b17023SJohn Marino 	  verify_dominators (CDI_DOMINATORS);
202*e4b17023SJohn Marino 	  verify_loop_structure ();
203*e4b17023SJohn Marino #endif
204*e4b17023SJohn Marino 	}
205*e4b17023SJohn Marino     }
206*e4b17023SJohn Marino 
207*e4b17023SJohn Marino   iv_analysis_done ();
208*e4b17023SJohn Marino }
209*e4b17023SJohn Marino 
210*e4b17023SJohn Marino /* Check whether exit of the LOOP is at the end of loop body.  */
211*e4b17023SJohn Marino 
212*e4b17023SJohn Marino static bool
loop_exit_at_end_p(struct loop * loop)213*e4b17023SJohn Marino loop_exit_at_end_p (struct loop *loop)
214*e4b17023SJohn Marino {
215*e4b17023SJohn Marino   struct niter_desc *desc = get_simple_loop_desc (loop);
216*e4b17023SJohn Marino   rtx insn;
217*e4b17023SJohn Marino 
218*e4b17023SJohn Marino   if (desc->in_edge->dest != loop->latch)
219*e4b17023SJohn Marino     return false;
220*e4b17023SJohn Marino 
221*e4b17023SJohn Marino   /* Check that the latch is empty.  */
222*e4b17023SJohn Marino   FOR_BB_INSNS (loop->latch, insn)
223*e4b17023SJohn Marino     {
224*e4b17023SJohn Marino       if (INSN_P (insn))
225*e4b17023SJohn Marino 	return false;
226*e4b17023SJohn Marino     }
227*e4b17023SJohn Marino 
228*e4b17023SJohn Marino   return true;
229*e4b17023SJohn Marino }
230*e4b17023SJohn Marino 
231*e4b17023SJohn Marino /* Depending on FLAGS, check whether to peel loops completely and do so.  */
232*e4b17023SJohn Marino static void
peel_loops_completely(int flags)233*e4b17023SJohn Marino peel_loops_completely (int flags)
234*e4b17023SJohn Marino {
235*e4b17023SJohn Marino   struct loop *loop;
236*e4b17023SJohn Marino   loop_iterator li;
237*e4b17023SJohn Marino 
238*e4b17023SJohn Marino   /* Scan the loops, the inner ones first.  */
239*e4b17023SJohn Marino   FOR_EACH_LOOP (li, loop, LI_FROM_INNERMOST)
240*e4b17023SJohn Marino     {
241*e4b17023SJohn Marino       loop->lpt_decision.decision = LPT_NONE;
242*e4b17023SJohn Marino 
243*e4b17023SJohn Marino       if (dump_file)
244*e4b17023SJohn Marino 	fprintf (dump_file,
245*e4b17023SJohn Marino 		 "\n;; *** Considering loop %d for complete peeling ***\n",
246*e4b17023SJohn Marino 		 loop->num);
247*e4b17023SJohn Marino 
248*e4b17023SJohn Marino       loop->ninsns = num_loop_insns (loop);
249*e4b17023SJohn Marino 
250*e4b17023SJohn Marino       decide_peel_once_rolling (loop, flags);
251*e4b17023SJohn Marino       if (loop->lpt_decision.decision == LPT_NONE)
252*e4b17023SJohn Marino 	decide_peel_completely (loop, flags);
253*e4b17023SJohn Marino 
254*e4b17023SJohn Marino       if (loop->lpt_decision.decision == LPT_PEEL_COMPLETELY)
255*e4b17023SJohn Marino 	{
256*e4b17023SJohn Marino 	  peel_loop_completely (loop);
257*e4b17023SJohn Marino #ifdef ENABLE_CHECKING
258*e4b17023SJohn Marino 	  verify_dominators (CDI_DOMINATORS);
259*e4b17023SJohn Marino 	  verify_loop_structure ();
260*e4b17023SJohn Marino #endif
261*e4b17023SJohn Marino 	}
262*e4b17023SJohn Marino     }
263*e4b17023SJohn Marino }
264*e4b17023SJohn Marino 
265*e4b17023SJohn Marino /* Decide whether unroll or peel loops (depending on FLAGS) and how much.  */
266*e4b17023SJohn Marino static void
decide_unrolling_and_peeling(int flags)267*e4b17023SJohn Marino decide_unrolling_and_peeling (int flags)
268*e4b17023SJohn Marino {
269*e4b17023SJohn Marino   struct loop *loop;
270*e4b17023SJohn Marino   loop_iterator li;
271*e4b17023SJohn Marino 
272*e4b17023SJohn Marino   /* Scan the loops, inner ones first.  */
273*e4b17023SJohn Marino   FOR_EACH_LOOP (li, loop, LI_FROM_INNERMOST)
274*e4b17023SJohn Marino     {
275*e4b17023SJohn Marino       loop->lpt_decision.decision = LPT_NONE;
276*e4b17023SJohn Marino 
277*e4b17023SJohn Marino       if (dump_file)
278*e4b17023SJohn Marino 	fprintf (dump_file, "\n;; *** Considering loop %d ***\n", loop->num);
279*e4b17023SJohn Marino 
280*e4b17023SJohn Marino       /* Do not peel cold areas.  */
281*e4b17023SJohn Marino       if (optimize_loop_for_size_p (loop))
282*e4b17023SJohn Marino 	{
283*e4b17023SJohn Marino 	  if (dump_file)
284*e4b17023SJohn Marino 	    fprintf (dump_file, ";; Not considering loop, cold area\n");
285*e4b17023SJohn Marino 	  continue;
286*e4b17023SJohn Marino 	}
287*e4b17023SJohn Marino 
288*e4b17023SJohn Marino       /* Can the loop be manipulated?  */
289*e4b17023SJohn Marino       if (!can_duplicate_loop_p (loop))
290*e4b17023SJohn Marino 	{
291*e4b17023SJohn Marino 	  if (dump_file)
292*e4b17023SJohn Marino 	    fprintf (dump_file,
293*e4b17023SJohn Marino 		     ";; Not considering loop, cannot duplicate\n");
294*e4b17023SJohn Marino 	  continue;
295*e4b17023SJohn Marino 	}
296*e4b17023SJohn Marino 
297*e4b17023SJohn Marino       /* Skip non-innermost loops.  */
298*e4b17023SJohn Marino       if (loop->inner)
299*e4b17023SJohn Marino 	{
300*e4b17023SJohn Marino 	  if (dump_file)
301*e4b17023SJohn Marino 	    fprintf (dump_file, ";; Not considering loop, is not innermost\n");
302*e4b17023SJohn Marino 	  continue;
303*e4b17023SJohn Marino 	}
304*e4b17023SJohn Marino 
305*e4b17023SJohn Marino       loop->ninsns = num_loop_insns (loop);
306*e4b17023SJohn Marino       loop->av_ninsns = average_num_loop_insns (loop);
307*e4b17023SJohn Marino 
308*e4b17023SJohn Marino       /* Try transformations one by one in decreasing order of
309*e4b17023SJohn Marino 	 priority.  */
310*e4b17023SJohn Marino 
311*e4b17023SJohn Marino       decide_unroll_constant_iterations (loop, flags);
312*e4b17023SJohn Marino       if (loop->lpt_decision.decision == LPT_NONE)
313*e4b17023SJohn Marino 	decide_unroll_runtime_iterations (loop, flags);
314*e4b17023SJohn Marino       if (loop->lpt_decision.decision == LPT_NONE)
315*e4b17023SJohn Marino 	decide_unroll_stupid (loop, flags);
316*e4b17023SJohn Marino       if (loop->lpt_decision.decision == LPT_NONE)
317*e4b17023SJohn Marino 	decide_peel_simple (loop, flags);
318*e4b17023SJohn Marino     }
319*e4b17023SJohn Marino }
320*e4b17023SJohn Marino 
321*e4b17023SJohn Marino /* Decide whether the LOOP is once rolling and suitable for complete
322*e4b17023SJohn Marino    peeling.  */
323*e4b17023SJohn Marino static void
decide_peel_once_rolling(struct loop * loop,int flags ATTRIBUTE_UNUSED)324*e4b17023SJohn Marino decide_peel_once_rolling (struct loop *loop, int flags ATTRIBUTE_UNUSED)
325*e4b17023SJohn Marino {
326*e4b17023SJohn Marino   struct niter_desc *desc;
327*e4b17023SJohn Marino 
328*e4b17023SJohn Marino   if (dump_file)
329*e4b17023SJohn Marino     fprintf (dump_file, "\n;; Considering peeling once rolling loop\n");
330*e4b17023SJohn Marino 
331*e4b17023SJohn Marino   /* Is the loop small enough?  */
332*e4b17023SJohn Marino   if ((unsigned) PARAM_VALUE (PARAM_MAX_ONCE_PEELED_INSNS) < loop->ninsns)
333*e4b17023SJohn Marino     {
334*e4b17023SJohn Marino       if (dump_file)
335*e4b17023SJohn Marino 	fprintf (dump_file, ";; Not considering loop, is too big\n");
336*e4b17023SJohn Marino       return;
337*e4b17023SJohn Marino     }
338*e4b17023SJohn Marino 
339*e4b17023SJohn Marino   /* Check for simple loops.  */
340*e4b17023SJohn Marino   desc = get_simple_loop_desc (loop);
341*e4b17023SJohn Marino 
342*e4b17023SJohn Marino   /* Check number of iterations.  */
343*e4b17023SJohn Marino   if (!desc->simple_p
344*e4b17023SJohn Marino       || desc->assumptions
345*e4b17023SJohn Marino       || desc->infinite
346*e4b17023SJohn Marino       || !desc->const_iter
347*e4b17023SJohn Marino       || desc->niter != 0)
348*e4b17023SJohn Marino     {
349*e4b17023SJohn Marino       if (dump_file)
350*e4b17023SJohn Marino 	fprintf (dump_file,
351*e4b17023SJohn Marino 		 ";; Unable to prove that the loop rolls exactly once\n");
352*e4b17023SJohn Marino       return;
353*e4b17023SJohn Marino     }
354*e4b17023SJohn Marino 
355*e4b17023SJohn Marino   /* Success.  */
356*e4b17023SJohn Marino   if (dump_file)
357*e4b17023SJohn Marino     fprintf (dump_file, ";; Decided to peel exactly once rolling loop\n");
358*e4b17023SJohn Marino   loop->lpt_decision.decision = LPT_PEEL_COMPLETELY;
359*e4b17023SJohn Marino }
360*e4b17023SJohn Marino 
361*e4b17023SJohn Marino /* Decide whether the LOOP is suitable for complete peeling.  */
362*e4b17023SJohn Marino static void
decide_peel_completely(struct loop * loop,int flags ATTRIBUTE_UNUSED)363*e4b17023SJohn Marino decide_peel_completely (struct loop *loop, int flags ATTRIBUTE_UNUSED)
364*e4b17023SJohn Marino {
365*e4b17023SJohn Marino   unsigned npeel;
366*e4b17023SJohn Marino   struct niter_desc *desc;
367*e4b17023SJohn Marino 
368*e4b17023SJohn Marino   if (dump_file)
369*e4b17023SJohn Marino     fprintf (dump_file, "\n;; Considering peeling completely\n");
370*e4b17023SJohn Marino 
371*e4b17023SJohn Marino   /* Skip non-innermost loops.  */
372*e4b17023SJohn Marino   if (loop->inner)
373*e4b17023SJohn Marino     {
374*e4b17023SJohn Marino       if (dump_file)
375*e4b17023SJohn Marino 	fprintf (dump_file, ";; Not considering loop, is not innermost\n");
376*e4b17023SJohn Marino       return;
377*e4b17023SJohn Marino     }
378*e4b17023SJohn Marino 
379*e4b17023SJohn Marino   /* Do not peel cold areas.  */
380*e4b17023SJohn Marino   if (optimize_loop_for_size_p (loop))
381*e4b17023SJohn Marino     {
382*e4b17023SJohn Marino       if (dump_file)
383*e4b17023SJohn Marino 	fprintf (dump_file, ";; Not considering loop, cold area\n");
384*e4b17023SJohn Marino       return;
385*e4b17023SJohn Marino     }
386*e4b17023SJohn Marino 
387*e4b17023SJohn Marino   /* Can the loop be manipulated?  */
388*e4b17023SJohn Marino   if (!can_duplicate_loop_p (loop))
389*e4b17023SJohn Marino     {
390*e4b17023SJohn Marino       if (dump_file)
391*e4b17023SJohn Marino 	fprintf (dump_file,
392*e4b17023SJohn Marino 		 ";; Not considering loop, cannot duplicate\n");
393*e4b17023SJohn Marino       return;
394*e4b17023SJohn Marino     }
395*e4b17023SJohn Marino 
396*e4b17023SJohn Marino   /* npeel = number of iterations to peel.  */
397*e4b17023SJohn Marino   npeel = PARAM_VALUE (PARAM_MAX_COMPLETELY_PEELED_INSNS) / loop->ninsns;
398*e4b17023SJohn Marino   if (npeel > (unsigned) PARAM_VALUE (PARAM_MAX_COMPLETELY_PEEL_TIMES))
399*e4b17023SJohn Marino     npeel = PARAM_VALUE (PARAM_MAX_COMPLETELY_PEEL_TIMES);
400*e4b17023SJohn Marino 
401*e4b17023SJohn Marino   /* Is the loop small enough?  */
402*e4b17023SJohn Marino   if (!npeel)
403*e4b17023SJohn Marino     {
404*e4b17023SJohn Marino       if (dump_file)
405*e4b17023SJohn Marino 	fprintf (dump_file, ";; Not considering loop, is too big\n");
406*e4b17023SJohn Marino       return;
407*e4b17023SJohn Marino     }
408*e4b17023SJohn Marino 
409*e4b17023SJohn Marino   /* Check for simple loops.  */
410*e4b17023SJohn Marino   desc = get_simple_loop_desc (loop);
411*e4b17023SJohn Marino 
412*e4b17023SJohn Marino   /* Check number of iterations.  */
413*e4b17023SJohn Marino   if (!desc->simple_p
414*e4b17023SJohn Marino       || desc->assumptions
415*e4b17023SJohn Marino       || !desc->const_iter
416*e4b17023SJohn Marino       || desc->infinite)
417*e4b17023SJohn Marino     {
418*e4b17023SJohn Marino       if (dump_file)
419*e4b17023SJohn Marino 	fprintf (dump_file,
420*e4b17023SJohn Marino 		 ";; Unable to prove that the loop iterates constant times\n");
421*e4b17023SJohn Marino       return;
422*e4b17023SJohn Marino     }
423*e4b17023SJohn Marino 
424*e4b17023SJohn Marino   if (desc->niter > npeel - 1)
425*e4b17023SJohn Marino     {
426*e4b17023SJohn Marino       if (dump_file)
427*e4b17023SJohn Marino 	{
428*e4b17023SJohn Marino 	  fprintf (dump_file,
429*e4b17023SJohn Marino 		   ";; Not peeling loop completely, rolls too much (");
430*e4b17023SJohn Marino 	  fprintf (dump_file, HOST_WIDEST_INT_PRINT_DEC, desc->niter);
431*e4b17023SJohn Marino 	  fprintf (dump_file, " iterations > %d [maximum peelings])\n", npeel);
432*e4b17023SJohn Marino 	}
433*e4b17023SJohn Marino       return;
434*e4b17023SJohn Marino     }
435*e4b17023SJohn Marino 
436*e4b17023SJohn Marino   /* Success.  */
437*e4b17023SJohn Marino   if (dump_file)
438*e4b17023SJohn Marino     fprintf (dump_file, ";; Decided to peel loop completely\n");
439*e4b17023SJohn Marino   loop->lpt_decision.decision = LPT_PEEL_COMPLETELY;
440*e4b17023SJohn Marino }
441*e4b17023SJohn Marino 
442*e4b17023SJohn Marino /* Peel all iterations of LOOP, remove exit edges and cancel the loop
443*e4b17023SJohn Marino    completely.  The transformation done:
444*e4b17023SJohn Marino 
445*e4b17023SJohn Marino    for (i = 0; i < 4; i++)
446*e4b17023SJohn Marino      body;
447*e4b17023SJohn Marino 
448*e4b17023SJohn Marino    ==>
449*e4b17023SJohn Marino 
450*e4b17023SJohn Marino    i = 0;
451*e4b17023SJohn Marino    body; i++;
452*e4b17023SJohn Marino    body; i++;
453*e4b17023SJohn Marino    body; i++;
454*e4b17023SJohn Marino    body; i++;
455*e4b17023SJohn Marino    */
456*e4b17023SJohn Marino static void
peel_loop_completely(struct loop * loop)457*e4b17023SJohn Marino peel_loop_completely (struct loop *loop)
458*e4b17023SJohn Marino {
459*e4b17023SJohn Marino   sbitmap wont_exit;
460*e4b17023SJohn Marino   unsigned HOST_WIDE_INT npeel;
461*e4b17023SJohn Marino   unsigned i;
462*e4b17023SJohn Marino   VEC (edge, heap) *remove_edges;
463*e4b17023SJohn Marino   edge ein;
464*e4b17023SJohn Marino   struct niter_desc *desc = get_simple_loop_desc (loop);
465*e4b17023SJohn Marino   struct opt_info *opt_info = NULL;
466*e4b17023SJohn Marino 
467*e4b17023SJohn Marino   npeel = desc->niter;
468*e4b17023SJohn Marino 
469*e4b17023SJohn Marino   if (npeel)
470*e4b17023SJohn Marino     {
471*e4b17023SJohn Marino       bool ok;
472*e4b17023SJohn Marino 
473*e4b17023SJohn Marino       wont_exit = sbitmap_alloc (npeel + 1);
474*e4b17023SJohn Marino       sbitmap_ones (wont_exit);
475*e4b17023SJohn Marino       RESET_BIT (wont_exit, 0);
476*e4b17023SJohn Marino       if (desc->noloop_assumptions)
477*e4b17023SJohn Marino 	RESET_BIT (wont_exit, 1);
478*e4b17023SJohn Marino 
479*e4b17023SJohn Marino       remove_edges = NULL;
480*e4b17023SJohn Marino 
481*e4b17023SJohn Marino       if (flag_split_ivs_in_unroller)
482*e4b17023SJohn Marino         opt_info = analyze_insns_in_loop (loop);
483*e4b17023SJohn Marino 
484*e4b17023SJohn Marino       opt_info_start_duplication (opt_info);
485*e4b17023SJohn Marino       ok = duplicate_loop_to_header_edge (loop, loop_preheader_edge (loop),
486*e4b17023SJohn Marino 					  npeel,
487*e4b17023SJohn Marino 					  wont_exit, desc->out_edge,
488*e4b17023SJohn Marino 					  &remove_edges,
489*e4b17023SJohn Marino 					  DLTHE_FLAG_UPDATE_FREQ
490*e4b17023SJohn Marino 					  | DLTHE_FLAG_COMPLETTE_PEEL
491*e4b17023SJohn Marino 					  | (opt_info
492*e4b17023SJohn Marino 					     ? DLTHE_RECORD_COPY_NUMBER : 0));
493*e4b17023SJohn Marino       gcc_assert (ok);
494*e4b17023SJohn Marino 
495*e4b17023SJohn Marino       free (wont_exit);
496*e4b17023SJohn Marino 
497*e4b17023SJohn Marino       if (opt_info)
498*e4b17023SJohn Marino  	{
499*e4b17023SJohn Marino  	  apply_opt_in_copies (opt_info, npeel, false, true);
500*e4b17023SJohn Marino  	  free_opt_info (opt_info);
501*e4b17023SJohn Marino  	}
502*e4b17023SJohn Marino 
503*e4b17023SJohn Marino       /* Remove the exit edges.  */
504*e4b17023SJohn Marino       FOR_EACH_VEC_ELT (edge, remove_edges, i, ein)
505*e4b17023SJohn Marino 	remove_path (ein);
506*e4b17023SJohn Marino       VEC_free (edge, heap, remove_edges);
507*e4b17023SJohn Marino     }
508*e4b17023SJohn Marino 
509*e4b17023SJohn Marino   ein = desc->in_edge;
510*e4b17023SJohn Marino   free_simple_loop_desc (loop);
511*e4b17023SJohn Marino 
512*e4b17023SJohn Marino   /* Now remove the unreachable part of the last iteration and cancel
513*e4b17023SJohn Marino      the loop.  */
514*e4b17023SJohn Marino   remove_path (ein);
515*e4b17023SJohn Marino 
516*e4b17023SJohn Marino   if (dump_file)
517*e4b17023SJohn Marino     fprintf (dump_file, ";; Peeled loop completely, %d times\n", (int) npeel);
518*e4b17023SJohn Marino }
519*e4b17023SJohn Marino 
520*e4b17023SJohn Marino /* Decide whether to unroll LOOP iterating constant number of times
521*e4b17023SJohn Marino    and how much.  */
522*e4b17023SJohn Marino 
523*e4b17023SJohn Marino static void
decide_unroll_constant_iterations(struct loop * loop,int flags)524*e4b17023SJohn Marino decide_unroll_constant_iterations (struct loop *loop, int flags)
525*e4b17023SJohn Marino {
526*e4b17023SJohn Marino   unsigned nunroll, nunroll_by_av, best_copies, best_unroll = 0, n_copies, i;
527*e4b17023SJohn Marino   struct niter_desc *desc;
528*e4b17023SJohn Marino 
529*e4b17023SJohn Marino   if (!(flags & UAP_UNROLL))
530*e4b17023SJohn Marino     {
531*e4b17023SJohn Marino       /* We were not asked to, just return back silently.  */
532*e4b17023SJohn Marino       return;
533*e4b17023SJohn Marino     }
534*e4b17023SJohn Marino 
535*e4b17023SJohn Marino   if (dump_file)
536*e4b17023SJohn Marino     fprintf (dump_file,
537*e4b17023SJohn Marino 	     "\n;; Considering unrolling loop with constant "
538*e4b17023SJohn Marino 	     "number of iterations\n");
539*e4b17023SJohn Marino 
540*e4b17023SJohn Marino   /* nunroll = total number of copies of the original loop body in
541*e4b17023SJohn Marino      unrolled loop (i.e. if it is 2, we have to duplicate loop body once.  */
542*e4b17023SJohn Marino   nunroll = PARAM_VALUE (PARAM_MAX_UNROLLED_INSNS) / loop->ninsns;
543*e4b17023SJohn Marino   nunroll_by_av
544*e4b17023SJohn Marino     = PARAM_VALUE (PARAM_MAX_AVERAGE_UNROLLED_INSNS) / loop->av_ninsns;
545*e4b17023SJohn Marino   if (nunroll > nunroll_by_av)
546*e4b17023SJohn Marino     nunroll = nunroll_by_av;
547*e4b17023SJohn Marino   if (nunroll > (unsigned) PARAM_VALUE (PARAM_MAX_UNROLL_TIMES))
548*e4b17023SJohn Marino     nunroll = PARAM_VALUE (PARAM_MAX_UNROLL_TIMES);
549*e4b17023SJohn Marino 
550*e4b17023SJohn Marino   /* Skip big loops.  */
551*e4b17023SJohn Marino   if (nunroll <= 1)
552*e4b17023SJohn Marino     {
553*e4b17023SJohn Marino       if (dump_file)
554*e4b17023SJohn Marino 	fprintf (dump_file, ";; Not considering loop, is too big\n");
555*e4b17023SJohn Marino       return;
556*e4b17023SJohn Marino     }
557*e4b17023SJohn Marino 
558*e4b17023SJohn Marino   /* Check for simple loops.  */
559*e4b17023SJohn Marino   desc = get_simple_loop_desc (loop);
560*e4b17023SJohn Marino 
561*e4b17023SJohn Marino   /* Check number of iterations.  */
562*e4b17023SJohn Marino   if (!desc->simple_p || !desc->const_iter || desc->assumptions)
563*e4b17023SJohn Marino     {
564*e4b17023SJohn Marino       if (dump_file)
565*e4b17023SJohn Marino 	fprintf (dump_file,
566*e4b17023SJohn Marino 		 ";; Unable to prove that the loop iterates constant times\n");
567*e4b17023SJohn Marino       return;
568*e4b17023SJohn Marino     }
569*e4b17023SJohn Marino 
570*e4b17023SJohn Marino   /* Check whether the loop rolls enough to consider.  */
571*e4b17023SJohn Marino   if (desc->niter < 2 * nunroll)
572*e4b17023SJohn Marino     {
573*e4b17023SJohn Marino       if (dump_file)
574*e4b17023SJohn Marino 	fprintf (dump_file, ";; Not unrolling loop, doesn't roll\n");
575*e4b17023SJohn Marino       return;
576*e4b17023SJohn Marino     }
577*e4b17023SJohn Marino 
578*e4b17023SJohn Marino   /* Success; now compute number of iterations to unroll.  We alter
579*e4b17023SJohn Marino      nunroll so that as few as possible copies of loop body are
580*e4b17023SJohn Marino      necessary, while still not decreasing the number of unrollings
581*e4b17023SJohn Marino      too much (at most by 1).  */
582*e4b17023SJohn Marino   best_copies = 2 * nunroll + 10;
583*e4b17023SJohn Marino 
584*e4b17023SJohn Marino   i = 2 * nunroll + 2;
585*e4b17023SJohn Marino   if (i - 1 >= desc->niter)
586*e4b17023SJohn Marino     i = desc->niter - 2;
587*e4b17023SJohn Marino 
588*e4b17023SJohn Marino   for (; i >= nunroll - 1; i--)
589*e4b17023SJohn Marino     {
590*e4b17023SJohn Marino       unsigned exit_mod = desc->niter % (i + 1);
591*e4b17023SJohn Marino 
592*e4b17023SJohn Marino       if (!loop_exit_at_end_p (loop))
593*e4b17023SJohn Marino 	n_copies = exit_mod + i + 1;
594*e4b17023SJohn Marino       else if (exit_mod != (unsigned) i
595*e4b17023SJohn Marino 	       || desc->noloop_assumptions != NULL_RTX)
596*e4b17023SJohn Marino 	n_copies = exit_mod + i + 2;
597*e4b17023SJohn Marino       else
598*e4b17023SJohn Marino 	n_copies = i + 1;
599*e4b17023SJohn Marino 
600*e4b17023SJohn Marino       if (n_copies < best_copies)
601*e4b17023SJohn Marino 	{
602*e4b17023SJohn Marino 	  best_copies = n_copies;
603*e4b17023SJohn Marino 	  best_unroll = i;
604*e4b17023SJohn Marino 	}
605*e4b17023SJohn Marino     }
606*e4b17023SJohn Marino 
607*e4b17023SJohn Marino   if (dump_file)
608*e4b17023SJohn Marino     fprintf (dump_file, ";; max_unroll %d (%d copies, initial %d).\n",
609*e4b17023SJohn Marino 	     best_unroll + 1, best_copies, nunroll);
610*e4b17023SJohn Marino 
611*e4b17023SJohn Marino   loop->lpt_decision.decision = LPT_UNROLL_CONSTANT;
612*e4b17023SJohn Marino   loop->lpt_decision.times = best_unroll;
613*e4b17023SJohn Marino 
614*e4b17023SJohn Marino   if (dump_file)
615*e4b17023SJohn Marino     fprintf (dump_file,
616*e4b17023SJohn Marino 	     ";; Decided to unroll the constant times rolling loop, %d times.\n",
617*e4b17023SJohn Marino 	     loop->lpt_decision.times);
618*e4b17023SJohn Marino }
619*e4b17023SJohn Marino 
620*e4b17023SJohn Marino /* Unroll LOOP with constant number of iterations LOOP->LPT_DECISION.TIMES + 1
621*e4b17023SJohn Marino    times.  The transformation does this:
622*e4b17023SJohn Marino 
623*e4b17023SJohn Marino    for (i = 0; i < 102; i++)
624*e4b17023SJohn Marino      body;
625*e4b17023SJohn Marino 
626*e4b17023SJohn Marino    ==>
627*e4b17023SJohn Marino 
628*e4b17023SJohn Marino    i = 0;
629*e4b17023SJohn Marino    body; i++;
630*e4b17023SJohn Marino    body; i++;
631*e4b17023SJohn Marino    while (i < 102)
632*e4b17023SJohn Marino      {
633*e4b17023SJohn Marino        body; i++;
634*e4b17023SJohn Marino        body; i++;
635*e4b17023SJohn Marino        body; i++;
636*e4b17023SJohn Marino        body; i++;
637*e4b17023SJohn Marino      }
638*e4b17023SJohn Marino   */
639*e4b17023SJohn Marino static void
unroll_loop_constant_iterations(struct loop * loop)640*e4b17023SJohn Marino unroll_loop_constant_iterations (struct loop *loop)
641*e4b17023SJohn Marino {
642*e4b17023SJohn Marino   unsigned HOST_WIDE_INT niter;
643*e4b17023SJohn Marino   unsigned exit_mod;
644*e4b17023SJohn Marino   sbitmap wont_exit;
645*e4b17023SJohn Marino   unsigned i;
646*e4b17023SJohn Marino   VEC (edge, heap) *remove_edges;
647*e4b17023SJohn Marino   edge e;
648*e4b17023SJohn Marino   unsigned max_unroll = loop->lpt_decision.times;
649*e4b17023SJohn Marino   struct niter_desc *desc = get_simple_loop_desc (loop);
650*e4b17023SJohn Marino   bool exit_at_end = loop_exit_at_end_p (loop);
651*e4b17023SJohn Marino   struct opt_info *opt_info = NULL;
652*e4b17023SJohn Marino   bool ok;
653*e4b17023SJohn Marino 
654*e4b17023SJohn Marino   niter = desc->niter;
655*e4b17023SJohn Marino 
656*e4b17023SJohn Marino   /* Should not get here (such loop should be peeled instead).  */
657*e4b17023SJohn Marino   gcc_assert (niter > max_unroll + 1);
658*e4b17023SJohn Marino 
659*e4b17023SJohn Marino   exit_mod = niter % (max_unroll + 1);
660*e4b17023SJohn Marino 
661*e4b17023SJohn Marino   wont_exit = sbitmap_alloc (max_unroll + 1);
662*e4b17023SJohn Marino   sbitmap_ones (wont_exit);
663*e4b17023SJohn Marino 
664*e4b17023SJohn Marino   remove_edges = NULL;
665*e4b17023SJohn Marino   if (flag_split_ivs_in_unroller
666*e4b17023SJohn Marino       || flag_variable_expansion_in_unroller)
667*e4b17023SJohn Marino     opt_info = analyze_insns_in_loop (loop);
668*e4b17023SJohn Marino 
669*e4b17023SJohn Marino   if (!exit_at_end)
670*e4b17023SJohn Marino     {
671*e4b17023SJohn Marino       /* The exit is not at the end of the loop; leave exit test
672*e4b17023SJohn Marino 	 in the first copy, so that the loops that start with test
673*e4b17023SJohn Marino 	 of exit condition have continuous body after unrolling.  */
674*e4b17023SJohn Marino 
675*e4b17023SJohn Marino       if (dump_file)
676*e4b17023SJohn Marino 	fprintf (dump_file, ";; Condition on beginning of loop.\n");
677*e4b17023SJohn Marino 
678*e4b17023SJohn Marino       /* Peel exit_mod iterations.  */
679*e4b17023SJohn Marino       RESET_BIT (wont_exit, 0);
680*e4b17023SJohn Marino       if (desc->noloop_assumptions)
681*e4b17023SJohn Marino 	RESET_BIT (wont_exit, 1);
682*e4b17023SJohn Marino 
683*e4b17023SJohn Marino       if (exit_mod)
684*e4b17023SJohn Marino 	{
685*e4b17023SJohn Marino 	  opt_info_start_duplication (opt_info);
686*e4b17023SJohn Marino           ok = duplicate_loop_to_header_edge (loop, loop_preheader_edge (loop),
687*e4b17023SJohn Marino 					      exit_mod,
688*e4b17023SJohn Marino 					      wont_exit, desc->out_edge,
689*e4b17023SJohn Marino 					      &remove_edges,
690*e4b17023SJohn Marino 					      DLTHE_FLAG_UPDATE_FREQ
691*e4b17023SJohn Marino 					      | (opt_info && exit_mod > 1
692*e4b17023SJohn Marino 						 ? DLTHE_RECORD_COPY_NUMBER
693*e4b17023SJohn Marino 						   : 0));
694*e4b17023SJohn Marino 	  gcc_assert (ok);
695*e4b17023SJohn Marino 
696*e4b17023SJohn Marino           if (opt_info && exit_mod > 1)
697*e4b17023SJohn Marino  	    apply_opt_in_copies (opt_info, exit_mod, false, false);
698*e4b17023SJohn Marino 
699*e4b17023SJohn Marino 	  desc->noloop_assumptions = NULL_RTX;
700*e4b17023SJohn Marino 	  desc->niter -= exit_mod;
701*e4b17023SJohn Marino 	  desc->niter_max -= exit_mod;
702*e4b17023SJohn Marino 	}
703*e4b17023SJohn Marino 
704*e4b17023SJohn Marino       SET_BIT (wont_exit, 1);
705*e4b17023SJohn Marino     }
706*e4b17023SJohn Marino   else
707*e4b17023SJohn Marino     {
708*e4b17023SJohn Marino       /* Leave exit test in last copy, for the same reason as above if
709*e4b17023SJohn Marino 	 the loop tests the condition at the end of loop body.  */
710*e4b17023SJohn Marino 
711*e4b17023SJohn Marino       if (dump_file)
712*e4b17023SJohn Marino 	fprintf (dump_file, ";; Condition on end of loop.\n");
713*e4b17023SJohn Marino 
714*e4b17023SJohn Marino       /* We know that niter >= max_unroll + 2; so we do not need to care of
715*e4b17023SJohn Marino 	 case when we would exit before reaching the loop.  So just peel
716*e4b17023SJohn Marino 	 exit_mod + 1 iterations.  */
717*e4b17023SJohn Marino       if (exit_mod != max_unroll
718*e4b17023SJohn Marino 	  || desc->noloop_assumptions)
719*e4b17023SJohn Marino 	{
720*e4b17023SJohn Marino 	  RESET_BIT (wont_exit, 0);
721*e4b17023SJohn Marino 	  if (desc->noloop_assumptions)
722*e4b17023SJohn Marino 	    RESET_BIT (wont_exit, 1);
723*e4b17023SJohn Marino 
724*e4b17023SJohn Marino           opt_info_start_duplication (opt_info);
725*e4b17023SJohn Marino 	  ok = duplicate_loop_to_header_edge (loop, loop_preheader_edge (loop),
726*e4b17023SJohn Marino 					      exit_mod + 1,
727*e4b17023SJohn Marino 					      wont_exit, desc->out_edge,
728*e4b17023SJohn Marino 					      &remove_edges,
729*e4b17023SJohn Marino 					      DLTHE_FLAG_UPDATE_FREQ
730*e4b17023SJohn Marino 					      | (opt_info && exit_mod > 0
731*e4b17023SJohn Marino 						 ? DLTHE_RECORD_COPY_NUMBER
732*e4b17023SJohn Marino 						   : 0));
733*e4b17023SJohn Marino 	  gcc_assert (ok);
734*e4b17023SJohn Marino 
735*e4b17023SJohn Marino           if (opt_info && exit_mod > 0)
736*e4b17023SJohn Marino   	    apply_opt_in_copies (opt_info, exit_mod + 1, false, false);
737*e4b17023SJohn Marino 
738*e4b17023SJohn Marino 	  desc->niter -= exit_mod + 1;
739*e4b17023SJohn Marino 	  desc->niter_max -= exit_mod + 1;
740*e4b17023SJohn Marino 	  desc->noloop_assumptions = NULL_RTX;
741*e4b17023SJohn Marino 
742*e4b17023SJohn Marino 	  SET_BIT (wont_exit, 0);
743*e4b17023SJohn Marino 	  SET_BIT (wont_exit, 1);
744*e4b17023SJohn Marino 	}
745*e4b17023SJohn Marino 
746*e4b17023SJohn Marino       RESET_BIT (wont_exit, max_unroll);
747*e4b17023SJohn Marino     }
748*e4b17023SJohn Marino 
749*e4b17023SJohn Marino   /* Now unroll the loop.  */
750*e4b17023SJohn Marino 
751*e4b17023SJohn Marino   opt_info_start_duplication (opt_info);
752*e4b17023SJohn Marino   ok = duplicate_loop_to_header_edge (loop, loop_latch_edge (loop),
753*e4b17023SJohn Marino 				      max_unroll,
754*e4b17023SJohn Marino 				      wont_exit, desc->out_edge,
755*e4b17023SJohn Marino 				      &remove_edges,
756*e4b17023SJohn Marino 				      DLTHE_FLAG_UPDATE_FREQ
757*e4b17023SJohn Marino 				      | (opt_info
758*e4b17023SJohn Marino 					 ? DLTHE_RECORD_COPY_NUMBER
759*e4b17023SJohn Marino 					   : 0));
760*e4b17023SJohn Marino   gcc_assert (ok);
761*e4b17023SJohn Marino 
762*e4b17023SJohn Marino   if (opt_info)
763*e4b17023SJohn Marino     {
764*e4b17023SJohn Marino       apply_opt_in_copies (opt_info, max_unroll, true, true);
765*e4b17023SJohn Marino       free_opt_info (opt_info);
766*e4b17023SJohn Marino     }
767*e4b17023SJohn Marino 
768*e4b17023SJohn Marino   free (wont_exit);
769*e4b17023SJohn Marino 
770*e4b17023SJohn Marino   if (exit_at_end)
771*e4b17023SJohn Marino     {
772*e4b17023SJohn Marino       basic_block exit_block = get_bb_copy (desc->in_edge->src);
773*e4b17023SJohn Marino       /* Find a new in and out edge; they are in the last copy we have made.  */
774*e4b17023SJohn Marino 
775*e4b17023SJohn Marino       if (EDGE_SUCC (exit_block, 0)->dest == desc->out_edge->dest)
776*e4b17023SJohn Marino 	{
777*e4b17023SJohn Marino 	  desc->out_edge = EDGE_SUCC (exit_block, 0);
778*e4b17023SJohn Marino 	  desc->in_edge = EDGE_SUCC (exit_block, 1);
779*e4b17023SJohn Marino 	}
780*e4b17023SJohn Marino       else
781*e4b17023SJohn Marino 	{
782*e4b17023SJohn Marino 	  desc->out_edge = EDGE_SUCC (exit_block, 1);
783*e4b17023SJohn Marino 	  desc->in_edge = EDGE_SUCC (exit_block, 0);
784*e4b17023SJohn Marino 	}
785*e4b17023SJohn Marino     }
786*e4b17023SJohn Marino 
787*e4b17023SJohn Marino   desc->niter /= max_unroll + 1;
788*e4b17023SJohn Marino   desc->niter_max /= max_unroll + 1;
789*e4b17023SJohn Marino   desc->niter_expr = GEN_INT (desc->niter);
790*e4b17023SJohn Marino 
791*e4b17023SJohn Marino   /* Remove the edges.  */
792*e4b17023SJohn Marino   FOR_EACH_VEC_ELT (edge, remove_edges, i, e)
793*e4b17023SJohn Marino     remove_path (e);
794*e4b17023SJohn Marino   VEC_free (edge, heap, remove_edges);
795*e4b17023SJohn Marino 
796*e4b17023SJohn Marino   if (dump_file)
797*e4b17023SJohn Marino     fprintf (dump_file,
798*e4b17023SJohn Marino 	     ";; Unrolled loop %d times, constant # of iterations %i insns\n",
799*e4b17023SJohn Marino 	     max_unroll, num_loop_insns (loop));
800*e4b17023SJohn Marino }
801*e4b17023SJohn Marino 
802*e4b17023SJohn Marino /* Decide whether to unroll LOOP iterating runtime computable number of times
803*e4b17023SJohn Marino    and how much.  */
804*e4b17023SJohn Marino static void
decide_unroll_runtime_iterations(struct loop * loop,int flags)805*e4b17023SJohn Marino decide_unroll_runtime_iterations (struct loop *loop, int flags)
806*e4b17023SJohn Marino {
807*e4b17023SJohn Marino   unsigned nunroll, nunroll_by_av, i;
808*e4b17023SJohn Marino   struct niter_desc *desc;
809*e4b17023SJohn Marino 
810*e4b17023SJohn Marino   if (!(flags & UAP_UNROLL))
811*e4b17023SJohn Marino     {
812*e4b17023SJohn Marino       /* We were not asked to, just return back silently.  */
813*e4b17023SJohn Marino       return;
814*e4b17023SJohn Marino     }
815*e4b17023SJohn Marino 
816*e4b17023SJohn Marino   if (dump_file)
817*e4b17023SJohn Marino     fprintf (dump_file,
818*e4b17023SJohn Marino 	     "\n;; Considering unrolling loop with runtime "
819*e4b17023SJohn Marino 	     "computable number of iterations\n");
820*e4b17023SJohn Marino 
821*e4b17023SJohn Marino   /* nunroll = total number of copies of the original loop body in
822*e4b17023SJohn Marino      unrolled loop (i.e. if it is 2, we have to duplicate loop body once.  */
823*e4b17023SJohn Marino   nunroll = PARAM_VALUE (PARAM_MAX_UNROLLED_INSNS) / loop->ninsns;
824*e4b17023SJohn Marino   nunroll_by_av = PARAM_VALUE (PARAM_MAX_AVERAGE_UNROLLED_INSNS) / loop->av_ninsns;
825*e4b17023SJohn Marino   if (nunroll > nunroll_by_av)
826*e4b17023SJohn Marino     nunroll = nunroll_by_av;
827*e4b17023SJohn Marino   if (nunroll > (unsigned) PARAM_VALUE (PARAM_MAX_UNROLL_TIMES))
828*e4b17023SJohn Marino     nunroll = PARAM_VALUE (PARAM_MAX_UNROLL_TIMES);
829*e4b17023SJohn Marino 
830*e4b17023SJohn Marino   if (targetm.loop_unroll_adjust)
831*e4b17023SJohn Marino     nunroll = targetm.loop_unroll_adjust (nunroll, loop);
832*e4b17023SJohn Marino 
833*e4b17023SJohn Marino   /* Skip big loops.  */
834*e4b17023SJohn Marino   if (nunroll <= 1)
835*e4b17023SJohn Marino     {
836*e4b17023SJohn Marino       if (dump_file)
837*e4b17023SJohn Marino 	fprintf (dump_file, ";; Not considering loop, is too big\n");
838*e4b17023SJohn Marino       return;
839*e4b17023SJohn Marino     }
840*e4b17023SJohn Marino 
841*e4b17023SJohn Marino   /* Check for simple loops.  */
842*e4b17023SJohn Marino   desc = get_simple_loop_desc (loop);
843*e4b17023SJohn Marino 
844*e4b17023SJohn Marino   /* Check simpleness.  */
845*e4b17023SJohn Marino   if (!desc->simple_p || desc->assumptions)
846*e4b17023SJohn Marino     {
847*e4b17023SJohn Marino       if (dump_file)
848*e4b17023SJohn Marino 	fprintf (dump_file,
849*e4b17023SJohn Marino 		 ";; Unable to prove that the number of iterations "
850*e4b17023SJohn Marino 		 "can be counted in runtime\n");
851*e4b17023SJohn Marino       return;
852*e4b17023SJohn Marino     }
853*e4b17023SJohn Marino 
854*e4b17023SJohn Marino   if (desc->const_iter)
855*e4b17023SJohn Marino     {
856*e4b17023SJohn Marino       if (dump_file)
857*e4b17023SJohn Marino 	fprintf (dump_file, ";; Loop iterates constant times\n");
858*e4b17023SJohn Marino       return;
859*e4b17023SJohn Marino     }
860*e4b17023SJohn Marino 
861*e4b17023SJohn Marino   /* If we have profile feedback, check whether the loop rolls.  */
862*e4b17023SJohn Marino   if (loop->header->count && expected_loop_iterations (loop) < 2 * nunroll)
863*e4b17023SJohn Marino     {
864*e4b17023SJohn Marino       if (dump_file)
865*e4b17023SJohn Marino 	fprintf (dump_file, ";; Not unrolling loop, doesn't roll\n");
866*e4b17023SJohn Marino       return;
867*e4b17023SJohn Marino     }
868*e4b17023SJohn Marino 
869*e4b17023SJohn Marino   /* Success; now force nunroll to be power of 2, as we are unable to
870*e4b17023SJohn Marino      cope with overflows in computation of number of iterations.  */
871*e4b17023SJohn Marino   for (i = 1; 2 * i <= nunroll; i *= 2)
872*e4b17023SJohn Marino     continue;
873*e4b17023SJohn Marino 
874*e4b17023SJohn Marino   loop->lpt_decision.decision = LPT_UNROLL_RUNTIME;
875*e4b17023SJohn Marino   loop->lpt_decision.times = i - 1;
876*e4b17023SJohn Marino 
877*e4b17023SJohn Marino   if (dump_file)
878*e4b17023SJohn Marino     fprintf (dump_file,
879*e4b17023SJohn Marino 	     ";; Decided to unroll the runtime computable "
880*e4b17023SJohn Marino 	     "times rolling loop, %d times.\n",
881*e4b17023SJohn Marino 	     loop->lpt_decision.times);
882*e4b17023SJohn Marino }
883*e4b17023SJohn Marino 
884*e4b17023SJohn Marino /* Splits edge E and inserts the sequence of instructions INSNS on it, and
885*e4b17023SJohn Marino    returns the newly created block.  If INSNS is NULL_RTX, nothing is changed
886*e4b17023SJohn Marino    and NULL is returned instead.  */
887*e4b17023SJohn Marino 
888*e4b17023SJohn Marino basic_block
split_edge_and_insert(edge e,rtx insns)889*e4b17023SJohn Marino split_edge_and_insert (edge e, rtx insns)
890*e4b17023SJohn Marino {
891*e4b17023SJohn Marino   basic_block bb;
892*e4b17023SJohn Marino 
893*e4b17023SJohn Marino   if (!insns)
894*e4b17023SJohn Marino     return NULL;
895*e4b17023SJohn Marino   bb = split_edge (e);
896*e4b17023SJohn Marino   emit_insn_after (insns, BB_END (bb));
897*e4b17023SJohn Marino 
898*e4b17023SJohn Marino   /* ??? We used to assume that INSNS can contain control flow insns, and
899*e4b17023SJohn Marino      that we had to try to find sub basic blocks in BB to maintain a valid
900*e4b17023SJohn Marino      CFG.  For this purpose we used to set the BB_SUPERBLOCK flag on BB
901*e4b17023SJohn Marino      and call break_superblocks when going out of cfglayout mode.  But it
902*e4b17023SJohn Marino      turns out that this never happens; and that if it does ever happen,
903*e4b17023SJohn Marino      the TODO_verify_flow at the end of the RTL loop passes would fail.
904*e4b17023SJohn Marino 
905*e4b17023SJohn Marino      There are two reasons why we expected we could have control flow insns
906*e4b17023SJohn Marino      in INSNS.  The first is when a comparison has to be done in parts, and
907*e4b17023SJohn Marino      the second is when the number of iterations is computed for loops with
908*e4b17023SJohn Marino      the number of iterations known at runtime.  In both cases, test cases
909*e4b17023SJohn Marino      to get control flow in INSNS appear to be impossible to construct:
910*e4b17023SJohn Marino 
911*e4b17023SJohn Marino       * If do_compare_rtx_and_jump needs several branches to do comparison
912*e4b17023SJohn Marino 	in a mode that needs comparison by parts, we cannot analyze the
913*e4b17023SJohn Marino 	number of iterations of the loop, and we never get to unrolling it.
914*e4b17023SJohn Marino 
915*e4b17023SJohn Marino       * The code in expand_divmod that was suspected to cause creation of
916*e4b17023SJohn Marino 	branching code seems to be only accessed for signed division.  The
917*e4b17023SJohn Marino 	divisions used by # of iterations analysis are always unsigned.
918*e4b17023SJohn Marino 	Problems might arise on architectures that emits branching code
919*e4b17023SJohn Marino 	for some operations that may appear in the unroller (especially
920*e4b17023SJohn Marino 	for division), but we have no such architectures.
921*e4b17023SJohn Marino 
922*e4b17023SJohn Marino      Considering all this, it was decided that we should for now assume
923*e4b17023SJohn Marino      that INSNS can in theory contain control flow insns, but in practice
924*e4b17023SJohn Marino      it never does.  So we don't handle the theoretical case, and should
925*e4b17023SJohn Marino      a real failure ever show up, we have a pretty good clue for how to
926*e4b17023SJohn Marino      fix it.  */
927*e4b17023SJohn Marino 
928*e4b17023SJohn Marino   return bb;
929*e4b17023SJohn Marino }
930*e4b17023SJohn Marino 
931*e4b17023SJohn Marino /* Unroll LOOP for that we are able to count number of iterations in runtime
932*e4b17023SJohn Marino    LOOP->LPT_DECISION.TIMES + 1 times.  The transformation does this (with some
933*e4b17023SJohn Marino    extra care for case n < 0):
934*e4b17023SJohn Marino 
935*e4b17023SJohn Marino    for (i = 0; i < n; i++)
936*e4b17023SJohn Marino      body;
937*e4b17023SJohn Marino 
938*e4b17023SJohn Marino    ==>
939*e4b17023SJohn Marino 
940*e4b17023SJohn Marino    i = 0;
941*e4b17023SJohn Marino    mod = n % 4;
942*e4b17023SJohn Marino 
943*e4b17023SJohn Marino    switch (mod)
944*e4b17023SJohn Marino      {
945*e4b17023SJohn Marino        case 3:
946*e4b17023SJohn Marino          body; i++;
947*e4b17023SJohn Marino        case 2:
948*e4b17023SJohn Marino          body; i++;
949*e4b17023SJohn Marino        case 1:
950*e4b17023SJohn Marino          body; i++;
951*e4b17023SJohn Marino        case 0: ;
952*e4b17023SJohn Marino      }
953*e4b17023SJohn Marino 
954*e4b17023SJohn Marino    while (i < n)
955*e4b17023SJohn Marino      {
956*e4b17023SJohn Marino        body; i++;
957*e4b17023SJohn Marino        body; i++;
958*e4b17023SJohn Marino        body; i++;
959*e4b17023SJohn Marino        body; i++;
960*e4b17023SJohn Marino      }
961*e4b17023SJohn Marino    */
962*e4b17023SJohn Marino static void
unroll_loop_runtime_iterations(struct loop * loop)963*e4b17023SJohn Marino unroll_loop_runtime_iterations (struct loop *loop)
964*e4b17023SJohn Marino {
965*e4b17023SJohn Marino   rtx old_niter, niter, init_code, branch_code, tmp;
966*e4b17023SJohn Marino   unsigned i, j, p;
967*e4b17023SJohn Marino   basic_block preheader, *body, swtch, ezc_swtch;
968*e4b17023SJohn Marino   VEC (basic_block, heap) *dom_bbs;
969*e4b17023SJohn Marino   sbitmap wont_exit;
970*e4b17023SJohn Marino   int may_exit_copy;
971*e4b17023SJohn Marino   unsigned n_peel;
972*e4b17023SJohn Marino   VEC (edge, heap) *remove_edges;
973*e4b17023SJohn Marino   edge e;
974*e4b17023SJohn Marino   bool extra_zero_check, last_may_exit;
975*e4b17023SJohn Marino   unsigned max_unroll = loop->lpt_decision.times;
976*e4b17023SJohn Marino   struct niter_desc *desc = get_simple_loop_desc (loop);
977*e4b17023SJohn Marino   bool exit_at_end = loop_exit_at_end_p (loop);
978*e4b17023SJohn Marino   struct opt_info *opt_info = NULL;
979*e4b17023SJohn Marino   bool ok;
980*e4b17023SJohn Marino 
981*e4b17023SJohn Marino   if (flag_split_ivs_in_unroller
982*e4b17023SJohn Marino       || flag_variable_expansion_in_unroller)
983*e4b17023SJohn Marino     opt_info = analyze_insns_in_loop (loop);
984*e4b17023SJohn Marino 
985*e4b17023SJohn Marino   /* Remember blocks whose dominators will have to be updated.  */
986*e4b17023SJohn Marino   dom_bbs = NULL;
987*e4b17023SJohn Marino 
988*e4b17023SJohn Marino   body = get_loop_body (loop);
989*e4b17023SJohn Marino   for (i = 0; i < loop->num_nodes; i++)
990*e4b17023SJohn Marino     {
991*e4b17023SJohn Marino       VEC (basic_block, heap) *ldom;
992*e4b17023SJohn Marino       basic_block bb;
993*e4b17023SJohn Marino 
994*e4b17023SJohn Marino       ldom = get_dominated_by (CDI_DOMINATORS, body[i]);
995*e4b17023SJohn Marino       FOR_EACH_VEC_ELT (basic_block, ldom, j, bb)
996*e4b17023SJohn Marino 	if (!flow_bb_inside_loop_p (loop, bb))
997*e4b17023SJohn Marino 	  VEC_safe_push (basic_block, heap, dom_bbs, bb);
998*e4b17023SJohn Marino 
999*e4b17023SJohn Marino       VEC_free (basic_block, heap, ldom);
1000*e4b17023SJohn Marino     }
1001*e4b17023SJohn Marino   free (body);
1002*e4b17023SJohn Marino 
1003*e4b17023SJohn Marino   if (!exit_at_end)
1004*e4b17023SJohn Marino     {
1005*e4b17023SJohn Marino       /* Leave exit in first copy (for explanation why see comment in
1006*e4b17023SJohn Marino 	 unroll_loop_constant_iterations).  */
1007*e4b17023SJohn Marino       may_exit_copy = 0;
1008*e4b17023SJohn Marino       n_peel = max_unroll - 1;
1009*e4b17023SJohn Marino       extra_zero_check = true;
1010*e4b17023SJohn Marino       last_may_exit = false;
1011*e4b17023SJohn Marino     }
1012*e4b17023SJohn Marino   else
1013*e4b17023SJohn Marino     {
1014*e4b17023SJohn Marino       /* Leave exit in last copy (for explanation why see comment in
1015*e4b17023SJohn Marino 	 unroll_loop_constant_iterations).  */
1016*e4b17023SJohn Marino       may_exit_copy = max_unroll;
1017*e4b17023SJohn Marino       n_peel = max_unroll;
1018*e4b17023SJohn Marino       extra_zero_check = false;
1019*e4b17023SJohn Marino       last_may_exit = true;
1020*e4b17023SJohn Marino     }
1021*e4b17023SJohn Marino 
1022*e4b17023SJohn Marino   /* Get expression for number of iterations.  */
1023*e4b17023SJohn Marino   start_sequence ();
1024*e4b17023SJohn Marino   old_niter = niter = gen_reg_rtx (desc->mode);
1025*e4b17023SJohn Marino   tmp = force_operand (copy_rtx (desc->niter_expr), niter);
1026*e4b17023SJohn Marino   if (tmp != niter)
1027*e4b17023SJohn Marino     emit_move_insn (niter, tmp);
1028*e4b17023SJohn Marino 
1029*e4b17023SJohn Marino   /* Count modulo by ANDing it with max_unroll; we use the fact that
1030*e4b17023SJohn Marino      the number of unrollings is a power of two, and thus this is correct
1031*e4b17023SJohn Marino      even if there is overflow in the computation.  */
1032*e4b17023SJohn Marino   niter = expand_simple_binop (desc->mode, AND,
1033*e4b17023SJohn Marino 			       niter,
1034*e4b17023SJohn Marino 			       GEN_INT (max_unroll),
1035*e4b17023SJohn Marino 			       NULL_RTX, 0, OPTAB_LIB_WIDEN);
1036*e4b17023SJohn Marino 
1037*e4b17023SJohn Marino   init_code = get_insns ();
1038*e4b17023SJohn Marino   end_sequence ();
1039*e4b17023SJohn Marino   unshare_all_rtl_in_chain (init_code);
1040*e4b17023SJohn Marino 
1041*e4b17023SJohn Marino   /* Precondition the loop.  */
1042*e4b17023SJohn Marino   split_edge_and_insert (loop_preheader_edge (loop), init_code);
1043*e4b17023SJohn Marino 
1044*e4b17023SJohn Marino   remove_edges = NULL;
1045*e4b17023SJohn Marino 
1046*e4b17023SJohn Marino   wont_exit = sbitmap_alloc (max_unroll + 2);
1047*e4b17023SJohn Marino 
1048*e4b17023SJohn Marino   /* Peel the first copy of loop body (almost always we must leave exit test
1049*e4b17023SJohn Marino      here; the only exception is when we have extra zero check and the number
1050*e4b17023SJohn Marino      of iterations is reliable.  Also record the place of (possible) extra
1051*e4b17023SJohn Marino      zero check.  */
1052*e4b17023SJohn Marino   sbitmap_zero (wont_exit);
1053*e4b17023SJohn Marino   if (extra_zero_check
1054*e4b17023SJohn Marino       && !desc->noloop_assumptions)
1055*e4b17023SJohn Marino     SET_BIT (wont_exit, 1);
1056*e4b17023SJohn Marino   ezc_swtch = loop_preheader_edge (loop)->src;
1057*e4b17023SJohn Marino   ok = duplicate_loop_to_header_edge (loop, loop_preheader_edge (loop),
1058*e4b17023SJohn Marino 				      1, wont_exit, desc->out_edge,
1059*e4b17023SJohn Marino 				      &remove_edges,
1060*e4b17023SJohn Marino 				      DLTHE_FLAG_UPDATE_FREQ);
1061*e4b17023SJohn Marino   gcc_assert (ok);
1062*e4b17023SJohn Marino 
1063*e4b17023SJohn Marino   /* Record the place where switch will be built for preconditioning.  */
1064*e4b17023SJohn Marino   swtch = split_edge (loop_preheader_edge (loop));
1065*e4b17023SJohn Marino 
1066*e4b17023SJohn Marino   for (i = 0; i < n_peel; i++)
1067*e4b17023SJohn Marino     {
1068*e4b17023SJohn Marino       /* Peel the copy.  */
1069*e4b17023SJohn Marino       sbitmap_zero (wont_exit);
1070*e4b17023SJohn Marino       if (i != n_peel - 1 || !last_may_exit)
1071*e4b17023SJohn Marino 	SET_BIT (wont_exit, 1);
1072*e4b17023SJohn Marino       ok = duplicate_loop_to_header_edge (loop, loop_preheader_edge (loop),
1073*e4b17023SJohn Marino 					  1, wont_exit, desc->out_edge,
1074*e4b17023SJohn Marino 					  &remove_edges,
1075*e4b17023SJohn Marino 					  DLTHE_FLAG_UPDATE_FREQ);
1076*e4b17023SJohn Marino       gcc_assert (ok);
1077*e4b17023SJohn Marino 
1078*e4b17023SJohn Marino       /* Create item for switch.  */
1079*e4b17023SJohn Marino       j = n_peel - i - (extra_zero_check ? 0 : 1);
1080*e4b17023SJohn Marino       p = REG_BR_PROB_BASE / (i + 2);
1081*e4b17023SJohn Marino 
1082*e4b17023SJohn Marino       preheader = split_edge (loop_preheader_edge (loop));
1083*e4b17023SJohn Marino       branch_code = compare_and_jump_seq (copy_rtx (niter), GEN_INT (j), EQ,
1084*e4b17023SJohn Marino 					  block_label (preheader), p,
1085*e4b17023SJohn Marino 					  NULL_RTX);
1086*e4b17023SJohn Marino 
1087*e4b17023SJohn Marino       /* We rely on the fact that the compare and jump cannot be optimized out,
1088*e4b17023SJohn Marino 	 and hence the cfg we create is correct.  */
1089*e4b17023SJohn Marino       gcc_assert (branch_code != NULL_RTX);
1090*e4b17023SJohn Marino 
1091*e4b17023SJohn Marino       swtch = split_edge_and_insert (single_pred_edge (swtch), branch_code);
1092*e4b17023SJohn Marino       set_immediate_dominator (CDI_DOMINATORS, preheader, swtch);
1093*e4b17023SJohn Marino       single_pred_edge (swtch)->probability = REG_BR_PROB_BASE - p;
1094*e4b17023SJohn Marino       e = make_edge (swtch, preheader,
1095*e4b17023SJohn Marino 		     single_succ_edge (swtch)->flags & EDGE_IRREDUCIBLE_LOOP);
1096*e4b17023SJohn Marino       e->probability = p;
1097*e4b17023SJohn Marino     }
1098*e4b17023SJohn Marino 
1099*e4b17023SJohn Marino   if (extra_zero_check)
1100*e4b17023SJohn Marino     {
1101*e4b17023SJohn Marino       /* Add branch for zero iterations.  */
1102*e4b17023SJohn Marino       p = REG_BR_PROB_BASE / (max_unroll + 1);
1103*e4b17023SJohn Marino       swtch = ezc_swtch;
1104*e4b17023SJohn Marino       preheader = split_edge (loop_preheader_edge (loop));
1105*e4b17023SJohn Marino       branch_code = compare_and_jump_seq (copy_rtx (niter), const0_rtx, EQ,
1106*e4b17023SJohn Marino 					  block_label (preheader), p,
1107*e4b17023SJohn Marino 					  NULL_RTX);
1108*e4b17023SJohn Marino       gcc_assert (branch_code != NULL_RTX);
1109*e4b17023SJohn Marino 
1110*e4b17023SJohn Marino       swtch = split_edge_and_insert (single_succ_edge (swtch), branch_code);
1111*e4b17023SJohn Marino       set_immediate_dominator (CDI_DOMINATORS, preheader, swtch);
1112*e4b17023SJohn Marino       single_succ_edge (swtch)->probability = REG_BR_PROB_BASE - p;
1113*e4b17023SJohn Marino       e = make_edge (swtch, preheader,
1114*e4b17023SJohn Marino 		     single_succ_edge (swtch)->flags & EDGE_IRREDUCIBLE_LOOP);
1115*e4b17023SJohn Marino       e->probability = p;
1116*e4b17023SJohn Marino     }
1117*e4b17023SJohn Marino 
1118*e4b17023SJohn Marino   /* Recount dominators for outer blocks.  */
1119*e4b17023SJohn Marino   iterate_fix_dominators (CDI_DOMINATORS, dom_bbs, false);
1120*e4b17023SJohn Marino 
1121*e4b17023SJohn Marino   /* And unroll loop.  */
1122*e4b17023SJohn Marino 
1123*e4b17023SJohn Marino   sbitmap_ones (wont_exit);
1124*e4b17023SJohn Marino   RESET_BIT (wont_exit, may_exit_copy);
1125*e4b17023SJohn Marino   opt_info_start_duplication (opt_info);
1126*e4b17023SJohn Marino 
1127*e4b17023SJohn Marino   ok = duplicate_loop_to_header_edge (loop, loop_latch_edge (loop),
1128*e4b17023SJohn Marino 				      max_unroll,
1129*e4b17023SJohn Marino 				      wont_exit, desc->out_edge,
1130*e4b17023SJohn Marino 				      &remove_edges,
1131*e4b17023SJohn Marino 				      DLTHE_FLAG_UPDATE_FREQ
1132*e4b17023SJohn Marino 				      | (opt_info
1133*e4b17023SJohn Marino 					 ? DLTHE_RECORD_COPY_NUMBER
1134*e4b17023SJohn Marino 					   : 0));
1135*e4b17023SJohn Marino   gcc_assert (ok);
1136*e4b17023SJohn Marino 
1137*e4b17023SJohn Marino   if (opt_info)
1138*e4b17023SJohn Marino     {
1139*e4b17023SJohn Marino       apply_opt_in_copies (opt_info, max_unroll, true, true);
1140*e4b17023SJohn Marino       free_opt_info (opt_info);
1141*e4b17023SJohn Marino     }
1142*e4b17023SJohn Marino 
1143*e4b17023SJohn Marino   free (wont_exit);
1144*e4b17023SJohn Marino 
1145*e4b17023SJohn Marino   if (exit_at_end)
1146*e4b17023SJohn Marino     {
1147*e4b17023SJohn Marino       basic_block exit_block = get_bb_copy (desc->in_edge->src);
1148*e4b17023SJohn Marino       /* Find a new in and out edge; they are in the last copy we have
1149*e4b17023SJohn Marino 	 made.  */
1150*e4b17023SJohn Marino 
1151*e4b17023SJohn Marino       if (EDGE_SUCC (exit_block, 0)->dest == desc->out_edge->dest)
1152*e4b17023SJohn Marino 	{
1153*e4b17023SJohn Marino 	  desc->out_edge = EDGE_SUCC (exit_block, 0);
1154*e4b17023SJohn Marino 	  desc->in_edge = EDGE_SUCC (exit_block, 1);
1155*e4b17023SJohn Marino 	}
1156*e4b17023SJohn Marino       else
1157*e4b17023SJohn Marino 	{
1158*e4b17023SJohn Marino 	  desc->out_edge = EDGE_SUCC (exit_block, 1);
1159*e4b17023SJohn Marino 	  desc->in_edge = EDGE_SUCC (exit_block, 0);
1160*e4b17023SJohn Marino 	}
1161*e4b17023SJohn Marino     }
1162*e4b17023SJohn Marino 
1163*e4b17023SJohn Marino   /* Remove the edges.  */
1164*e4b17023SJohn Marino   FOR_EACH_VEC_ELT (edge, remove_edges, i, e)
1165*e4b17023SJohn Marino     remove_path (e);
1166*e4b17023SJohn Marino   VEC_free (edge, heap, remove_edges);
1167*e4b17023SJohn Marino 
1168*e4b17023SJohn Marino   /* We must be careful when updating the number of iterations due to
1169*e4b17023SJohn Marino      preconditioning and the fact that the value must be valid at entry
1170*e4b17023SJohn Marino      of the loop.  After passing through the above code, we see that
1171*e4b17023SJohn Marino      the correct new number of iterations is this:  */
1172*e4b17023SJohn Marino   gcc_assert (!desc->const_iter);
1173*e4b17023SJohn Marino   desc->niter_expr =
1174*e4b17023SJohn Marino     simplify_gen_binary (UDIV, desc->mode, old_niter,
1175*e4b17023SJohn Marino 			 GEN_INT (max_unroll + 1));
1176*e4b17023SJohn Marino   desc->niter_max /= max_unroll + 1;
1177*e4b17023SJohn Marino   if (exit_at_end)
1178*e4b17023SJohn Marino     {
1179*e4b17023SJohn Marino       desc->niter_expr =
1180*e4b17023SJohn Marino 	simplify_gen_binary (MINUS, desc->mode, desc->niter_expr, const1_rtx);
1181*e4b17023SJohn Marino       desc->noloop_assumptions = NULL_RTX;
1182*e4b17023SJohn Marino       desc->niter_max--;
1183*e4b17023SJohn Marino     }
1184*e4b17023SJohn Marino 
1185*e4b17023SJohn Marino   if (dump_file)
1186*e4b17023SJohn Marino     fprintf (dump_file,
1187*e4b17023SJohn Marino 	     ";; Unrolled loop %d times, counting # of iterations "
1188*e4b17023SJohn Marino 	     "in runtime, %i insns\n",
1189*e4b17023SJohn Marino 	     max_unroll, num_loop_insns (loop));
1190*e4b17023SJohn Marino 
1191*e4b17023SJohn Marino   VEC_free (basic_block, heap, dom_bbs);
1192*e4b17023SJohn Marino }
1193*e4b17023SJohn Marino 
1194*e4b17023SJohn Marino /* Decide whether to simply peel LOOP and how much.  */
1195*e4b17023SJohn Marino static void
decide_peel_simple(struct loop * loop,int flags)1196*e4b17023SJohn Marino decide_peel_simple (struct loop *loop, int flags)
1197*e4b17023SJohn Marino {
1198*e4b17023SJohn Marino   unsigned npeel;
1199*e4b17023SJohn Marino   struct niter_desc *desc;
1200*e4b17023SJohn Marino 
1201*e4b17023SJohn Marino   if (!(flags & UAP_PEEL))
1202*e4b17023SJohn Marino     {
1203*e4b17023SJohn Marino       /* We were not asked to, just return back silently.  */
1204*e4b17023SJohn Marino       return;
1205*e4b17023SJohn Marino     }
1206*e4b17023SJohn Marino 
1207*e4b17023SJohn Marino   if (dump_file)
1208*e4b17023SJohn Marino     fprintf (dump_file, "\n;; Considering simply peeling loop\n");
1209*e4b17023SJohn Marino 
1210*e4b17023SJohn Marino   /* npeel = number of iterations to peel.  */
1211*e4b17023SJohn Marino   npeel = PARAM_VALUE (PARAM_MAX_PEELED_INSNS) / loop->ninsns;
1212*e4b17023SJohn Marino   if (npeel > (unsigned) PARAM_VALUE (PARAM_MAX_PEEL_TIMES))
1213*e4b17023SJohn Marino     npeel = PARAM_VALUE (PARAM_MAX_PEEL_TIMES);
1214*e4b17023SJohn Marino 
1215*e4b17023SJohn Marino   /* Skip big loops.  */
1216*e4b17023SJohn Marino   if (!npeel)
1217*e4b17023SJohn Marino     {
1218*e4b17023SJohn Marino       if (dump_file)
1219*e4b17023SJohn Marino 	fprintf (dump_file, ";; Not considering loop, is too big\n");
1220*e4b17023SJohn Marino       return;
1221*e4b17023SJohn Marino     }
1222*e4b17023SJohn Marino 
1223*e4b17023SJohn Marino   /* Check for simple loops.  */
1224*e4b17023SJohn Marino   desc = get_simple_loop_desc (loop);
1225*e4b17023SJohn Marino 
1226*e4b17023SJohn Marino   /* Check number of iterations.  */
1227*e4b17023SJohn Marino   if (desc->simple_p && !desc->assumptions && desc->const_iter)
1228*e4b17023SJohn Marino     {
1229*e4b17023SJohn Marino       if (dump_file)
1230*e4b17023SJohn Marino 	fprintf (dump_file, ";; Loop iterates constant times\n");
1231*e4b17023SJohn Marino       return;
1232*e4b17023SJohn Marino     }
1233*e4b17023SJohn Marino 
1234*e4b17023SJohn Marino   /* Do not simply peel loops with branches inside -- it increases number
1235*e4b17023SJohn Marino      of mispredicts.  */
1236*e4b17023SJohn Marino   if (num_loop_branches (loop) > 1)
1237*e4b17023SJohn Marino     {
1238*e4b17023SJohn Marino       if (dump_file)
1239*e4b17023SJohn Marino 	fprintf (dump_file, ";; Not peeling, contains branches\n");
1240*e4b17023SJohn Marino       return;
1241*e4b17023SJohn Marino     }
1242*e4b17023SJohn Marino 
1243*e4b17023SJohn Marino   if (loop->header->count)
1244*e4b17023SJohn Marino     {
1245*e4b17023SJohn Marino       unsigned niter = expected_loop_iterations (loop);
1246*e4b17023SJohn Marino       if (niter + 1 > npeel)
1247*e4b17023SJohn Marino 	{
1248*e4b17023SJohn Marino 	  if (dump_file)
1249*e4b17023SJohn Marino 	    {
1250*e4b17023SJohn Marino 	      fprintf (dump_file, ";; Not peeling loop, rolls too much (");
1251*e4b17023SJohn Marino 	      fprintf (dump_file, HOST_WIDEST_INT_PRINT_DEC,
1252*e4b17023SJohn Marino 		       (HOST_WIDEST_INT) (niter + 1));
1253*e4b17023SJohn Marino 	      fprintf (dump_file, " iterations > %d [maximum peelings])\n",
1254*e4b17023SJohn Marino 		       npeel);
1255*e4b17023SJohn Marino 	    }
1256*e4b17023SJohn Marino 	  return;
1257*e4b17023SJohn Marino 	}
1258*e4b17023SJohn Marino       npeel = niter + 1;
1259*e4b17023SJohn Marino     }
1260*e4b17023SJohn Marino   else
1261*e4b17023SJohn Marino     {
1262*e4b17023SJohn Marino       /* For now we have no good heuristics to decide whether loop peeling
1263*e4b17023SJohn Marino          will be effective, so disable it.  */
1264*e4b17023SJohn Marino       if (dump_file)
1265*e4b17023SJohn Marino 	fprintf (dump_file,
1266*e4b17023SJohn Marino 		 ";; Not peeling loop, no evidence it will be profitable\n");
1267*e4b17023SJohn Marino       return;
1268*e4b17023SJohn Marino     }
1269*e4b17023SJohn Marino 
1270*e4b17023SJohn Marino   /* Success.  */
1271*e4b17023SJohn Marino   loop->lpt_decision.decision = LPT_PEEL_SIMPLE;
1272*e4b17023SJohn Marino   loop->lpt_decision.times = npeel;
1273*e4b17023SJohn Marino 
1274*e4b17023SJohn Marino   if (dump_file)
1275*e4b17023SJohn Marino     fprintf (dump_file, ";; Decided to simply peel the loop, %d times.\n",
1276*e4b17023SJohn Marino 	     loop->lpt_decision.times);
1277*e4b17023SJohn Marino }
1278*e4b17023SJohn Marino 
1279*e4b17023SJohn Marino /* Peel a LOOP LOOP->LPT_DECISION.TIMES times.  The transformation:
1280*e4b17023SJohn Marino    while (cond)
1281*e4b17023SJohn Marino      body;
1282*e4b17023SJohn Marino 
1283*e4b17023SJohn Marino    ==>
1284*e4b17023SJohn Marino 
1285*e4b17023SJohn Marino    if (!cond) goto end;
1286*e4b17023SJohn Marino    body;
1287*e4b17023SJohn Marino    if (!cond) goto end;
1288*e4b17023SJohn Marino    body;
1289*e4b17023SJohn Marino    while (cond)
1290*e4b17023SJohn Marino      body;
1291*e4b17023SJohn Marino    end: ;
1292*e4b17023SJohn Marino    */
1293*e4b17023SJohn Marino static void
peel_loop_simple(struct loop * loop)1294*e4b17023SJohn Marino peel_loop_simple (struct loop *loop)
1295*e4b17023SJohn Marino {
1296*e4b17023SJohn Marino   sbitmap wont_exit;
1297*e4b17023SJohn Marino   unsigned npeel = loop->lpt_decision.times;
1298*e4b17023SJohn Marino   struct niter_desc *desc = get_simple_loop_desc (loop);
1299*e4b17023SJohn Marino   struct opt_info *opt_info = NULL;
1300*e4b17023SJohn Marino   bool ok;
1301*e4b17023SJohn Marino 
1302*e4b17023SJohn Marino   if (flag_split_ivs_in_unroller && npeel > 1)
1303*e4b17023SJohn Marino     opt_info = analyze_insns_in_loop (loop);
1304*e4b17023SJohn Marino 
1305*e4b17023SJohn Marino   wont_exit = sbitmap_alloc (npeel + 1);
1306*e4b17023SJohn Marino   sbitmap_zero (wont_exit);
1307*e4b17023SJohn Marino 
1308*e4b17023SJohn Marino   opt_info_start_duplication (opt_info);
1309*e4b17023SJohn Marino 
1310*e4b17023SJohn Marino   ok = duplicate_loop_to_header_edge (loop, loop_preheader_edge (loop),
1311*e4b17023SJohn Marino 				      npeel, wont_exit, NULL,
1312*e4b17023SJohn Marino 				      NULL, DLTHE_FLAG_UPDATE_FREQ
1313*e4b17023SJohn Marino 				      | (opt_info
1314*e4b17023SJohn Marino 					 ? DLTHE_RECORD_COPY_NUMBER
1315*e4b17023SJohn Marino 					   : 0));
1316*e4b17023SJohn Marino   gcc_assert (ok);
1317*e4b17023SJohn Marino 
1318*e4b17023SJohn Marino   free (wont_exit);
1319*e4b17023SJohn Marino 
1320*e4b17023SJohn Marino   if (opt_info)
1321*e4b17023SJohn Marino     {
1322*e4b17023SJohn Marino       apply_opt_in_copies (opt_info, npeel, false, false);
1323*e4b17023SJohn Marino       free_opt_info (opt_info);
1324*e4b17023SJohn Marino     }
1325*e4b17023SJohn Marino 
1326*e4b17023SJohn Marino   if (desc->simple_p)
1327*e4b17023SJohn Marino     {
1328*e4b17023SJohn Marino       if (desc->const_iter)
1329*e4b17023SJohn Marino 	{
1330*e4b17023SJohn Marino 	  desc->niter -= npeel;
1331*e4b17023SJohn Marino 	  desc->niter_expr = GEN_INT (desc->niter);
1332*e4b17023SJohn Marino 	  desc->noloop_assumptions = NULL_RTX;
1333*e4b17023SJohn Marino 	}
1334*e4b17023SJohn Marino       else
1335*e4b17023SJohn Marino 	{
1336*e4b17023SJohn Marino 	  /* We cannot just update niter_expr, as its value might be clobbered
1337*e4b17023SJohn Marino 	     inside loop.  We could handle this by counting the number into
1338*e4b17023SJohn Marino 	     temporary just like we do in runtime unrolling, but it does not
1339*e4b17023SJohn Marino 	     seem worthwhile.  */
1340*e4b17023SJohn Marino 	  free_simple_loop_desc (loop);
1341*e4b17023SJohn Marino 	}
1342*e4b17023SJohn Marino     }
1343*e4b17023SJohn Marino   if (dump_file)
1344*e4b17023SJohn Marino     fprintf (dump_file, ";; Peeling loop %d times\n", npeel);
1345*e4b17023SJohn Marino }
1346*e4b17023SJohn Marino 
1347*e4b17023SJohn Marino /* Decide whether to unroll LOOP stupidly and how much.  */
1348*e4b17023SJohn Marino static void
decide_unroll_stupid(struct loop * loop,int flags)1349*e4b17023SJohn Marino decide_unroll_stupid (struct loop *loop, int flags)
1350*e4b17023SJohn Marino {
1351*e4b17023SJohn Marino   unsigned nunroll, nunroll_by_av, i;
1352*e4b17023SJohn Marino   struct niter_desc *desc;
1353*e4b17023SJohn Marino 
1354*e4b17023SJohn Marino   if (!(flags & UAP_UNROLL_ALL))
1355*e4b17023SJohn Marino     {
1356*e4b17023SJohn Marino       /* We were not asked to, just return back silently.  */
1357*e4b17023SJohn Marino       return;
1358*e4b17023SJohn Marino     }
1359*e4b17023SJohn Marino 
1360*e4b17023SJohn Marino   if (dump_file)
1361*e4b17023SJohn Marino     fprintf (dump_file, "\n;; Considering unrolling loop stupidly\n");
1362*e4b17023SJohn Marino 
1363*e4b17023SJohn Marino   /* nunroll = total number of copies of the original loop body in
1364*e4b17023SJohn Marino      unrolled loop (i.e. if it is 2, we have to duplicate loop body once.  */
1365*e4b17023SJohn Marino   nunroll = PARAM_VALUE (PARAM_MAX_UNROLLED_INSNS) / loop->ninsns;
1366*e4b17023SJohn Marino   nunroll_by_av
1367*e4b17023SJohn Marino     = PARAM_VALUE (PARAM_MAX_AVERAGE_UNROLLED_INSNS) / loop->av_ninsns;
1368*e4b17023SJohn Marino   if (nunroll > nunroll_by_av)
1369*e4b17023SJohn Marino     nunroll = nunroll_by_av;
1370*e4b17023SJohn Marino   if (nunroll > (unsigned) PARAM_VALUE (PARAM_MAX_UNROLL_TIMES))
1371*e4b17023SJohn Marino     nunroll = PARAM_VALUE (PARAM_MAX_UNROLL_TIMES);
1372*e4b17023SJohn Marino 
1373*e4b17023SJohn Marino   if (targetm.loop_unroll_adjust)
1374*e4b17023SJohn Marino     nunroll = targetm.loop_unroll_adjust (nunroll, loop);
1375*e4b17023SJohn Marino 
1376*e4b17023SJohn Marino   /* Skip big loops.  */
1377*e4b17023SJohn Marino   if (nunroll <= 1)
1378*e4b17023SJohn Marino     {
1379*e4b17023SJohn Marino       if (dump_file)
1380*e4b17023SJohn Marino 	fprintf (dump_file, ";; Not considering loop, is too big\n");
1381*e4b17023SJohn Marino       return;
1382*e4b17023SJohn Marino     }
1383*e4b17023SJohn Marino 
1384*e4b17023SJohn Marino   /* Check for simple loops.  */
1385*e4b17023SJohn Marino   desc = get_simple_loop_desc (loop);
1386*e4b17023SJohn Marino 
1387*e4b17023SJohn Marino   /* Check simpleness.  */
1388*e4b17023SJohn Marino   if (desc->simple_p && !desc->assumptions)
1389*e4b17023SJohn Marino     {
1390*e4b17023SJohn Marino       if (dump_file)
1391*e4b17023SJohn Marino 	fprintf (dump_file, ";; The loop is simple\n");
1392*e4b17023SJohn Marino       return;
1393*e4b17023SJohn Marino     }
1394*e4b17023SJohn Marino 
1395*e4b17023SJohn Marino   /* Do not unroll loops with branches inside -- it increases number
1396*e4b17023SJohn Marino      of mispredicts.  */
1397*e4b17023SJohn Marino   if (num_loop_branches (loop) > 1)
1398*e4b17023SJohn Marino     {
1399*e4b17023SJohn Marino       if (dump_file)
1400*e4b17023SJohn Marino 	fprintf (dump_file, ";; Not unrolling, contains branches\n");
1401*e4b17023SJohn Marino       return;
1402*e4b17023SJohn Marino     }
1403*e4b17023SJohn Marino 
1404*e4b17023SJohn Marino   /* If we have profile feedback, check whether the loop rolls.  */
1405*e4b17023SJohn Marino   if (loop->header->count
1406*e4b17023SJohn Marino       && expected_loop_iterations (loop) < 2 * nunroll)
1407*e4b17023SJohn Marino     {
1408*e4b17023SJohn Marino       if (dump_file)
1409*e4b17023SJohn Marino 	fprintf (dump_file, ";; Not unrolling loop, doesn't roll\n");
1410*e4b17023SJohn Marino       return;
1411*e4b17023SJohn Marino     }
1412*e4b17023SJohn Marino 
1413*e4b17023SJohn Marino   /* Success.  Now force nunroll to be power of 2, as it seems that this
1414*e4b17023SJohn Marino      improves results (partially because of better alignments, partially
1415*e4b17023SJohn Marino      because of some dark magic).  */
1416*e4b17023SJohn Marino   for (i = 1; 2 * i <= nunroll; i *= 2)
1417*e4b17023SJohn Marino     continue;
1418*e4b17023SJohn Marino 
1419*e4b17023SJohn Marino   loop->lpt_decision.decision = LPT_UNROLL_STUPID;
1420*e4b17023SJohn Marino   loop->lpt_decision.times = i - 1;
1421*e4b17023SJohn Marino 
1422*e4b17023SJohn Marino   if (dump_file)
1423*e4b17023SJohn Marino     fprintf (dump_file,
1424*e4b17023SJohn Marino 	     ";; Decided to unroll the loop stupidly, %d times.\n",
1425*e4b17023SJohn Marino 	     loop->lpt_decision.times);
1426*e4b17023SJohn Marino }
1427*e4b17023SJohn Marino 
1428*e4b17023SJohn Marino /* Unroll a LOOP LOOP->LPT_DECISION.TIMES times.  The transformation:
1429*e4b17023SJohn Marino    while (cond)
1430*e4b17023SJohn Marino      body;
1431*e4b17023SJohn Marino 
1432*e4b17023SJohn Marino    ==>
1433*e4b17023SJohn Marino 
1434*e4b17023SJohn Marino    while (cond)
1435*e4b17023SJohn Marino      {
1436*e4b17023SJohn Marino        body;
1437*e4b17023SJohn Marino        if (!cond) break;
1438*e4b17023SJohn Marino        body;
1439*e4b17023SJohn Marino        if (!cond) break;
1440*e4b17023SJohn Marino        body;
1441*e4b17023SJohn Marino        if (!cond) break;
1442*e4b17023SJohn Marino        body;
1443*e4b17023SJohn Marino      }
1444*e4b17023SJohn Marino    */
1445*e4b17023SJohn Marino static void
unroll_loop_stupid(struct loop * loop)1446*e4b17023SJohn Marino unroll_loop_stupid (struct loop *loop)
1447*e4b17023SJohn Marino {
1448*e4b17023SJohn Marino   sbitmap wont_exit;
1449*e4b17023SJohn Marino   unsigned nunroll = loop->lpt_decision.times;
1450*e4b17023SJohn Marino   struct niter_desc *desc = get_simple_loop_desc (loop);
1451*e4b17023SJohn Marino   struct opt_info *opt_info = NULL;
1452*e4b17023SJohn Marino   bool ok;
1453*e4b17023SJohn Marino 
1454*e4b17023SJohn Marino   if (flag_split_ivs_in_unroller
1455*e4b17023SJohn Marino       || flag_variable_expansion_in_unroller)
1456*e4b17023SJohn Marino     opt_info = analyze_insns_in_loop (loop);
1457*e4b17023SJohn Marino 
1458*e4b17023SJohn Marino 
1459*e4b17023SJohn Marino   wont_exit = sbitmap_alloc (nunroll + 1);
1460*e4b17023SJohn Marino   sbitmap_zero (wont_exit);
1461*e4b17023SJohn Marino   opt_info_start_duplication (opt_info);
1462*e4b17023SJohn Marino 
1463*e4b17023SJohn Marino   ok = duplicate_loop_to_header_edge (loop, loop_latch_edge (loop),
1464*e4b17023SJohn Marino 				      nunroll, wont_exit,
1465*e4b17023SJohn Marino 				      NULL, NULL,
1466*e4b17023SJohn Marino 				      DLTHE_FLAG_UPDATE_FREQ
1467*e4b17023SJohn Marino 				      | (opt_info
1468*e4b17023SJohn Marino 					 ? DLTHE_RECORD_COPY_NUMBER
1469*e4b17023SJohn Marino 					   : 0));
1470*e4b17023SJohn Marino   gcc_assert (ok);
1471*e4b17023SJohn Marino 
1472*e4b17023SJohn Marino   if (opt_info)
1473*e4b17023SJohn Marino     {
1474*e4b17023SJohn Marino       apply_opt_in_copies (opt_info, nunroll, true, true);
1475*e4b17023SJohn Marino       free_opt_info (opt_info);
1476*e4b17023SJohn Marino     }
1477*e4b17023SJohn Marino 
1478*e4b17023SJohn Marino   free (wont_exit);
1479*e4b17023SJohn Marino 
1480*e4b17023SJohn Marino   if (desc->simple_p)
1481*e4b17023SJohn Marino     {
1482*e4b17023SJohn Marino       /* We indeed may get here provided that there are nontrivial assumptions
1483*e4b17023SJohn Marino 	 for a loop to be really simple.  We could update the counts, but the
1484*e4b17023SJohn Marino 	 problem is that we are unable to decide which exit will be taken
1485*e4b17023SJohn Marino 	 (not really true in case the number of iterations is constant,
1486*e4b17023SJohn Marino 	 but noone will do anything with this information, so we do not
1487*e4b17023SJohn Marino 	 worry about it).  */
1488*e4b17023SJohn Marino       desc->simple_p = false;
1489*e4b17023SJohn Marino     }
1490*e4b17023SJohn Marino 
1491*e4b17023SJohn Marino   if (dump_file)
1492*e4b17023SJohn Marino     fprintf (dump_file, ";; Unrolled loop %d times, %i insns\n",
1493*e4b17023SJohn Marino 	     nunroll, num_loop_insns (loop));
1494*e4b17023SJohn Marino }
1495*e4b17023SJohn Marino 
1496*e4b17023SJohn Marino /* A hash function for information about insns to split.  */
1497*e4b17023SJohn Marino 
1498*e4b17023SJohn Marino static hashval_t
si_info_hash(const void * ivts)1499*e4b17023SJohn Marino si_info_hash (const void *ivts)
1500*e4b17023SJohn Marino {
1501*e4b17023SJohn Marino   return (hashval_t) INSN_UID (((const struct iv_to_split *) ivts)->insn);
1502*e4b17023SJohn Marino }
1503*e4b17023SJohn Marino 
1504*e4b17023SJohn Marino /* An equality functions for information about insns to split.  */
1505*e4b17023SJohn Marino 
1506*e4b17023SJohn Marino static int
si_info_eq(const void * ivts1,const void * ivts2)1507*e4b17023SJohn Marino si_info_eq (const void *ivts1, const void *ivts2)
1508*e4b17023SJohn Marino {
1509*e4b17023SJohn Marino   const struct iv_to_split *const i1 = (const struct iv_to_split *) ivts1;
1510*e4b17023SJohn Marino   const struct iv_to_split *const i2 = (const struct iv_to_split *) ivts2;
1511*e4b17023SJohn Marino 
1512*e4b17023SJohn Marino   return i1->insn == i2->insn;
1513*e4b17023SJohn Marino }
1514*e4b17023SJohn Marino 
1515*e4b17023SJohn Marino /* Return a hash for VES, which is really a "var_to_expand *".  */
1516*e4b17023SJohn Marino 
1517*e4b17023SJohn Marino static hashval_t
ve_info_hash(const void * ves)1518*e4b17023SJohn Marino ve_info_hash (const void *ves)
1519*e4b17023SJohn Marino {
1520*e4b17023SJohn Marino   return (hashval_t) INSN_UID (((const struct var_to_expand *) ves)->insn);
1521*e4b17023SJohn Marino }
1522*e4b17023SJohn Marino 
1523*e4b17023SJohn Marino /* Return true if IVTS1 and IVTS2 (which are really both of type
1524*e4b17023SJohn Marino    "var_to_expand *") refer to the same instruction.  */
1525*e4b17023SJohn Marino 
1526*e4b17023SJohn Marino static int
ve_info_eq(const void * ivts1,const void * ivts2)1527*e4b17023SJohn Marino ve_info_eq (const void *ivts1, const void *ivts2)
1528*e4b17023SJohn Marino {
1529*e4b17023SJohn Marino   const struct var_to_expand *const i1 = (const struct var_to_expand *) ivts1;
1530*e4b17023SJohn Marino   const struct var_to_expand *const i2 = (const struct var_to_expand *) ivts2;
1531*e4b17023SJohn Marino 
1532*e4b17023SJohn Marino   return i1->insn == i2->insn;
1533*e4b17023SJohn Marino }
1534*e4b17023SJohn Marino 
1535*e4b17023SJohn Marino /* Returns true if REG is referenced in one nondebug insn in LOOP.
1536*e4b17023SJohn Marino    Set *DEBUG_USES to the number of debug insns that reference the
1537*e4b17023SJohn Marino    variable.  */
1538*e4b17023SJohn Marino 
1539*e4b17023SJohn Marino bool
referenced_in_one_insn_in_loop_p(struct loop * loop,rtx reg,int * debug_uses)1540*e4b17023SJohn Marino referenced_in_one_insn_in_loop_p (struct loop *loop, rtx reg,
1541*e4b17023SJohn Marino 				  int *debug_uses)
1542*e4b17023SJohn Marino {
1543*e4b17023SJohn Marino   basic_block *body, bb;
1544*e4b17023SJohn Marino   unsigned i;
1545*e4b17023SJohn Marino   int count_ref = 0;
1546*e4b17023SJohn Marino   rtx insn;
1547*e4b17023SJohn Marino 
1548*e4b17023SJohn Marino   body = get_loop_body (loop);
1549*e4b17023SJohn Marino   for (i = 0; i < loop->num_nodes; i++)
1550*e4b17023SJohn Marino     {
1551*e4b17023SJohn Marino       bb = body[i];
1552*e4b17023SJohn Marino 
1553*e4b17023SJohn Marino       FOR_BB_INSNS (bb, insn)
1554*e4b17023SJohn Marino 	if (!rtx_referenced_p (reg, insn))
1555*e4b17023SJohn Marino 	  continue;
1556*e4b17023SJohn Marino 	else if (DEBUG_INSN_P (insn))
1557*e4b17023SJohn Marino 	  ++*debug_uses;
1558*e4b17023SJohn Marino 	else if (++count_ref > 1)
1559*e4b17023SJohn Marino 	  break;
1560*e4b17023SJohn Marino     }
1561*e4b17023SJohn Marino   free (body);
1562*e4b17023SJohn Marino   return (count_ref  == 1);
1563*e4b17023SJohn Marino }
1564*e4b17023SJohn Marino 
1565*e4b17023SJohn Marino /* Reset the DEBUG_USES debug insns in LOOP that reference REG.  */
1566*e4b17023SJohn Marino 
1567*e4b17023SJohn Marino static void
reset_debug_uses_in_loop(struct loop * loop,rtx reg,int debug_uses)1568*e4b17023SJohn Marino reset_debug_uses_in_loop (struct loop *loop, rtx reg, int debug_uses)
1569*e4b17023SJohn Marino {
1570*e4b17023SJohn Marino   basic_block *body, bb;
1571*e4b17023SJohn Marino   unsigned i;
1572*e4b17023SJohn Marino   rtx insn;
1573*e4b17023SJohn Marino 
1574*e4b17023SJohn Marino   body = get_loop_body (loop);
1575*e4b17023SJohn Marino   for (i = 0; debug_uses && i < loop->num_nodes; i++)
1576*e4b17023SJohn Marino     {
1577*e4b17023SJohn Marino       bb = body[i];
1578*e4b17023SJohn Marino 
1579*e4b17023SJohn Marino       FOR_BB_INSNS (bb, insn)
1580*e4b17023SJohn Marino 	if (!DEBUG_INSN_P (insn) || !rtx_referenced_p (reg, insn))
1581*e4b17023SJohn Marino 	  continue;
1582*e4b17023SJohn Marino 	else
1583*e4b17023SJohn Marino 	  {
1584*e4b17023SJohn Marino 	    validate_change (insn, &INSN_VAR_LOCATION_LOC (insn),
1585*e4b17023SJohn Marino 			     gen_rtx_UNKNOWN_VAR_LOC (), 0);
1586*e4b17023SJohn Marino 	    if (!--debug_uses)
1587*e4b17023SJohn Marino 	      break;
1588*e4b17023SJohn Marino 	  }
1589*e4b17023SJohn Marino     }
1590*e4b17023SJohn Marino   free (body);
1591*e4b17023SJohn Marino }
1592*e4b17023SJohn Marino 
1593*e4b17023SJohn Marino /* Determine whether INSN contains an accumulator
1594*e4b17023SJohn Marino    which can be expanded into separate copies,
1595*e4b17023SJohn Marino    one for each copy of the LOOP body.
1596*e4b17023SJohn Marino 
1597*e4b17023SJohn Marino    for (i = 0 ; i < n; i++)
1598*e4b17023SJohn Marino      sum += a[i];
1599*e4b17023SJohn Marino 
1600*e4b17023SJohn Marino    ==>
1601*e4b17023SJohn Marino 
1602*e4b17023SJohn Marino    sum += a[i]
1603*e4b17023SJohn Marino    ....
1604*e4b17023SJohn Marino    i = i+1;
1605*e4b17023SJohn Marino    sum1 += a[i]
1606*e4b17023SJohn Marino    ....
1607*e4b17023SJohn Marino    i = i+1
1608*e4b17023SJohn Marino    sum2 += a[i];
1609*e4b17023SJohn Marino    ....
1610*e4b17023SJohn Marino 
1611*e4b17023SJohn Marino    Return NULL if INSN contains no opportunity for expansion of accumulator.
1612*e4b17023SJohn Marino    Otherwise, allocate a VAR_TO_EXPAND structure, fill it with the relevant
1613*e4b17023SJohn Marino    information and return a pointer to it.
1614*e4b17023SJohn Marino */
1615*e4b17023SJohn Marino 
1616*e4b17023SJohn Marino static struct var_to_expand *
analyze_insn_to_expand_var(struct loop * loop,rtx insn)1617*e4b17023SJohn Marino analyze_insn_to_expand_var (struct loop *loop, rtx insn)
1618*e4b17023SJohn Marino {
1619*e4b17023SJohn Marino   rtx set, dest, src;
1620*e4b17023SJohn Marino   struct var_to_expand *ves;
1621*e4b17023SJohn Marino   unsigned accum_pos;
1622*e4b17023SJohn Marino   enum rtx_code code;
1623*e4b17023SJohn Marino   int debug_uses = 0;
1624*e4b17023SJohn Marino 
1625*e4b17023SJohn Marino   set = single_set (insn);
1626*e4b17023SJohn Marino   if (!set)
1627*e4b17023SJohn Marino     return NULL;
1628*e4b17023SJohn Marino 
1629*e4b17023SJohn Marino   dest = SET_DEST (set);
1630*e4b17023SJohn Marino   src = SET_SRC (set);
1631*e4b17023SJohn Marino   code = GET_CODE (src);
1632*e4b17023SJohn Marino 
1633*e4b17023SJohn Marino   if (code != PLUS && code != MINUS && code != MULT && code != FMA)
1634*e4b17023SJohn Marino     return NULL;
1635*e4b17023SJohn Marino 
1636*e4b17023SJohn Marino   if (FLOAT_MODE_P (GET_MODE (dest)))
1637*e4b17023SJohn Marino     {
1638*e4b17023SJohn Marino       if (!flag_associative_math)
1639*e4b17023SJohn Marino         return NULL;
1640*e4b17023SJohn Marino       /* In the case of FMA, we're also changing the rounding.  */
1641*e4b17023SJohn Marino       if (code == FMA && !flag_unsafe_math_optimizations)
1642*e4b17023SJohn Marino 	return NULL;
1643*e4b17023SJohn Marino     }
1644*e4b17023SJohn Marino 
1645*e4b17023SJohn Marino   /* Hmm, this is a bit paradoxical.  We know that INSN is a valid insn
1646*e4b17023SJohn Marino      in MD.  But if there is no optab to generate the insn, we can not
1647*e4b17023SJohn Marino      perform the variable expansion.  This can happen if an MD provides
1648*e4b17023SJohn Marino      an insn but not a named pattern to generate it, for example to avoid
1649*e4b17023SJohn Marino      producing code that needs additional mode switches like for x87/mmx.
1650*e4b17023SJohn Marino 
1651*e4b17023SJohn Marino      So we check have_insn_for which looks for an optab for the operation
1652*e4b17023SJohn Marino      in SRC.  If it doesn't exist, we can't perform the expansion even
1653*e4b17023SJohn Marino      though INSN is valid.  */
1654*e4b17023SJohn Marino   if (!have_insn_for (code, GET_MODE (src)))
1655*e4b17023SJohn Marino     return NULL;
1656*e4b17023SJohn Marino 
1657*e4b17023SJohn Marino   if (!REG_P (dest)
1658*e4b17023SJohn Marino       && !(GET_CODE (dest) == SUBREG
1659*e4b17023SJohn Marino            && REG_P (SUBREG_REG (dest))))
1660*e4b17023SJohn Marino     return NULL;
1661*e4b17023SJohn Marino 
1662*e4b17023SJohn Marino   /* Find the accumulator use within the operation.  */
1663*e4b17023SJohn Marino   if (code == FMA)
1664*e4b17023SJohn Marino     {
1665*e4b17023SJohn Marino       /* We only support accumulation via FMA in the ADD position.  */
1666*e4b17023SJohn Marino       if (!rtx_equal_p  (dest, XEXP (src, 2)))
1667*e4b17023SJohn Marino 	return NULL;
1668*e4b17023SJohn Marino       accum_pos = 2;
1669*e4b17023SJohn Marino     }
1670*e4b17023SJohn Marino   else if (rtx_equal_p (dest, XEXP (src, 0)))
1671*e4b17023SJohn Marino     accum_pos = 0;
1672*e4b17023SJohn Marino   else if (rtx_equal_p (dest, XEXP (src, 1)))
1673*e4b17023SJohn Marino     {
1674*e4b17023SJohn Marino       /* The method of expansion that we are using; which includes the
1675*e4b17023SJohn Marino 	 initialization of the expansions with zero and the summation of
1676*e4b17023SJohn Marino          the expansions at the end of the computation will yield wrong
1677*e4b17023SJohn Marino 	 results for (x = something - x) thus avoid using it in that case.  */
1678*e4b17023SJohn Marino       if (code == MINUS)
1679*e4b17023SJohn Marino 	return NULL;
1680*e4b17023SJohn Marino       accum_pos = 1;
1681*e4b17023SJohn Marino     }
1682*e4b17023SJohn Marino   else
1683*e4b17023SJohn Marino     return NULL;
1684*e4b17023SJohn Marino 
1685*e4b17023SJohn Marino   /* It must not otherwise be used.  */
1686*e4b17023SJohn Marino   if (code == FMA)
1687*e4b17023SJohn Marino     {
1688*e4b17023SJohn Marino       if (rtx_referenced_p (dest, XEXP (src, 0))
1689*e4b17023SJohn Marino 	  || rtx_referenced_p (dest, XEXP (src, 1)))
1690*e4b17023SJohn Marino 	return NULL;
1691*e4b17023SJohn Marino     }
1692*e4b17023SJohn Marino   else if (rtx_referenced_p (dest, XEXP (src, 1 - accum_pos)))
1693*e4b17023SJohn Marino     return NULL;
1694*e4b17023SJohn Marino 
1695*e4b17023SJohn Marino   /* It must be used in exactly one insn.  */
1696*e4b17023SJohn Marino   if (!referenced_in_one_insn_in_loop_p (loop, dest, &debug_uses))
1697*e4b17023SJohn Marino     return NULL;
1698*e4b17023SJohn Marino 
1699*e4b17023SJohn Marino   if (dump_file)
1700*e4b17023SJohn Marino     {
1701*e4b17023SJohn Marino       fprintf (dump_file, "\n;; Expanding Accumulator ");
1702*e4b17023SJohn Marino       print_rtl (dump_file, dest);
1703*e4b17023SJohn Marino       fprintf (dump_file, "\n");
1704*e4b17023SJohn Marino     }
1705*e4b17023SJohn Marino 
1706*e4b17023SJohn Marino   if (debug_uses)
1707*e4b17023SJohn Marino     /* Instead of resetting the debug insns, we could replace each
1708*e4b17023SJohn Marino        debug use in the loop with the sum or product of all expanded
1709*e4b17023SJohn Marino        accummulators.  Since we'll only know of all expansions at the
1710*e4b17023SJohn Marino        end, we'd have to keep track of which vars_to_expand a debug
1711*e4b17023SJohn Marino        insn in the loop references, take note of each copy of the
1712*e4b17023SJohn Marino        debug insn during unrolling, and when it's all done, compute
1713*e4b17023SJohn Marino        the sum or product of each variable and adjust the original
1714*e4b17023SJohn Marino        debug insn and each copy thereof.  What a pain!  */
1715*e4b17023SJohn Marino     reset_debug_uses_in_loop (loop, dest, debug_uses);
1716*e4b17023SJohn Marino 
1717*e4b17023SJohn Marino   /* Record the accumulator to expand.  */
1718*e4b17023SJohn Marino   ves = XNEW (struct var_to_expand);
1719*e4b17023SJohn Marino   ves->insn = insn;
1720*e4b17023SJohn Marino   ves->reg = copy_rtx (dest);
1721*e4b17023SJohn Marino   ves->var_expansions = VEC_alloc (rtx, heap, 1);
1722*e4b17023SJohn Marino   ves->next = NULL;
1723*e4b17023SJohn Marino   ves->op = GET_CODE (src);
1724*e4b17023SJohn Marino   ves->expansion_count = 0;
1725*e4b17023SJohn Marino   ves->reuse_expansion = 0;
1726*e4b17023SJohn Marino   ves->accum_pos = accum_pos;
1727*e4b17023SJohn Marino   return ves;
1728*e4b17023SJohn Marino }
1729*e4b17023SJohn Marino 
1730*e4b17023SJohn Marino /* Determine whether there is an induction variable in INSN that
1731*e4b17023SJohn Marino    we would like to split during unrolling.
1732*e4b17023SJohn Marino 
1733*e4b17023SJohn Marino    I.e. replace
1734*e4b17023SJohn Marino 
1735*e4b17023SJohn Marino    i = i + 1;
1736*e4b17023SJohn Marino    ...
1737*e4b17023SJohn Marino    i = i + 1;
1738*e4b17023SJohn Marino    ...
1739*e4b17023SJohn Marino    i = i + 1;
1740*e4b17023SJohn Marino    ...
1741*e4b17023SJohn Marino 
1742*e4b17023SJohn Marino    type chains by
1743*e4b17023SJohn Marino 
1744*e4b17023SJohn Marino    i0 = i + 1
1745*e4b17023SJohn Marino    ...
1746*e4b17023SJohn Marino    i = i0 + 1
1747*e4b17023SJohn Marino    ...
1748*e4b17023SJohn Marino    i = i0 + 2
1749*e4b17023SJohn Marino    ...
1750*e4b17023SJohn Marino 
1751*e4b17023SJohn Marino    Return NULL if INSN contains no interesting IVs.  Otherwise, allocate
1752*e4b17023SJohn Marino    an IV_TO_SPLIT structure, fill it with the relevant information and return a
1753*e4b17023SJohn Marino    pointer to it.  */
1754*e4b17023SJohn Marino 
1755*e4b17023SJohn Marino static struct iv_to_split *
analyze_iv_to_split_insn(rtx insn)1756*e4b17023SJohn Marino analyze_iv_to_split_insn (rtx insn)
1757*e4b17023SJohn Marino {
1758*e4b17023SJohn Marino   rtx set, dest;
1759*e4b17023SJohn Marino   struct rtx_iv iv;
1760*e4b17023SJohn Marino   struct iv_to_split *ivts;
1761*e4b17023SJohn Marino   bool ok;
1762*e4b17023SJohn Marino 
1763*e4b17023SJohn Marino   /* For now we just split the basic induction variables.  Later this may be
1764*e4b17023SJohn Marino      extended for example by selecting also addresses of memory references.  */
1765*e4b17023SJohn Marino   set = single_set (insn);
1766*e4b17023SJohn Marino   if (!set)
1767*e4b17023SJohn Marino     return NULL;
1768*e4b17023SJohn Marino 
1769*e4b17023SJohn Marino   dest = SET_DEST (set);
1770*e4b17023SJohn Marino   if (!REG_P (dest))
1771*e4b17023SJohn Marino     return NULL;
1772*e4b17023SJohn Marino 
1773*e4b17023SJohn Marino   if (!biv_p (insn, dest))
1774*e4b17023SJohn Marino     return NULL;
1775*e4b17023SJohn Marino 
1776*e4b17023SJohn Marino   ok = iv_analyze_result (insn, dest, &iv);
1777*e4b17023SJohn Marino 
1778*e4b17023SJohn Marino   /* This used to be an assert under the assumption that if biv_p returns
1779*e4b17023SJohn Marino      true that iv_analyze_result must also return true.  However, that
1780*e4b17023SJohn Marino      assumption is not strictly correct as evidenced by pr25569.
1781*e4b17023SJohn Marino 
1782*e4b17023SJohn Marino      Returning NULL when iv_analyze_result returns false is safe and
1783*e4b17023SJohn Marino      avoids the problems in pr25569 until the iv_analyze_* routines
1784*e4b17023SJohn Marino      can be fixed, which is apparently hard and time consuming
1785*e4b17023SJohn Marino      according to their author.  */
1786*e4b17023SJohn Marino   if (! ok)
1787*e4b17023SJohn Marino     return NULL;
1788*e4b17023SJohn Marino 
1789*e4b17023SJohn Marino   if (iv.step == const0_rtx
1790*e4b17023SJohn Marino       || iv.mode != iv.extend_mode)
1791*e4b17023SJohn Marino     return NULL;
1792*e4b17023SJohn Marino 
1793*e4b17023SJohn Marino   /* Record the insn to split.  */
1794*e4b17023SJohn Marino   ivts = XNEW (struct iv_to_split);
1795*e4b17023SJohn Marino   ivts->insn = insn;
1796*e4b17023SJohn Marino   ivts->base_var = NULL_RTX;
1797*e4b17023SJohn Marino   ivts->step = iv.step;
1798*e4b17023SJohn Marino   ivts->next = NULL;
1799*e4b17023SJohn Marino   ivts->n_loc = 1;
1800*e4b17023SJohn Marino   ivts->loc[0] = 1;
1801*e4b17023SJohn Marino 
1802*e4b17023SJohn Marino   return ivts;
1803*e4b17023SJohn Marino }
1804*e4b17023SJohn Marino 
1805*e4b17023SJohn Marino /* Determines which of insns in LOOP can be optimized.
1806*e4b17023SJohn Marino    Return a OPT_INFO struct with the relevant hash tables filled
1807*e4b17023SJohn Marino    with all insns to be optimized.  The FIRST_NEW_BLOCK field
1808*e4b17023SJohn Marino    is undefined for the return value.  */
1809*e4b17023SJohn Marino 
1810*e4b17023SJohn Marino static struct opt_info *
analyze_insns_in_loop(struct loop * loop)1811*e4b17023SJohn Marino analyze_insns_in_loop (struct loop *loop)
1812*e4b17023SJohn Marino {
1813*e4b17023SJohn Marino   basic_block *body, bb;
1814*e4b17023SJohn Marino   unsigned i;
1815*e4b17023SJohn Marino   struct opt_info *opt_info = XCNEW (struct opt_info);
1816*e4b17023SJohn Marino   rtx insn;
1817*e4b17023SJohn Marino   struct iv_to_split *ivts = NULL;
1818*e4b17023SJohn Marino   struct var_to_expand *ves = NULL;
1819*e4b17023SJohn Marino   PTR *slot1;
1820*e4b17023SJohn Marino   PTR *slot2;
1821*e4b17023SJohn Marino   VEC (edge, heap) *edges = get_loop_exit_edges (loop);
1822*e4b17023SJohn Marino   edge exit;
1823*e4b17023SJohn Marino   bool can_apply = false;
1824*e4b17023SJohn Marino 
1825*e4b17023SJohn Marino   iv_analysis_loop_init (loop);
1826*e4b17023SJohn Marino 
1827*e4b17023SJohn Marino   body = get_loop_body (loop);
1828*e4b17023SJohn Marino 
1829*e4b17023SJohn Marino   if (flag_split_ivs_in_unroller)
1830*e4b17023SJohn Marino     {
1831*e4b17023SJohn Marino       opt_info->insns_to_split = htab_create (5 * loop->num_nodes,
1832*e4b17023SJohn Marino 					      si_info_hash, si_info_eq, free);
1833*e4b17023SJohn Marino       opt_info->iv_to_split_head = NULL;
1834*e4b17023SJohn Marino       opt_info->iv_to_split_tail = &opt_info->iv_to_split_head;
1835*e4b17023SJohn Marino     }
1836*e4b17023SJohn Marino 
1837*e4b17023SJohn Marino   /* Record the loop exit bb and loop preheader before the unrolling.  */
1838*e4b17023SJohn Marino   opt_info->loop_preheader = loop_preheader_edge (loop)->src;
1839*e4b17023SJohn Marino 
1840*e4b17023SJohn Marino   if (VEC_length (edge, edges) == 1)
1841*e4b17023SJohn Marino     {
1842*e4b17023SJohn Marino       exit = VEC_index (edge, edges, 0);
1843*e4b17023SJohn Marino       if (!(exit->flags & EDGE_COMPLEX))
1844*e4b17023SJohn Marino 	{
1845*e4b17023SJohn Marino 	  opt_info->loop_exit = split_edge (exit);
1846*e4b17023SJohn Marino 	  can_apply = true;
1847*e4b17023SJohn Marino 	}
1848*e4b17023SJohn Marino     }
1849*e4b17023SJohn Marino 
1850*e4b17023SJohn Marino   if (flag_variable_expansion_in_unroller
1851*e4b17023SJohn Marino       && can_apply)
1852*e4b17023SJohn Marino     {
1853*e4b17023SJohn Marino       opt_info->insns_with_var_to_expand = htab_create (5 * loop->num_nodes,
1854*e4b17023SJohn Marino 							ve_info_hash,
1855*e4b17023SJohn Marino 							ve_info_eq, free);
1856*e4b17023SJohn Marino       opt_info->var_to_expand_head = NULL;
1857*e4b17023SJohn Marino       opt_info->var_to_expand_tail = &opt_info->var_to_expand_head;
1858*e4b17023SJohn Marino     }
1859*e4b17023SJohn Marino 
1860*e4b17023SJohn Marino   for (i = 0; i < loop->num_nodes; i++)
1861*e4b17023SJohn Marino     {
1862*e4b17023SJohn Marino       bb = body[i];
1863*e4b17023SJohn Marino       if (!dominated_by_p (CDI_DOMINATORS, loop->latch, bb))
1864*e4b17023SJohn Marino 	continue;
1865*e4b17023SJohn Marino 
1866*e4b17023SJohn Marino       FOR_BB_INSNS (bb, insn)
1867*e4b17023SJohn Marino       {
1868*e4b17023SJohn Marino         if (!INSN_P (insn))
1869*e4b17023SJohn Marino           continue;
1870*e4b17023SJohn Marino 
1871*e4b17023SJohn Marino         if (opt_info->insns_to_split)
1872*e4b17023SJohn Marino           ivts = analyze_iv_to_split_insn (insn);
1873*e4b17023SJohn Marino 
1874*e4b17023SJohn Marino         if (ivts)
1875*e4b17023SJohn Marino           {
1876*e4b17023SJohn Marino             slot1 = htab_find_slot (opt_info->insns_to_split, ivts, INSERT);
1877*e4b17023SJohn Marino 	    gcc_assert (*slot1 == NULL);
1878*e4b17023SJohn Marino             *slot1 = ivts;
1879*e4b17023SJohn Marino 	    *opt_info->iv_to_split_tail = ivts;
1880*e4b17023SJohn Marino 	    opt_info->iv_to_split_tail = &ivts->next;
1881*e4b17023SJohn Marino             continue;
1882*e4b17023SJohn Marino           }
1883*e4b17023SJohn Marino 
1884*e4b17023SJohn Marino         if (opt_info->insns_with_var_to_expand)
1885*e4b17023SJohn Marino           ves = analyze_insn_to_expand_var (loop, insn);
1886*e4b17023SJohn Marino 
1887*e4b17023SJohn Marino         if (ves)
1888*e4b17023SJohn Marino           {
1889*e4b17023SJohn Marino             slot2 = htab_find_slot (opt_info->insns_with_var_to_expand, ves, INSERT);
1890*e4b17023SJohn Marino 	    gcc_assert (*slot2 == NULL);
1891*e4b17023SJohn Marino             *slot2 = ves;
1892*e4b17023SJohn Marino 	    *opt_info->var_to_expand_tail = ves;
1893*e4b17023SJohn Marino 	    opt_info->var_to_expand_tail = &ves->next;
1894*e4b17023SJohn Marino           }
1895*e4b17023SJohn Marino       }
1896*e4b17023SJohn Marino     }
1897*e4b17023SJohn Marino 
1898*e4b17023SJohn Marino   VEC_free (edge, heap, edges);
1899*e4b17023SJohn Marino   free (body);
1900*e4b17023SJohn Marino   return opt_info;
1901*e4b17023SJohn Marino }
1902*e4b17023SJohn Marino 
1903*e4b17023SJohn Marino /* Called just before loop duplication.  Records start of duplicated area
1904*e4b17023SJohn Marino    to OPT_INFO.  */
1905*e4b17023SJohn Marino 
1906*e4b17023SJohn Marino static void
opt_info_start_duplication(struct opt_info * opt_info)1907*e4b17023SJohn Marino opt_info_start_duplication (struct opt_info *opt_info)
1908*e4b17023SJohn Marino {
1909*e4b17023SJohn Marino   if (opt_info)
1910*e4b17023SJohn Marino     opt_info->first_new_block = last_basic_block;
1911*e4b17023SJohn Marino }
1912*e4b17023SJohn Marino 
1913*e4b17023SJohn Marino /* Determine the number of iterations between initialization of the base
1914*e4b17023SJohn Marino    variable and the current copy (N_COPY).  N_COPIES is the total number
1915*e4b17023SJohn Marino    of newly created copies.  UNROLLING is true if we are unrolling
1916*e4b17023SJohn Marino    (not peeling) the loop.  */
1917*e4b17023SJohn Marino 
1918*e4b17023SJohn Marino static unsigned
determine_split_iv_delta(unsigned n_copy,unsigned n_copies,bool unrolling)1919*e4b17023SJohn Marino determine_split_iv_delta (unsigned n_copy, unsigned n_copies, bool unrolling)
1920*e4b17023SJohn Marino {
1921*e4b17023SJohn Marino   if (unrolling)
1922*e4b17023SJohn Marino     {
1923*e4b17023SJohn Marino       /* If we are unrolling, initialization is done in the original loop
1924*e4b17023SJohn Marino 	 body (number 0).  */
1925*e4b17023SJohn Marino       return n_copy;
1926*e4b17023SJohn Marino     }
1927*e4b17023SJohn Marino   else
1928*e4b17023SJohn Marino     {
1929*e4b17023SJohn Marino       /* If we are peeling, the copy in that the initialization occurs has
1930*e4b17023SJohn Marino 	 number 1.  The original loop (number 0) is the last.  */
1931*e4b17023SJohn Marino       if (n_copy)
1932*e4b17023SJohn Marino 	return n_copy - 1;
1933*e4b17023SJohn Marino       else
1934*e4b17023SJohn Marino 	return n_copies;
1935*e4b17023SJohn Marino     }
1936*e4b17023SJohn Marino }
1937*e4b17023SJohn Marino 
1938*e4b17023SJohn Marino /* Locate in EXPR the expression corresponding to the location recorded
1939*e4b17023SJohn Marino    in IVTS, and return a pointer to the RTX for this location.  */
1940*e4b17023SJohn Marino 
1941*e4b17023SJohn Marino static rtx *
get_ivts_expr(rtx expr,struct iv_to_split * ivts)1942*e4b17023SJohn Marino get_ivts_expr (rtx expr, struct iv_to_split *ivts)
1943*e4b17023SJohn Marino {
1944*e4b17023SJohn Marino   unsigned i;
1945*e4b17023SJohn Marino   rtx *ret = &expr;
1946*e4b17023SJohn Marino 
1947*e4b17023SJohn Marino   for (i = 0; i < ivts->n_loc; i++)
1948*e4b17023SJohn Marino     ret = &XEXP (*ret, ivts->loc[i]);
1949*e4b17023SJohn Marino 
1950*e4b17023SJohn Marino   return ret;
1951*e4b17023SJohn Marino }
1952*e4b17023SJohn Marino 
1953*e4b17023SJohn Marino /* Allocate basic variable for the induction variable chain.  */
1954*e4b17023SJohn Marino 
1955*e4b17023SJohn Marino static void
allocate_basic_variable(struct iv_to_split * ivts)1956*e4b17023SJohn Marino allocate_basic_variable (struct iv_to_split *ivts)
1957*e4b17023SJohn Marino {
1958*e4b17023SJohn Marino   rtx expr = *get_ivts_expr (single_set (ivts->insn), ivts);
1959*e4b17023SJohn Marino 
1960*e4b17023SJohn Marino   ivts->base_var = gen_reg_rtx (GET_MODE (expr));
1961*e4b17023SJohn Marino }
1962*e4b17023SJohn Marino 
1963*e4b17023SJohn Marino /* Insert initialization of basic variable of IVTS before INSN, taking
1964*e4b17023SJohn Marino    the initial value from INSN.  */
1965*e4b17023SJohn Marino 
1966*e4b17023SJohn Marino static void
insert_base_initialization(struct iv_to_split * ivts,rtx insn)1967*e4b17023SJohn Marino insert_base_initialization (struct iv_to_split *ivts, rtx insn)
1968*e4b17023SJohn Marino {
1969*e4b17023SJohn Marino   rtx expr = copy_rtx (*get_ivts_expr (single_set (insn), ivts));
1970*e4b17023SJohn Marino   rtx seq;
1971*e4b17023SJohn Marino 
1972*e4b17023SJohn Marino   start_sequence ();
1973*e4b17023SJohn Marino   expr = force_operand (expr, ivts->base_var);
1974*e4b17023SJohn Marino   if (expr != ivts->base_var)
1975*e4b17023SJohn Marino     emit_move_insn (ivts->base_var, expr);
1976*e4b17023SJohn Marino   seq = get_insns ();
1977*e4b17023SJohn Marino   end_sequence ();
1978*e4b17023SJohn Marino 
1979*e4b17023SJohn Marino   emit_insn_before (seq, insn);
1980*e4b17023SJohn Marino }
1981*e4b17023SJohn Marino 
1982*e4b17023SJohn Marino /* Replace the use of induction variable described in IVTS in INSN
1983*e4b17023SJohn Marino    by base variable + DELTA * step.  */
1984*e4b17023SJohn Marino 
1985*e4b17023SJohn Marino static void
split_iv(struct iv_to_split * ivts,rtx insn,unsigned delta)1986*e4b17023SJohn Marino split_iv (struct iv_to_split *ivts, rtx insn, unsigned delta)
1987*e4b17023SJohn Marino {
1988*e4b17023SJohn Marino   rtx expr, *loc, seq, incr, var;
1989*e4b17023SJohn Marino   enum machine_mode mode = GET_MODE (ivts->base_var);
1990*e4b17023SJohn Marino   rtx src, dest, set;
1991*e4b17023SJohn Marino 
1992*e4b17023SJohn Marino   /* Construct base + DELTA * step.  */
1993*e4b17023SJohn Marino   if (!delta)
1994*e4b17023SJohn Marino     expr = ivts->base_var;
1995*e4b17023SJohn Marino   else
1996*e4b17023SJohn Marino     {
1997*e4b17023SJohn Marino       incr = simplify_gen_binary (MULT, mode,
1998*e4b17023SJohn Marino 				  ivts->step, gen_int_mode (delta, mode));
1999*e4b17023SJohn Marino       expr = simplify_gen_binary (PLUS, GET_MODE (ivts->base_var),
2000*e4b17023SJohn Marino 				  ivts->base_var, incr);
2001*e4b17023SJohn Marino     }
2002*e4b17023SJohn Marino 
2003*e4b17023SJohn Marino   /* Figure out where to do the replacement.  */
2004*e4b17023SJohn Marino   loc = get_ivts_expr (single_set (insn), ivts);
2005*e4b17023SJohn Marino 
2006*e4b17023SJohn Marino   /* If we can make the replacement right away, we're done.  */
2007*e4b17023SJohn Marino   if (validate_change (insn, loc, expr, 0))
2008*e4b17023SJohn Marino     return;
2009*e4b17023SJohn Marino 
2010*e4b17023SJohn Marino   /* Otherwise, force EXPR into a register and try again.  */
2011*e4b17023SJohn Marino   start_sequence ();
2012*e4b17023SJohn Marino   var = gen_reg_rtx (mode);
2013*e4b17023SJohn Marino   expr = force_operand (expr, var);
2014*e4b17023SJohn Marino   if (expr != var)
2015*e4b17023SJohn Marino     emit_move_insn (var, expr);
2016*e4b17023SJohn Marino   seq = get_insns ();
2017*e4b17023SJohn Marino   end_sequence ();
2018*e4b17023SJohn Marino   emit_insn_before (seq, insn);
2019*e4b17023SJohn Marino 
2020*e4b17023SJohn Marino   if (validate_change (insn, loc, var, 0))
2021*e4b17023SJohn Marino     return;
2022*e4b17023SJohn Marino 
2023*e4b17023SJohn Marino   /* The last chance.  Try recreating the assignment in insn
2024*e4b17023SJohn Marino      completely from scratch.  */
2025*e4b17023SJohn Marino   set = single_set (insn);
2026*e4b17023SJohn Marino   gcc_assert (set);
2027*e4b17023SJohn Marino 
2028*e4b17023SJohn Marino   start_sequence ();
2029*e4b17023SJohn Marino   *loc = var;
2030*e4b17023SJohn Marino   src = copy_rtx (SET_SRC (set));
2031*e4b17023SJohn Marino   dest = copy_rtx (SET_DEST (set));
2032*e4b17023SJohn Marino   src = force_operand (src, dest);
2033*e4b17023SJohn Marino   if (src != dest)
2034*e4b17023SJohn Marino     emit_move_insn (dest, src);
2035*e4b17023SJohn Marino   seq = get_insns ();
2036*e4b17023SJohn Marino   end_sequence ();
2037*e4b17023SJohn Marino 
2038*e4b17023SJohn Marino   emit_insn_before (seq, insn);
2039*e4b17023SJohn Marino   delete_insn (insn);
2040*e4b17023SJohn Marino }
2041*e4b17023SJohn Marino 
2042*e4b17023SJohn Marino 
2043*e4b17023SJohn Marino /* Return one expansion of the accumulator recorded in struct VE.  */
2044*e4b17023SJohn Marino 
2045*e4b17023SJohn Marino static rtx
get_expansion(struct var_to_expand * ve)2046*e4b17023SJohn Marino get_expansion (struct var_to_expand *ve)
2047*e4b17023SJohn Marino {
2048*e4b17023SJohn Marino   rtx reg;
2049*e4b17023SJohn Marino 
2050*e4b17023SJohn Marino   if (ve->reuse_expansion == 0)
2051*e4b17023SJohn Marino     reg = ve->reg;
2052*e4b17023SJohn Marino   else
2053*e4b17023SJohn Marino     reg = VEC_index (rtx, ve->var_expansions, ve->reuse_expansion - 1);
2054*e4b17023SJohn Marino 
2055*e4b17023SJohn Marino   if (VEC_length (rtx, ve->var_expansions) == (unsigned) ve->reuse_expansion)
2056*e4b17023SJohn Marino     ve->reuse_expansion = 0;
2057*e4b17023SJohn Marino   else
2058*e4b17023SJohn Marino     ve->reuse_expansion++;
2059*e4b17023SJohn Marino 
2060*e4b17023SJohn Marino   return reg;
2061*e4b17023SJohn Marino }
2062*e4b17023SJohn Marino 
2063*e4b17023SJohn Marino 
2064*e4b17023SJohn Marino /* Given INSN replace the uses of the accumulator recorded in VE
2065*e4b17023SJohn Marino    with a new register.  */
2066*e4b17023SJohn Marino 
2067*e4b17023SJohn Marino static void
expand_var_during_unrolling(struct var_to_expand * ve,rtx insn)2068*e4b17023SJohn Marino expand_var_during_unrolling (struct var_to_expand *ve, rtx insn)
2069*e4b17023SJohn Marino {
2070*e4b17023SJohn Marino   rtx new_reg, set;
2071*e4b17023SJohn Marino   bool really_new_expansion = false;
2072*e4b17023SJohn Marino 
2073*e4b17023SJohn Marino   set = single_set (insn);
2074*e4b17023SJohn Marino   gcc_assert (set);
2075*e4b17023SJohn Marino 
2076*e4b17023SJohn Marino   /* Generate a new register only if the expansion limit has not been
2077*e4b17023SJohn Marino      reached.  Else reuse an already existing expansion.  */
2078*e4b17023SJohn Marino   if (PARAM_VALUE (PARAM_MAX_VARIABLE_EXPANSIONS) > ve->expansion_count)
2079*e4b17023SJohn Marino     {
2080*e4b17023SJohn Marino       really_new_expansion = true;
2081*e4b17023SJohn Marino       new_reg = gen_reg_rtx (GET_MODE (ve->reg));
2082*e4b17023SJohn Marino     }
2083*e4b17023SJohn Marino   else
2084*e4b17023SJohn Marino     new_reg = get_expansion (ve);
2085*e4b17023SJohn Marino 
2086*e4b17023SJohn Marino   validate_change (insn, &SET_DEST (set), new_reg, 1);
2087*e4b17023SJohn Marino   validate_change (insn, &XEXP (SET_SRC (set), ve->accum_pos), new_reg, 1);
2088*e4b17023SJohn Marino 
2089*e4b17023SJohn Marino   if (apply_change_group ())
2090*e4b17023SJohn Marino     if (really_new_expansion)
2091*e4b17023SJohn Marino       {
2092*e4b17023SJohn Marino         VEC_safe_push (rtx, heap, ve->var_expansions, new_reg);
2093*e4b17023SJohn Marino         ve->expansion_count++;
2094*e4b17023SJohn Marino       }
2095*e4b17023SJohn Marino }
2096*e4b17023SJohn Marino 
2097*e4b17023SJohn Marino /* Initialize the variable expansions in loop preheader.  PLACE is the
2098*e4b17023SJohn Marino    loop-preheader basic block where the initialization of the
2099*e4b17023SJohn Marino    expansions should take place.  The expansions are initialized with
2100*e4b17023SJohn Marino    (-0) when the operation is plus or minus to honor sign zero.  This
2101*e4b17023SJohn Marino    way we can prevent cases where the sign of the final result is
2102*e4b17023SJohn Marino    effected by the sign of the expansion.  Here is an example to
2103*e4b17023SJohn Marino    demonstrate this:
2104*e4b17023SJohn Marino 
2105*e4b17023SJohn Marino    for (i = 0 ; i < n; i++)
2106*e4b17023SJohn Marino      sum += something;
2107*e4b17023SJohn Marino 
2108*e4b17023SJohn Marino    ==>
2109*e4b17023SJohn Marino 
2110*e4b17023SJohn Marino    sum += something
2111*e4b17023SJohn Marino    ....
2112*e4b17023SJohn Marino    i = i+1;
2113*e4b17023SJohn Marino    sum1 += something
2114*e4b17023SJohn Marino    ....
2115*e4b17023SJohn Marino    i = i+1
2116*e4b17023SJohn Marino    sum2 += something;
2117*e4b17023SJohn Marino    ....
2118*e4b17023SJohn Marino 
2119*e4b17023SJohn Marino    When SUM is initialized with -zero and SOMETHING is also -zero; the
2120*e4b17023SJohn Marino    final result of sum should be -zero thus the expansions sum1 and sum2
2121*e4b17023SJohn Marino    should be initialized with -zero as well (otherwise we will get +zero
2122*e4b17023SJohn Marino    as the final result).  */
2123*e4b17023SJohn Marino 
2124*e4b17023SJohn Marino static void
insert_var_expansion_initialization(struct var_to_expand * ve,basic_block place)2125*e4b17023SJohn Marino insert_var_expansion_initialization (struct var_to_expand *ve,
2126*e4b17023SJohn Marino 				     basic_block place)
2127*e4b17023SJohn Marino {
2128*e4b17023SJohn Marino   rtx seq, var, zero_init, insn;
2129*e4b17023SJohn Marino   unsigned i;
2130*e4b17023SJohn Marino   enum machine_mode mode = GET_MODE (ve->reg);
2131*e4b17023SJohn Marino   bool honor_signed_zero_p = HONOR_SIGNED_ZEROS (mode);
2132*e4b17023SJohn Marino 
2133*e4b17023SJohn Marino   if (VEC_length (rtx, ve->var_expansions) == 0)
2134*e4b17023SJohn Marino     return;
2135*e4b17023SJohn Marino 
2136*e4b17023SJohn Marino   start_sequence ();
2137*e4b17023SJohn Marino   switch (ve->op)
2138*e4b17023SJohn Marino     {
2139*e4b17023SJohn Marino     case FMA:
2140*e4b17023SJohn Marino       /* Note that we only accumulate FMA via the ADD operand.  */
2141*e4b17023SJohn Marino     case PLUS:
2142*e4b17023SJohn Marino     case MINUS:
2143*e4b17023SJohn Marino       FOR_EACH_VEC_ELT (rtx, ve->var_expansions, i, var)
2144*e4b17023SJohn Marino         {
2145*e4b17023SJohn Marino 	  if (honor_signed_zero_p)
2146*e4b17023SJohn Marino 	    zero_init = simplify_gen_unary (NEG, mode, CONST0_RTX (mode), mode);
2147*e4b17023SJohn Marino 	  else
2148*e4b17023SJohn Marino 	    zero_init = CONST0_RTX (mode);
2149*e4b17023SJohn Marino           emit_move_insn (var, zero_init);
2150*e4b17023SJohn Marino         }
2151*e4b17023SJohn Marino       break;
2152*e4b17023SJohn Marino 
2153*e4b17023SJohn Marino     case MULT:
2154*e4b17023SJohn Marino       FOR_EACH_VEC_ELT (rtx, ve->var_expansions, i, var)
2155*e4b17023SJohn Marino         {
2156*e4b17023SJohn Marino           zero_init = CONST1_RTX (GET_MODE (var));
2157*e4b17023SJohn Marino           emit_move_insn (var, zero_init);
2158*e4b17023SJohn Marino         }
2159*e4b17023SJohn Marino       break;
2160*e4b17023SJohn Marino 
2161*e4b17023SJohn Marino     default:
2162*e4b17023SJohn Marino       gcc_unreachable ();
2163*e4b17023SJohn Marino     }
2164*e4b17023SJohn Marino 
2165*e4b17023SJohn Marino   seq = get_insns ();
2166*e4b17023SJohn Marino   end_sequence ();
2167*e4b17023SJohn Marino 
2168*e4b17023SJohn Marino   insn = BB_HEAD (place);
2169*e4b17023SJohn Marino   while (!NOTE_INSN_BASIC_BLOCK_P (insn))
2170*e4b17023SJohn Marino     insn = NEXT_INSN (insn);
2171*e4b17023SJohn Marino 
2172*e4b17023SJohn Marino   emit_insn_after (seq, insn);
2173*e4b17023SJohn Marino }
2174*e4b17023SJohn Marino 
2175*e4b17023SJohn Marino /* Combine the variable expansions at the loop exit.  PLACE is the
2176*e4b17023SJohn Marino    loop exit basic block where the summation of the expansions should
2177*e4b17023SJohn Marino    take place.  */
2178*e4b17023SJohn Marino 
2179*e4b17023SJohn Marino static void
combine_var_copies_in_loop_exit(struct var_to_expand * ve,basic_block place)2180*e4b17023SJohn Marino combine_var_copies_in_loop_exit (struct var_to_expand *ve, basic_block place)
2181*e4b17023SJohn Marino {
2182*e4b17023SJohn Marino   rtx sum = ve->reg;
2183*e4b17023SJohn Marino   rtx expr, seq, var, insn;
2184*e4b17023SJohn Marino   unsigned i;
2185*e4b17023SJohn Marino 
2186*e4b17023SJohn Marino   if (VEC_length (rtx, ve->var_expansions) == 0)
2187*e4b17023SJohn Marino     return;
2188*e4b17023SJohn Marino 
2189*e4b17023SJohn Marino   start_sequence ();
2190*e4b17023SJohn Marino   switch (ve->op)
2191*e4b17023SJohn Marino     {
2192*e4b17023SJohn Marino     case FMA:
2193*e4b17023SJohn Marino       /* Note that we only accumulate FMA via the ADD operand.  */
2194*e4b17023SJohn Marino     case PLUS:
2195*e4b17023SJohn Marino     case MINUS:
2196*e4b17023SJohn Marino       FOR_EACH_VEC_ELT (rtx, ve->var_expansions, i, var)
2197*e4b17023SJohn Marino 	sum = simplify_gen_binary (PLUS, GET_MODE (ve->reg), var, sum);
2198*e4b17023SJohn Marino       break;
2199*e4b17023SJohn Marino 
2200*e4b17023SJohn Marino     case MULT:
2201*e4b17023SJohn Marino       FOR_EACH_VEC_ELT (rtx, ve->var_expansions, i, var)
2202*e4b17023SJohn Marino 	sum = simplify_gen_binary (MULT, GET_MODE (ve->reg), var, sum);
2203*e4b17023SJohn Marino       break;
2204*e4b17023SJohn Marino 
2205*e4b17023SJohn Marino     default:
2206*e4b17023SJohn Marino       gcc_unreachable ();
2207*e4b17023SJohn Marino     }
2208*e4b17023SJohn Marino 
2209*e4b17023SJohn Marino   expr = force_operand (sum, ve->reg);
2210*e4b17023SJohn Marino   if (expr != ve->reg)
2211*e4b17023SJohn Marino     emit_move_insn (ve->reg, expr);
2212*e4b17023SJohn Marino   seq = get_insns ();
2213*e4b17023SJohn Marino   end_sequence ();
2214*e4b17023SJohn Marino 
2215*e4b17023SJohn Marino   insn = BB_HEAD (place);
2216*e4b17023SJohn Marino   while (!NOTE_INSN_BASIC_BLOCK_P (insn))
2217*e4b17023SJohn Marino     insn = NEXT_INSN (insn);
2218*e4b17023SJohn Marino 
2219*e4b17023SJohn Marino   emit_insn_after (seq, insn);
2220*e4b17023SJohn Marino }
2221*e4b17023SJohn Marino 
2222*e4b17023SJohn Marino /* Apply loop optimizations in loop copies using the
2223*e4b17023SJohn Marino    data which gathered during the unrolling.  Structure
2224*e4b17023SJohn Marino    OPT_INFO record that data.
2225*e4b17023SJohn Marino 
2226*e4b17023SJohn Marino    UNROLLING is true if we unrolled (not peeled) the loop.
2227*e4b17023SJohn Marino    REWRITE_ORIGINAL_BODY is true if we should also rewrite the original body of
2228*e4b17023SJohn Marino    the loop (as it should happen in complete unrolling, but not in ordinary
2229*e4b17023SJohn Marino    peeling of the loop).  */
2230*e4b17023SJohn Marino 
2231*e4b17023SJohn Marino static void
apply_opt_in_copies(struct opt_info * opt_info,unsigned n_copies,bool unrolling,bool rewrite_original_loop)2232*e4b17023SJohn Marino apply_opt_in_copies (struct opt_info *opt_info,
2233*e4b17023SJohn Marino                      unsigned n_copies, bool unrolling,
2234*e4b17023SJohn Marino                      bool rewrite_original_loop)
2235*e4b17023SJohn Marino {
2236*e4b17023SJohn Marino   unsigned i, delta;
2237*e4b17023SJohn Marino   basic_block bb, orig_bb;
2238*e4b17023SJohn Marino   rtx insn, orig_insn, next;
2239*e4b17023SJohn Marino   struct iv_to_split ivts_templ, *ivts;
2240*e4b17023SJohn Marino   struct var_to_expand ve_templ, *ves;
2241*e4b17023SJohn Marino 
2242*e4b17023SJohn Marino   /* Sanity check -- we need to put initialization in the original loop
2243*e4b17023SJohn Marino      body.  */
2244*e4b17023SJohn Marino   gcc_assert (!unrolling || rewrite_original_loop);
2245*e4b17023SJohn Marino 
2246*e4b17023SJohn Marino   /* Allocate the basic variables (i0).  */
2247*e4b17023SJohn Marino   if (opt_info->insns_to_split)
2248*e4b17023SJohn Marino     for (ivts = opt_info->iv_to_split_head; ivts; ivts = ivts->next)
2249*e4b17023SJohn Marino       allocate_basic_variable (ivts);
2250*e4b17023SJohn Marino 
2251*e4b17023SJohn Marino   for (i = opt_info->first_new_block; i < (unsigned) last_basic_block; i++)
2252*e4b17023SJohn Marino     {
2253*e4b17023SJohn Marino       bb = BASIC_BLOCK (i);
2254*e4b17023SJohn Marino       orig_bb = get_bb_original (bb);
2255*e4b17023SJohn Marino 
2256*e4b17023SJohn Marino       /* bb->aux holds position in copy sequence initialized by
2257*e4b17023SJohn Marino 	 duplicate_loop_to_header_edge.  */
2258*e4b17023SJohn Marino       delta = determine_split_iv_delta ((size_t)bb->aux, n_copies,
2259*e4b17023SJohn Marino 					unrolling);
2260*e4b17023SJohn Marino       bb->aux = 0;
2261*e4b17023SJohn Marino       orig_insn = BB_HEAD (orig_bb);
2262*e4b17023SJohn Marino       for (insn = BB_HEAD (bb); insn != NEXT_INSN (BB_END (bb)); insn = next)
2263*e4b17023SJohn Marino         {
2264*e4b17023SJohn Marino           next = NEXT_INSN (insn);
2265*e4b17023SJohn Marino 	  if (!INSN_P (insn)
2266*e4b17023SJohn Marino 	      || (DEBUG_INSN_P (insn)
2267*e4b17023SJohn Marino 		  && TREE_CODE (INSN_VAR_LOCATION_DECL (insn)) == LABEL_DECL))
2268*e4b17023SJohn Marino             continue;
2269*e4b17023SJohn Marino 
2270*e4b17023SJohn Marino 	  while (!INSN_P (orig_insn)
2271*e4b17023SJohn Marino 		 || (DEBUG_INSN_P (orig_insn)
2272*e4b17023SJohn Marino 		     && (TREE_CODE (INSN_VAR_LOCATION_DECL (orig_insn))
2273*e4b17023SJohn Marino 			 == LABEL_DECL)))
2274*e4b17023SJohn Marino             orig_insn = NEXT_INSN (orig_insn);
2275*e4b17023SJohn Marino 
2276*e4b17023SJohn Marino           ivts_templ.insn = orig_insn;
2277*e4b17023SJohn Marino           ve_templ.insn = orig_insn;
2278*e4b17023SJohn Marino 
2279*e4b17023SJohn Marino           /* Apply splitting iv optimization.  */
2280*e4b17023SJohn Marino           if (opt_info->insns_to_split)
2281*e4b17023SJohn Marino             {
2282*e4b17023SJohn Marino               ivts = (struct iv_to_split *)
2283*e4b17023SJohn Marino 		htab_find (opt_info->insns_to_split, &ivts_templ);
2284*e4b17023SJohn Marino 
2285*e4b17023SJohn Marino               if (ivts)
2286*e4b17023SJohn Marino                 {
2287*e4b17023SJohn Marino 		  gcc_assert (GET_CODE (PATTERN (insn))
2288*e4b17023SJohn Marino 			      == GET_CODE (PATTERN (orig_insn)));
2289*e4b17023SJohn Marino 
2290*e4b17023SJohn Marino                   if (!delta)
2291*e4b17023SJohn Marino                     insert_base_initialization (ivts, insn);
2292*e4b17023SJohn Marino                   split_iv (ivts, insn, delta);
2293*e4b17023SJohn Marino                 }
2294*e4b17023SJohn Marino             }
2295*e4b17023SJohn Marino           /* Apply variable expansion optimization.  */
2296*e4b17023SJohn Marino           if (unrolling && opt_info->insns_with_var_to_expand)
2297*e4b17023SJohn Marino             {
2298*e4b17023SJohn Marino               ves = (struct var_to_expand *)
2299*e4b17023SJohn Marino 		htab_find (opt_info->insns_with_var_to_expand, &ve_templ);
2300*e4b17023SJohn Marino               if (ves)
2301*e4b17023SJohn Marino                 {
2302*e4b17023SJohn Marino 		  gcc_assert (GET_CODE (PATTERN (insn))
2303*e4b17023SJohn Marino 			      == GET_CODE (PATTERN (orig_insn)));
2304*e4b17023SJohn Marino                   expand_var_during_unrolling (ves, insn);
2305*e4b17023SJohn Marino                 }
2306*e4b17023SJohn Marino             }
2307*e4b17023SJohn Marino           orig_insn = NEXT_INSN (orig_insn);
2308*e4b17023SJohn Marino         }
2309*e4b17023SJohn Marino     }
2310*e4b17023SJohn Marino 
2311*e4b17023SJohn Marino   if (!rewrite_original_loop)
2312*e4b17023SJohn Marino     return;
2313*e4b17023SJohn Marino 
2314*e4b17023SJohn Marino   /* Initialize the variable expansions in the loop preheader
2315*e4b17023SJohn Marino      and take care of combining them at the loop exit.  */
2316*e4b17023SJohn Marino   if (opt_info->insns_with_var_to_expand)
2317*e4b17023SJohn Marino     {
2318*e4b17023SJohn Marino       for (ves = opt_info->var_to_expand_head; ves; ves = ves->next)
2319*e4b17023SJohn Marino 	insert_var_expansion_initialization (ves, opt_info->loop_preheader);
2320*e4b17023SJohn Marino       for (ves = opt_info->var_to_expand_head; ves; ves = ves->next)
2321*e4b17023SJohn Marino 	combine_var_copies_in_loop_exit (ves, opt_info->loop_exit);
2322*e4b17023SJohn Marino     }
2323*e4b17023SJohn Marino 
2324*e4b17023SJohn Marino   /* Rewrite also the original loop body.  Find them as originals of the blocks
2325*e4b17023SJohn Marino      in the last copied iteration, i.e. those that have
2326*e4b17023SJohn Marino      get_bb_copy (get_bb_original (bb)) == bb.  */
2327*e4b17023SJohn Marino   for (i = opt_info->first_new_block; i < (unsigned) last_basic_block; i++)
2328*e4b17023SJohn Marino     {
2329*e4b17023SJohn Marino       bb = BASIC_BLOCK (i);
2330*e4b17023SJohn Marino       orig_bb = get_bb_original (bb);
2331*e4b17023SJohn Marino       if (get_bb_copy (orig_bb) != bb)
2332*e4b17023SJohn Marino 	continue;
2333*e4b17023SJohn Marino 
2334*e4b17023SJohn Marino       delta = determine_split_iv_delta (0, n_copies, unrolling);
2335*e4b17023SJohn Marino       for (orig_insn = BB_HEAD (orig_bb);
2336*e4b17023SJohn Marino            orig_insn != NEXT_INSN (BB_END (bb));
2337*e4b17023SJohn Marino            orig_insn = next)
2338*e4b17023SJohn Marino         {
2339*e4b17023SJohn Marino           next = NEXT_INSN (orig_insn);
2340*e4b17023SJohn Marino 
2341*e4b17023SJohn Marino           if (!INSN_P (orig_insn))
2342*e4b17023SJohn Marino  	    continue;
2343*e4b17023SJohn Marino 
2344*e4b17023SJohn Marino           ivts_templ.insn = orig_insn;
2345*e4b17023SJohn Marino           if (opt_info->insns_to_split)
2346*e4b17023SJohn Marino             {
2347*e4b17023SJohn Marino               ivts = (struct iv_to_split *)
2348*e4b17023SJohn Marino 		htab_find (opt_info->insns_to_split, &ivts_templ);
2349*e4b17023SJohn Marino               if (ivts)
2350*e4b17023SJohn Marino                 {
2351*e4b17023SJohn Marino                   if (!delta)
2352*e4b17023SJohn Marino                     insert_base_initialization (ivts, orig_insn);
2353*e4b17023SJohn Marino                   split_iv (ivts, orig_insn, delta);
2354*e4b17023SJohn Marino                   continue;
2355*e4b17023SJohn Marino                 }
2356*e4b17023SJohn Marino             }
2357*e4b17023SJohn Marino 
2358*e4b17023SJohn Marino         }
2359*e4b17023SJohn Marino     }
2360*e4b17023SJohn Marino }
2361*e4b17023SJohn Marino 
2362*e4b17023SJohn Marino /* Release OPT_INFO.  */
2363*e4b17023SJohn Marino 
2364*e4b17023SJohn Marino static void
free_opt_info(struct opt_info * opt_info)2365*e4b17023SJohn Marino free_opt_info (struct opt_info *opt_info)
2366*e4b17023SJohn Marino {
2367*e4b17023SJohn Marino   if (opt_info->insns_to_split)
2368*e4b17023SJohn Marino     htab_delete (opt_info->insns_to_split);
2369*e4b17023SJohn Marino   if (opt_info->insns_with_var_to_expand)
2370*e4b17023SJohn Marino     {
2371*e4b17023SJohn Marino       struct var_to_expand *ves;
2372*e4b17023SJohn Marino 
2373*e4b17023SJohn Marino       for (ves = opt_info->var_to_expand_head; ves; ves = ves->next)
2374*e4b17023SJohn Marino 	VEC_free (rtx, heap, ves->var_expansions);
2375*e4b17023SJohn Marino       htab_delete (opt_info->insns_with_var_to_expand);
2376*e4b17023SJohn Marino     }
2377*e4b17023SJohn Marino   free (opt_info);
2378*e4b17023SJohn Marino }
2379