1*e4b17023SJohn Marino /* Loop unrolling and peeling.
2*e4b17023SJohn Marino Copyright (C) 2002, 2003, 2004, 2005, 2007, 2008, 2010, 2011
3*e4b17023SJohn Marino Free Software Foundation, Inc.
4*e4b17023SJohn Marino
5*e4b17023SJohn Marino This file is part of GCC.
6*e4b17023SJohn Marino
7*e4b17023SJohn Marino GCC is free software; you can redistribute it and/or modify it under
8*e4b17023SJohn Marino the terms of the GNU General Public License as published by the Free
9*e4b17023SJohn Marino Software Foundation; either version 3, or (at your option) any later
10*e4b17023SJohn Marino version.
11*e4b17023SJohn Marino
12*e4b17023SJohn Marino GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13*e4b17023SJohn Marino WARRANTY; without even the implied warranty of MERCHANTABILITY or
14*e4b17023SJohn Marino FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
15*e4b17023SJohn Marino for more details.
16*e4b17023SJohn Marino
17*e4b17023SJohn Marino You should have received a copy of the GNU General Public License
18*e4b17023SJohn Marino along with GCC; see the file COPYING3. If not see
19*e4b17023SJohn Marino <http://www.gnu.org/licenses/>. */
20*e4b17023SJohn Marino
21*e4b17023SJohn Marino #include "config.h"
22*e4b17023SJohn Marino #include "system.h"
23*e4b17023SJohn Marino #include "coretypes.h"
24*e4b17023SJohn Marino #include "tm.h"
25*e4b17023SJohn Marino #include "rtl.h"
26*e4b17023SJohn Marino #include "hard-reg-set.h"
27*e4b17023SJohn Marino #include "obstack.h"
28*e4b17023SJohn Marino #include "basic-block.h"
29*e4b17023SJohn Marino #include "cfgloop.h"
30*e4b17023SJohn Marino #include "cfglayout.h"
31*e4b17023SJohn Marino #include "params.h"
32*e4b17023SJohn Marino #include "output.h"
33*e4b17023SJohn Marino #include "expr.h"
34*e4b17023SJohn Marino #include "hashtab.h"
35*e4b17023SJohn Marino #include "recog.h"
36*e4b17023SJohn Marino #include "target.h"
37*e4b17023SJohn Marino
38*e4b17023SJohn Marino /* This pass performs loop unrolling and peeling. We only perform these
39*e4b17023SJohn Marino optimizations on innermost loops (with single exception) because
40*e4b17023SJohn Marino the impact on performance is greatest here, and we want to avoid
41*e4b17023SJohn Marino unnecessary code size growth. The gain is caused by greater sequentiality
42*e4b17023SJohn Marino of code, better code to optimize for further passes and in some cases
43*e4b17023SJohn Marino by fewer testings of exit conditions. The main problem is code growth,
44*e4b17023SJohn Marino that impacts performance negatively due to effect of caches.
45*e4b17023SJohn Marino
46*e4b17023SJohn Marino What we do:
47*e4b17023SJohn Marino
48*e4b17023SJohn Marino -- complete peeling of once-rolling loops; this is the above mentioned
49*e4b17023SJohn Marino exception, as this causes loop to be cancelled completely and
50*e4b17023SJohn Marino does not cause code growth
51*e4b17023SJohn Marino -- complete peeling of loops that roll (small) constant times.
52*e4b17023SJohn Marino -- simple peeling of first iterations of loops that do not roll much
53*e4b17023SJohn Marino (according to profile feedback)
54*e4b17023SJohn Marino -- unrolling of loops that roll constant times; this is almost always
55*e4b17023SJohn Marino win, as we get rid of exit condition tests.
56*e4b17023SJohn Marino -- unrolling of loops that roll number of times that we can compute
57*e4b17023SJohn Marino in runtime; we also get rid of exit condition tests here, but there
58*e4b17023SJohn Marino is the extra expense for calculating the number of iterations
59*e4b17023SJohn Marino -- simple unrolling of remaining loops; this is performed only if we
60*e4b17023SJohn Marino are asked to, as the gain is questionable in this case and often
61*e4b17023SJohn Marino it may even slow down the code
62*e4b17023SJohn Marino For more detailed descriptions of each of those, see comments at
63*e4b17023SJohn Marino appropriate function below.
64*e4b17023SJohn Marino
65*e4b17023SJohn Marino There is a lot of parameters (defined and described in params.def) that
66*e4b17023SJohn Marino control how much we unroll/peel.
67*e4b17023SJohn Marino
68*e4b17023SJohn Marino ??? A great problem is that we don't have a good way how to determine
69*e4b17023SJohn Marino how many times we should unroll the loop; the experiments I have made
70*e4b17023SJohn Marino showed that this choice may affect performance in order of several %.
71*e4b17023SJohn Marino */
72*e4b17023SJohn Marino
73*e4b17023SJohn Marino /* Information about induction variables to split. */
74*e4b17023SJohn Marino
75*e4b17023SJohn Marino struct iv_to_split
76*e4b17023SJohn Marino {
77*e4b17023SJohn Marino rtx insn; /* The insn in that the induction variable occurs. */
78*e4b17023SJohn Marino rtx base_var; /* The variable on that the values in the further
79*e4b17023SJohn Marino iterations are based. */
80*e4b17023SJohn Marino rtx step; /* Step of the induction variable. */
81*e4b17023SJohn Marino struct iv_to_split *next; /* Next entry in walking order. */
82*e4b17023SJohn Marino unsigned n_loc;
83*e4b17023SJohn Marino unsigned loc[3]; /* Location where the definition of the induction
84*e4b17023SJohn Marino variable occurs in the insn. For example if
85*e4b17023SJohn Marino N_LOC is 2, the expression is located at
86*e4b17023SJohn Marino XEXP (XEXP (single_set, loc[0]), loc[1]). */
87*e4b17023SJohn Marino };
88*e4b17023SJohn Marino
89*e4b17023SJohn Marino /* Information about accumulators to expand. */
90*e4b17023SJohn Marino
91*e4b17023SJohn Marino struct var_to_expand
92*e4b17023SJohn Marino {
93*e4b17023SJohn Marino rtx insn; /* The insn in that the variable expansion occurs. */
94*e4b17023SJohn Marino rtx reg; /* The accumulator which is expanded. */
95*e4b17023SJohn Marino VEC(rtx,heap) *var_expansions; /* The copies of the accumulator which is expanded. */
96*e4b17023SJohn Marino struct var_to_expand *next; /* Next entry in walking order. */
97*e4b17023SJohn Marino enum rtx_code op; /* The type of the accumulation - addition, subtraction
98*e4b17023SJohn Marino or multiplication. */
99*e4b17023SJohn Marino int expansion_count; /* Count the number of expansions generated so far. */
100*e4b17023SJohn Marino int reuse_expansion; /* The expansion we intend to reuse to expand
101*e4b17023SJohn Marino the accumulator. If REUSE_EXPANSION is 0 reuse
102*e4b17023SJohn Marino the original accumulator. Else use
103*e4b17023SJohn Marino var_expansions[REUSE_EXPANSION - 1]. */
104*e4b17023SJohn Marino unsigned accum_pos; /* The position in which the accumulator is placed in
105*e4b17023SJohn Marino the insn src. For example in x = x + something
106*e4b17023SJohn Marino accum_pos is 0 while in x = something + x accum_pos
107*e4b17023SJohn Marino is 1. */
108*e4b17023SJohn Marino };
109*e4b17023SJohn Marino
110*e4b17023SJohn Marino /* Information about optimization applied in
111*e4b17023SJohn Marino the unrolled loop. */
112*e4b17023SJohn Marino
113*e4b17023SJohn Marino struct opt_info
114*e4b17023SJohn Marino {
115*e4b17023SJohn Marino htab_t insns_to_split; /* A hashtable of insns to split. */
116*e4b17023SJohn Marino struct iv_to_split *iv_to_split_head; /* The first iv to split. */
117*e4b17023SJohn Marino struct iv_to_split **iv_to_split_tail; /* Pointer to the tail of the list. */
118*e4b17023SJohn Marino htab_t insns_with_var_to_expand; /* A hashtable of insns with accumulators
119*e4b17023SJohn Marino to expand. */
120*e4b17023SJohn Marino struct var_to_expand *var_to_expand_head; /* The first var to expand. */
121*e4b17023SJohn Marino struct var_to_expand **var_to_expand_tail; /* Pointer to the tail of the list. */
122*e4b17023SJohn Marino unsigned first_new_block; /* The first basic block that was
123*e4b17023SJohn Marino duplicated. */
124*e4b17023SJohn Marino basic_block loop_exit; /* The loop exit basic block. */
125*e4b17023SJohn Marino basic_block loop_preheader; /* The loop preheader basic block. */
126*e4b17023SJohn Marino };
127*e4b17023SJohn Marino
128*e4b17023SJohn Marino static void decide_unrolling_and_peeling (int);
129*e4b17023SJohn Marino static void peel_loops_completely (int);
130*e4b17023SJohn Marino static void decide_peel_simple (struct loop *, int);
131*e4b17023SJohn Marino static void decide_peel_once_rolling (struct loop *, int);
132*e4b17023SJohn Marino static void decide_peel_completely (struct loop *, int);
133*e4b17023SJohn Marino static void decide_unroll_stupid (struct loop *, int);
134*e4b17023SJohn Marino static void decide_unroll_constant_iterations (struct loop *, int);
135*e4b17023SJohn Marino static void decide_unroll_runtime_iterations (struct loop *, int);
136*e4b17023SJohn Marino static void peel_loop_simple (struct loop *);
137*e4b17023SJohn Marino static void peel_loop_completely (struct loop *);
138*e4b17023SJohn Marino static void unroll_loop_stupid (struct loop *);
139*e4b17023SJohn Marino static void unroll_loop_constant_iterations (struct loop *);
140*e4b17023SJohn Marino static void unroll_loop_runtime_iterations (struct loop *);
141*e4b17023SJohn Marino static struct opt_info *analyze_insns_in_loop (struct loop *);
142*e4b17023SJohn Marino static void opt_info_start_duplication (struct opt_info *);
143*e4b17023SJohn Marino static void apply_opt_in_copies (struct opt_info *, unsigned, bool, bool);
144*e4b17023SJohn Marino static void free_opt_info (struct opt_info *);
145*e4b17023SJohn Marino static struct var_to_expand *analyze_insn_to_expand_var (struct loop*, rtx);
146*e4b17023SJohn Marino static bool referenced_in_one_insn_in_loop_p (struct loop *, rtx, int *);
147*e4b17023SJohn Marino static struct iv_to_split *analyze_iv_to_split_insn (rtx);
148*e4b17023SJohn Marino static void expand_var_during_unrolling (struct var_to_expand *, rtx);
149*e4b17023SJohn Marino static void insert_var_expansion_initialization (struct var_to_expand *,
150*e4b17023SJohn Marino basic_block);
151*e4b17023SJohn Marino static void combine_var_copies_in_loop_exit (struct var_to_expand *,
152*e4b17023SJohn Marino basic_block);
153*e4b17023SJohn Marino static rtx get_expansion (struct var_to_expand *);
154*e4b17023SJohn Marino
155*e4b17023SJohn Marino /* Unroll and/or peel (depending on FLAGS) LOOPS. */
156*e4b17023SJohn Marino void
unroll_and_peel_loops(int flags)157*e4b17023SJohn Marino unroll_and_peel_loops (int flags)
158*e4b17023SJohn Marino {
159*e4b17023SJohn Marino struct loop *loop;
160*e4b17023SJohn Marino bool check;
161*e4b17023SJohn Marino loop_iterator li;
162*e4b17023SJohn Marino
163*e4b17023SJohn Marino /* First perform complete loop peeling (it is almost surely a win,
164*e4b17023SJohn Marino and affects parameters for further decision a lot). */
165*e4b17023SJohn Marino peel_loops_completely (flags);
166*e4b17023SJohn Marino
167*e4b17023SJohn Marino /* Now decide rest of unrolling and peeling. */
168*e4b17023SJohn Marino decide_unrolling_and_peeling (flags);
169*e4b17023SJohn Marino
170*e4b17023SJohn Marino /* Scan the loops, inner ones first. */
171*e4b17023SJohn Marino FOR_EACH_LOOP (li, loop, LI_FROM_INNERMOST)
172*e4b17023SJohn Marino {
173*e4b17023SJohn Marino check = true;
174*e4b17023SJohn Marino /* And perform the appropriate transformations. */
175*e4b17023SJohn Marino switch (loop->lpt_decision.decision)
176*e4b17023SJohn Marino {
177*e4b17023SJohn Marino case LPT_PEEL_COMPLETELY:
178*e4b17023SJohn Marino /* Already done. */
179*e4b17023SJohn Marino gcc_unreachable ();
180*e4b17023SJohn Marino case LPT_PEEL_SIMPLE:
181*e4b17023SJohn Marino peel_loop_simple (loop);
182*e4b17023SJohn Marino break;
183*e4b17023SJohn Marino case LPT_UNROLL_CONSTANT:
184*e4b17023SJohn Marino unroll_loop_constant_iterations (loop);
185*e4b17023SJohn Marino break;
186*e4b17023SJohn Marino case LPT_UNROLL_RUNTIME:
187*e4b17023SJohn Marino unroll_loop_runtime_iterations (loop);
188*e4b17023SJohn Marino break;
189*e4b17023SJohn Marino case LPT_UNROLL_STUPID:
190*e4b17023SJohn Marino unroll_loop_stupid (loop);
191*e4b17023SJohn Marino break;
192*e4b17023SJohn Marino case LPT_NONE:
193*e4b17023SJohn Marino check = false;
194*e4b17023SJohn Marino break;
195*e4b17023SJohn Marino default:
196*e4b17023SJohn Marino gcc_unreachable ();
197*e4b17023SJohn Marino }
198*e4b17023SJohn Marino if (check)
199*e4b17023SJohn Marino {
200*e4b17023SJohn Marino #ifdef ENABLE_CHECKING
201*e4b17023SJohn Marino verify_dominators (CDI_DOMINATORS);
202*e4b17023SJohn Marino verify_loop_structure ();
203*e4b17023SJohn Marino #endif
204*e4b17023SJohn Marino }
205*e4b17023SJohn Marino }
206*e4b17023SJohn Marino
207*e4b17023SJohn Marino iv_analysis_done ();
208*e4b17023SJohn Marino }
209*e4b17023SJohn Marino
210*e4b17023SJohn Marino /* Check whether exit of the LOOP is at the end of loop body. */
211*e4b17023SJohn Marino
212*e4b17023SJohn Marino static bool
loop_exit_at_end_p(struct loop * loop)213*e4b17023SJohn Marino loop_exit_at_end_p (struct loop *loop)
214*e4b17023SJohn Marino {
215*e4b17023SJohn Marino struct niter_desc *desc = get_simple_loop_desc (loop);
216*e4b17023SJohn Marino rtx insn;
217*e4b17023SJohn Marino
218*e4b17023SJohn Marino if (desc->in_edge->dest != loop->latch)
219*e4b17023SJohn Marino return false;
220*e4b17023SJohn Marino
221*e4b17023SJohn Marino /* Check that the latch is empty. */
222*e4b17023SJohn Marino FOR_BB_INSNS (loop->latch, insn)
223*e4b17023SJohn Marino {
224*e4b17023SJohn Marino if (INSN_P (insn))
225*e4b17023SJohn Marino return false;
226*e4b17023SJohn Marino }
227*e4b17023SJohn Marino
228*e4b17023SJohn Marino return true;
229*e4b17023SJohn Marino }
230*e4b17023SJohn Marino
231*e4b17023SJohn Marino /* Depending on FLAGS, check whether to peel loops completely and do so. */
232*e4b17023SJohn Marino static void
peel_loops_completely(int flags)233*e4b17023SJohn Marino peel_loops_completely (int flags)
234*e4b17023SJohn Marino {
235*e4b17023SJohn Marino struct loop *loop;
236*e4b17023SJohn Marino loop_iterator li;
237*e4b17023SJohn Marino
238*e4b17023SJohn Marino /* Scan the loops, the inner ones first. */
239*e4b17023SJohn Marino FOR_EACH_LOOP (li, loop, LI_FROM_INNERMOST)
240*e4b17023SJohn Marino {
241*e4b17023SJohn Marino loop->lpt_decision.decision = LPT_NONE;
242*e4b17023SJohn Marino
243*e4b17023SJohn Marino if (dump_file)
244*e4b17023SJohn Marino fprintf (dump_file,
245*e4b17023SJohn Marino "\n;; *** Considering loop %d for complete peeling ***\n",
246*e4b17023SJohn Marino loop->num);
247*e4b17023SJohn Marino
248*e4b17023SJohn Marino loop->ninsns = num_loop_insns (loop);
249*e4b17023SJohn Marino
250*e4b17023SJohn Marino decide_peel_once_rolling (loop, flags);
251*e4b17023SJohn Marino if (loop->lpt_decision.decision == LPT_NONE)
252*e4b17023SJohn Marino decide_peel_completely (loop, flags);
253*e4b17023SJohn Marino
254*e4b17023SJohn Marino if (loop->lpt_decision.decision == LPT_PEEL_COMPLETELY)
255*e4b17023SJohn Marino {
256*e4b17023SJohn Marino peel_loop_completely (loop);
257*e4b17023SJohn Marino #ifdef ENABLE_CHECKING
258*e4b17023SJohn Marino verify_dominators (CDI_DOMINATORS);
259*e4b17023SJohn Marino verify_loop_structure ();
260*e4b17023SJohn Marino #endif
261*e4b17023SJohn Marino }
262*e4b17023SJohn Marino }
263*e4b17023SJohn Marino }
264*e4b17023SJohn Marino
265*e4b17023SJohn Marino /* Decide whether unroll or peel loops (depending on FLAGS) and how much. */
266*e4b17023SJohn Marino static void
decide_unrolling_and_peeling(int flags)267*e4b17023SJohn Marino decide_unrolling_and_peeling (int flags)
268*e4b17023SJohn Marino {
269*e4b17023SJohn Marino struct loop *loop;
270*e4b17023SJohn Marino loop_iterator li;
271*e4b17023SJohn Marino
272*e4b17023SJohn Marino /* Scan the loops, inner ones first. */
273*e4b17023SJohn Marino FOR_EACH_LOOP (li, loop, LI_FROM_INNERMOST)
274*e4b17023SJohn Marino {
275*e4b17023SJohn Marino loop->lpt_decision.decision = LPT_NONE;
276*e4b17023SJohn Marino
277*e4b17023SJohn Marino if (dump_file)
278*e4b17023SJohn Marino fprintf (dump_file, "\n;; *** Considering loop %d ***\n", loop->num);
279*e4b17023SJohn Marino
280*e4b17023SJohn Marino /* Do not peel cold areas. */
281*e4b17023SJohn Marino if (optimize_loop_for_size_p (loop))
282*e4b17023SJohn Marino {
283*e4b17023SJohn Marino if (dump_file)
284*e4b17023SJohn Marino fprintf (dump_file, ";; Not considering loop, cold area\n");
285*e4b17023SJohn Marino continue;
286*e4b17023SJohn Marino }
287*e4b17023SJohn Marino
288*e4b17023SJohn Marino /* Can the loop be manipulated? */
289*e4b17023SJohn Marino if (!can_duplicate_loop_p (loop))
290*e4b17023SJohn Marino {
291*e4b17023SJohn Marino if (dump_file)
292*e4b17023SJohn Marino fprintf (dump_file,
293*e4b17023SJohn Marino ";; Not considering loop, cannot duplicate\n");
294*e4b17023SJohn Marino continue;
295*e4b17023SJohn Marino }
296*e4b17023SJohn Marino
297*e4b17023SJohn Marino /* Skip non-innermost loops. */
298*e4b17023SJohn Marino if (loop->inner)
299*e4b17023SJohn Marino {
300*e4b17023SJohn Marino if (dump_file)
301*e4b17023SJohn Marino fprintf (dump_file, ";; Not considering loop, is not innermost\n");
302*e4b17023SJohn Marino continue;
303*e4b17023SJohn Marino }
304*e4b17023SJohn Marino
305*e4b17023SJohn Marino loop->ninsns = num_loop_insns (loop);
306*e4b17023SJohn Marino loop->av_ninsns = average_num_loop_insns (loop);
307*e4b17023SJohn Marino
308*e4b17023SJohn Marino /* Try transformations one by one in decreasing order of
309*e4b17023SJohn Marino priority. */
310*e4b17023SJohn Marino
311*e4b17023SJohn Marino decide_unroll_constant_iterations (loop, flags);
312*e4b17023SJohn Marino if (loop->lpt_decision.decision == LPT_NONE)
313*e4b17023SJohn Marino decide_unroll_runtime_iterations (loop, flags);
314*e4b17023SJohn Marino if (loop->lpt_decision.decision == LPT_NONE)
315*e4b17023SJohn Marino decide_unroll_stupid (loop, flags);
316*e4b17023SJohn Marino if (loop->lpt_decision.decision == LPT_NONE)
317*e4b17023SJohn Marino decide_peel_simple (loop, flags);
318*e4b17023SJohn Marino }
319*e4b17023SJohn Marino }
320*e4b17023SJohn Marino
321*e4b17023SJohn Marino /* Decide whether the LOOP is once rolling and suitable for complete
322*e4b17023SJohn Marino peeling. */
323*e4b17023SJohn Marino static void
decide_peel_once_rolling(struct loop * loop,int flags ATTRIBUTE_UNUSED)324*e4b17023SJohn Marino decide_peel_once_rolling (struct loop *loop, int flags ATTRIBUTE_UNUSED)
325*e4b17023SJohn Marino {
326*e4b17023SJohn Marino struct niter_desc *desc;
327*e4b17023SJohn Marino
328*e4b17023SJohn Marino if (dump_file)
329*e4b17023SJohn Marino fprintf (dump_file, "\n;; Considering peeling once rolling loop\n");
330*e4b17023SJohn Marino
331*e4b17023SJohn Marino /* Is the loop small enough? */
332*e4b17023SJohn Marino if ((unsigned) PARAM_VALUE (PARAM_MAX_ONCE_PEELED_INSNS) < loop->ninsns)
333*e4b17023SJohn Marino {
334*e4b17023SJohn Marino if (dump_file)
335*e4b17023SJohn Marino fprintf (dump_file, ";; Not considering loop, is too big\n");
336*e4b17023SJohn Marino return;
337*e4b17023SJohn Marino }
338*e4b17023SJohn Marino
339*e4b17023SJohn Marino /* Check for simple loops. */
340*e4b17023SJohn Marino desc = get_simple_loop_desc (loop);
341*e4b17023SJohn Marino
342*e4b17023SJohn Marino /* Check number of iterations. */
343*e4b17023SJohn Marino if (!desc->simple_p
344*e4b17023SJohn Marino || desc->assumptions
345*e4b17023SJohn Marino || desc->infinite
346*e4b17023SJohn Marino || !desc->const_iter
347*e4b17023SJohn Marino || desc->niter != 0)
348*e4b17023SJohn Marino {
349*e4b17023SJohn Marino if (dump_file)
350*e4b17023SJohn Marino fprintf (dump_file,
351*e4b17023SJohn Marino ";; Unable to prove that the loop rolls exactly once\n");
352*e4b17023SJohn Marino return;
353*e4b17023SJohn Marino }
354*e4b17023SJohn Marino
355*e4b17023SJohn Marino /* Success. */
356*e4b17023SJohn Marino if (dump_file)
357*e4b17023SJohn Marino fprintf (dump_file, ";; Decided to peel exactly once rolling loop\n");
358*e4b17023SJohn Marino loop->lpt_decision.decision = LPT_PEEL_COMPLETELY;
359*e4b17023SJohn Marino }
360*e4b17023SJohn Marino
361*e4b17023SJohn Marino /* Decide whether the LOOP is suitable for complete peeling. */
362*e4b17023SJohn Marino static void
decide_peel_completely(struct loop * loop,int flags ATTRIBUTE_UNUSED)363*e4b17023SJohn Marino decide_peel_completely (struct loop *loop, int flags ATTRIBUTE_UNUSED)
364*e4b17023SJohn Marino {
365*e4b17023SJohn Marino unsigned npeel;
366*e4b17023SJohn Marino struct niter_desc *desc;
367*e4b17023SJohn Marino
368*e4b17023SJohn Marino if (dump_file)
369*e4b17023SJohn Marino fprintf (dump_file, "\n;; Considering peeling completely\n");
370*e4b17023SJohn Marino
371*e4b17023SJohn Marino /* Skip non-innermost loops. */
372*e4b17023SJohn Marino if (loop->inner)
373*e4b17023SJohn Marino {
374*e4b17023SJohn Marino if (dump_file)
375*e4b17023SJohn Marino fprintf (dump_file, ";; Not considering loop, is not innermost\n");
376*e4b17023SJohn Marino return;
377*e4b17023SJohn Marino }
378*e4b17023SJohn Marino
379*e4b17023SJohn Marino /* Do not peel cold areas. */
380*e4b17023SJohn Marino if (optimize_loop_for_size_p (loop))
381*e4b17023SJohn Marino {
382*e4b17023SJohn Marino if (dump_file)
383*e4b17023SJohn Marino fprintf (dump_file, ";; Not considering loop, cold area\n");
384*e4b17023SJohn Marino return;
385*e4b17023SJohn Marino }
386*e4b17023SJohn Marino
387*e4b17023SJohn Marino /* Can the loop be manipulated? */
388*e4b17023SJohn Marino if (!can_duplicate_loop_p (loop))
389*e4b17023SJohn Marino {
390*e4b17023SJohn Marino if (dump_file)
391*e4b17023SJohn Marino fprintf (dump_file,
392*e4b17023SJohn Marino ";; Not considering loop, cannot duplicate\n");
393*e4b17023SJohn Marino return;
394*e4b17023SJohn Marino }
395*e4b17023SJohn Marino
396*e4b17023SJohn Marino /* npeel = number of iterations to peel. */
397*e4b17023SJohn Marino npeel = PARAM_VALUE (PARAM_MAX_COMPLETELY_PEELED_INSNS) / loop->ninsns;
398*e4b17023SJohn Marino if (npeel > (unsigned) PARAM_VALUE (PARAM_MAX_COMPLETELY_PEEL_TIMES))
399*e4b17023SJohn Marino npeel = PARAM_VALUE (PARAM_MAX_COMPLETELY_PEEL_TIMES);
400*e4b17023SJohn Marino
401*e4b17023SJohn Marino /* Is the loop small enough? */
402*e4b17023SJohn Marino if (!npeel)
403*e4b17023SJohn Marino {
404*e4b17023SJohn Marino if (dump_file)
405*e4b17023SJohn Marino fprintf (dump_file, ";; Not considering loop, is too big\n");
406*e4b17023SJohn Marino return;
407*e4b17023SJohn Marino }
408*e4b17023SJohn Marino
409*e4b17023SJohn Marino /* Check for simple loops. */
410*e4b17023SJohn Marino desc = get_simple_loop_desc (loop);
411*e4b17023SJohn Marino
412*e4b17023SJohn Marino /* Check number of iterations. */
413*e4b17023SJohn Marino if (!desc->simple_p
414*e4b17023SJohn Marino || desc->assumptions
415*e4b17023SJohn Marino || !desc->const_iter
416*e4b17023SJohn Marino || desc->infinite)
417*e4b17023SJohn Marino {
418*e4b17023SJohn Marino if (dump_file)
419*e4b17023SJohn Marino fprintf (dump_file,
420*e4b17023SJohn Marino ";; Unable to prove that the loop iterates constant times\n");
421*e4b17023SJohn Marino return;
422*e4b17023SJohn Marino }
423*e4b17023SJohn Marino
424*e4b17023SJohn Marino if (desc->niter > npeel - 1)
425*e4b17023SJohn Marino {
426*e4b17023SJohn Marino if (dump_file)
427*e4b17023SJohn Marino {
428*e4b17023SJohn Marino fprintf (dump_file,
429*e4b17023SJohn Marino ";; Not peeling loop completely, rolls too much (");
430*e4b17023SJohn Marino fprintf (dump_file, HOST_WIDEST_INT_PRINT_DEC, desc->niter);
431*e4b17023SJohn Marino fprintf (dump_file, " iterations > %d [maximum peelings])\n", npeel);
432*e4b17023SJohn Marino }
433*e4b17023SJohn Marino return;
434*e4b17023SJohn Marino }
435*e4b17023SJohn Marino
436*e4b17023SJohn Marino /* Success. */
437*e4b17023SJohn Marino if (dump_file)
438*e4b17023SJohn Marino fprintf (dump_file, ";; Decided to peel loop completely\n");
439*e4b17023SJohn Marino loop->lpt_decision.decision = LPT_PEEL_COMPLETELY;
440*e4b17023SJohn Marino }
441*e4b17023SJohn Marino
442*e4b17023SJohn Marino /* Peel all iterations of LOOP, remove exit edges and cancel the loop
443*e4b17023SJohn Marino completely. The transformation done:
444*e4b17023SJohn Marino
445*e4b17023SJohn Marino for (i = 0; i < 4; i++)
446*e4b17023SJohn Marino body;
447*e4b17023SJohn Marino
448*e4b17023SJohn Marino ==>
449*e4b17023SJohn Marino
450*e4b17023SJohn Marino i = 0;
451*e4b17023SJohn Marino body; i++;
452*e4b17023SJohn Marino body; i++;
453*e4b17023SJohn Marino body; i++;
454*e4b17023SJohn Marino body; i++;
455*e4b17023SJohn Marino */
456*e4b17023SJohn Marino static void
peel_loop_completely(struct loop * loop)457*e4b17023SJohn Marino peel_loop_completely (struct loop *loop)
458*e4b17023SJohn Marino {
459*e4b17023SJohn Marino sbitmap wont_exit;
460*e4b17023SJohn Marino unsigned HOST_WIDE_INT npeel;
461*e4b17023SJohn Marino unsigned i;
462*e4b17023SJohn Marino VEC (edge, heap) *remove_edges;
463*e4b17023SJohn Marino edge ein;
464*e4b17023SJohn Marino struct niter_desc *desc = get_simple_loop_desc (loop);
465*e4b17023SJohn Marino struct opt_info *opt_info = NULL;
466*e4b17023SJohn Marino
467*e4b17023SJohn Marino npeel = desc->niter;
468*e4b17023SJohn Marino
469*e4b17023SJohn Marino if (npeel)
470*e4b17023SJohn Marino {
471*e4b17023SJohn Marino bool ok;
472*e4b17023SJohn Marino
473*e4b17023SJohn Marino wont_exit = sbitmap_alloc (npeel + 1);
474*e4b17023SJohn Marino sbitmap_ones (wont_exit);
475*e4b17023SJohn Marino RESET_BIT (wont_exit, 0);
476*e4b17023SJohn Marino if (desc->noloop_assumptions)
477*e4b17023SJohn Marino RESET_BIT (wont_exit, 1);
478*e4b17023SJohn Marino
479*e4b17023SJohn Marino remove_edges = NULL;
480*e4b17023SJohn Marino
481*e4b17023SJohn Marino if (flag_split_ivs_in_unroller)
482*e4b17023SJohn Marino opt_info = analyze_insns_in_loop (loop);
483*e4b17023SJohn Marino
484*e4b17023SJohn Marino opt_info_start_duplication (opt_info);
485*e4b17023SJohn Marino ok = duplicate_loop_to_header_edge (loop, loop_preheader_edge (loop),
486*e4b17023SJohn Marino npeel,
487*e4b17023SJohn Marino wont_exit, desc->out_edge,
488*e4b17023SJohn Marino &remove_edges,
489*e4b17023SJohn Marino DLTHE_FLAG_UPDATE_FREQ
490*e4b17023SJohn Marino | DLTHE_FLAG_COMPLETTE_PEEL
491*e4b17023SJohn Marino | (opt_info
492*e4b17023SJohn Marino ? DLTHE_RECORD_COPY_NUMBER : 0));
493*e4b17023SJohn Marino gcc_assert (ok);
494*e4b17023SJohn Marino
495*e4b17023SJohn Marino free (wont_exit);
496*e4b17023SJohn Marino
497*e4b17023SJohn Marino if (opt_info)
498*e4b17023SJohn Marino {
499*e4b17023SJohn Marino apply_opt_in_copies (opt_info, npeel, false, true);
500*e4b17023SJohn Marino free_opt_info (opt_info);
501*e4b17023SJohn Marino }
502*e4b17023SJohn Marino
503*e4b17023SJohn Marino /* Remove the exit edges. */
504*e4b17023SJohn Marino FOR_EACH_VEC_ELT (edge, remove_edges, i, ein)
505*e4b17023SJohn Marino remove_path (ein);
506*e4b17023SJohn Marino VEC_free (edge, heap, remove_edges);
507*e4b17023SJohn Marino }
508*e4b17023SJohn Marino
509*e4b17023SJohn Marino ein = desc->in_edge;
510*e4b17023SJohn Marino free_simple_loop_desc (loop);
511*e4b17023SJohn Marino
512*e4b17023SJohn Marino /* Now remove the unreachable part of the last iteration and cancel
513*e4b17023SJohn Marino the loop. */
514*e4b17023SJohn Marino remove_path (ein);
515*e4b17023SJohn Marino
516*e4b17023SJohn Marino if (dump_file)
517*e4b17023SJohn Marino fprintf (dump_file, ";; Peeled loop completely, %d times\n", (int) npeel);
518*e4b17023SJohn Marino }
519*e4b17023SJohn Marino
520*e4b17023SJohn Marino /* Decide whether to unroll LOOP iterating constant number of times
521*e4b17023SJohn Marino and how much. */
522*e4b17023SJohn Marino
523*e4b17023SJohn Marino static void
decide_unroll_constant_iterations(struct loop * loop,int flags)524*e4b17023SJohn Marino decide_unroll_constant_iterations (struct loop *loop, int flags)
525*e4b17023SJohn Marino {
526*e4b17023SJohn Marino unsigned nunroll, nunroll_by_av, best_copies, best_unroll = 0, n_copies, i;
527*e4b17023SJohn Marino struct niter_desc *desc;
528*e4b17023SJohn Marino
529*e4b17023SJohn Marino if (!(flags & UAP_UNROLL))
530*e4b17023SJohn Marino {
531*e4b17023SJohn Marino /* We were not asked to, just return back silently. */
532*e4b17023SJohn Marino return;
533*e4b17023SJohn Marino }
534*e4b17023SJohn Marino
535*e4b17023SJohn Marino if (dump_file)
536*e4b17023SJohn Marino fprintf (dump_file,
537*e4b17023SJohn Marino "\n;; Considering unrolling loop with constant "
538*e4b17023SJohn Marino "number of iterations\n");
539*e4b17023SJohn Marino
540*e4b17023SJohn Marino /* nunroll = total number of copies of the original loop body in
541*e4b17023SJohn Marino unrolled loop (i.e. if it is 2, we have to duplicate loop body once. */
542*e4b17023SJohn Marino nunroll = PARAM_VALUE (PARAM_MAX_UNROLLED_INSNS) / loop->ninsns;
543*e4b17023SJohn Marino nunroll_by_av
544*e4b17023SJohn Marino = PARAM_VALUE (PARAM_MAX_AVERAGE_UNROLLED_INSNS) / loop->av_ninsns;
545*e4b17023SJohn Marino if (nunroll > nunroll_by_av)
546*e4b17023SJohn Marino nunroll = nunroll_by_av;
547*e4b17023SJohn Marino if (nunroll > (unsigned) PARAM_VALUE (PARAM_MAX_UNROLL_TIMES))
548*e4b17023SJohn Marino nunroll = PARAM_VALUE (PARAM_MAX_UNROLL_TIMES);
549*e4b17023SJohn Marino
550*e4b17023SJohn Marino /* Skip big loops. */
551*e4b17023SJohn Marino if (nunroll <= 1)
552*e4b17023SJohn Marino {
553*e4b17023SJohn Marino if (dump_file)
554*e4b17023SJohn Marino fprintf (dump_file, ";; Not considering loop, is too big\n");
555*e4b17023SJohn Marino return;
556*e4b17023SJohn Marino }
557*e4b17023SJohn Marino
558*e4b17023SJohn Marino /* Check for simple loops. */
559*e4b17023SJohn Marino desc = get_simple_loop_desc (loop);
560*e4b17023SJohn Marino
561*e4b17023SJohn Marino /* Check number of iterations. */
562*e4b17023SJohn Marino if (!desc->simple_p || !desc->const_iter || desc->assumptions)
563*e4b17023SJohn Marino {
564*e4b17023SJohn Marino if (dump_file)
565*e4b17023SJohn Marino fprintf (dump_file,
566*e4b17023SJohn Marino ";; Unable to prove that the loop iterates constant times\n");
567*e4b17023SJohn Marino return;
568*e4b17023SJohn Marino }
569*e4b17023SJohn Marino
570*e4b17023SJohn Marino /* Check whether the loop rolls enough to consider. */
571*e4b17023SJohn Marino if (desc->niter < 2 * nunroll)
572*e4b17023SJohn Marino {
573*e4b17023SJohn Marino if (dump_file)
574*e4b17023SJohn Marino fprintf (dump_file, ";; Not unrolling loop, doesn't roll\n");
575*e4b17023SJohn Marino return;
576*e4b17023SJohn Marino }
577*e4b17023SJohn Marino
578*e4b17023SJohn Marino /* Success; now compute number of iterations to unroll. We alter
579*e4b17023SJohn Marino nunroll so that as few as possible copies of loop body are
580*e4b17023SJohn Marino necessary, while still not decreasing the number of unrollings
581*e4b17023SJohn Marino too much (at most by 1). */
582*e4b17023SJohn Marino best_copies = 2 * nunroll + 10;
583*e4b17023SJohn Marino
584*e4b17023SJohn Marino i = 2 * nunroll + 2;
585*e4b17023SJohn Marino if (i - 1 >= desc->niter)
586*e4b17023SJohn Marino i = desc->niter - 2;
587*e4b17023SJohn Marino
588*e4b17023SJohn Marino for (; i >= nunroll - 1; i--)
589*e4b17023SJohn Marino {
590*e4b17023SJohn Marino unsigned exit_mod = desc->niter % (i + 1);
591*e4b17023SJohn Marino
592*e4b17023SJohn Marino if (!loop_exit_at_end_p (loop))
593*e4b17023SJohn Marino n_copies = exit_mod + i + 1;
594*e4b17023SJohn Marino else if (exit_mod != (unsigned) i
595*e4b17023SJohn Marino || desc->noloop_assumptions != NULL_RTX)
596*e4b17023SJohn Marino n_copies = exit_mod + i + 2;
597*e4b17023SJohn Marino else
598*e4b17023SJohn Marino n_copies = i + 1;
599*e4b17023SJohn Marino
600*e4b17023SJohn Marino if (n_copies < best_copies)
601*e4b17023SJohn Marino {
602*e4b17023SJohn Marino best_copies = n_copies;
603*e4b17023SJohn Marino best_unroll = i;
604*e4b17023SJohn Marino }
605*e4b17023SJohn Marino }
606*e4b17023SJohn Marino
607*e4b17023SJohn Marino if (dump_file)
608*e4b17023SJohn Marino fprintf (dump_file, ";; max_unroll %d (%d copies, initial %d).\n",
609*e4b17023SJohn Marino best_unroll + 1, best_copies, nunroll);
610*e4b17023SJohn Marino
611*e4b17023SJohn Marino loop->lpt_decision.decision = LPT_UNROLL_CONSTANT;
612*e4b17023SJohn Marino loop->lpt_decision.times = best_unroll;
613*e4b17023SJohn Marino
614*e4b17023SJohn Marino if (dump_file)
615*e4b17023SJohn Marino fprintf (dump_file,
616*e4b17023SJohn Marino ";; Decided to unroll the constant times rolling loop, %d times.\n",
617*e4b17023SJohn Marino loop->lpt_decision.times);
618*e4b17023SJohn Marino }
619*e4b17023SJohn Marino
620*e4b17023SJohn Marino /* Unroll LOOP with constant number of iterations LOOP->LPT_DECISION.TIMES + 1
621*e4b17023SJohn Marino times. The transformation does this:
622*e4b17023SJohn Marino
623*e4b17023SJohn Marino for (i = 0; i < 102; i++)
624*e4b17023SJohn Marino body;
625*e4b17023SJohn Marino
626*e4b17023SJohn Marino ==>
627*e4b17023SJohn Marino
628*e4b17023SJohn Marino i = 0;
629*e4b17023SJohn Marino body; i++;
630*e4b17023SJohn Marino body; i++;
631*e4b17023SJohn Marino while (i < 102)
632*e4b17023SJohn Marino {
633*e4b17023SJohn Marino body; i++;
634*e4b17023SJohn Marino body; i++;
635*e4b17023SJohn Marino body; i++;
636*e4b17023SJohn Marino body; i++;
637*e4b17023SJohn Marino }
638*e4b17023SJohn Marino */
639*e4b17023SJohn Marino static void
unroll_loop_constant_iterations(struct loop * loop)640*e4b17023SJohn Marino unroll_loop_constant_iterations (struct loop *loop)
641*e4b17023SJohn Marino {
642*e4b17023SJohn Marino unsigned HOST_WIDE_INT niter;
643*e4b17023SJohn Marino unsigned exit_mod;
644*e4b17023SJohn Marino sbitmap wont_exit;
645*e4b17023SJohn Marino unsigned i;
646*e4b17023SJohn Marino VEC (edge, heap) *remove_edges;
647*e4b17023SJohn Marino edge e;
648*e4b17023SJohn Marino unsigned max_unroll = loop->lpt_decision.times;
649*e4b17023SJohn Marino struct niter_desc *desc = get_simple_loop_desc (loop);
650*e4b17023SJohn Marino bool exit_at_end = loop_exit_at_end_p (loop);
651*e4b17023SJohn Marino struct opt_info *opt_info = NULL;
652*e4b17023SJohn Marino bool ok;
653*e4b17023SJohn Marino
654*e4b17023SJohn Marino niter = desc->niter;
655*e4b17023SJohn Marino
656*e4b17023SJohn Marino /* Should not get here (such loop should be peeled instead). */
657*e4b17023SJohn Marino gcc_assert (niter > max_unroll + 1);
658*e4b17023SJohn Marino
659*e4b17023SJohn Marino exit_mod = niter % (max_unroll + 1);
660*e4b17023SJohn Marino
661*e4b17023SJohn Marino wont_exit = sbitmap_alloc (max_unroll + 1);
662*e4b17023SJohn Marino sbitmap_ones (wont_exit);
663*e4b17023SJohn Marino
664*e4b17023SJohn Marino remove_edges = NULL;
665*e4b17023SJohn Marino if (flag_split_ivs_in_unroller
666*e4b17023SJohn Marino || flag_variable_expansion_in_unroller)
667*e4b17023SJohn Marino opt_info = analyze_insns_in_loop (loop);
668*e4b17023SJohn Marino
669*e4b17023SJohn Marino if (!exit_at_end)
670*e4b17023SJohn Marino {
671*e4b17023SJohn Marino /* The exit is not at the end of the loop; leave exit test
672*e4b17023SJohn Marino in the first copy, so that the loops that start with test
673*e4b17023SJohn Marino of exit condition have continuous body after unrolling. */
674*e4b17023SJohn Marino
675*e4b17023SJohn Marino if (dump_file)
676*e4b17023SJohn Marino fprintf (dump_file, ";; Condition on beginning of loop.\n");
677*e4b17023SJohn Marino
678*e4b17023SJohn Marino /* Peel exit_mod iterations. */
679*e4b17023SJohn Marino RESET_BIT (wont_exit, 0);
680*e4b17023SJohn Marino if (desc->noloop_assumptions)
681*e4b17023SJohn Marino RESET_BIT (wont_exit, 1);
682*e4b17023SJohn Marino
683*e4b17023SJohn Marino if (exit_mod)
684*e4b17023SJohn Marino {
685*e4b17023SJohn Marino opt_info_start_duplication (opt_info);
686*e4b17023SJohn Marino ok = duplicate_loop_to_header_edge (loop, loop_preheader_edge (loop),
687*e4b17023SJohn Marino exit_mod,
688*e4b17023SJohn Marino wont_exit, desc->out_edge,
689*e4b17023SJohn Marino &remove_edges,
690*e4b17023SJohn Marino DLTHE_FLAG_UPDATE_FREQ
691*e4b17023SJohn Marino | (opt_info && exit_mod > 1
692*e4b17023SJohn Marino ? DLTHE_RECORD_COPY_NUMBER
693*e4b17023SJohn Marino : 0));
694*e4b17023SJohn Marino gcc_assert (ok);
695*e4b17023SJohn Marino
696*e4b17023SJohn Marino if (opt_info && exit_mod > 1)
697*e4b17023SJohn Marino apply_opt_in_copies (opt_info, exit_mod, false, false);
698*e4b17023SJohn Marino
699*e4b17023SJohn Marino desc->noloop_assumptions = NULL_RTX;
700*e4b17023SJohn Marino desc->niter -= exit_mod;
701*e4b17023SJohn Marino desc->niter_max -= exit_mod;
702*e4b17023SJohn Marino }
703*e4b17023SJohn Marino
704*e4b17023SJohn Marino SET_BIT (wont_exit, 1);
705*e4b17023SJohn Marino }
706*e4b17023SJohn Marino else
707*e4b17023SJohn Marino {
708*e4b17023SJohn Marino /* Leave exit test in last copy, for the same reason as above if
709*e4b17023SJohn Marino the loop tests the condition at the end of loop body. */
710*e4b17023SJohn Marino
711*e4b17023SJohn Marino if (dump_file)
712*e4b17023SJohn Marino fprintf (dump_file, ";; Condition on end of loop.\n");
713*e4b17023SJohn Marino
714*e4b17023SJohn Marino /* We know that niter >= max_unroll + 2; so we do not need to care of
715*e4b17023SJohn Marino case when we would exit before reaching the loop. So just peel
716*e4b17023SJohn Marino exit_mod + 1 iterations. */
717*e4b17023SJohn Marino if (exit_mod != max_unroll
718*e4b17023SJohn Marino || desc->noloop_assumptions)
719*e4b17023SJohn Marino {
720*e4b17023SJohn Marino RESET_BIT (wont_exit, 0);
721*e4b17023SJohn Marino if (desc->noloop_assumptions)
722*e4b17023SJohn Marino RESET_BIT (wont_exit, 1);
723*e4b17023SJohn Marino
724*e4b17023SJohn Marino opt_info_start_duplication (opt_info);
725*e4b17023SJohn Marino ok = duplicate_loop_to_header_edge (loop, loop_preheader_edge (loop),
726*e4b17023SJohn Marino exit_mod + 1,
727*e4b17023SJohn Marino wont_exit, desc->out_edge,
728*e4b17023SJohn Marino &remove_edges,
729*e4b17023SJohn Marino DLTHE_FLAG_UPDATE_FREQ
730*e4b17023SJohn Marino | (opt_info && exit_mod > 0
731*e4b17023SJohn Marino ? DLTHE_RECORD_COPY_NUMBER
732*e4b17023SJohn Marino : 0));
733*e4b17023SJohn Marino gcc_assert (ok);
734*e4b17023SJohn Marino
735*e4b17023SJohn Marino if (opt_info && exit_mod > 0)
736*e4b17023SJohn Marino apply_opt_in_copies (opt_info, exit_mod + 1, false, false);
737*e4b17023SJohn Marino
738*e4b17023SJohn Marino desc->niter -= exit_mod + 1;
739*e4b17023SJohn Marino desc->niter_max -= exit_mod + 1;
740*e4b17023SJohn Marino desc->noloop_assumptions = NULL_RTX;
741*e4b17023SJohn Marino
742*e4b17023SJohn Marino SET_BIT (wont_exit, 0);
743*e4b17023SJohn Marino SET_BIT (wont_exit, 1);
744*e4b17023SJohn Marino }
745*e4b17023SJohn Marino
746*e4b17023SJohn Marino RESET_BIT (wont_exit, max_unroll);
747*e4b17023SJohn Marino }
748*e4b17023SJohn Marino
749*e4b17023SJohn Marino /* Now unroll the loop. */
750*e4b17023SJohn Marino
751*e4b17023SJohn Marino opt_info_start_duplication (opt_info);
752*e4b17023SJohn Marino ok = duplicate_loop_to_header_edge (loop, loop_latch_edge (loop),
753*e4b17023SJohn Marino max_unroll,
754*e4b17023SJohn Marino wont_exit, desc->out_edge,
755*e4b17023SJohn Marino &remove_edges,
756*e4b17023SJohn Marino DLTHE_FLAG_UPDATE_FREQ
757*e4b17023SJohn Marino | (opt_info
758*e4b17023SJohn Marino ? DLTHE_RECORD_COPY_NUMBER
759*e4b17023SJohn Marino : 0));
760*e4b17023SJohn Marino gcc_assert (ok);
761*e4b17023SJohn Marino
762*e4b17023SJohn Marino if (opt_info)
763*e4b17023SJohn Marino {
764*e4b17023SJohn Marino apply_opt_in_copies (opt_info, max_unroll, true, true);
765*e4b17023SJohn Marino free_opt_info (opt_info);
766*e4b17023SJohn Marino }
767*e4b17023SJohn Marino
768*e4b17023SJohn Marino free (wont_exit);
769*e4b17023SJohn Marino
770*e4b17023SJohn Marino if (exit_at_end)
771*e4b17023SJohn Marino {
772*e4b17023SJohn Marino basic_block exit_block = get_bb_copy (desc->in_edge->src);
773*e4b17023SJohn Marino /* Find a new in and out edge; they are in the last copy we have made. */
774*e4b17023SJohn Marino
775*e4b17023SJohn Marino if (EDGE_SUCC (exit_block, 0)->dest == desc->out_edge->dest)
776*e4b17023SJohn Marino {
777*e4b17023SJohn Marino desc->out_edge = EDGE_SUCC (exit_block, 0);
778*e4b17023SJohn Marino desc->in_edge = EDGE_SUCC (exit_block, 1);
779*e4b17023SJohn Marino }
780*e4b17023SJohn Marino else
781*e4b17023SJohn Marino {
782*e4b17023SJohn Marino desc->out_edge = EDGE_SUCC (exit_block, 1);
783*e4b17023SJohn Marino desc->in_edge = EDGE_SUCC (exit_block, 0);
784*e4b17023SJohn Marino }
785*e4b17023SJohn Marino }
786*e4b17023SJohn Marino
787*e4b17023SJohn Marino desc->niter /= max_unroll + 1;
788*e4b17023SJohn Marino desc->niter_max /= max_unroll + 1;
789*e4b17023SJohn Marino desc->niter_expr = GEN_INT (desc->niter);
790*e4b17023SJohn Marino
791*e4b17023SJohn Marino /* Remove the edges. */
792*e4b17023SJohn Marino FOR_EACH_VEC_ELT (edge, remove_edges, i, e)
793*e4b17023SJohn Marino remove_path (e);
794*e4b17023SJohn Marino VEC_free (edge, heap, remove_edges);
795*e4b17023SJohn Marino
796*e4b17023SJohn Marino if (dump_file)
797*e4b17023SJohn Marino fprintf (dump_file,
798*e4b17023SJohn Marino ";; Unrolled loop %d times, constant # of iterations %i insns\n",
799*e4b17023SJohn Marino max_unroll, num_loop_insns (loop));
800*e4b17023SJohn Marino }
801*e4b17023SJohn Marino
802*e4b17023SJohn Marino /* Decide whether to unroll LOOP iterating runtime computable number of times
803*e4b17023SJohn Marino and how much. */
804*e4b17023SJohn Marino static void
decide_unroll_runtime_iterations(struct loop * loop,int flags)805*e4b17023SJohn Marino decide_unroll_runtime_iterations (struct loop *loop, int flags)
806*e4b17023SJohn Marino {
807*e4b17023SJohn Marino unsigned nunroll, nunroll_by_av, i;
808*e4b17023SJohn Marino struct niter_desc *desc;
809*e4b17023SJohn Marino
810*e4b17023SJohn Marino if (!(flags & UAP_UNROLL))
811*e4b17023SJohn Marino {
812*e4b17023SJohn Marino /* We were not asked to, just return back silently. */
813*e4b17023SJohn Marino return;
814*e4b17023SJohn Marino }
815*e4b17023SJohn Marino
816*e4b17023SJohn Marino if (dump_file)
817*e4b17023SJohn Marino fprintf (dump_file,
818*e4b17023SJohn Marino "\n;; Considering unrolling loop with runtime "
819*e4b17023SJohn Marino "computable number of iterations\n");
820*e4b17023SJohn Marino
821*e4b17023SJohn Marino /* nunroll = total number of copies of the original loop body in
822*e4b17023SJohn Marino unrolled loop (i.e. if it is 2, we have to duplicate loop body once. */
823*e4b17023SJohn Marino nunroll = PARAM_VALUE (PARAM_MAX_UNROLLED_INSNS) / loop->ninsns;
824*e4b17023SJohn Marino nunroll_by_av = PARAM_VALUE (PARAM_MAX_AVERAGE_UNROLLED_INSNS) / loop->av_ninsns;
825*e4b17023SJohn Marino if (nunroll > nunroll_by_av)
826*e4b17023SJohn Marino nunroll = nunroll_by_av;
827*e4b17023SJohn Marino if (nunroll > (unsigned) PARAM_VALUE (PARAM_MAX_UNROLL_TIMES))
828*e4b17023SJohn Marino nunroll = PARAM_VALUE (PARAM_MAX_UNROLL_TIMES);
829*e4b17023SJohn Marino
830*e4b17023SJohn Marino if (targetm.loop_unroll_adjust)
831*e4b17023SJohn Marino nunroll = targetm.loop_unroll_adjust (nunroll, loop);
832*e4b17023SJohn Marino
833*e4b17023SJohn Marino /* Skip big loops. */
834*e4b17023SJohn Marino if (nunroll <= 1)
835*e4b17023SJohn Marino {
836*e4b17023SJohn Marino if (dump_file)
837*e4b17023SJohn Marino fprintf (dump_file, ";; Not considering loop, is too big\n");
838*e4b17023SJohn Marino return;
839*e4b17023SJohn Marino }
840*e4b17023SJohn Marino
841*e4b17023SJohn Marino /* Check for simple loops. */
842*e4b17023SJohn Marino desc = get_simple_loop_desc (loop);
843*e4b17023SJohn Marino
844*e4b17023SJohn Marino /* Check simpleness. */
845*e4b17023SJohn Marino if (!desc->simple_p || desc->assumptions)
846*e4b17023SJohn Marino {
847*e4b17023SJohn Marino if (dump_file)
848*e4b17023SJohn Marino fprintf (dump_file,
849*e4b17023SJohn Marino ";; Unable to prove that the number of iterations "
850*e4b17023SJohn Marino "can be counted in runtime\n");
851*e4b17023SJohn Marino return;
852*e4b17023SJohn Marino }
853*e4b17023SJohn Marino
854*e4b17023SJohn Marino if (desc->const_iter)
855*e4b17023SJohn Marino {
856*e4b17023SJohn Marino if (dump_file)
857*e4b17023SJohn Marino fprintf (dump_file, ";; Loop iterates constant times\n");
858*e4b17023SJohn Marino return;
859*e4b17023SJohn Marino }
860*e4b17023SJohn Marino
861*e4b17023SJohn Marino /* If we have profile feedback, check whether the loop rolls. */
862*e4b17023SJohn Marino if (loop->header->count && expected_loop_iterations (loop) < 2 * nunroll)
863*e4b17023SJohn Marino {
864*e4b17023SJohn Marino if (dump_file)
865*e4b17023SJohn Marino fprintf (dump_file, ";; Not unrolling loop, doesn't roll\n");
866*e4b17023SJohn Marino return;
867*e4b17023SJohn Marino }
868*e4b17023SJohn Marino
869*e4b17023SJohn Marino /* Success; now force nunroll to be power of 2, as we are unable to
870*e4b17023SJohn Marino cope with overflows in computation of number of iterations. */
871*e4b17023SJohn Marino for (i = 1; 2 * i <= nunroll; i *= 2)
872*e4b17023SJohn Marino continue;
873*e4b17023SJohn Marino
874*e4b17023SJohn Marino loop->lpt_decision.decision = LPT_UNROLL_RUNTIME;
875*e4b17023SJohn Marino loop->lpt_decision.times = i - 1;
876*e4b17023SJohn Marino
877*e4b17023SJohn Marino if (dump_file)
878*e4b17023SJohn Marino fprintf (dump_file,
879*e4b17023SJohn Marino ";; Decided to unroll the runtime computable "
880*e4b17023SJohn Marino "times rolling loop, %d times.\n",
881*e4b17023SJohn Marino loop->lpt_decision.times);
882*e4b17023SJohn Marino }
883*e4b17023SJohn Marino
884*e4b17023SJohn Marino /* Splits edge E and inserts the sequence of instructions INSNS on it, and
885*e4b17023SJohn Marino returns the newly created block. If INSNS is NULL_RTX, nothing is changed
886*e4b17023SJohn Marino and NULL is returned instead. */
887*e4b17023SJohn Marino
888*e4b17023SJohn Marino basic_block
split_edge_and_insert(edge e,rtx insns)889*e4b17023SJohn Marino split_edge_and_insert (edge e, rtx insns)
890*e4b17023SJohn Marino {
891*e4b17023SJohn Marino basic_block bb;
892*e4b17023SJohn Marino
893*e4b17023SJohn Marino if (!insns)
894*e4b17023SJohn Marino return NULL;
895*e4b17023SJohn Marino bb = split_edge (e);
896*e4b17023SJohn Marino emit_insn_after (insns, BB_END (bb));
897*e4b17023SJohn Marino
898*e4b17023SJohn Marino /* ??? We used to assume that INSNS can contain control flow insns, and
899*e4b17023SJohn Marino that we had to try to find sub basic blocks in BB to maintain a valid
900*e4b17023SJohn Marino CFG. For this purpose we used to set the BB_SUPERBLOCK flag on BB
901*e4b17023SJohn Marino and call break_superblocks when going out of cfglayout mode. But it
902*e4b17023SJohn Marino turns out that this never happens; and that if it does ever happen,
903*e4b17023SJohn Marino the TODO_verify_flow at the end of the RTL loop passes would fail.
904*e4b17023SJohn Marino
905*e4b17023SJohn Marino There are two reasons why we expected we could have control flow insns
906*e4b17023SJohn Marino in INSNS. The first is when a comparison has to be done in parts, and
907*e4b17023SJohn Marino the second is when the number of iterations is computed for loops with
908*e4b17023SJohn Marino the number of iterations known at runtime. In both cases, test cases
909*e4b17023SJohn Marino to get control flow in INSNS appear to be impossible to construct:
910*e4b17023SJohn Marino
911*e4b17023SJohn Marino * If do_compare_rtx_and_jump needs several branches to do comparison
912*e4b17023SJohn Marino in a mode that needs comparison by parts, we cannot analyze the
913*e4b17023SJohn Marino number of iterations of the loop, and we never get to unrolling it.
914*e4b17023SJohn Marino
915*e4b17023SJohn Marino * The code in expand_divmod that was suspected to cause creation of
916*e4b17023SJohn Marino branching code seems to be only accessed for signed division. The
917*e4b17023SJohn Marino divisions used by # of iterations analysis are always unsigned.
918*e4b17023SJohn Marino Problems might arise on architectures that emits branching code
919*e4b17023SJohn Marino for some operations that may appear in the unroller (especially
920*e4b17023SJohn Marino for division), but we have no such architectures.
921*e4b17023SJohn Marino
922*e4b17023SJohn Marino Considering all this, it was decided that we should for now assume
923*e4b17023SJohn Marino that INSNS can in theory contain control flow insns, but in practice
924*e4b17023SJohn Marino it never does. So we don't handle the theoretical case, and should
925*e4b17023SJohn Marino a real failure ever show up, we have a pretty good clue for how to
926*e4b17023SJohn Marino fix it. */
927*e4b17023SJohn Marino
928*e4b17023SJohn Marino return bb;
929*e4b17023SJohn Marino }
930*e4b17023SJohn Marino
931*e4b17023SJohn Marino /* Unroll LOOP for that we are able to count number of iterations in runtime
932*e4b17023SJohn Marino LOOP->LPT_DECISION.TIMES + 1 times. The transformation does this (with some
933*e4b17023SJohn Marino extra care for case n < 0):
934*e4b17023SJohn Marino
935*e4b17023SJohn Marino for (i = 0; i < n; i++)
936*e4b17023SJohn Marino body;
937*e4b17023SJohn Marino
938*e4b17023SJohn Marino ==>
939*e4b17023SJohn Marino
940*e4b17023SJohn Marino i = 0;
941*e4b17023SJohn Marino mod = n % 4;
942*e4b17023SJohn Marino
943*e4b17023SJohn Marino switch (mod)
944*e4b17023SJohn Marino {
945*e4b17023SJohn Marino case 3:
946*e4b17023SJohn Marino body; i++;
947*e4b17023SJohn Marino case 2:
948*e4b17023SJohn Marino body; i++;
949*e4b17023SJohn Marino case 1:
950*e4b17023SJohn Marino body; i++;
951*e4b17023SJohn Marino case 0: ;
952*e4b17023SJohn Marino }
953*e4b17023SJohn Marino
954*e4b17023SJohn Marino while (i < n)
955*e4b17023SJohn Marino {
956*e4b17023SJohn Marino body; i++;
957*e4b17023SJohn Marino body; i++;
958*e4b17023SJohn Marino body; i++;
959*e4b17023SJohn Marino body; i++;
960*e4b17023SJohn Marino }
961*e4b17023SJohn Marino */
962*e4b17023SJohn Marino static void
unroll_loop_runtime_iterations(struct loop * loop)963*e4b17023SJohn Marino unroll_loop_runtime_iterations (struct loop *loop)
964*e4b17023SJohn Marino {
965*e4b17023SJohn Marino rtx old_niter, niter, init_code, branch_code, tmp;
966*e4b17023SJohn Marino unsigned i, j, p;
967*e4b17023SJohn Marino basic_block preheader, *body, swtch, ezc_swtch;
968*e4b17023SJohn Marino VEC (basic_block, heap) *dom_bbs;
969*e4b17023SJohn Marino sbitmap wont_exit;
970*e4b17023SJohn Marino int may_exit_copy;
971*e4b17023SJohn Marino unsigned n_peel;
972*e4b17023SJohn Marino VEC (edge, heap) *remove_edges;
973*e4b17023SJohn Marino edge e;
974*e4b17023SJohn Marino bool extra_zero_check, last_may_exit;
975*e4b17023SJohn Marino unsigned max_unroll = loop->lpt_decision.times;
976*e4b17023SJohn Marino struct niter_desc *desc = get_simple_loop_desc (loop);
977*e4b17023SJohn Marino bool exit_at_end = loop_exit_at_end_p (loop);
978*e4b17023SJohn Marino struct opt_info *opt_info = NULL;
979*e4b17023SJohn Marino bool ok;
980*e4b17023SJohn Marino
981*e4b17023SJohn Marino if (flag_split_ivs_in_unroller
982*e4b17023SJohn Marino || flag_variable_expansion_in_unroller)
983*e4b17023SJohn Marino opt_info = analyze_insns_in_loop (loop);
984*e4b17023SJohn Marino
985*e4b17023SJohn Marino /* Remember blocks whose dominators will have to be updated. */
986*e4b17023SJohn Marino dom_bbs = NULL;
987*e4b17023SJohn Marino
988*e4b17023SJohn Marino body = get_loop_body (loop);
989*e4b17023SJohn Marino for (i = 0; i < loop->num_nodes; i++)
990*e4b17023SJohn Marino {
991*e4b17023SJohn Marino VEC (basic_block, heap) *ldom;
992*e4b17023SJohn Marino basic_block bb;
993*e4b17023SJohn Marino
994*e4b17023SJohn Marino ldom = get_dominated_by (CDI_DOMINATORS, body[i]);
995*e4b17023SJohn Marino FOR_EACH_VEC_ELT (basic_block, ldom, j, bb)
996*e4b17023SJohn Marino if (!flow_bb_inside_loop_p (loop, bb))
997*e4b17023SJohn Marino VEC_safe_push (basic_block, heap, dom_bbs, bb);
998*e4b17023SJohn Marino
999*e4b17023SJohn Marino VEC_free (basic_block, heap, ldom);
1000*e4b17023SJohn Marino }
1001*e4b17023SJohn Marino free (body);
1002*e4b17023SJohn Marino
1003*e4b17023SJohn Marino if (!exit_at_end)
1004*e4b17023SJohn Marino {
1005*e4b17023SJohn Marino /* Leave exit in first copy (for explanation why see comment in
1006*e4b17023SJohn Marino unroll_loop_constant_iterations). */
1007*e4b17023SJohn Marino may_exit_copy = 0;
1008*e4b17023SJohn Marino n_peel = max_unroll - 1;
1009*e4b17023SJohn Marino extra_zero_check = true;
1010*e4b17023SJohn Marino last_may_exit = false;
1011*e4b17023SJohn Marino }
1012*e4b17023SJohn Marino else
1013*e4b17023SJohn Marino {
1014*e4b17023SJohn Marino /* Leave exit in last copy (for explanation why see comment in
1015*e4b17023SJohn Marino unroll_loop_constant_iterations). */
1016*e4b17023SJohn Marino may_exit_copy = max_unroll;
1017*e4b17023SJohn Marino n_peel = max_unroll;
1018*e4b17023SJohn Marino extra_zero_check = false;
1019*e4b17023SJohn Marino last_may_exit = true;
1020*e4b17023SJohn Marino }
1021*e4b17023SJohn Marino
1022*e4b17023SJohn Marino /* Get expression for number of iterations. */
1023*e4b17023SJohn Marino start_sequence ();
1024*e4b17023SJohn Marino old_niter = niter = gen_reg_rtx (desc->mode);
1025*e4b17023SJohn Marino tmp = force_operand (copy_rtx (desc->niter_expr), niter);
1026*e4b17023SJohn Marino if (tmp != niter)
1027*e4b17023SJohn Marino emit_move_insn (niter, tmp);
1028*e4b17023SJohn Marino
1029*e4b17023SJohn Marino /* Count modulo by ANDing it with max_unroll; we use the fact that
1030*e4b17023SJohn Marino the number of unrollings is a power of two, and thus this is correct
1031*e4b17023SJohn Marino even if there is overflow in the computation. */
1032*e4b17023SJohn Marino niter = expand_simple_binop (desc->mode, AND,
1033*e4b17023SJohn Marino niter,
1034*e4b17023SJohn Marino GEN_INT (max_unroll),
1035*e4b17023SJohn Marino NULL_RTX, 0, OPTAB_LIB_WIDEN);
1036*e4b17023SJohn Marino
1037*e4b17023SJohn Marino init_code = get_insns ();
1038*e4b17023SJohn Marino end_sequence ();
1039*e4b17023SJohn Marino unshare_all_rtl_in_chain (init_code);
1040*e4b17023SJohn Marino
1041*e4b17023SJohn Marino /* Precondition the loop. */
1042*e4b17023SJohn Marino split_edge_and_insert (loop_preheader_edge (loop), init_code);
1043*e4b17023SJohn Marino
1044*e4b17023SJohn Marino remove_edges = NULL;
1045*e4b17023SJohn Marino
1046*e4b17023SJohn Marino wont_exit = sbitmap_alloc (max_unroll + 2);
1047*e4b17023SJohn Marino
1048*e4b17023SJohn Marino /* Peel the first copy of loop body (almost always we must leave exit test
1049*e4b17023SJohn Marino here; the only exception is when we have extra zero check and the number
1050*e4b17023SJohn Marino of iterations is reliable. Also record the place of (possible) extra
1051*e4b17023SJohn Marino zero check. */
1052*e4b17023SJohn Marino sbitmap_zero (wont_exit);
1053*e4b17023SJohn Marino if (extra_zero_check
1054*e4b17023SJohn Marino && !desc->noloop_assumptions)
1055*e4b17023SJohn Marino SET_BIT (wont_exit, 1);
1056*e4b17023SJohn Marino ezc_swtch = loop_preheader_edge (loop)->src;
1057*e4b17023SJohn Marino ok = duplicate_loop_to_header_edge (loop, loop_preheader_edge (loop),
1058*e4b17023SJohn Marino 1, wont_exit, desc->out_edge,
1059*e4b17023SJohn Marino &remove_edges,
1060*e4b17023SJohn Marino DLTHE_FLAG_UPDATE_FREQ);
1061*e4b17023SJohn Marino gcc_assert (ok);
1062*e4b17023SJohn Marino
1063*e4b17023SJohn Marino /* Record the place where switch will be built for preconditioning. */
1064*e4b17023SJohn Marino swtch = split_edge (loop_preheader_edge (loop));
1065*e4b17023SJohn Marino
1066*e4b17023SJohn Marino for (i = 0; i < n_peel; i++)
1067*e4b17023SJohn Marino {
1068*e4b17023SJohn Marino /* Peel the copy. */
1069*e4b17023SJohn Marino sbitmap_zero (wont_exit);
1070*e4b17023SJohn Marino if (i != n_peel - 1 || !last_may_exit)
1071*e4b17023SJohn Marino SET_BIT (wont_exit, 1);
1072*e4b17023SJohn Marino ok = duplicate_loop_to_header_edge (loop, loop_preheader_edge (loop),
1073*e4b17023SJohn Marino 1, wont_exit, desc->out_edge,
1074*e4b17023SJohn Marino &remove_edges,
1075*e4b17023SJohn Marino DLTHE_FLAG_UPDATE_FREQ);
1076*e4b17023SJohn Marino gcc_assert (ok);
1077*e4b17023SJohn Marino
1078*e4b17023SJohn Marino /* Create item for switch. */
1079*e4b17023SJohn Marino j = n_peel - i - (extra_zero_check ? 0 : 1);
1080*e4b17023SJohn Marino p = REG_BR_PROB_BASE / (i + 2);
1081*e4b17023SJohn Marino
1082*e4b17023SJohn Marino preheader = split_edge (loop_preheader_edge (loop));
1083*e4b17023SJohn Marino branch_code = compare_and_jump_seq (copy_rtx (niter), GEN_INT (j), EQ,
1084*e4b17023SJohn Marino block_label (preheader), p,
1085*e4b17023SJohn Marino NULL_RTX);
1086*e4b17023SJohn Marino
1087*e4b17023SJohn Marino /* We rely on the fact that the compare and jump cannot be optimized out,
1088*e4b17023SJohn Marino and hence the cfg we create is correct. */
1089*e4b17023SJohn Marino gcc_assert (branch_code != NULL_RTX);
1090*e4b17023SJohn Marino
1091*e4b17023SJohn Marino swtch = split_edge_and_insert (single_pred_edge (swtch), branch_code);
1092*e4b17023SJohn Marino set_immediate_dominator (CDI_DOMINATORS, preheader, swtch);
1093*e4b17023SJohn Marino single_pred_edge (swtch)->probability = REG_BR_PROB_BASE - p;
1094*e4b17023SJohn Marino e = make_edge (swtch, preheader,
1095*e4b17023SJohn Marino single_succ_edge (swtch)->flags & EDGE_IRREDUCIBLE_LOOP);
1096*e4b17023SJohn Marino e->probability = p;
1097*e4b17023SJohn Marino }
1098*e4b17023SJohn Marino
1099*e4b17023SJohn Marino if (extra_zero_check)
1100*e4b17023SJohn Marino {
1101*e4b17023SJohn Marino /* Add branch for zero iterations. */
1102*e4b17023SJohn Marino p = REG_BR_PROB_BASE / (max_unroll + 1);
1103*e4b17023SJohn Marino swtch = ezc_swtch;
1104*e4b17023SJohn Marino preheader = split_edge (loop_preheader_edge (loop));
1105*e4b17023SJohn Marino branch_code = compare_and_jump_seq (copy_rtx (niter), const0_rtx, EQ,
1106*e4b17023SJohn Marino block_label (preheader), p,
1107*e4b17023SJohn Marino NULL_RTX);
1108*e4b17023SJohn Marino gcc_assert (branch_code != NULL_RTX);
1109*e4b17023SJohn Marino
1110*e4b17023SJohn Marino swtch = split_edge_and_insert (single_succ_edge (swtch), branch_code);
1111*e4b17023SJohn Marino set_immediate_dominator (CDI_DOMINATORS, preheader, swtch);
1112*e4b17023SJohn Marino single_succ_edge (swtch)->probability = REG_BR_PROB_BASE - p;
1113*e4b17023SJohn Marino e = make_edge (swtch, preheader,
1114*e4b17023SJohn Marino single_succ_edge (swtch)->flags & EDGE_IRREDUCIBLE_LOOP);
1115*e4b17023SJohn Marino e->probability = p;
1116*e4b17023SJohn Marino }
1117*e4b17023SJohn Marino
1118*e4b17023SJohn Marino /* Recount dominators for outer blocks. */
1119*e4b17023SJohn Marino iterate_fix_dominators (CDI_DOMINATORS, dom_bbs, false);
1120*e4b17023SJohn Marino
1121*e4b17023SJohn Marino /* And unroll loop. */
1122*e4b17023SJohn Marino
1123*e4b17023SJohn Marino sbitmap_ones (wont_exit);
1124*e4b17023SJohn Marino RESET_BIT (wont_exit, may_exit_copy);
1125*e4b17023SJohn Marino opt_info_start_duplication (opt_info);
1126*e4b17023SJohn Marino
1127*e4b17023SJohn Marino ok = duplicate_loop_to_header_edge (loop, loop_latch_edge (loop),
1128*e4b17023SJohn Marino max_unroll,
1129*e4b17023SJohn Marino wont_exit, desc->out_edge,
1130*e4b17023SJohn Marino &remove_edges,
1131*e4b17023SJohn Marino DLTHE_FLAG_UPDATE_FREQ
1132*e4b17023SJohn Marino | (opt_info
1133*e4b17023SJohn Marino ? DLTHE_RECORD_COPY_NUMBER
1134*e4b17023SJohn Marino : 0));
1135*e4b17023SJohn Marino gcc_assert (ok);
1136*e4b17023SJohn Marino
1137*e4b17023SJohn Marino if (opt_info)
1138*e4b17023SJohn Marino {
1139*e4b17023SJohn Marino apply_opt_in_copies (opt_info, max_unroll, true, true);
1140*e4b17023SJohn Marino free_opt_info (opt_info);
1141*e4b17023SJohn Marino }
1142*e4b17023SJohn Marino
1143*e4b17023SJohn Marino free (wont_exit);
1144*e4b17023SJohn Marino
1145*e4b17023SJohn Marino if (exit_at_end)
1146*e4b17023SJohn Marino {
1147*e4b17023SJohn Marino basic_block exit_block = get_bb_copy (desc->in_edge->src);
1148*e4b17023SJohn Marino /* Find a new in and out edge; they are in the last copy we have
1149*e4b17023SJohn Marino made. */
1150*e4b17023SJohn Marino
1151*e4b17023SJohn Marino if (EDGE_SUCC (exit_block, 0)->dest == desc->out_edge->dest)
1152*e4b17023SJohn Marino {
1153*e4b17023SJohn Marino desc->out_edge = EDGE_SUCC (exit_block, 0);
1154*e4b17023SJohn Marino desc->in_edge = EDGE_SUCC (exit_block, 1);
1155*e4b17023SJohn Marino }
1156*e4b17023SJohn Marino else
1157*e4b17023SJohn Marino {
1158*e4b17023SJohn Marino desc->out_edge = EDGE_SUCC (exit_block, 1);
1159*e4b17023SJohn Marino desc->in_edge = EDGE_SUCC (exit_block, 0);
1160*e4b17023SJohn Marino }
1161*e4b17023SJohn Marino }
1162*e4b17023SJohn Marino
1163*e4b17023SJohn Marino /* Remove the edges. */
1164*e4b17023SJohn Marino FOR_EACH_VEC_ELT (edge, remove_edges, i, e)
1165*e4b17023SJohn Marino remove_path (e);
1166*e4b17023SJohn Marino VEC_free (edge, heap, remove_edges);
1167*e4b17023SJohn Marino
1168*e4b17023SJohn Marino /* We must be careful when updating the number of iterations due to
1169*e4b17023SJohn Marino preconditioning and the fact that the value must be valid at entry
1170*e4b17023SJohn Marino of the loop. After passing through the above code, we see that
1171*e4b17023SJohn Marino the correct new number of iterations is this: */
1172*e4b17023SJohn Marino gcc_assert (!desc->const_iter);
1173*e4b17023SJohn Marino desc->niter_expr =
1174*e4b17023SJohn Marino simplify_gen_binary (UDIV, desc->mode, old_niter,
1175*e4b17023SJohn Marino GEN_INT (max_unroll + 1));
1176*e4b17023SJohn Marino desc->niter_max /= max_unroll + 1;
1177*e4b17023SJohn Marino if (exit_at_end)
1178*e4b17023SJohn Marino {
1179*e4b17023SJohn Marino desc->niter_expr =
1180*e4b17023SJohn Marino simplify_gen_binary (MINUS, desc->mode, desc->niter_expr, const1_rtx);
1181*e4b17023SJohn Marino desc->noloop_assumptions = NULL_RTX;
1182*e4b17023SJohn Marino desc->niter_max--;
1183*e4b17023SJohn Marino }
1184*e4b17023SJohn Marino
1185*e4b17023SJohn Marino if (dump_file)
1186*e4b17023SJohn Marino fprintf (dump_file,
1187*e4b17023SJohn Marino ";; Unrolled loop %d times, counting # of iterations "
1188*e4b17023SJohn Marino "in runtime, %i insns\n",
1189*e4b17023SJohn Marino max_unroll, num_loop_insns (loop));
1190*e4b17023SJohn Marino
1191*e4b17023SJohn Marino VEC_free (basic_block, heap, dom_bbs);
1192*e4b17023SJohn Marino }
1193*e4b17023SJohn Marino
1194*e4b17023SJohn Marino /* Decide whether to simply peel LOOP and how much. */
1195*e4b17023SJohn Marino static void
decide_peel_simple(struct loop * loop,int flags)1196*e4b17023SJohn Marino decide_peel_simple (struct loop *loop, int flags)
1197*e4b17023SJohn Marino {
1198*e4b17023SJohn Marino unsigned npeel;
1199*e4b17023SJohn Marino struct niter_desc *desc;
1200*e4b17023SJohn Marino
1201*e4b17023SJohn Marino if (!(flags & UAP_PEEL))
1202*e4b17023SJohn Marino {
1203*e4b17023SJohn Marino /* We were not asked to, just return back silently. */
1204*e4b17023SJohn Marino return;
1205*e4b17023SJohn Marino }
1206*e4b17023SJohn Marino
1207*e4b17023SJohn Marino if (dump_file)
1208*e4b17023SJohn Marino fprintf (dump_file, "\n;; Considering simply peeling loop\n");
1209*e4b17023SJohn Marino
1210*e4b17023SJohn Marino /* npeel = number of iterations to peel. */
1211*e4b17023SJohn Marino npeel = PARAM_VALUE (PARAM_MAX_PEELED_INSNS) / loop->ninsns;
1212*e4b17023SJohn Marino if (npeel > (unsigned) PARAM_VALUE (PARAM_MAX_PEEL_TIMES))
1213*e4b17023SJohn Marino npeel = PARAM_VALUE (PARAM_MAX_PEEL_TIMES);
1214*e4b17023SJohn Marino
1215*e4b17023SJohn Marino /* Skip big loops. */
1216*e4b17023SJohn Marino if (!npeel)
1217*e4b17023SJohn Marino {
1218*e4b17023SJohn Marino if (dump_file)
1219*e4b17023SJohn Marino fprintf (dump_file, ";; Not considering loop, is too big\n");
1220*e4b17023SJohn Marino return;
1221*e4b17023SJohn Marino }
1222*e4b17023SJohn Marino
1223*e4b17023SJohn Marino /* Check for simple loops. */
1224*e4b17023SJohn Marino desc = get_simple_loop_desc (loop);
1225*e4b17023SJohn Marino
1226*e4b17023SJohn Marino /* Check number of iterations. */
1227*e4b17023SJohn Marino if (desc->simple_p && !desc->assumptions && desc->const_iter)
1228*e4b17023SJohn Marino {
1229*e4b17023SJohn Marino if (dump_file)
1230*e4b17023SJohn Marino fprintf (dump_file, ";; Loop iterates constant times\n");
1231*e4b17023SJohn Marino return;
1232*e4b17023SJohn Marino }
1233*e4b17023SJohn Marino
1234*e4b17023SJohn Marino /* Do not simply peel loops with branches inside -- it increases number
1235*e4b17023SJohn Marino of mispredicts. */
1236*e4b17023SJohn Marino if (num_loop_branches (loop) > 1)
1237*e4b17023SJohn Marino {
1238*e4b17023SJohn Marino if (dump_file)
1239*e4b17023SJohn Marino fprintf (dump_file, ";; Not peeling, contains branches\n");
1240*e4b17023SJohn Marino return;
1241*e4b17023SJohn Marino }
1242*e4b17023SJohn Marino
1243*e4b17023SJohn Marino if (loop->header->count)
1244*e4b17023SJohn Marino {
1245*e4b17023SJohn Marino unsigned niter = expected_loop_iterations (loop);
1246*e4b17023SJohn Marino if (niter + 1 > npeel)
1247*e4b17023SJohn Marino {
1248*e4b17023SJohn Marino if (dump_file)
1249*e4b17023SJohn Marino {
1250*e4b17023SJohn Marino fprintf (dump_file, ";; Not peeling loop, rolls too much (");
1251*e4b17023SJohn Marino fprintf (dump_file, HOST_WIDEST_INT_PRINT_DEC,
1252*e4b17023SJohn Marino (HOST_WIDEST_INT) (niter + 1));
1253*e4b17023SJohn Marino fprintf (dump_file, " iterations > %d [maximum peelings])\n",
1254*e4b17023SJohn Marino npeel);
1255*e4b17023SJohn Marino }
1256*e4b17023SJohn Marino return;
1257*e4b17023SJohn Marino }
1258*e4b17023SJohn Marino npeel = niter + 1;
1259*e4b17023SJohn Marino }
1260*e4b17023SJohn Marino else
1261*e4b17023SJohn Marino {
1262*e4b17023SJohn Marino /* For now we have no good heuristics to decide whether loop peeling
1263*e4b17023SJohn Marino will be effective, so disable it. */
1264*e4b17023SJohn Marino if (dump_file)
1265*e4b17023SJohn Marino fprintf (dump_file,
1266*e4b17023SJohn Marino ";; Not peeling loop, no evidence it will be profitable\n");
1267*e4b17023SJohn Marino return;
1268*e4b17023SJohn Marino }
1269*e4b17023SJohn Marino
1270*e4b17023SJohn Marino /* Success. */
1271*e4b17023SJohn Marino loop->lpt_decision.decision = LPT_PEEL_SIMPLE;
1272*e4b17023SJohn Marino loop->lpt_decision.times = npeel;
1273*e4b17023SJohn Marino
1274*e4b17023SJohn Marino if (dump_file)
1275*e4b17023SJohn Marino fprintf (dump_file, ";; Decided to simply peel the loop, %d times.\n",
1276*e4b17023SJohn Marino loop->lpt_decision.times);
1277*e4b17023SJohn Marino }
1278*e4b17023SJohn Marino
1279*e4b17023SJohn Marino /* Peel a LOOP LOOP->LPT_DECISION.TIMES times. The transformation:
1280*e4b17023SJohn Marino while (cond)
1281*e4b17023SJohn Marino body;
1282*e4b17023SJohn Marino
1283*e4b17023SJohn Marino ==>
1284*e4b17023SJohn Marino
1285*e4b17023SJohn Marino if (!cond) goto end;
1286*e4b17023SJohn Marino body;
1287*e4b17023SJohn Marino if (!cond) goto end;
1288*e4b17023SJohn Marino body;
1289*e4b17023SJohn Marino while (cond)
1290*e4b17023SJohn Marino body;
1291*e4b17023SJohn Marino end: ;
1292*e4b17023SJohn Marino */
1293*e4b17023SJohn Marino static void
peel_loop_simple(struct loop * loop)1294*e4b17023SJohn Marino peel_loop_simple (struct loop *loop)
1295*e4b17023SJohn Marino {
1296*e4b17023SJohn Marino sbitmap wont_exit;
1297*e4b17023SJohn Marino unsigned npeel = loop->lpt_decision.times;
1298*e4b17023SJohn Marino struct niter_desc *desc = get_simple_loop_desc (loop);
1299*e4b17023SJohn Marino struct opt_info *opt_info = NULL;
1300*e4b17023SJohn Marino bool ok;
1301*e4b17023SJohn Marino
1302*e4b17023SJohn Marino if (flag_split_ivs_in_unroller && npeel > 1)
1303*e4b17023SJohn Marino opt_info = analyze_insns_in_loop (loop);
1304*e4b17023SJohn Marino
1305*e4b17023SJohn Marino wont_exit = sbitmap_alloc (npeel + 1);
1306*e4b17023SJohn Marino sbitmap_zero (wont_exit);
1307*e4b17023SJohn Marino
1308*e4b17023SJohn Marino opt_info_start_duplication (opt_info);
1309*e4b17023SJohn Marino
1310*e4b17023SJohn Marino ok = duplicate_loop_to_header_edge (loop, loop_preheader_edge (loop),
1311*e4b17023SJohn Marino npeel, wont_exit, NULL,
1312*e4b17023SJohn Marino NULL, DLTHE_FLAG_UPDATE_FREQ
1313*e4b17023SJohn Marino | (opt_info
1314*e4b17023SJohn Marino ? DLTHE_RECORD_COPY_NUMBER
1315*e4b17023SJohn Marino : 0));
1316*e4b17023SJohn Marino gcc_assert (ok);
1317*e4b17023SJohn Marino
1318*e4b17023SJohn Marino free (wont_exit);
1319*e4b17023SJohn Marino
1320*e4b17023SJohn Marino if (opt_info)
1321*e4b17023SJohn Marino {
1322*e4b17023SJohn Marino apply_opt_in_copies (opt_info, npeel, false, false);
1323*e4b17023SJohn Marino free_opt_info (opt_info);
1324*e4b17023SJohn Marino }
1325*e4b17023SJohn Marino
1326*e4b17023SJohn Marino if (desc->simple_p)
1327*e4b17023SJohn Marino {
1328*e4b17023SJohn Marino if (desc->const_iter)
1329*e4b17023SJohn Marino {
1330*e4b17023SJohn Marino desc->niter -= npeel;
1331*e4b17023SJohn Marino desc->niter_expr = GEN_INT (desc->niter);
1332*e4b17023SJohn Marino desc->noloop_assumptions = NULL_RTX;
1333*e4b17023SJohn Marino }
1334*e4b17023SJohn Marino else
1335*e4b17023SJohn Marino {
1336*e4b17023SJohn Marino /* We cannot just update niter_expr, as its value might be clobbered
1337*e4b17023SJohn Marino inside loop. We could handle this by counting the number into
1338*e4b17023SJohn Marino temporary just like we do in runtime unrolling, but it does not
1339*e4b17023SJohn Marino seem worthwhile. */
1340*e4b17023SJohn Marino free_simple_loop_desc (loop);
1341*e4b17023SJohn Marino }
1342*e4b17023SJohn Marino }
1343*e4b17023SJohn Marino if (dump_file)
1344*e4b17023SJohn Marino fprintf (dump_file, ";; Peeling loop %d times\n", npeel);
1345*e4b17023SJohn Marino }
1346*e4b17023SJohn Marino
1347*e4b17023SJohn Marino /* Decide whether to unroll LOOP stupidly and how much. */
1348*e4b17023SJohn Marino static void
decide_unroll_stupid(struct loop * loop,int flags)1349*e4b17023SJohn Marino decide_unroll_stupid (struct loop *loop, int flags)
1350*e4b17023SJohn Marino {
1351*e4b17023SJohn Marino unsigned nunroll, nunroll_by_av, i;
1352*e4b17023SJohn Marino struct niter_desc *desc;
1353*e4b17023SJohn Marino
1354*e4b17023SJohn Marino if (!(flags & UAP_UNROLL_ALL))
1355*e4b17023SJohn Marino {
1356*e4b17023SJohn Marino /* We were not asked to, just return back silently. */
1357*e4b17023SJohn Marino return;
1358*e4b17023SJohn Marino }
1359*e4b17023SJohn Marino
1360*e4b17023SJohn Marino if (dump_file)
1361*e4b17023SJohn Marino fprintf (dump_file, "\n;; Considering unrolling loop stupidly\n");
1362*e4b17023SJohn Marino
1363*e4b17023SJohn Marino /* nunroll = total number of copies of the original loop body in
1364*e4b17023SJohn Marino unrolled loop (i.e. if it is 2, we have to duplicate loop body once. */
1365*e4b17023SJohn Marino nunroll = PARAM_VALUE (PARAM_MAX_UNROLLED_INSNS) / loop->ninsns;
1366*e4b17023SJohn Marino nunroll_by_av
1367*e4b17023SJohn Marino = PARAM_VALUE (PARAM_MAX_AVERAGE_UNROLLED_INSNS) / loop->av_ninsns;
1368*e4b17023SJohn Marino if (nunroll > nunroll_by_av)
1369*e4b17023SJohn Marino nunroll = nunroll_by_av;
1370*e4b17023SJohn Marino if (nunroll > (unsigned) PARAM_VALUE (PARAM_MAX_UNROLL_TIMES))
1371*e4b17023SJohn Marino nunroll = PARAM_VALUE (PARAM_MAX_UNROLL_TIMES);
1372*e4b17023SJohn Marino
1373*e4b17023SJohn Marino if (targetm.loop_unroll_adjust)
1374*e4b17023SJohn Marino nunroll = targetm.loop_unroll_adjust (nunroll, loop);
1375*e4b17023SJohn Marino
1376*e4b17023SJohn Marino /* Skip big loops. */
1377*e4b17023SJohn Marino if (nunroll <= 1)
1378*e4b17023SJohn Marino {
1379*e4b17023SJohn Marino if (dump_file)
1380*e4b17023SJohn Marino fprintf (dump_file, ";; Not considering loop, is too big\n");
1381*e4b17023SJohn Marino return;
1382*e4b17023SJohn Marino }
1383*e4b17023SJohn Marino
1384*e4b17023SJohn Marino /* Check for simple loops. */
1385*e4b17023SJohn Marino desc = get_simple_loop_desc (loop);
1386*e4b17023SJohn Marino
1387*e4b17023SJohn Marino /* Check simpleness. */
1388*e4b17023SJohn Marino if (desc->simple_p && !desc->assumptions)
1389*e4b17023SJohn Marino {
1390*e4b17023SJohn Marino if (dump_file)
1391*e4b17023SJohn Marino fprintf (dump_file, ";; The loop is simple\n");
1392*e4b17023SJohn Marino return;
1393*e4b17023SJohn Marino }
1394*e4b17023SJohn Marino
1395*e4b17023SJohn Marino /* Do not unroll loops with branches inside -- it increases number
1396*e4b17023SJohn Marino of mispredicts. */
1397*e4b17023SJohn Marino if (num_loop_branches (loop) > 1)
1398*e4b17023SJohn Marino {
1399*e4b17023SJohn Marino if (dump_file)
1400*e4b17023SJohn Marino fprintf (dump_file, ";; Not unrolling, contains branches\n");
1401*e4b17023SJohn Marino return;
1402*e4b17023SJohn Marino }
1403*e4b17023SJohn Marino
1404*e4b17023SJohn Marino /* If we have profile feedback, check whether the loop rolls. */
1405*e4b17023SJohn Marino if (loop->header->count
1406*e4b17023SJohn Marino && expected_loop_iterations (loop) < 2 * nunroll)
1407*e4b17023SJohn Marino {
1408*e4b17023SJohn Marino if (dump_file)
1409*e4b17023SJohn Marino fprintf (dump_file, ";; Not unrolling loop, doesn't roll\n");
1410*e4b17023SJohn Marino return;
1411*e4b17023SJohn Marino }
1412*e4b17023SJohn Marino
1413*e4b17023SJohn Marino /* Success. Now force nunroll to be power of 2, as it seems that this
1414*e4b17023SJohn Marino improves results (partially because of better alignments, partially
1415*e4b17023SJohn Marino because of some dark magic). */
1416*e4b17023SJohn Marino for (i = 1; 2 * i <= nunroll; i *= 2)
1417*e4b17023SJohn Marino continue;
1418*e4b17023SJohn Marino
1419*e4b17023SJohn Marino loop->lpt_decision.decision = LPT_UNROLL_STUPID;
1420*e4b17023SJohn Marino loop->lpt_decision.times = i - 1;
1421*e4b17023SJohn Marino
1422*e4b17023SJohn Marino if (dump_file)
1423*e4b17023SJohn Marino fprintf (dump_file,
1424*e4b17023SJohn Marino ";; Decided to unroll the loop stupidly, %d times.\n",
1425*e4b17023SJohn Marino loop->lpt_decision.times);
1426*e4b17023SJohn Marino }
1427*e4b17023SJohn Marino
1428*e4b17023SJohn Marino /* Unroll a LOOP LOOP->LPT_DECISION.TIMES times. The transformation:
1429*e4b17023SJohn Marino while (cond)
1430*e4b17023SJohn Marino body;
1431*e4b17023SJohn Marino
1432*e4b17023SJohn Marino ==>
1433*e4b17023SJohn Marino
1434*e4b17023SJohn Marino while (cond)
1435*e4b17023SJohn Marino {
1436*e4b17023SJohn Marino body;
1437*e4b17023SJohn Marino if (!cond) break;
1438*e4b17023SJohn Marino body;
1439*e4b17023SJohn Marino if (!cond) break;
1440*e4b17023SJohn Marino body;
1441*e4b17023SJohn Marino if (!cond) break;
1442*e4b17023SJohn Marino body;
1443*e4b17023SJohn Marino }
1444*e4b17023SJohn Marino */
1445*e4b17023SJohn Marino static void
unroll_loop_stupid(struct loop * loop)1446*e4b17023SJohn Marino unroll_loop_stupid (struct loop *loop)
1447*e4b17023SJohn Marino {
1448*e4b17023SJohn Marino sbitmap wont_exit;
1449*e4b17023SJohn Marino unsigned nunroll = loop->lpt_decision.times;
1450*e4b17023SJohn Marino struct niter_desc *desc = get_simple_loop_desc (loop);
1451*e4b17023SJohn Marino struct opt_info *opt_info = NULL;
1452*e4b17023SJohn Marino bool ok;
1453*e4b17023SJohn Marino
1454*e4b17023SJohn Marino if (flag_split_ivs_in_unroller
1455*e4b17023SJohn Marino || flag_variable_expansion_in_unroller)
1456*e4b17023SJohn Marino opt_info = analyze_insns_in_loop (loop);
1457*e4b17023SJohn Marino
1458*e4b17023SJohn Marino
1459*e4b17023SJohn Marino wont_exit = sbitmap_alloc (nunroll + 1);
1460*e4b17023SJohn Marino sbitmap_zero (wont_exit);
1461*e4b17023SJohn Marino opt_info_start_duplication (opt_info);
1462*e4b17023SJohn Marino
1463*e4b17023SJohn Marino ok = duplicate_loop_to_header_edge (loop, loop_latch_edge (loop),
1464*e4b17023SJohn Marino nunroll, wont_exit,
1465*e4b17023SJohn Marino NULL, NULL,
1466*e4b17023SJohn Marino DLTHE_FLAG_UPDATE_FREQ
1467*e4b17023SJohn Marino | (opt_info
1468*e4b17023SJohn Marino ? DLTHE_RECORD_COPY_NUMBER
1469*e4b17023SJohn Marino : 0));
1470*e4b17023SJohn Marino gcc_assert (ok);
1471*e4b17023SJohn Marino
1472*e4b17023SJohn Marino if (opt_info)
1473*e4b17023SJohn Marino {
1474*e4b17023SJohn Marino apply_opt_in_copies (opt_info, nunroll, true, true);
1475*e4b17023SJohn Marino free_opt_info (opt_info);
1476*e4b17023SJohn Marino }
1477*e4b17023SJohn Marino
1478*e4b17023SJohn Marino free (wont_exit);
1479*e4b17023SJohn Marino
1480*e4b17023SJohn Marino if (desc->simple_p)
1481*e4b17023SJohn Marino {
1482*e4b17023SJohn Marino /* We indeed may get here provided that there are nontrivial assumptions
1483*e4b17023SJohn Marino for a loop to be really simple. We could update the counts, but the
1484*e4b17023SJohn Marino problem is that we are unable to decide which exit will be taken
1485*e4b17023SJohn Marino (not really true in case the number of iterations is constant,
1486*e4b17023SJohn Marino but noone will do anything with this information, so we do not
1487*e4b17023SJohn Marino worry about it). */
1488*e4b17023SJohn Marino desc->simple_p = false;
1489*e4b17023SJohn Marino }
1490*e4b17023SJohn Marino
1491*e4b17023SJohn Marino if (dump_file)
1492*e4b17023SJohn Marino fprintf (dump_file, ";; Unrolled loop %d times, %i insns\n",
1493*e4b17023SJohn Marino nunroll, num_loop_insns (loop));
1494*e4b17023SJohn Marino }
1495*e4b17023SJohn Marino
1496*e4b17023SJohn Marino /* A hash function for information about insns to split. */
1497*e4b17023SJohn Marino
1498*e4b17023SJohn Marino static hashval_t
si_info_hash(const void * ivts)1499*e4b17023SJohn Marino si_info_hash (const void *ivts)
1500*e4b17023SJohn Marino {
1501*e4b17023SJohn Marino return (hashval_t) INSN_UID (((const struct iv_to_split *) ivts)->insn);
1502*e4b17023SJohn Marino }
1503*e4b17023SJohn Marino
1504*e4b17023SJohn Marino /* An equality functions for information about insns to split. */
1505*e4b17023SJohn Marino
1506*e4b17023SJohn Marino static int
si_info_eq(const void * ivts1,const void * ivts2)1507*e4b17023SJohn Marino si_info_eq (const void *ivts1, const void *ivts2)
1508*e4b17023SJohn Marino {
1509*e4b17023SJohn Marino const struct iv_to_split *const i1 = (const struct iv_to_split *) ivts1;
1510*e4b17023SJohn Marino const struct iv_to_split *const i2 = (const struct iv_to_split *) ivts2;
1511*e4b17023SJohn Marino
1512*e4b17023SJohn Marino return i1->insn == i2->insn;
1513*e4b17023SJohn Marino }
1514*e4b17023SJohn Marino
1515*e4b17023SJohn Marino /* Return a hash for VES, which is really a "var_to_expand *". */
1516*e4b17023SJohn Marino
1517*e4b17023SJohn Marino static hashval_t
ve_info_hash(const void * ves)1518*e4b17023SJohn Marino ve_info_hash (const void *ves)
1519*e4b17023SJohn Marino {
1520*e4b17023SJohn Marino return (hashval_t) INSN_UID (((const struct var_to_expand *) ves)->insn);
1521*e4b17023SJohn Marino }
1522*e4b17023SJohn Marino
1523*e4b17023SJohn Marino /* Return true if IVTS1 and IVTS2 (which are really both of type
1524*e4b17023SJohn Marino "var_to_expand *") refer to the same instruction. */
1525*e4b17023SJohn Marino
1526*e4b17023SJohn Marino static int
ve_info_eq(const void * ivts1,const void * ivts2)1527*e4b17023SJohn Marino ve_info_eq (const void *ivts1, const void *ivts2)
1528*e4b17023SJohn Marino {
1529*e4b17023SJohn Marino const struct var_to_expand *const i1 = (const struct var_to_expand *) ivts1;
1530*e4b17023SJohn Marino const struct var_to_expand *const i2 = (const struct var_to_expand *) ivts2;
1531*e4b17023SJohn Marino
1532*e4b17023SJohn Marino return i1->insn == i2->insn;
1533*e4b17023SJohn Marino }
1534*e4b17023SJohn Marino
1535*e4b17023SJohn Marino /* Returns true if REG is referenced in one nondebug insn in LOOP.
1536*e4b17023SJohn Marino Set *DEBUG_USES to the number of debug insns that reference the
1537*e4b17023SJohn Marino variable. */
1538*e4b17023SJohn Marino
1539*e4b17023SJohn Marino bool
referenced_in_one_insn_in_loop_p(struct loop * loop,rtx reg,int * debug_uses)1540*e4b17023SJohn Marino referenced_in_one_insn_in_loop_p (struct loop *loop, rtx reg,
1541*e4b17023SJohn Marino int *debug_uses)
1542*e4b17023SJohn Marino {
1543*e4b17023SJohn Marino basic_block *body, bb;
1544*e4b17023SJohn Marino unsigned i;
1545*e4b17023SJohn Marino int count_ref = 0;
1546*e4b17023SJohn Marino rtx insn;
1547*e4b17023SJohn Marino
1548*e4b17023SJohn Marino body = get_loop_body (loop);
1549*e4b17023SJohn Marino for (i = 0; i < loop->num_nodes; i++)
1550*e4b17023SJohn Marino {
1551*e4b17023SJohn Marino bb = body[i];
1552*e4b17023SJohn Marino
1553*e4b17023SJohn Marino FOR_BB_INSNS (bb, insn)
1554*e4b17023SJohn Marino if (!rtx_referenced_p (reg, insn))
1555*e4b17023SJohn Marino continue;
1556*e4b17023SJohn Marino else if (DEBUG_INSN_P (insn))
1557*e4b17023SJohn Marino ++*debug_uses;
1558*e4b17023SJohn Marino else if (++count_ref > 1)
1559*e4b17023SJohn Marino break;
1560*e4b17023SJohn Marino }
1561*e4b17023SJohn Marino free (body);
1562*e4b17023SJohn Marino return (count_ref == 1);
1563*e4b17023SJohn Marino }
1564*e4b17023SJohn Marino
1565*e4b17023SJohn Marino /* Reset the DEBUG_USES debug insns in LOOP that reference REG. */
1566*e4b17023SJohn Marino
1567*e4b17023SJohn Marino static void
reset_debug_uses_in_loop(struct loop * loop,rtx reg,int debug_uses)1568*e4b17023SJohn Marino reset_debug_uses_in_loop (struct loop *loop, rtx reg, int debug_uses)
1569*e4b17023SJohn Marino {
1570*e4b17023SJohn Marino basic_block *body, bb;
1571*e4b17023SJohn Marino unsigned i;
1572*e4b17023SJohn Marino rtx insn;
1573*e4b17023SJohn Marino
1574*e4b17023SJohn Marino body = get_loop_body (loop);
1575*e4b17023SJohn Marino for (i = 0; debug_uses && i < loop->num_nodes; i++)
1576*e4b17023SJohn Marino {
1577*e4b17023SJohn Marino bb = body[i];
1578*e4b17023SJohn Marino
1579*e4b17023SJohn Marino FOR_BB_INSNS (bb, insn)
1580*e4b17023SJohn Marino if (!DEBUG_INSN_P (insn) || !rtx_referenced_p (reg, insn))
1581*e4b17023SJohn Marino continue;
1582*e4b17023SJohn Marino else
1583*e4b17023SJohn Marino {
1584*e4b17023SJohn Marino validate_change (insn, &INSN_VAR_LOCATION_LOC (insn),
1585*e4b17023SJohn Marino gen_rtx_UNKNOWN_VAR_LOC (), 0);
1586*e4b17023SJohn Marino if (!--debug_uses)
1587*e4b17023SJohn Marino break;
1588*e4b17023SJohn Marino }
1589*e4b17023SJohn Marino }
1590*e4b17023SJohn Marino free (body);
1591*e4b17023SJohn Marino }
1592*e4b17023SJohn Marino
1593*e4b17023SJohn Marino /* Determine whether INSN contains an accumulator
1594*e4b17023SJohn Marino which can be expanded into separate copies,
1595*e4b17023SJohn Marino one for each copy of the LOOP body.
1596*e4b17023SJohn Marino
1597*e4b17023SJohn Marino for (i = 0 ; i < n; i++)
1598*e4b17023SJohn Marino sum += a[i];
1599*e4b17023SJohn Marino
1600*e4b17023SJohn Marino ==>
1601*e4b17023SJohn Marino
1602*e4b17023SJohn Marino sum += a[i]
1603*e4b17023SJohn Marino ....
1604*e4b17023SJohn Marino i = i+1;
1605*e4b17023SJohn Marino sum1 += a[i]
1606*e4b17023SJohn Marino ....
1607*e4b17023SJohn Marino i = i+1
1608*e4b17023SJohn Marino sum2 += a[i];
1609*e4b17023SJohn Marino ....
1610*e4b17023SJohn Marino
1611*e4b17023SJohn Marino Return NULL if INSN contains no opportunity for expansion of accumulator.
1612*e4b17023SJohn Marino Otherwise, allocate a VAR_TO_EXPAND structure, fill it with the relevant
1613*e4b17023SJohn Marino information and return a pointer to it.
1614*e4b17023SJohn Marino */
1615*e4b17023SJohn Marino
1616*e4b17023SJohn Marino static struct var_to_expand *
analyze_insn_to_expand_var(struct loop * loop,rtx insn)1617*e4b17023SJohn Marino analyze_insn_to_expand_var (struct loop *loop, rtx insn)
1618*e4b17023SJohn Marino {
1619*e4b17023SJohn Marino rtx set, dest, src;
1620*e4b17023SJohn Marino struct var_to_expand *ves;
1621*e4b17023SJohn Marino unsigned accum_pos;
1622*e4b17023SJohn Marino enum rtx_code code;
1623*e4b17023SJohn Marino int debug_uses = 0;
1624*e4b17023SJohn Marino
1625*e4b17023SJohn Marino set = single_set (insn);
1626*e4b17023SJohn Marino if (!set)
1627*e4b17023SJohn Marino return NULL;
1628*e4b17023SJohn Marino
1629*e4b17023SJohn Marino dest = SET_DEST (set);
1630*e4b17023SJohn Marino src = SET_SRC (set);
1631*e4b17023SJohn Marino code = GET_CODE (src);
1632*e4b17023SJohn Marino
1633*e4b17023SJohn Marino if (code != PLUS && code != MINUS && code != MULT && code != FMA)
1634*e4b17023SJohn Marino return NULL;
1635*e4b17023SJohn Marino
1636*e4b17023SJohn Marino if (FLOAT_MODE_P (GET_MODE (dest)))
1637*e4b17023SJohn Marino {
1638*e4b17023SJohn Marino if (!flag_associative_math)
1639*e4b17023SJohn Marino return NULL;
1640*e4b17023SJohn Marino /* In the case of FMA, we're also changing the rounding. */
1641*e4b17023SJohn Marino if (code == FMA && !flag_unsafe_math_optimizations)
1642*e4b17023SJohn Marino return NULL;
1643*e4b17023SJohn Marino }
1644*e4b17023SJohn Marino
1645*e4b17023SJohn Marino /* Hmm, this is a bit paradoxical. We know that INSN is a valid insn
1646*e4b17023SJohn Marino in MD. But if there is no optab to generate the insn, we can not
1647*e4b17023SJohn Marino perform the variable expansion. This can happen if an MD provides
1648*e4b17023SJohn Marino an insn but not a named pattern to generate it, for example to avoid
1649*e4b17023SJohn Marino producing code that needs additional mode switches like for x87/mmx.
1650*e4b17023SJohn Marino
1651*e4b17023SJohn Marino So we check have_insn_for which looks for an optab for the operation
1652*e4b17023SJohn Marino in SRC. If it doesn't exist, we can't perform the expansion even
1653*e4b17023SJohn Marino though INSN is valid. */
1654*e4b17023SJohn Marino if (!have_insn_for (code, GET_MODE (src)))
1655*e4b17023SJohn Marino return NULL;
1656*e4b17023SJohn Marino
1657*e4b17023SJohn Marino if (!REG_P (dest)
1658*e4b17023SJohn Marino && !(GET_CODE (dest) == SUBREG
1659*e4b17023SJohn Marino && REG_P (SUBREG_REG (dest))))
1660*e4b17023SJohn Marino return NULL;
1661*e4b17023SJohn Marino
1662*e4b17023SJohn Marino /* Find the accumulator use within the operation. */
1663*e4b17023SJohn Marino if (code == FMA)
1664*e4b17023SJohn Marino {
1665*e4b17023SJohn Marino /* We only support accumulation via FMA in the ADD position. */
1666*e4b17023SJohn Marino if (!rtx_equal_p (dest, XEXP (src, 2)))
1667*e4b17023SJohn Marino return NULL;
1668*e4b17023SJohn Marino accum_pos = 2;
1669*e4b17023SJohn Marino }
1670*e4b17023SJohn Marino else if (rtx_equal_p (dest, XEXP (src, 0)))
1671*e4b17023SJohn Marino accum_pos = 0;
1672*e4b17023SJohn Marino else if (rtx_equal_p (dest, XEXP (src, 1)))
1673*e4b17023SJohn Marino {
1674*e4b17023SJohn Marino /* The method of expansion that we are using; which includes the
1675*e4b17023SJohn Marino initialization of the expansions with zero and the summation of
1676*e4b17023SJohn Marino the expansions at the end of the computation will yield wrong
1677*e4b17023SJohn Marino results for (x = something - x) thus avoid using it in that case. */
1678*e4b17023SJohn Marino if (code == MINUS)
1679*e4b17023SJohn Marino return NULL;
1680*e4b17023SJohn Marino accum_pos = 1;
1681*e4b17023SJohn Marino }
1682*e4b17023SJohn Marino else
1683*e4b17023SJohn Marino return NULL;
1684*e4b17023SJohn Marino
1685*e4b17023SJohn Marino /* It must not otherwise be used. */
1686*e4b17023SJohn Marino if (code == FMA)
1687*e4b17023SJohn Marino {
1688*e4b17023SJohn Marino if (rtx_referenced_p (dest, XEXP (src, 0))
1689*e4b17023SJohn Marino || rtx_referenced_p (dest, XEXP (src, 1)))
1690*e4b17023SJohn Marino return NULL;
1691*e4b17023SJohn Marino }
1692*e4b17023SJohn Marino else if (rtx_referenced_p (dest, XEXP (src, 1 - accum_pos)))
1693*e4b17023SJohn Marino return NULL;
1694*e4b17023SJohn Marino
1695*e4b17023SJohn Marino /* It must be used in exactly one insn. */
1696*e4b17023SJohn Marino if (!referenced_in_one_insn_in_loop_p (loop, dest, &debug_uses))
1697*e4b17023SJohn Marino return NULL;
1698*e4b17023SJohn Marino
1699*e4b17023SJohn Marino if (dump_file)
1700*e4b17023SJohn Marino {
1701*e4b17023SJohn Marino fprintf (dump_file, "\n;; Expanding Accumulator ");
1702*e4b17023SJohn Marino print_rtl (dump_file, dest);
1703*e4b17023SJohn Marino fprintf (dump_file, "\n");
1704*e4b17023SJohn Marino }
1705*e4b17023SJohn Marino
1706*e4b17023SJohn Marino if (debug_uses)
1707*e4b17023SJohn Marino /* Instead of resetting the debug insns, we could replace each
1708*e4b17023SJohn Marino debug use in the loop with the sum or product of all expanded
1709*e4b17023SJohn Marino accummulators. Since we'll only know of all expansions at the
1710*e4b17023SJohn Marino end, we'd have to keep track of which vars_to_expand a debug
1711*e4b17023SJohn Marino insn in the loop references, take note of each copy of the
1712*e4b17023SJohn Marino debug insn during unrolling, and when it's all done, compute
1713*e4b17023SJohn Marino the sum or product of each variable and adjust the original
1714*e4b17023SJohn Marino debug insn and each copy thereof. What a pain! */
1715*e4b17023SJohn Marino reset_debug_uses_in_loop (loop, dest, debug_uses);
1716*e4b17023SJohn Marino
1717*e4b17023SJohn Marino /* Record the accumulator to expand. */
1718*e4b17023SJohn Marino ves = XNEW (struct var_to_expand);
1719*e4b17023SJohn Marino ves->insn = insn;
1720*e4b17023SJohn Marino ves->reg = copy_rtx (dest);
1721*e4b17023SJohn Marino ves->var_expansions = VEC_alloc (rtx, heap, 1);
1722*e4b17023SJohn Marino ves->next = NULL;
1723*e4b17023SJohn Marino ves->op = GET_CODE (src);
1724*e4b17023SJohn Marino ves->expansion_count = 0;
1725*e4b17023SJohn Marino ves->reuse_expansion = 0;
1726*e4b17023SJohn Marino ves->accum_pos = accum_pos;
1727*e4b17023SJohn Marino return ves;
1728*e4b17023SJohn Marino }
1729*e4b17023SJohn Marino
1730*e4b17023SJohn Marino /* Determine whether there is an induction variable in INSN that
1731*e4b17023SJohn Marino we would like to split during unrolling.
1732*e4b17023SJohn Marino
1733*e4b17023SJohn Marino I.e. replace
1734*e4b17023SJohn Marino
1735*e4b17023SJohn Marino i = i + 1;
1736*e4b17023SJohn Marino ...
1737*e4b17023SJohn Marino i = i + 1;
1738*e4b17023SJohn Marino ...
1739*e4b17023SJohn Marino i = i + 1;
1740*e4b17023SJohn Marino ...
1741*e4b17023SJohn Marino
1742*e4b17023SJohn Marino type chains by
1743*e4b17023SJohn Marino
1744*e4b17023SJohn Marino i0 = i + 1
1745*e4b17023SJohn Marino ...
1746*e4b17023SJohn Marino i = i0 + 1
1747*e4b17023SJohn Marino ...
1748*e4b17023SJohn Marino i = i0 + 2
1749*e4b17023SJohn Marino ...
1750*e4b17023SJohn Marino
1751*e4b17023SJohn Marino Return NULL if INSN contains no interesting IVs. Otherwise, allocate
1752*e4b17023SJohn Marino an IV_TO_SPLIT structure, fill it with the relevant information and return a
1753*e4b17023SJohn Marino pointer to it. */
1754*e4b17023SJohn Marino
1755*e4b17023SJohn Marino static struct iv_to_split *
analyze_iv_to_split_insn(rtx insn)1756*e4b17023SJohn Marino analyze_iv_to_split_insn (rtx insn)
1757*e4b17023SJohn Marino {
1758*e4b17023SJohn Marino rtx set, dest;
1759*e4b17023SJohn Marino struct rtx_iv iv;
1760*e4b17023SJohn Marino struct iv_to_split *ivts;
1761*e4b17023SJohn Marino bool ok;
1762*e4b17023SJohn Marino
1763*e4b17023SJohn Marino /* For now we just split the basic induction variables. Later this may be
1764*e4b17023SJohn Marino extended for example by selecting also addresses of memory references. */
1765*e4b17023SJohn Marino set = single_set (insn);
1766*e4b17023SJohn Marino if (!set)
1767*e4b17023SJohn Marino return NULL;
1768*e4b17023SJohn Marino
1769*e4b17023SJohn Marino dest = SET_DEST (set);
1770*e4b17023SJohn Marino if (!REG_P (dest))
1771*e4b17023SJohn Marino return NULL;
1772*e4b17023SJohn Marino
1773*e4b17023SJohn Marino if (!biv_p (insn, dest))
1774*e4b17023SJohn Marino return NULL;
1775*e4b17023SJohn Marino
1776*e4b17023SJohn Marino ok = iv_analyze_result (insn, dest, &iv);
1777*e4b17023SJohn Marino
1778*e4b17023SJohn Marino /* This used to be an assert under the assumption that if biv_p returns
1779*e4b17023SJohn Marino true that iv_analyze_result must also return true. However, that
1780*e4b17023SJohn Marino assumption is not strictly correct as evidenced by pr25569.
1781*e4b17023SJohn Marino
1782*e4b17023SJohn Marino Returning NULL when iv_analyze_result returns false is safe and
1783*e4b17023SJohn Marino avoids the problems in pr25569 until the iv_analyze_* routines
1784*e4b17023SJohn Marino can be fixed, which is apparently hard and time consuming
1785*e4b17023SJohn Marino according to their author. */
1786*e4b17023SJohn Marino if (! ok)
1787*e4b17023SJohn Marino return NULL;
1788*e4b17023SJohn Marino
1789*e4b17023SJohn Marino if (iv.step == const0_rtx
1790*e4b17023SJohn Marino || iv.mode != iv.extend_mode)
1791*e4b17023SJohn Marino return NULL;
1792*e4b17023SJohn Marino
1793*e4b17023SJohn Marino /* Record the insn to split. */
1794*e4b17023SJohn Marino ivts = XNEW (struct iv_to_split);
1795*e4b17023SJohn Marino ivts->insn = insn;
1796*e4b17023SJohn Marino ivts->base_var = NULL_RTX;
1797*e4b17023SJohn Marino ivts->step = iv.step;
1798*e4b17023SJohn Marino ivts->next = NULL;
1799*e4b17023SJohn Marino ivts->n_loc = 1;
1800*e4b17023SJohn Marino ivts->loc[0] = 1;
1801*e4b17023SJohn Marino
1802*e4b17023SJohn Marino return ivts;
1803*e4b17023SJohn Marino }
1804*e4b17023SJohn Marino
1805*e4b17023SJohn Marino /* Determines which of insns in LOOP can be optimized.
1806*e4b17023SJohn Marino Return a OPT_INFO struct with the relevant hash tables filled
1807*e4b17023SJohn Marino with all insns to be optimized. The FIRST_NEW_BLOCK field
1808*e4b17023SJohn Marino is undefined for the return value. */
1809*e4b17023SJohn Marino
1810*e4b17023SJohn Marino static struct opt_info *
analyze_insns_in_loop(struct loop * loop)1811*e4b17023SJohn Marino analyze_insns_in_loop (struct loop *loop)
1812*e4b17023SJohn Marino {
1813*e4b17023SJohn Marino basic_block *body, bb;
1814*e4b17023SJohn Marino unsigned i;
1815*e4b17023SJohn Marino struct opt_info *opt_info = XCNEW (struct opt_info);
1816*e4b17023SJohn Marino rtx insn;
1817*e4b17023SJohn Marino struct iv_to_split *ivts = NULL;
1818*e4b17023SJohn Marino struct var_to_expand *ves = NULL;
1819*e4b17023SJohn Marino PTR *slot1;
1820*e4b17023SJohn Marino PTR *slot2;
1821*e4b17023SJohn Marino VEC (edge, heap) *edges = get_loop_exit_edges (loop);
1822*e4b17023SJohn Marino edge exit;
1823*e4b17023SJohn Marino bool can_apply = false;
1824*e4b17023SJohn Marino
1825*e4b17023SJohn Marino iv_analysis_loop_init (loop);
1826*e4b17023SJohn Marino
1827*e4b17023SJohn Marino body = get_loop_body (loop);
1828*e4b17023SJohn Marino
1829*e4b17023SJohn Marino if (flag_split_ivs_in_unroller)
1830*e4b17023SJohn Marino {
1831*e4b17023SJohn Marino opt_info->insns_to_split = htab_create (5 * loop->num_nodes,
1832*e4b17023SJohn Marino si_info_hash, si_info_eq, free);
1833*e4b17023SJohn Marino opt_info->iv_to_split_head = NULL;
1834*e4b17023SJohn Marino opt_info->iv_to_split_tail = &opt_info->iv_to_split_head;
1835*e4b17023SJohn Marino }
1836*e4b17023SJohn Marino
1837*e4b17023SJohn Marino /* Record the loop exit bb and loop preheader before the unrolling. */
1838*e4b17023SJohn Marino opt_info->loop_preheader = loop_preheader_edge (loop)->src;
1839*e4b17023SJohn Marino
1840*e4b17023SJohn Marino if (VEC_length (edge, edges) == 1)
1841*e4b17023SJohn Marino {
1842*e4b17023SJohn Marino exit = VEC_index (edge, edges, 0);
1843*e4b17023SJohn Marino if (!(exit->flags & EDGE_COMPLEX))
1844*e4b17023SJohn Marino {
1845*e4b17023SJohn Marino opt_info->loop_exit = split_edge (exit);
1846*e4b17023SJohn Marino can_apply = true;
1847*e4b17023SJohn Marino }
1848*e4b17023SJohn Marino }
1849*e4b17023SJohn Marino
1850*e4b17023SJohn Marino if (flag_variable_expansion_in_unroller
1851*e4b17023SJohn Marino && can_apply)
1852*e4b17023SJohn Marino {
1853*e4b17023SJohn Marino opt_info->insns_with_var_to_expand = htab_create (5 * loop->num_nodes,
1854*e4b17023SJohn Marino ve_info_hash,
1855*e4b17023SJohn Marino ve_info_eq, free);
1856*e4b17023SJohn Marino opt_info->var_to_expand_head = NULL;
1857*e4b17023SJohn Marino opt_info->var_to_expand_tail = &opt_info->var_to_expand_head;
1858*e4b17023SJohn Marino }
1859*e4b17023SJohn Marino
1860*e4b17023SJohn Marino for (i = 0; i < loop->num_nodes; i++)
1861*e4b17023SJohn Marino {
1862*e4b17023SJohn Marino bb = body[i];
1863*e4b17023SJohn Marino if (!dominated_by_p (CDI_DOMINATORS, loop->latch, bb))
1864*e4b17023SJohn Marino continue;
1865*e4b17023SJohn Marino
1866*e4b17023SJohn Marino FOR_BB_INSNS (bb, insn)
1867*e4b17023SJohn Marino {
1868*e4b17023SJohn Marino if (!INSN_P (insn))
1869*e4b17023SJohn Marino continue;
1870*e4b17023SJohn Marino
1871*e4b17023SJohn Marino if (opt_info->insns_to_split)
1872*e4b17023SJohn Marino ivts = analyze_iv_to_split_insn (insn);
1873*e4b17023SJohn Marino
1874*e4b17023SJohn Marino if (ivts)
1875*e4b17023SJohn Marino {
1876*e4b17023SJohn Marino slot1 = htab_find_slot (opt_info->insns_to_split, ivts, INSERT);
1877*e4b17023SJohn Marino gcc_assert (*slot1 == NULL);
1878*e4b17023SJohn Marino *slot1 = ivts;
1879*e4b17023SJohn Marino *opt_info->iv_to_split_tail = ivts;
1880*e4b17023SJohn Marino opt_info->iv_to_split_tail = &ivts->next;
1881*e4b17023SJohn Marino continue;
1882*e4b17023SJohn Marino }
1883*e4b17023SJohn Marino
1884*e4b17023SJohn Marino if (opt_info->insns_with_var_to_expand)
1885*e4b17023SJohn Marino ves = analyze_insn_to_expand_var (loop, insn);
1886*e4b17023SJohn Marino
1887*e4b17023SJohn Marino if (ves)
1888*e4b17023SJohn Marino {
1889*e4b17023SJohn Marino slot2 = htab_find_slot (opt_info->insns_with_var_to_expand, ves, INSERT);
1890*e4b17023SJohn Marino gcc_assert (*slot2 == NULL);
1891*e4b17023SJohn Marino *slot2 = ves;
1892*e4b17023SJohn Marino *opt_info->var_to_expand_tail = ves;
1893*e4b17023SJohn Marino opt_info->var_to_expand_tail = &ves->next;
1894*e4b17023SJohn Marino }
1895*e4b17023SJohn Marino }
1896*e4b17023SJohn Marino }
1897*e4b17023SJohn Marino
1898*e4b17023SJohn Marino VEC_free (edge, heap, edges);
1899*e4b17023SJohn Marino free (body);
1900*e4b17023SJohn Marino return opt_info;
1901*e4b17023SJohn Marino }
1902*e4b17023SJohn Marino
1903*e4b17023SJohn Marino /* Called just before loop duplication. Records start of duplicated area
1904*e4b17023SJohn Marino to OPT_INFO. */
1905*e4b17023SJohn Marino
1906*e4b17023SJohn Marino static void
opt_info_start_duplication(struct opt_info * opt_info)1907*e4b17023SJohn Marino opt_info_start_duplication (struct opt_info *opt_info)
1908*e4b17023SJohn Marino {
1909*e4b17023SJohn Marino if (opt_info)
1910*e4b17023SJohn Marino opt_info->first_new_block = last_basic_block;
1911*e4b17023SJohn Marino }
1912*e4b17023SJohn Marino
1913*e4b17023SJohn Marino /* Determine the number of iterations between initialization of the base
1914*e4b17023SJohn Marino variable and the current copy (N_COPY). N_COPIES is the total number
1915*e4b17023SJohn Marino of newly created copies. UNROLLING is true if we are unrolling
1916*e4b17023SJohn Marino (not peeling) the loop. */
1917*e4b17023SJohn Marino
1918*e4b17023SJohn Marino static unsigned
determine_split_iv_delta(unsigned n_copy,unsigned n_copies,bool unrolling)1919*e4b17023SJohn Marino determine_split_iv_delta (unsigned n_copy, unsigned n_copies, bool unrolling)
1920*e4b17023SJohn Marino {
1921*e4b17023SJohn Marino if (unrolling)
1922*e4b17023SJohn Marino {
1923*e4b17023SJohn Marino /* If we are unrolling, initialization is done in the original loop
1924*e4b17023SJohn Marino body (number 0). */
1925*e4b17023SJohn Marino return n_copy;
1926*e4b17023SJohn Marino }
1927*e4b17023SJohn Marino else
1928*e4b17023SJohn Marino {
1929*e4b17023SJohn Marino /* If we are peeling, the copy in that the initialization occurs has
1930*e4b17023SJohn Marino number 1. The original loop (number 0) is the last. */
1931*e4b17023SJohn Marino if (n_copy)
1932*e4b17023SJohn Marino return n_copy - 1;
1933*e4b17023SJohn Marino else
1934*e4b17023SJohn Marino return n_copies;
1935*e4b17023SJohn Marino }
1936*e4b17023SJohn Marino }
1937*e4b17023SJohn Marino
1938*e4b17023SJohn Marino /* Locate in EXPR the expression corresponding to the location recorded
1939*e4b17023SJohn Marino in IVTS, and return a pointer to the RTX for this location. */
1940*e4b17023SJohn Marino
1941*e4b17023SJohn Marino static rtx *
get_ivts_expr(rtx expr,struct iv_to_split * ivts)1942*e4b17023SJohn Marino get_ivts_expr (rtx expr, struct iv_to_split *ivts)
1943*e4b17023SJohn Marino {
1944*e4b17023SJohn Marino unsigned i;
1945*e4b17023SJohn Marino rtx *ret = &expr;
1946*e4b17023SJohn Marino
1947*e4b17023SJohn Marino for (i = 0; i < ivts->n_loc; i++)
1948*e4b17023SJohn Marino ret = &XEXP (*ret, ivts->loc[i]);
1949*e4b17023SJohn Marino
1950*e4b17023SJohn Marino return ret;
1951*e4b17023SJohn Marino }
1952*e4b17023SJohn Marino
1953*e4b17023SJohn Marino /* Allocate basic variable for the induction variable chain. */
1954*e4b17023SJohn Marino
1955*e4b17023SJohn Marino static void
allocate_basic_variable(struct iv_to_split * ivts)1956*e4b17023SJohn Marino allocate_basic_variable (struct iv_to_split *ivts)
1957*e4b17023SJohn Marino {
1958*e4b17023SJohn Marino rtx expr = *get_ivts_expr (single_set (ivts->insn), ivts);
1959*e4b17023SJohn Marino
1960*e4b17023SJohn Marino ivts->base_var = gen_reg_rtx (GET_MODE (expr));
1961*e4b17023SJohn Marino }
1962*e4b17023SJohn Marino
1963*e4b17023SJohn Marino /* Insert initialization of basic variable of IVTS before INSN, taking
1964*e4b17023SJohn Marino the initial value from INSN. */
1965*e4b17023SJohn Marino
1966*e4b17023SJohn Marino static void
insert_base_initialization(struct iv_to_split * ivts,rtx insn)1967*e4b17023SJohn Marino insert_base_initialization (struct iv_to_split *ivts, rtx insn)
1968*e4b17023SJohn Marino {
1969*e4b17023SJohn Marino rtx expr = copy_rtx (*get_ivts_expr (single_set (insn), ivts));
1970*e4b17023SJohn Marino rtx seq;
1971*e4b17023SJohn Marino
1972*e4b17023SJohn Marino start_sequence ();
1973*e4b17023SJohn Marino expr = force_operand (expr, ivts->base_var);
1974*e4b17023SJohn Marino if (expr != ivts->base_var)
1975*e4b17023SJohn Marino emit_move_insn (ivts->base_var, expr);
1976*e4b17023SJohn Marino seq = get_insns ();
1977*e4b17023SJohn Marino end_sequence ();
1978*e4b17023SJohn Marino
1979*e4b17023SJohn Marino emit_insn_before (seq, insn);
1980*e4b17023SJohn Marino }
1981*e4b17023SJohn Marino
1982*e4b17023SJohn Marino /* Replace the use of induction variable described in IVTS in INSN
1983*e4b17023SJohn Marino by base variable + DELTA * step. */
1984*e4b17023SJohn Marino
1985*e4b17023SJohn Marino static void
split_iv(struct iv_to_split * ivts,rtx insn,unsigned delta)1986*e4b17023SJohn Marino split_iv (struct iv_to_split *ivts, rtx insn, unsigned delta)
1987*e4b17023SJohn Marino {
1988*e4b17023SJohn Marino rtx expr, *loc, seq, incr, var;
1989*e4b17023SJohn Marino enum machine_mode mode = GET_MODE (ivts->base_var);
1990*e4b17023SJohn Marino rtx src, dest, set;
1991*e4b17023SJohn Marino
1992*e4b17023SJohn Marino /* Construct base + DELTA * step. */
1993*e4b17023SJohn Marino if (!delta)
1994*e4b17023SJohn Marino expr = ivts->base_var;
1995*e4b17023SJohn Marino else
1996*e4b17023SJohn Marino {
1997*e4b17023SJohn Marino incr = simplify_gen_binary (MULT, mode,
1998*e4b17023SJohn Marino ivts->step, gen_int_mode (delta, mode));
1999*e4b17023SJohn Marino expr = simplify_gen_binary (PLUS, GET_MODE (ivts->base_var),
2000*e4b17023SJohn Marino ivts->base_var, incr);
2001*e4b17023SJohn Marino }
2002*e4b17023SJohn Marino
2003*e4b17023SJohn Marino /* Figure out where to do the replacement. */
2004*e4b17023SJohn Marino loc = get_ivts_expr (single_set (insn), ivts);
2005*e4b17023SJohn Marino
2006*e4b17023SJohn Marino /* If we can make the replacement right away, we're done. */
2007*e4b17023SJohn Marino if (validate_change (insn, loc, expr, 0))
2008*e4b17023SJohn Marino return;
2009*e4b17023SJohn Marino
2010*e4b17023SJohn Marino /* Otherwise, force EXPR into a register and try again. */
2011*e4b17023SJohn Marino start_sequence ();
2012*e4b17023SJohn Marino var = gen_reg_rtx (mode);
2013*e4b17023SJohn Marino expr = force_operand (expr, var);
2014*e4b17023SJohn Marino if (expr != var)
2015*e4b17023SJohn Marino emit_move_insn (var, expr);
2016*e4b17023SJohn Marino seq = get_insns ();
2017*e4b17023SJohn Marino end_sequence ();
2018*e4b17023SJohn Marino emit_insn_before (seq, insn);
2019*e4b17023SJohn Marino
2020*e4b17023SJohn Marino if (validate_change (insn, loc, var, 0))
2021*e4b17023SJohn Marino return;
2022*e4b17023SJohn Marino
2023*e4b17023SJohn Marino /* The last chance. Try recreating the assignment in insn
2024*e4b17023SJohn Marino completely from scratch. */
2025*e4b17023SJohn Marino set = single_set (insn);
2026*e4b17023SJohn Marino gcc_assert (set);
2027*e4b17023SJohn Marino
2028*e4b17023SJohn Marino start_sequence ();
2029*e4b17023SJohn Marino *loc = var;
2030*e4b17023SJohn Marino src = copy_rtx (SET_SRC (set));
2031*e4b17023SJohn Marino dest = copy_rtx (SET_DEST (set));
2032*e4b17023SJohn Marino src = force_operand (src, dest);
2033*e4b17023SJohn Marino if (src != dest)
2034*e4b17023SJohn Marino emit_move_insn (dest, src);
2035*e4b17023SJohn Marino seq = get_insns ();
2036*e4b17023SJohn Marino end_sequence ();
2037*e4b17023SJohn Marino
2038*e4b17023SJohn Marino emit_insn_before (seq, insn);
2039*e4b17023SJohn Marino delete_insn (insn);
2040*e4b17023SJohn Marino }
2041*e4b17023SJohn Marino
2042*e4b17023SJohn Marino
2043*e4b17023SJohn Marino /* Return one expansion of the accumulator recorded in struct VE. */
2044*e4b17023SJohn Marino
2045*e4b17023SJohn Marino static rtx
get_expansion(struct var_to_expand * ve)2046*e4b17023SJohn Marino get_expansion (struct var_to_expand *ve)
2047*e4b17023SJohn Marino {
2048*e4b17023SJohn Marino rtx reg;
2049*e4b17023SJohn Marino
2050*e4b17023SJohn Marino if (ve->reuse_expansion == 0)
2051*e4b17023SJohn Marino reg = ve->reg;
2052*e4b17023SJohn Marino else
2053*e4b17023SJohn Marino reg = VEC_index (rtx, ve->var_expansions, ve->reuse_expansion - 1);
2054*e4b17023SJohn Marino
2055*e4b17023SJohn Marino if (VEC_length (rtx, ve->var_expansions) == (unsigned) ve->reuse_expansion)
2056*e4b17023SJohn Marino ve->reuse_expansion = 0;
2057*e4b17023SJohn Marino else
2058*e4b17023SJohn Marino ve->reuse_expansion++;
2059*e4b17023SJohn Marino
2060*e4b17023SJohn Marino return reg;
2061*e4b17023SJohn Marino }
2062*e4b17023SJohn Marino
2063*e4b17023SJohn Marino
2064*e4b17023SJohn Marino /* Given INSN replace the uses of the accumulator recorded in VE
2065*e4b17023SJohn Marino with a new register. */
2066*e4b17023SJohn Marino
2067*e4b17023SJohn Marino static void
expand_var_during_unrolling(struct var_to_expand * ve,rtx insn)2068*e4b17023SJohn Marino expand_var_during_unrolling (struct var_to_expand *ve, rtx insn)
2069*e4b17023SJohn Marino {
2070*e4b17023SJohn Marino rtx new_reg, set;
2071*e4b17023SJohn Marino bool really_new_expansion = false;
2072*e4b17023SJohn Marino
2073*e4b17023SJohn Marino set = single_set (insn);
2074*e4b17023SJohn Marino gcc_assert (set);
2075*e4b17023SJohn Marino
2076*e4b17023SJohn Marino /* Generate a new register only if the expansion limit has not been
2077*e4b17023SJohn Marino reached. Else reuse an already existing expansion. */
2078*e4b17023SJohn Marino if (PARAM_VALUE (PARAM_MAX_VARIABLE_EXPANSIONS) > ve->expansion_count)
2079*e4b17023SJohn Marino {
2080*e4b17023SJohn Marino really_new_expansion = true;
2081*e4b17023SJohn Marino new_reg = gen_reg_rtx (GET_MODE (ve->reg));
2082*e4b17023SJohn Marino }
2083*e4b17023SJohn Marino else
2084*e4b17023SJohn Marino new_reg = get_expansion (ve);
2085*e4b17023SJohn Marino
2086*e4b17023SJohn Marino validate_change (insn, &SET_DEST (set), new_reg, 1);
2087*e4b17023SJohn Marino validate_change (insn, &XEXP (SET_SRC (set), ve->accum_pos), new_reg, 1);
2088*e4b17023SJohn Marino
2089*e4b17023SJohn Marino if (apply_change_group ())
2090*e4b17023SJohn Marino if (really_new_expansion)
2091*e4b17023SJohn Marino {
2092*e4b17023SJohn Marino VEC_safe_push (rtx, heap, ve->var_expansions, new_reg);
2093*e4b17023SJohn Marino ve->expansion_count++;
2094*e4b17023SJohn Marino }
2095*e4b17023SJohn Marino }
2096*e4b17023SJohn Marino
2097*e4b17023SJohn Marino /* Initialize the variable expansions in loop preheader. PLACE is the
2098*e4b17023SJohn Marino loop-preheader basic block where the initialization of the
2099*e4b17023SJohn Marino expansions should take place. The expansions are initialized with
2100*e4b17023SJohn Marino (-0) when the operation is plus or minus to honor sign zero. This
2101*e4b17023SJohn Marino way we can prevent cases where the sign of the final result is
2102*e4b17023SJohn Marino effected by the sign of the expansion. Here is an example to
2103*e4b17023SJohn Marino demonstrate this:
2104*e4b17023SJohn Marino
2105*e4b17023SJohn Marino for (i = 0 ; i < n; i++)
2106*e4b17023SJohn Marino sum += something;
2107*e4b17023SJohn Marino
2108*e4b17023SJohn Marino ==>
2109*e4b17023SJohn Marino
2110*e4b17023SJohn Marino sum += something
2111*e4b17023SJohn Marino ....
2112*e4b17023SJohn Marino i = i+1;
2113*e4b17023SJohn Marino sum1 += something
2114*e4b17023SJohn Marino ....
2115*e4b17023SJohn Marino i = i+1
2116*e4b17023SJohn Marino sum2 += something;
2117*e4b17023SJohn Marino ....
2118*e4b17023SJohn Marino
2119*e4b17023SJohn Marino When SUM is initialized with -zero and SOMETHING is also -zero; the
2120*e4b17023SJohn Marino final result of sum should be -zero thus the expansions sum1 and sum2
2121*e4b17023SJohn Marino should be initialized with -zero as well (otherwise we will get +zero
2122*e4b17023SJohn Marino as the final result). */
2123*e4b17023SJohn Marino
2124*e4b17023SJohn Marino static void
insert_var_expansion_initialization(struct var_to_expand * ve,basic_block place)2125*e4b17023SJohn Marino insert_var_expansion_initialization (struct var_to_expand *ve,
2126*e4b17023SJohn Marino basic_block place)
2127*e4b17023SJohn Marino {
2128*e4b17023SJohn Marino rtx seq, var, zero_init, insn;
2129*e4b17023SJohn Marino unsigned i;
2130*e4b17023SJohn Marino enum machine_mode mode = GET_MODE (ve->reg);
2131*e4b17023SJohn Marino bool honor_signed_zero_p = HONOR_SIGNED_ZEROS (mode);
2132*e4b17023SJohn Marino
2133*e4b17023SJohn Marino if (VEC_length (rtx, ve->var_expansions) == 0)
2134*e4b17023SJohn Marino return;
2135*e4b17023SJohn Marino
2136*e4b17023SJohn Marino start_sequence ();
2137*e4b17023SJohn Marino switch (ve->op)
2138*e4b17023SJohn Marino {
2139*e4b17023SJohn Marino case FMA:
2140*e4b17023SJohn Marino /* Note that we only accumulate FMA via the ADD operand. */
2141*e4b17023SJohn Marino case PLUS:
2142*e4b17023SJohn Marino case MINUS:
2143*e4b17023SJohn Marino FOR_EACH_VEC_ELT (rtx, ve->var_expansions, i, var)
2144*e4b17023SJohn Marino {
2145*e4b17023SJohn Marino if (honor_signed_zero_p)
2146*e4b17023SJohn Marino zero_init = simplify_gen_unary (NEG, mode, CONST0_RTX (mode), mode);
2147*e4b17023SJohn Marino else
2148*e4b17023SJohn Marino zero_init = CONST0_RTX (mode);
2149*e4b17023SJohn Marino emit_move_insn (var, zero_init);
2150*e4b17023SJohn Marino }
2151*e4b17023SJohn Marino break;
2152*e4b17023SJohn Marino
2153*e4b17023SJohn Marino case MULT:
2154*e4b17023SJohn Marino FOR_EACH_VEC_ELT (rtx, ve->var_expansions, i, var)
2155*e4b17023SJohn Marino {
2156*e4b17023SJohn Marino zero_init = CONST1_RTX (GET_MODE (var));
2157*e4b17023SJohn Marino emit_move_insn (var, zero_init);
2158*e4b17023SJohn Marino }
2159*e4b17023SJohn Marino break;
2160*e4b17023SJohn Marino
2161*e4b17023SJohn Marino default:
2162*e4b17023SJohn Marino gcc_unreachable ();
2163*e4b17023SJohn Marino }
2164*e4b17023SJohn Marino
2165*e4b17023SJohn Marino seq = get_insns ();
2166*e4b17023SJohn Marino end_sequence ();
2167*e4b17023SJohn Marino
2168*e4b17023SJohn Marino insn = BB_HEAD (place);
2169*e4b17023SJohn Marino while (!NOTE_INSN_BASIC_BLOCK_P (insn))
2170*e4b17023SJohn Marino insn = NEXT_INSN (insn);
2171*e4b17023SJohn Marino
2172*e4b17023SJohn Marino emit_insn_after (seq, insn);
2173*e4b17023SJohn Marino }
2174*e4b17023SJohn Marino
2175*e4b17023SJohn Marino /* Combine the variable expansions at the loop exit. PLACE is the
2176*e4b17023SJohn Marino loop exit basic block where the summation of the expansions should
2177*e4b17023SJohn Marino take place. */
2178*e4b17023SJohn Marino
2179*e4b17023SJohn Marino static void
combine_var_copies_in_loop_exit(struct var_to_expand * ve,basic_block place)2180*e4b17023SJohn Marino combine_var_copies_in_loop_exit (struct var_to_expand *ve, basic_block place)
2181*e4b17023SJohn Marino {
2182*e4b17023SJohn Marino rtx sum = ve->reg;
2183*e4b17023SJohn Marino rtx expr, seq, var, insn;
2184*e4b17023SJohn Marino unsigned i;
2185*e4b17023SJohn Marino
2186*e4b17023SJohn Marino if (VEC_length (rtx, ve->var_expansions) == 0)
2187*e4b17023SJohn Marino return;
2188*e4b17023SJohn Marino
2189*e4b17023SJohn Marino start_sequence ();
2190*e4b17023SJohn Marino switch (ve->op)
2191*e4b17023SJohn Marino {
2192*e4b17023SJohn Marino case FMA:
2193*e4b17023SJohn Marino /* Note that we only accumulate FMA via the ADD operand. */
2194*e4b17023SJohn Marino case PLUS:
2195*e4b17023SJohn Marino case MINUS:
2196*e4b17023SJohn Marino FOR_EACH_VEC_ELT (rtx, ve->var_expansions, i, var)
2197*e4b17023SJohn Marino sum = simplify_gen_binary (PLUS, GET_MODE (ve->reg), var, sum);
2198*e4b17023SJohn Marino break;
2199*e4b17023SJohn Marino
2200*e4b17023SJohn Marino case MULT:
2201*e4b17023SJohn Marino FOR_EACH_VEC_ELT (rtx, ve->var_expansions, i, var)
2202*e4b17023SJohn Marino sum = simplify_gen_binary (MULT, GET_MODE (ve->reg), var, sum);
2203*e4b17023SJohn Marino break;
2204*e4b17023SJohn Marino
2205*e4b17023SJohn Marino default:
2206*e4b17023SJohn Marino gcc_unreachable ();
2207*e4b17023SJohn Marino }
2208*e4b17023SJohn Marino
2209*e4b17023SJohn Marino expr = force_operand (sum, ve->reg);
2210*e4b17023SJohn Marino if (expr != ve->reg)
2211*e4b17023SJohn Marino emit_move_insn (ve->reg, expr);
2212*e4b17023SJohn Marino seq = get_insns ();
2213*e4b17023SJohn Marino end_sequence ();
2214*e4b17023SJohn Marino
2215*e4b17023SJohn Marino insn = BB_HEAD (place);
2216*e4b17023SJohn Marino while (!NOTE_INSN_BASIC_BLOCK_P (insn))
2217*e4b17023SJohn Marino insn = NEXT_INSN (insn);
2218*e4b17023SJohn Marino
2219*e4b17023SJohn Marino emit_insn_after (seq, insn);
2220*e4b17023SJohn Marino }
2221*e4b17023SJohn Marino
2222*e4b17023SJohn Marino /* Apply loop optimizations in loop copies using the
2223*e4b17023SJohn Marino data which gathered during the unrolling. Structure
2224*e4b17023SJohn Marino OPT_INFO record that data.
2225*e4b17023SJohn Marino
2226*e4b17023SJohn Marino UNROLLING is true if we unrolled (not peeled) the loop.
2227*e4b17023SJohn Marino REWRITE_ORIGINAL_BODY is true if we should also rewrite the original body of
2228*e4b17023SJohn Marino the loop (as it should happen in complete unrolling, but not in ordinary
2229*e4b17023SJohn Marino peeling of the loop). */
2230*e4b17023SJohn Marino
2231*e4b17023SJohn Marino static void
apply_opt_in_copies(struct opt_info * opt_info,unsigned n_copies,bool unrolling,bool rewrite_original_loop)2232*e4b17023SJohn Marino apply_opt_in_copies (struct opt_info *opt_info,
2233*e4b17023SJohn Marino unsigned n_copies, bool unrolling,
2234*e4b17023SJohn Marino bool rewrite_original_loop)
2235*e4b17023SJohn Marino {
2236*e4b17023SJohn Marino unsigned i, delta;
2237*e4b17023SJohn Marino basic_block bb, orig_bb;
2238*e4b17023SJohn Marino rtx insn, orig_insn, next;
2239*e4b17023SJohn Marino struct iv_to_split ivts_templ, *ivts;
2240*e4b17023SJohn Marino struct var_to_expand ve_templ, *ves;
2241*e4b17023SJohn Marino
2242*e4b17023SJohn Marino /* Sanity check -- we need to put initialization in the original loop
2243*e4b17023SJohn Marino body. */
2244*e4b17023SJohn Marino gcc_assert (!unrolling || rewrite_original_loop);
2245*e4b17023SJohn Marino
2246*e4b17023SJohn Marino /* Allocate the basic variables (i0). */
2247*e4b17023SJohn Marino if (opt_info->insns_to_split)
2248*e4b17023SJohn Marino for (ivts = opt_info->iv_to_split_head; ivts; ivts = ivts->next)
2249*e4b17023SJohn Marino allocate_basic_variable (ivts);
2250*e4b17023SJohn Marino
2251*e4b17023SJohn Marino for (i = opt_info->first_new_block; i < (unsigned) last_basic_block; i++)
2252*e4b17023SJohn Marino {
2253*e4b17023SJohn Marino bb = BASIC_BLOCK (i);
2254*e4b17023SJohn Marino orig_bb = get_bb_original (bb);
2255*e4b17023SJohn Marino
2256*e4b17023SJohn Marino /* bb->aux holds position in copy sequence initialized by
2257*e4b17023SJohn Marino duplicate_loop_to_header_edge. */
2258*e4b17023SJohn Marino delta = determine_split_iv_delta ((size_t)bb->aux, n_copies,
2259*e4b17023SJohn Marino unrolling);
2260*e4b17023SJohn Marino bb->aux = 0;
2261*e4b17023SJohn Marino orig_insn = BB_HEAD (orig_bb);
2262*e4b17023SJohn Marino for (insn = BB_HEAD (bb); insn != NEXT_INSN (BB_END (bb)); insn = next)
2263*e4b17023SJohn Marino {
2264*e4b17023SJohn Marino next = NEXT_INSN (insn);
2265*e4b17023SJohn Marino if (!INSN_P (insn)
2266*e4b17023SJohn Marino || (DEBUG_INSN_P (insn)
2267*e4b17023SJohn Marino && TREE_CODE (INSN_VAR_LOCATION_DECL (insn)) == LABEL_DECL))
2268*e4b17023SJohn Marino continue;
2269*e4b17023SJohn Marino
2270*e4b17023SJohn Marino while (!INSN_P (orig_insn)
2271*e4b17023SJohn Marino || (DEBUG_INSN_P (orig_insn)
2272*e4b17023SJohn Marino && (TREE_CODE (INSN_VAR_LOCATION_DECL (orig_insn))
2273*e4b17023SJohn Marino == LABEL_DECL)))
2274*e4b17023SJohn Marino orig_insn = NEXT_INSN (orig_insn);
2275*e4b17023SJohn Marino
2276*e4b17023SJohn Marino ivts_templ.insn = orig_insn;
2277*e4b17023SJohn Marino ve_templ.insn = orig_insn;
2278*e4b17023SJohn Marino
2279*e4b17023SJohn Marino /* Apply splitting iv optimization. */
2280*e4b17023SJohn Marino if (opt_info->insns_to_split)
2281*e4b17023SJohn Marino {
2282*e4b17023SJohn Marino ivts = (struct iv_to_split *)
2283*e4b17023SJohn Marino htab_find (opt_info->insns_to_split, &ivts_templ);
2284*e4b17023SJohn Marino
2285*e4b17023SJohn Marino if (ivts)
2286*e4b17023SJohn Marino {
2287*e4b17023SJohn Marino gcc_assert (GET_CODE (PATTERN (insn))
2288*e4b17023SJohn Marino == GET_CODE (PATTERN (orig_insn)));
2289*e4b17023SJohn Marino
2290*e4b17023SJohn Marino if (!delta)
2291*e4b17023SJohn Marino insert_base_initialization (ivts, insn);
2292*e4b17023SJohn Marino split_iv (ivts, insn, delta);
2293*e4b17023SJohn Marino }
2294*e4b17023SJohn Marino }
2295*e4b17023SJohn Marino /* Apply variable expansion optimization. */
2296*e4b17023SJohn Marino if (unrolling && opt_info->insns_with_var_to_expand)
2297*e4b17023SJohn Marino {
2298*e4b17023SJohn Marino ves = (struct var_to_expand *)
2299*e4b17023SJohn Marino htab_find (opt_info->insns_with_var_to_expand, &ve_templ);
2300*e4b17023SJohn Marino if (ves)
2301*e4b17023SJohn Marino {
2302*e4b17023SJohn Marino gcc_assert (GET_CODE (PATTERN (insn))
2303*e4b17023SJohn Marino == GET_CODE (PATTERN (orig_insn)));
2304*e4b17023SJohn Marino expand_var_during_unrolling (ves, insn);
2305*e4b17023SJohn Marino }
2306*e4b17023SJohn Marino }
2307*e4b17023SJohn Marino orig_insn = NEXT_INSN (orig_insn);
2308*e4b17023SJohn Marino }
2309*e4b17023SJohn Marino }
2310*e4b17023SJohn Marino
2311*e4b17023SJohn Marino if (!rewrite_original_loop)
2312*e4b17023SJohn Marino return;
2313*e4b17023SJohn Marino
2314*e4b17023SJohn Marino /* Initialize the variable expansions in the loop preheader
2315*e4b17023SJohn Marino and take care of combining them at the loop exit. */
2316*e4b17023SJohn Marino if (opt_info->insns_with_var_to_expand)
2317*e4b17023SJohn Marino {
2318*e4b17023SJohn Marino for (ves = opt_info->var_to_expand_head; ves; ves = ves->next)
2319*e4b17023SJohn Marino insert_var_expansion_initialization (ves, opt_info->loop_preheader);
2320*e4b17023SJohn Marino for (ves = opt_info->var_to_expand_head; ves; ves = ves->next)
2321*e4b17023SJohn Marino combine_var_copies_in_loop_exit (ves, opt_info->loop_exit);
2322*e4b17023SJohn Marino }
2323*e4b17023SJohn Marino
2324*e4b17023SJohn Marino /* Rewrite also the original loop body. Find them as originals of the blocks
2325*e4b17023SJohn Marino in the last copied iteration, i.e. those that have
2326*e4b17023SJohn Marino get_bb_copy (get_bb_original (bb)) == bb. */
2327*e4b17023SJohn Marino for (i = opt_info->first_new_block; i < (unsigned) last_basic_block; i++)
2328*e4b17023SJohn Marino {
2329*e4b17023SJohn Marino bb = BASIC_BLOCK (i);
2330*e4b17023SJohn Marino orig_bb = get_bb_original (bb);
2331*e4b17023SJohn Marino if (get_bb_copy (orig_bb) != bb)
2332*e4b17023SJohn Marino continue;
2333*e4b17023SJohn Marino
2334*e4b17023SJohn Marino delta = determine_split_iv_delta (0, n_copies, unrolling);
2335*e4b17023SJohn Marino for (orig_insn = BB_HEAD (orig_bb);
2336*e4b17023SJohn Marino orig_insn != NEXT_INSN (BB_END (bb));
2337*e4b17023SJohn Marino orig_insn = next)
2338*e4b17023SJohn Marino {
2339*e4b17023SJohn Marino next = NEXT_INSN (orig_insn);
2340*e4b17023SJohn Marino
2341*e4b17023SJohn Marino if (!INSN_P (orig_insn))
2342*e4b17023SJohn Marino continue;
2343*e4b17023SJohn Marino
2344*e4b17023SJohn Marino ivts_templ.insn = orig_insn;
2345*e4b17023SJohn Marino if (opt_info->insns_to_split)
2346*e4b17023SJohn Marino {
2347*e4b17023SJohn Marino ivts = (struct iv_to_split *)
2348*e4b17023SJohn Marino htab_find (opt_info->insns_to_split, &ivts_templ);
2349*e4b17023SJohn Marino if (ivts)
2350*e4b17023SJohn Marino {
2351*e4b17023SJohn Marino if (!delta)
2352*e4b17023SJohn Marino insert_base_initialization (ivts, orig_insn);
2353*e4b17023SJohn Marino split_iv (ivts, orig_insn, delta);
2354*e4b17023SJohn Marino continue;
2355*e4b17023SJohn Marino }
2356*e4b17023SJohn Marino }
2357*e4b17023SJohn Marino
2358*e4b17023SJohn Marino }
2359*e4b17023SJohn Marino }
2360*e4b17023SJohn Marino }
2361*e4b17023SJohn Marino
2362*e4b17023SJohn Marino /* Release OPT_INFO. */
2363*e4b17023SJohn Marino
2364*e4b17023SJohn Marino static void
free_opt_info(struct opt_info * opt_info)2365*e4b17023SJohn Marino free_opt_info (struct opt_info *opt_info)
2366*e4b17023SJohn Marino {
2367*e4b17023SJohn Marino if (opt_info->insns_to_split)
2368*e4b17023SJohn Marino htab_delete (opt_info->insns_to_split);
2369*e4b17023SJohn Marino if (opt_info->insns_with_var_to_expand)
2370*e4b17023SJohn Marino {
2371*e4b17023SJohn Marino struct var_to_expand *ves;
2372*e4b17023SJohn Marino
2373*e4b17023SJohn Marino for (ves = opt_info->var_to_expand_head; ves; ves = ves->next)
2374*e4b17023SJohn Marino VEC_free (rtx, heap, ves->var_expansions);
2375*e4b17023SJohn Marino htab_delete (opt_info->insns_with_var_to_expand);
2376*e4b17023SJohn Marino }
2377*e4b17023SJohn Marino free (opt_info);
2378*e4b17023SJohn Marino }
2379