1*38fd1498Szrj /* Induction variable optimizations.
2*38fd1498Szrj Copyright (C) 2003-2018 Free Software Foundation, Inc.
3*38fd1498Szrj
4*38fd1498Szrj This file is part of GCC.
5*38fd1498Szrj
6*38fd1498Szrj GCC is free software; you can redistribute it and/or modify it
7*38fd1498Szrj under the terms of the GNU General Public License as published by the
8*38fd1498Szrj Free Software Foundation; either version 3, or (at your option) any
9*38fd1498Szrj later version.
10*38fd1498Szrj
11*38fd1498Szrj GCC is distributed in the hope that it will be useful, but WITHOUT
12*38fd1498Szrj ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13*38fd1498Szrj FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14*38fd1498Szrj for more details.
15*38fd1498Szrj
16*38fd1498Szrj You should have received a copy of the GNU General Public License
17*38fd1498Szrj along with GCC; see the file COPYING3. If not see
18*38fd1498Szrj <http://www.gnu.org/licenses/>. */
19*38fd1498Szrj
20*38fd1498Szrj /* This pass tries to find the optimal set of induction variables for the loop.
21*38fd1498Szrj It optimizes just the basic linear induction variables (although adding
22*38fd1498Szrj support for other types should not be too hard). It includes the
23*38fd1498Szrj optimizations commonly known as strength reduction, induction variable
24*38fd1498Szrj coalescing and induction variable elimination. It does it in the
25*38fd1498Szrj following steps:
26*38fd1498Szrj
27*38fd1498Szrj 1) The interesting uses of induction variables are found. This includes
28*38fd1498Szrj
29*38fd1498Szrj -- uses of induction variables in non-linear expressions
30*38fd1498Szrj -- addresses of arrays
31*38fd1498Szrj -- comparisons of induction variables
32*38fd1498Szrj
33*38fd1498Szrj Note the interesting uses are categorized and handled in group.
34*38fd1498Szrj Generally, address type uses are grouped together if their iv bases
35*38fd1498Szrj are different in constant offset.
36*38fd1498Szrj
37*38fd1498Szrj 2) Candidates for the induction variables are found. This includes
38*38fd1498Szrj
39*38fd1498Szrj -- old induction variables
40*38fd1498Szrj -- the variables defined by expressions derived from the "interesting
41*38fd1498Szrj groups/uses" above
42*38fd1498Szrj
43*38fd1498Szrj 3) The optimal (w.r. to a cost function) set of variables is chosen. The
44*38fd1498Szrj cost function assigns a cost to sets of induction variables and consists
45*38fd1498Szrj of three parts:
46*38fd1498Szrj
47*38fd1498Szrj -- The group/use costs. Each of the interesting groups/uses chooses
48*38fd1498Szrj the best induction variable in the set and adds its cost to the sum.
49*38fd1498Szrj The cost reflects the time spent on modifying the induction variables
50*38fd1498Szrj value to be usable for the given purpose (adding base and offset for
51*38fd1498Szrj arrays, etc.).
52*38fd1498Szrj -- The variable costs. Each of the variables has a cost assigned that
53*38fd1498Szrj reflects the costs associated with incrementing the value of the
54*38fd1498Szrj variable. The original variables are somewhat preferred.
55*38fd1498Szrj -- The set cost. Depending on the size of the set, extra cost may be
56*38fd1498Szrj added to reflect register pressure.
57*38fd1498Szrj
58*38fd1498Szrj All the costs are defined in a machine-specific way, using the target
59*38fd1498Szrj hooks and machine descriptions to determine them.
60*38fd1498Szrj
61*38fd1498Szrj 4) The trees are transformed to use the new variables, the dead code is
62*38fd1498Szrj removed.
63*38fd1498Szrj
64*38fd1498Szrj All of this is done loop by loop. Doing it globally is theoretically
65*38fd1498Szrj possible, it might give a better performance and it might enable us
66*38fd1498Szrj to decide costs more precisely, but getting all the interactions right
67*38fd1498Szrj would be complicated. */
68*38fd1498Szrj
69*38fd1498Szrj #include "config.h"
70*38fd1498Szrj #include "system.h"
71*38fd1498Szrj #include "coretypes.h"
72*38fd1498Szrj #include "backend.h"
73*38fd1498Szrj #include "rtl.h"
74*38fd1498Szrj #include "tree.h"
75*38fd1498Szrj #include "gimple.h"
76*38fd1498Szrj #include "cfghooks.h"
77*38fd1498Szrj #include "tree-pass.h"
78*38fd1498Szrj #include "memmodel.h"
79*38fd1498Szrj #include "tm_p.h"
80*38fd1498Szrj #include "ssa.h"
81*38fd1498Szrj #include "expmed.h"
82*38fd1498Szrj #include "insn-config.h"
83*38fd1498Szrj #include "emit-rtl.h"
84*38fd1498Szrj #include "recog.h"
85*38fd1498Szrj #include "cgraph.h"
86*38fd1498Szrj #include "gimple-pretty-print.h"
87*38fd1498Szrj #include "alias.h"
88*38fd1498Szrj #include "fold-const.h"
89*38fd1498Szrj #include "stor-layout.h"
90*38fd1498Szrj #include "tree-eh.h"
91*38fd1498Szrj #include "gimplify.h"
92*38fd1498Szrj #include "gimple-iterator.h"
93*38fd1498Szrj #include "gimplify-me.h"
94*38fd1498Szrj #include "tree-cfg.h"
95*38fd1498Szrj #include "tree-ssa-loop-ivopts.h"
96*38fd1498Szrj #include "tree-ssa-loop-manip.h"
97*38fd1498Szrj #include "tree-ssa-loop-niter.h"
98*38fd1498Szrj #include "tree-ssa-loop.h"
99*38fd1498Szrj #include "explow.h"
100*38fd1498Szrj #include "expr.h"
101*38fd1498Szrj #include "tree-dfa.h"
102*38fd1498Szrj #include "tree-ssa.h"
103*38fd1498Szrj #include "cfgloop.h"
104*38fd1498Szrj #include "tree-scalar-evolution.h"
105*38fd1498Szrj #include "params.h"
106*38fd1498Szrj #include "tree-affine.h"
107*38fd1498Szrj #include "tree-ssa-propagate.h"
108*38fd1498Szrj #include "tree-ssa-address.h"
109*38fd1498Szrj #include "builtins.h"
110*38fd1498Szrj #include "tree-vectorizer.h"
111*38fd1498Szrj
112*38fd1498Szrj /* FIXME: Expressions are expanded to RTL in this pass to determine the
113*38fd1498Szrj cost of different addressing modes. This should be moved to a TBD
114*38fd1498Szrj interface between the GIMPLE and RTL worlds. */
115*38fd1498Szrj
116*38fd1498Szrj /* The infinite cost. */
117*38fd1498Szrj #define INFTY 10000000
118*38fd1498Szrj
119*38fd1498Szrj /* Returns the expected number of loop iterations for LOOP.
120*38fd1498Szrj The average trip count is computed from profile data if it
121*38fd1498Szrj exists. */
122*38fd1498Szrj
123*38fd1498Szrj static inline HOST_WIDE_INT
avg_loop_niter(struct loop * loop)124*38fd1498Szrj avg_loop_niter (struct loop *loop)
125*38fd1498Szrj {
126*38fd1498Szrj HOST_WIDE_INT niter = estimated_stmt_executions_int (loop);
127*38fd1498Szrj if (niter == -1)
128*38fd1498Szrj {
129*38fd1498Szrj niter = likely_max_stmt_executions_int (loop);
130*38fd1498Szrj
131*38fd1498Szrj if (niter == -1 || niter > PARAM_VALUE (PARAM_AVG_LOOP_NITER))
132*38fd1498Szrj return PARAM_VALUE (PARAM_AVG_LOOP_NITER);
133*38fd1498Szrj }
134*38fd1498Szrj
135*38fd1498Szrj return niter;
136*38fd1498Szrj }
137*38fd1498Szrj
138*38fd1498Szrj struct iv_use;
139*38fd1498Szrj
140*38fd1498Szrj /* Representation of the induction variable. */
141*38fd1498Szrj struct iv
142*38fd1498Szrj {
143*38fd1498Szrj tree base; /* Initial value of the iv. */
144*38fd1498Szrj tree base_object; /* A memory object to that the induction variable points. */
145*38fd1498Szrj tree step; /* Step of the iv (constant only). */
146*38fd1498Szrj tree ssa_name; /* The ssa name with the value. */
147*38fd1498Szrj struct iv_use *nonlin_use; /* The identifier in the use if it is the case. */
148*38fd1498Szrj bool biv_p; /* Is it a biv? */
149*38fd1498Szrj bool no_overflow; /* True if the iv doesn't overflow. */
150*38fd1498Szrj bool have_address_use;/* For biv, indicate if it's used in any address
151*38fd1498Szrj type use. */
152*38fd1498Szrj };
153*38fd1498Szrj
154*38fd1498Szrj /* Per-ssa version information (induction variable descriptions, etc.). */
155*38fd1498Szrj struct version_info
156*38fd1498Szrj {
157*38fd1498Szrj tree name; /* The ssa name. */
158*38fd1498Szrj struct iv *iv; /* Induction variable description. */
159*38fd1498Szrj bool has_nonlin_use; /* For a loop-level invariant, whether it is used in
160*38fd1498Szrj an expression that is not an induction variable. */
161*38fd1498Szrj bool preserve_biv; /* For the original biv, whether to preserve it. */
162*38fd1498Szrj unsigned inv_id; /* Id of an invariant. */
163*38fd1498Szrj };
164*38fd1498Szrj
165*38fd1498Szrj /* Types of uses. */
166*38fd1498Szrj enum use_type
167*38fd1498Szrj {
168*38fd1498Szrj USE_NONLINEAR_EXPR, /* Use in a nonlinear expression. */
169*38fd1498Szrj USE_REF_ADDRESS, /* Use is an address for an explicit memory
170*38fd1498Szrj reference. */
171*38fd1498Szrj USE_PTR_ADDRESS, /* Use is a pointer argument to a function in
172*38fd1498Szrj cases where the expansion of the function
173*38fd1498Szrj will turn the argument into a normal address. */
174*38fd1498Szrj USE_COMPARE /* Use is a compare. */
175*38fd1498Szrj };
176*38fd1498Szrj
177*38fd1498Szrj /* Cost of a computation. */
178*38fd1498Szrj struct comp_cost
179*38fd1498Szrj {
comp_costcomp_cost180*38fd1498Szrj comp_cost (): cost (0), complexity (0), scratch (0)
181*38fd1498Szrj {}
182*38fd1498Szrj
183*38fd1498Szrj comp_cost (int cost, unsigned complexity, int scratch = 0)
costcomp_cost184*38fd1498Szrj : cost (cost), complexity (complexity), scratch (scratch)
185*38fd1498Szrj {}
186*38fd1498Szrj
187*38fd1498Szrj /* Returns true if COST is infinite. */
188*38fd1498Szrj bool infinite_cost_p ();
189*38fd1498Szrj
190*38fd1498Szrj /* Adds costs COST1 and COST2. */
191*38fd1498Szrj friend comp_cost operator+ (comp_cost cost1, comp_cost cost2);
192*38fd1498Szrj
193*38fd1498Szrj /* Adds COST to the comp_cost. */
194*38fd1498Szrj comp_cost operator+= (comp_cost cost);
195*38fd1498Szrj
196*38fd1498Szrj /* Adds constant C to this comp_cost. */
197*38fd1498Szrj comp_cost operator+= (HOST_WIDE_INT c);
198*38fd1498Szrj
199*38fd1498Szrj /* Subtracts constant C to this comp_cost. */
200*38fd1498Szrj comp_cost operator-= (HOST_WIDE_INT c);
201*38fd1498Szrj
202*38fd1498Szrj /* Divide the comp_cost by constant C. */
203*38fd1498Szrj comp_cost operator/= (HOST_WIDE_INT c);
204*38fd1498Szrj
205*38fd1498Szrj /* Multiply the comp_cost by constant C. */
206*38fd1498Szrj comp_cost operator*= (HOST_WIDE_INT c);
207*38fd1498Szrj
208*38fd1498Szrj /* Subtracts costs COST1 and COST2. */
209*38fd1498Szrj friend comp_cost operator- (comp_cost cost1, comp_cost cost2);
210*38fd1498Szrj
211*38fd1498Szrj /* Subtracts COST from this comp_cost. */
212*38fd1498Szrj comp_cost operator-= (comp_cost cost);
213*38fd1498Szrj
214*38fd1498Szrj /* Returns true if COST1 is smaller than COST2. */
215*38fd1498Szrj friend bool operator< (comp_cost cost1, comp_cost cost2);
216*38fd1498Szrj
217*38fd1498Szrj /* Returns true if COST1 and COST2 are equal. */
218*38fd1498Szrj friend bool operator== (comp_cost cost1, comp_cost cost2);
219*38fd1498Szrj
220*38fd1498Szrj /* Returns true if COST1 is smaller or equal than COST2. */
221*38fd1498Szrj friend bool operator<= (comp_cost cost1, comp_cost cost2);
222*38fd1498Szrj
223*38fd1498Szrj int cost; /* The runtime cost. */
224*38fd1498Szrj unsigned complexity; /* The estimate of the complexity of the code for
225*38fd1498Szrj the computation (in no concrete units --
226*38fd1498Szrj complexity field should be larger for more
227*38fd1498Szrj complex expressions and addressing modes). */
228*38fd1498Szrj int scratch; /* Scratch used during cost computation. */
229*38fd1498Szrj };
230*38fd1498Szrj
231*38fd1498Szrj static const comp_cost no_cost;
232*38fd1498Szrj static const comp_cost infinite_cost (INFTY, INFTY, INFTY);
233*38fd1498Szrj
234*38fd1498Szrj bool
infinite_cost_p()235*38fd1498Szrj comp_cost::infinite_cost_p ()
236*38fd1498Szrj {
237*38fd1498Szrj return cost == INFTY;
238*38fd1498Szrj }
239*38fd1498Szrj
240*38fd1498Szrj comp_cost
241*38fd1498Szrj operator+ (comp_cost cost1, comp_cost cost2)
242*38fd1498Szrj {
243*38fd1498Szrj if (cost1.infinite_cost_p () || cost2.infinite_cost_p ())
244*38fd1498Szrj return infinite_cost;
245*38fd1498Szrj
246*38fd1498Szrj cost1.cost += cost2.cost;
247*38fd1498Szrj cost1.complexity += cost2.complexity;
248*38fd1498Szrj
249*38fd1498Szrj return cost1;
250*38fd1498Szrj }
251*38fd1498Szrj
252*38fd1498Szrj comp_cost
253*38fd1498Szrj operator- (comp_cost cost1, comp_cost cost2)
254*38fd1498Szrj {
255*38fd1498Szrj if (cost1.infinite_cost_p ())
256*38fd1498Szrj return infinite_cost;
257*38fd1498Szrj
258*38fd1498Szrj gcc_assert (!cost2.infinite_cost_p ());
259*38fd1498Szrj
260*38fd1498Szrj cost1.cost -= cost2.cost;
261*38fd1498Szrj cost1.complexity -= cost2.complexity;
262*38fd1498Szrj
263*38fd1498Szrj return cost1;
264*38fd1498Szrj }
265*38fd1498Szrj
266*38fd1498Szrj comp_cost
267*38fd1498Szrj comp_cost::operator+= (comp_cost cost)
268*38fd1498Szrj {
269*38fd1498Szrj *this = *this + cost;
270*38fd1498Szrj return *this;
271*38fd1498Szrj }
272*38fd1498Szrj
273*38fd1498Szrj comp_cost
274*38fd1498Szrj comp_cost::operator+= (HOST_WIDE_INT c)
275*38fd1498Szrj {
276*38fd1498Szrj if (infinite_cost_p ())
277*38fd1498Szrj return *this;
278*38fd1498Szrj
279*38fd1498Szrj this->cost += c;
280*38fd1498Szrj
281*38fd1498Szrj return *this;
282*38fd1498Szrj }
283*38fd1498Szrj
284*38fd1498Szrj comp_cost
285*38fd1498Szrj comp_cost::operator-= (HOST_WIDE_INT c)
286*38fd1498Szrj {
287*38fd1498Szrj if (infinite_cost_p ())
288*38fd1498Szrj return *this;
289*38fd1498Szrj
290*38fd1498Szrj this->cost -= c;
291*38fd1498Szrj
292*38fd1498Szrj return *this;
293*38fd1498Szrj }
294*38fd1498Szrj
295*38fd1498Szrj comp_cost
296*38fd1498Szrj comp_cost::operator/= (HOST_WIDE_INT c)
297*38fd1498Szrj {
298*38fd1498Szrj if (infinite_cost_p ())
299*38fd1498Szrj return *this;
300*38fd1498Szrj
301*38fd1498Szrj this->cost /= c;
302*38fd1498Szrj
303*38fd1498Szrj return *this;
304*38fd1498Szrj }
305*38fd1498Szrj
306*38fd1498Szrj comp_cost
307*38fd1498Szrj comp_cost::operator*= (HOST_WIDE_INT c)
308*38fd1498Szrj {
309*38fd1498Szrj if (infinite_cost_p ())
310*38fd1498Szrj return *this;
311*38fd1498Szrj
312*38fd1498Szrj this->cost *= c;
313*38fd1498Szrj
314*38fd1498Szrj return *this;
315*38fd1498Szrj }
316*38fd1498Szrj
317*38fd1498Szrj comp_cost
318*38fd1498Szrj comp_cost::operator-= (comp_cost cost)
319*38fd1498Szrj {
320*38fd1498Szrj *this = *this - cost;
321*38fd1498Szrj return *this;
322*38fd1498Szrj }
323*38fd1498Szrj
324*38fd1498Szrj bool
325*38fd1498Szrj operator< (comp_cost cost1, comp_cost cost2)
326*38fd1498Szrj {
327*38fd1498Szrj if (cost1.cost == cost2.cost)
328*38fd1498Szrj return cost1.complexity < cost2.complexity;
329*38fd1498Szrj
330*38fd1498Szrj return cost1.cost < cost2.cost;
331*38fd1498Szrj }
332*38fd1498Szrj
333*38fd1498Szrj bool
334*38fd1498Szrj operator== (comp_cost cost1, comp_cost cost2)
335*38fd1498Szrj {
336*38fd1498Szrj return cost1.cost == cost2.cost
337*38fd1498Szrj && cost1.complexity == cost2.complexity;
338*38fd1498Szrj }
339*38fd1498Szrj
340*38fd1498Szrj bool
341*38fd1498Szrj operator<= (comp_cost cost1, comp_cost cost2)
342*38fd1498Szrj {
343*38fd1498Szrj return cost1 < cost2 || cost1 == cost2;
344*38fd1498Szrj }
345*38fd1498Szrj
346*38fd1498Szrj struct iv_inv_expr_ent;
347*38fd1498Szrj
348*38fd1498Szrj /* The candidate - cost pair. */
349*38fd1498Szrj struct cost_pair
350*38fd1498Szrj {
351*38fd1498Szrj struct iv_cand *cand; /* The candidate. */
352*38fd1498Szrj comp_cost cost; /* The cost. */
353*38fd1498Szrj enum tree_code comp; /* For iv elimination, the comparison. */
354*38fd1498Szrj bitmap inv_vars; /* The list of invariant ssa_vars that have to be
355*38fd1498Szrj preserved when representing iv_use with iv_cand. */
356*38fd1498Szrj bitmap inv_exprs; /* The list of newly created invariant expressions
357*38fd1498Szrj when representing iv_use with iv_cand. */
358*38fd1498Szrj tree value; /* For final value elimination, the expression for
359*38fd1498Szrj the final value of the iv. For iv elimination,
360*38fd1498Szrj the new bound to compare with. */
361*38fd1498Szrj };
362*38fd1498Szrj
363*38fd1498Szrj /* Use. */
364*38fd1498Szrj struct iv_use
365*38fd1498Szrj {
366*38fd1498Szrj unsigned id; /* The id of the use. */
367*38fd1498Szrj unsigned group_id; /* The group id the use belongs to. */
368*38fd1498Szrj enum use_type type; /* Type of the use. */
369*38fd1498Szrj tree mem_type; /* The memory type to use when testing whether an
370*38fd1498Szrj address is legitimate, and what the address's
371*38fd1498Szrj cost is. */
372*38fd1498Szrj struct iv *iv; /* The induction variable it is based on. */
373*38fd1498Szrj gimple *stmt; /* Statement in that it occurs. */
374*38fd1498Szrj tree *op_p; /* The place where it occurs. */
375*38fd1498Szrj
376*38fd1498Szrj tree addr_base; /* Base address with const offset stripped. */
377*38fd1498Szrj poly_uint64_pod addr_offset;
378*38fd1498Szrj /* Const offset stripped from base address. */
379*38fd1498Szrj };
380*38fd1498Szrj
381*38fd1498Szrj /* Group of uses. */
382*38fd1498Szrj struct iv_group
383*38fd1498Szrj {
384*38fd1498Szrj /* The id of the group. */
385*38fd1498Szrj unsigned id;
386*38fd1498Szrj /* Uses of the group are of the same type. */
387*38fd1498Szrj enum use_type type;
388*38fd1498Szrj /* The set of "related" IV candidates, plus the important ones. */
389*38fd1498Szrj bitmap related_cands;
390*38fd1498Szrj /* Number of IV candidates in the cost_map. */
391*38fd1498Szrj unsigned n_map_members;
392*38fd1498Szrj /* The costs wrto the iv candidates. */
393*38fd1498Szrj struct cost_pair *cost_map;
394*38fd1498Szrj /* The selected candidate for the group. */
395*38fd1498Szrj struct iv_cand *selected;
396*38fd1498Szrj /* Uses in the group. */
397*38fd1498Szrj vec<struct iv_use *> vuses;
398*38fd1498Szrj };
399*38fd1498Szrj
400*38fd1498Szrj /* The position where the iv is computed. */
401*38fd1498Szrj enum iv_position
402*38fd1498Szrj {
403*38fd1498Szrj IP_NORMAL, /* At the end, just before the exit condition. */
404*38fd1498Szrj IP_END, /* At the end of the latch block. */
405*38fd1498Szrj IP_BEFORE_USE, /* Immediately before a specific use. */
406*38fd1498Szrj IP_AFTER_USE, /* Immediately after a specific use. */
407*38fd1498Szrj IP_ORIGINAL /* The original biv. */
408*38fd1498Szrj };
409*38fd1498Szrj
410*38fd1498Szrj /* The induction variable candidate. */
411*38fd1498Szrj struct iv_cand
412*38fd1498Szrj {
413*38fd1498Szrj unsigned id; /* The number of the candidate. */
414*38fd1498Szrj bool important; /* Whether this is an "important" candidate, i.e. such
415*38fd1498Szrj that it should be considered by all uses. */
416*38fd1498Szrj ENUM_BITFIELD(iv_position) pos : 8; /* Where it is computed. */
417*38fd1498Szrj gimple *incremented_at;/* For original biv, the statement where it is
418*38fd1498Szrj incremented. */
419*38fd1498Szrj tree var_before; /* The variable used for it before increment. */
420*38fd1498Szrj tree var_after; /* The variable used for it after increment. */
421*38fd1498Szrj struct iv *iv; /* The value of the candidate. NULL for
422*38fd1498Szrj "pseudocandidate" used to indicate the possibility
423*38fd1498Szrj to replace the final value of an iv by direct
424*38fd1498Szrj computation of the value. */
425*38fd1498Szrj unsigned cost; /* Cost of the candidate. */
426*38fd1498Szrj unsigned cost_step; /* Cost of the candidate's increment operation. */
427*38fd1498Szrj struct iv_use *ainc_use; /* For IP_{BEFORE,AFTER}_USE candidates, the place
428*38fd1498Szrj where it is incremented. */
429*38fd1498Szrj bitmap inv_vars; /* The list of invariant ssa_vars used in step of the
430*38fd1498Szrj iv_cand. */
431*38fd1498Szrj bitmap inv_exprs; /* If step is more complicated than a single ssa_var,
432*38fd1498Szrj hanlde it as a new invariant expression which will
433*38fd1498Szrj be hoisted out of loop. */
434*38fd1498Szrj struct iv *orig_iv; /* The original iv if this cand is added from biv with
435*38fd1498Szrj smaller type. */
436*38fd1498Szrj };
437*38fd1498Szrj
438*38fd1498Szrj /* Hashtable entry for common candidate derived from iv uses. */
439*38fd1498Szrj struct iv_common_cand
440*38fd1498Szrj {
441*38fd1498Szrj tree base;
442*38fd1498Szrj tree step;
443*38fd1498Szrj /* IV uses from which this common candidate is derived. */
444*38fd1498Szrj auto_vec<struct iv_use *> uses;
445*38fd1498Szrj hashval_t hash;
446*38fd1498Szrj };
447*38fd1498Szrj
448*38fd1498Szrj /* Hashtable helpers. */
449*38fd1498Szrj
450*38fd1498Szrj struct iv_common_cand_hasher : delete_ptr_hash <iv_common_cand>
451*38fd1498Szrj {
452*38fd1498Szrj static inline hashval_t hash (const iv_common_cand *);
453*38fd1498Szrj static inline bool equal (const iv_common_cand *, const iv_common_cand *);
454*38fd1498Szrj };
455*38fd1498Szrj
456*38fd1498Szrj /* Hash function for possible common candidates. */
457*38fd1498Szrj
458*38fd1498Szrj inline hashval_t
hash(const iv_common_cand * ccand)459*38fd1498Szrj iv_common_cand_hasher::hash (const iv_common_cand *ccand)
460*38fd1498Szrj {
461*38fd1498Szrj return ccand->hash;
462*38fd1498Szrj }
463*38fd1498Szrj
464*38fd1498Szrj /* Hash table equality function for common candidates. */
465*38fd1498Szrj
466*38fd1498Szrj inline bool
equal(const iv_common_cand * ccand1,const iv_common_cand * ccand2)467*38fd1498Szrj iv_common_cand_hasher::equal (const iv_common_cand *ccand1,
468*38fd1498Szrj const iv_common_cand *ccand2)
469*38fd1498Szrj {
470*38fd1498Szrj return (ccand1->hash == ccand2->hash
471*38fd1498Szrj && operand_equal_p (ccand1->base, ccand2->base, 0)
472*38fd1498Szrj && operand_equal_p (ccand1->step, ccand2->step, 0)
473*38fd1498Szrj && (TYPE_PRECISION (TREE_TYPE (ccand1->base))
474*38fd1498Szrj == TYPE_PRECISION (TREE_TYPE (ccand2->base))));
475*38fd1498Szrj }
476*38fd1498Szrj
477*38fd1498Szrj /* Loop invariant expression hashtable entry. */
478*38fd1498Szrj
479*38fd1498Szrj struct iv_inv_expr_ent
480*38fd1498Szrj {
481*38fd1498Szrj /* Tree expression of the entry. */
482*38fd1498Szrj tree expr;
483*38fd1498Szrj /* Unique indentifier. */
484*38fd1498Szrj int id;
485*38fd1498Szrj /* Hash value. */
486*38fd1498Szrj hashval_t hash;
487*38fd1498Szrj };
488*38fd1498Szrj
489*38fd1498Szrj /* Sort iv_inv_expr_ent pair A and B by id field. */
490*38fd1498Szrj
491*38fd1498Szrj static int
sort_iv_inv_expr_ent(const void * a,const void * b)492*38fd1498Szrj sort_iv_inv_expr_ent (const void *a, const void *b)
493*38fd1498Szrj {
494*38fd1498Szrj const iv_inv_expr_ent * const *e1 = (const iv_inv_expr_ent * const *) (a);
495*38fd1498Szrj const iv_inv_expr_ent * const *e2 = (const iv_inv_expr_ent * const *) (b);
496*38fd1498Szrj
497*38fd1498Szrj unsigned id1 = (*e1)->id;
498*38fd1498Szrj unsigned id2 = (*e2)->id;
499*38fd1498Szrj
500*38fd1498Szrj if (id1 < id2)
501*38fd1498Szrj return -1;
502*38fd1498Szrj else if (id1 > id2)
503*38fd1498Szrj return 1;
504*38fd1498Szrj else
505*38fd1498Szrj return 0;
506*38fd1498Szrj }
507*38fd1498Szrj
508*38fd1498Szrj /* Hashtable helpers. */
509*38fd1498Szrj
510*38fd1498Szrj struct iv_inv_expr_hasher : free_ptr_hash <iv_inv_expr_ent>
511*38fd1498Szrj {
512*38fd1498Szrj static inline hashval_t hash (const iv_inv_expr_ent *);
513*38fd1498Szrj static inline bool equal (const iv_inv_expr_ent *, const iv_inv_expr_ent *);
514*38fd1498Szrj };
515*38fd1498Szrj
516*38fd1498Szrj /* Return true if uses of type TYPE represent some form of address. */
517*38fd1498Szrj
518*38fd1498Szrj inline bool
address_p(use_type type)519*38fd1498Szrj address_p (use_type type)
520*38fd1498Szrj {
521*38fd1498Szrj return type == USE_REF_ADDRESS || type == USE_PTR_ADDRESS;
522*38fd1498Szrj }
523*38fd1498Szrj
524*38fd1498Szrj /* Hash function for loop invariant expressions. */
525*38fd1498Szrj
526*38fd1498Szrj inline hashval_t
hash(const iv_inv_expr_ent * expr)527*38fd1498Szrj iv_inv_expr_hasher::hash (const iv_inv_expr_ent *expr)
528*38fd1498Szrj {
529*38fd1498Szrj return expr->hash;
530*38fd1498Szrj }
531*38fd1498Szrj
532*38fd1498Szrj /* Hash table equality function for expressions. */
533*38fd1498Szrj
534*38fd1498Szrj inline bool
equal(const iv_inv_expr_ent * expr1,const iv_inv_expr_ent * expr2)535*38fd1498Szrj iv_inv_expr_hasher::equal (const iv_inv_expr_ent *expr1,
536*38fd1498Szrj const iv_inv_expr_ent *expr2)
537*38fd1498Szrj {
538*38fd1498Szrj return expr1->hash == expr2->hash
539*38fd1498Szrj && operand_equal_p (expr1->expr, expr2->expr, 0);
540*38fd1498Szrj }
541*38fd1498Szrj
542*38fd1498Szrj struct ivopts_data
543*38fd1498Szrj {
544*38fd1498Szrj /* The currently optimized loop. */
545*38fd1498Szrj struct loop *current_loop;
546*38fd1498Szrj source_location loop_loc;
547*38fd1498Szrj
548*38fd1498Szrj /* Numbers of iterations for all exits of the current loop. */
549*38fd1498Szrj hash_map<edge, tree_niter_desc *> *niters;
550*38fd1498Szrj
551*38fd1498Szrj /* Number of registers used in it. */
552*38fd1498Szrj unsigned regs_used;
553*38fd1498Szrj
554*38fd1498Szrj /* The size of version_info array allocated. */
555*38fd1498Szrj unsigned version_info_size;
556*38fd1498Szrj
557*38fd1498Szrj /* The array of information for the ssa names. */
558*38fd1498Szrj struct version_info *version_info;
559*38fd1498Szrj
560*38fd1498Szrj /* The hashtable of loop invariant expressions created
561*38fd1498Szrj by ivopt. */
562*38fd1498Szrj hash_table<iv_inv_expr_hasher> *inv_expr_tab;
563*38fd1498Szrj
564*38fd1498Szrj /* The bitmap of indices in version_info whose value was changed. */
565*38fd1498Szrj bitmap relevant;
566*38fd1498Szrj
567*38fd1498Szrj /* The uses of induction variables. */
568*38fd1498Szrj vec<iv_group *> vgroups;
569*38fd1498Szrj
570*38fd1498Szrj /* The candidates. */
571*38fd1498Szrj vec<iv_cand *> vcands;
572*38fd1498Szrj
573*38fd1498Szrj /* A bitmap of important candidates. */
574*38fd1498Szrj bitmap important_candidates;
575*38fd1498Szrj
576*38fd1498Szrj /* Cache used by tree_to_aff_combination_expand. */
577*38fd1498Szrj hash_map<tree, name_expansion *> *name_expansion_cache;
578*38fd1498Szrj
579*38fd1498Szrj /* The hashtable of common candidates derived from iv uses. */
580*38fd1498Szrj hash_table<iv_common_cand_hasher> *iv_common_cand_tab;
581*38fd1498Szrj
582*38fd1498Szrj /* The common candidates. */
583*38fd1498Szrj vec<iv_common_cand *> iv_common_cands;
584*38fd1498Szrj
585*38fd1498Szrj /* The maximum invariant variable id. */
586*38fd1498Szrj unsigned max_inv_var_id;
587*38fd1498Szrj
588*38fd1498Szrj /* The maximum invariant expression id. */
589*38fd1498Szrj unsigned max_inv_expr_id;
590*38fd1498Szrj
591*38fd1498Szrj /* Number of no_overflow BIVs which are not used in memory address. */
592*38fd1498Szrj unsigned bivs_not_used_in_addr;
593*38fd1498Szrj
594*38fd1498Szrj /* Obstack for iv structure. */
595*38fd1498Szrj struct obstack iv_obstack;
596*38fd1498Szrj
597*38fd1498Szrj /* Whether to consider just related and important candidates when replacing a
598*38fd1498Szrj use. */
599*38fd1498Szrj bool consider_all_candidates;
600*38fd1498Szrj
601*38fd1498Szrj /* Are we optimizing for speed? */
602*38fd1498Szrj bool speed;
603*38fd1498Szrj
604*38fd1498Szrj /* Whether the loop body includes any function calls. */
605*38fd1498Szrj bool body_includes_call;
606*38fd1498Szrj
607*38fd1498Szrj /* Whether the loop body can only be exited via single exit. */
608*38fd1498Szrj bool loop_single_exit_p;
609*38fd1498Szrj };
610*38fd1498Szrj
611*38fd1498Szrj /* An assignment of iv candidates to uses. */
612*38fd1498Szrj
613*38fd1498Szrj struct iv_ca
614*38fd1498Szrj {
615*38fd1498Szrj /* The number of uses covered by the assignment. */
616*38fd1498Szrj unsigned upto;
617*38fd1498Szrj
618*38fd1498Szrj /* Number of uses that cannot be expressed by the candidates in the set. */
619*38fd1498Szrj unsigned bad_groups;
620*38fd1498Szrj
621*38fd1498Szrj /* Candidate assigned to a use, together with the related costs. */
622*38fd1498Szrj struct cost_pair **cand_for_group;
623*38fd1498Szrj
624*38fd1498Szrj /* Number of times each candidate is used. */
625*38fd1498Szrj unsigned *n_cand_uses;
626*38fd1498Szrj
627*38fd1498Szrj /* The candidates used. */
628*38fd1498Szrj bitmap cands;
629*38fd1498Szrj
630*38fd1498Szrj /* The number of candidates in the set. */
631*38fd1498Szrj unsigned n_cands;
632*38fd1498Szrj
633*38fd1498Szrj /* The number of invariants needed, including both invariant variants and
634*38fd1498Szrj invariant expressions. */
635*38fd1498Szrj unsigned n_invs;
636*38fd1498Szrj
637*38fd1498Szrj /* Total cost of expressing uses. */
638*38fd1498Szrj comp_cost cand_use_cost;
639*38fd1498Szrj
640*38fd1498Szrj /* Total cost of candidates. */
641*38fd1498Szrj unsigned cand_cost;
642*38fd1498Szrj
643*38fd1498Szrj /* Number of times each invariant variable is used. */
644*38fd1498Szrj unsigned *n_inv_var_uses;
645*38fd1498Szrj
646*38fd1498Szrj /* Number of times each invariant expression is used. */
647*38fd1498Szrj unsigned *n_inv_expr_uses;
648*38fd1498Szrj
649*38fd1498Szrj /* Total cost of the assignment. */
650*38fd1498Szrj comp_cost cost;
651*38fd1498Szrj };
652*38fd1498Szrj
653*38fd1498Szrj /* Difference of two iv candidate assignments. */
654*38fd1498Szrj
655*38fd1498Szrj struct iv_ca_delta
656*38fd1498Szrj {
657*38fd1498Szrj /* Changed group. */
658*38fd1498Szrj struct iv_group *group;
659*38fd1498Szrj
660*38fd1498Szrj /* An old assignment (for rollback purposes). */
661*38fd1498Szrj struct cost_pair *old_cp;
662*38fd1498Szrj
663*38fd1498Szrj /* A new assignment. */
664*38fd1498Szrj struct cost_pair *new_cp;
665*38fd1498Szrj
666*38fd1498Szrj /* Next change in the list. */
667*38fd1498Szrj struct iv_ca_delta *next;
668*38fd1498Szrj };
669*38fd1498Szrj
670*38fd1498Szrj /* Bound on number of candidates below that all candidates are considered. */
671*38fd1498Szrj
672*38fd1498Szrj #define CONSIDER_ALL_CANDIDATES_BOUND \
673*38fd1498Szrj ((unsigned) PARAM_VALUE (PARAM_IV_CONSIDER_ALL_CANDIDATES_BOUND))
674*38fd1498Szrj
675*38fd1498Szrj /* If there are more iv occurrences, we just give up (it is quite unlikely that
676*38fd1498Szrj optimizing such a loop would help, and it would take ages). */
677*38fd1498Szrj
678*38fd1498Szrj #define MAX_CONSIDERED_GROUPS \
679*38fd1498Szrj ((unsigned) PARAM_VALUE (PARAM_IV_MAX_CONSIDERED_USES))
680*38fd1498Szrj
681*38fd1498Szrj /* If there are at most this number of ivs in the set, try removing unnecessary
682*38fd1498Szrj ivs from the set always. */
683*38fd1498Szrj
684*38fd1498Szrj #define ALWAYS_PRUNE_CAND_SET_BOUND \
685*38fd1498Szrj ((unsigned) PARAM_VALUE (PARAM_IV_ALWAYS_PRUNE_CAND_SET_BOUND))
686*38fd1498Szrj
687*38fd1498Szrj /* The list of trees for that the decl_rtl field must be reset is stored
688*38fd1498Szrj here. */
689*38fd1498Szrj
690*38fd1498Szrj static vec<tree> decl_rtl_to_reset;
691*38fd1498Szrj
692*38fd1498Szrj static comp_cost force_expr_to_var_cost (tree, bool);
693*38fd1498Szrj
694*38fd1498Szrj /* The single loop exit if it dominates the latch, NULL otherwise. */
695*38fd1498Szrj
696*38fd1498Szrj edge
single_dom_exit(struct loop * loop)697*38fd1498Szrj single_dom_exit (struct loop *loop)
698*38fd1498Szrj {
699*38fd1498Szrj edge exit = single_exit (loop);
700*38fd1498Szrj
701*38fd1498Szrj if (!exit)
702*38fd1498Szrj return NULL;
703*38fd1498Szrj
704*38fd1498Szrj if (!just_once_each_iteration_p (loop, exit->src))
705*38fd1498Szrj return NULL;
706*38fd1498Szrj
707*38fd1498Szrj return exit;
708*38fd1498Szrj }
709*38fd1498Szrj
710*38fd1498Szrj /* Dumps information about the induction variable IV to FILE. Don't dump
711*38fd1498Szrj variable's name if DUMP_NAME is FALSE. The information is dumped with
712*38fd1498Szrj preceding spaces indicated by INDENT_LEVEL. */
713*38fd1498Szrj
714*38fd1498Szrj void
dump_iv(FILE * file,struct iv * iv,bool dump_name,unsigned indent_level)715*38fd1498Szrj dump_iv (FILE *file, struct iv *iv, bool dump_name, unsigned indent_level)
716*38fd1498Szrj {
717*38fd1498Szrj const char *p;
718*38fd1498Szrj const char spaces[9] = {' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '\0'};
719*38fd1498Szrj
720*38fd1498Szrj if (indent_level > 4)
721*38fd1498Szrj indent_level = 4;
722*38fd1498Szrj p = spaces + 8 - (indent_level << 1);
723*38fd1498Szrj
724*38fd1498Szrj fprintf (file, "%sIV struct:\n", p);
725*38fd1498Szrj if (iv->ssa_name && dump_name)
726*38fd1498Szrj {
727*38fd1498Szrj fprintf (file, "%s SSA_NAME:\t", p);
728*38fd1498Szrj print_generic_expr (file, iv->ssa_name, TDF_SLIM);
729*38fd1498Szrj fprintf (file, "\n");
730*38fd1498Szrj }
731*38fd1498Szrj
732*38fd1498Szrj fprintf (file, "%s Type:\t", p);
733*38fd1498Szrj print_generic_expr (file, TREE_TYPE (iv->base), TDF_SLIM);
734*38fd1498Szrj fprintf (file, "\n");
735*38fd1498Szrj
736*38fd1498Szrj fprintf (file, "%s Base:\t", p);
737*38fd1498Szrj print_generic_expr (file, iv->base, TDF_SLIM);
738*38fd1498Szrj fprintf (file, "\n");
739*38fd1498Szrj
740*38fd1498Szrj fprintf (file, "%s Step:\t", p);
741*38fd1498Szrj print_generic_expr (file, iv->step, TDF_SLIM);
742*38fd1498Szrj fprintf (file, "\n");
743*38fd1498Szrj
744*38fd1498Szrj if (iv->base_object)
745*38fd1498Szrj {
746*38fd1498Szrj fprintf (file, "%s Object:\t", p);
747*38fd1498Szrj print_generic_expr (file, iv->base_object, TDF_SLIM);
748*38fd1498Szrj fprintf (file, "\n");
749*38fd1498Szrj }
750*38fd1498Szrj
751*38fd1498Szrj fprintf (file, "%s Biv:\t%c\n", p, iv->biv_p ? 'Y' : 'N');
752*38fd1498Szrj
753*38fd1498Szrj fprintf (file, "%s Overflowness wrto loop niter:\t%s\n",
754*38fd1498Szrj p, iv->no_overflow ? "No-overflow" : "Overflow");
755*38fd1498Szrj }
756*38fd1498Szrj
757*38fd1498Szrj /* Dumps information about the USE to FILE. */
758*38fd1498Szrj
759*38fd1498Szrj void
dump_use(FILE * file,struct iv_use * use)760*38fd1498Szrj dump_use (FILE *file, struct iv_use *use)
761*38fd1498Szrj {
762*38fd1498Szrj fprintf (file, " Use %d.%d:\n", use->group_id, use->id);
763*38fd1498Szrj fprintf (file, " At stmt:\t");
764*38fd1498Szrj print_gimple_stmt (file, use->stmt, 0);
765*38fd1498Szrj fprintf (file, " At pos:\t");
766*38fd1498Szrj if (use->op_p)
767*38fd1498Szrj print_generic_expr (file, *use->op_p, TDF_SLIM);
768*38fd1498Szrj fprintf (file, "\n");
769*38fd1498Szrj dump_iv (file, use->iv, false, 2);
770*38fd1498Szrj }
771*38fd1498Szrj
772*38fd1498Szrj /* Dumps information about the uses to FILE. */
773*38fd1498Szrj
774*38fd1498Szrj void
dump_groups(FILE * file,struct ivopts_data * data)775*38fd1498Szrj dump_groups (FILE *file, struct ivopts_data *data)
776*38fd1498Szrj {
777*38fd1498Szrj unsigned i, j;
778*38fd1498Szrj struct iv_group *group;
779*38fd1498Szrj
780*38fd1498Szrj for (i = 0; i < data->vgroups.length (); i++)
781*38fd1498Szrj {
782*38fd1498Szrj group = data->vgroups[i];
783*38fd1498Szrj fprintf (file, "Group %d:\n", group->id);
784*38fd1498Szrj if (group->type == USE_NONLINEAR_EXPR)
785*38fd1498Szrj fprintf (file, " Type:\tGENERIC\n");
786*38fd1498Szrj else if (group->type == USE_REF_ADDRESS)
787*38fd1498Szrj fprintf (file, " Type:\tREFERENCE ADDRESS\n");
788*38fd1498Szrj else if (group->type == USE_PTR_ADDRESS)
789*38fd1498Szrj fprintf (file, " Type:\tPOINTER ARGUMENT ADDRESS\n");
790*38fd1498Szrj else
791*38fd1498Szrj {
792*38fd1498Szrj gcc_assert (group->type == USE_COMPARE);
793*38fd1498Szrj fprintf (file, " Type:\tCOMPARE\n");
794*38fd1498Szrj }
795*38fd1498Szrj for (j = 0; j < group->vuses.length (); j++)
796*38fd1498Szrj dump_use (file, group->vuses[j]);
797*38fd1498Szrj }
798*38fd1498Szrj }
799*38fd1498Szrj
800*38fd1498Szrj /* Dumps information about induction variable candidate CAND to FILE. */
801*38fd1498Szrj
802*38fd1498Szrj void
dump_cand(FILE * file,struct iv_cand * cand)803*38fd1498Szrj dump_cand (FILE *file, struct iv_cand *cand)
804*38fd1498Szrj {
805*38fd1498Szrj struct iv *iv = cand->iv;
806*38fd1498Szrj
807*38fd1498Szrj fprintf (file, "Candidate %d:\n", cand->id);
808*38fd1498Szrj if (cand->inv_vars)
809*38fd1498Szrj {
810*38fd1498Szrj fprintf (file, " Depend on inv.vars: ");
811*38fd1498Szrj dump_bitmap (file, cand->inv_vars);
812*38fd1498Szrj }
813*38fd1498Szrj if (cand->inv_exprs)
814*38fd1498Szrj {
815*38fd1498Szrj fprintf (file, " Depend on inv.exprs: ");
816*38fd1498Szrj dump_bitmap (file, cand->inv_exprs);
817*38fd1498Szrj }
818*38fd1498Szrj
819*38fd1498Szrj if (cand->var_before)
820*38fd1498Szrj {
821*38fd1498Szrj fprintf (file, " Var befor: ");
822*38fd1498Szrj print_generic_expr (file, cand->var_before, TDF_SLIM);
823*38fd1498Szrj fprintf (file, "\n");
824*38fd1498Szrj }
825*38fd1498Szrj if (cand->var_after)
826*38fd1498Szrj {
827*38fd1498Szrj fprintf (file, " Var after: ");
828*38fd1498Szrj print_generic_expr (file, cand->var_after, TDF_SLIM);
829*38fd1498Szrj fprintf (file, "\n");
830*38fd1498Szrj }
831*38fd1498Szrj
832*38fd1498Szrj switch (cand->pos)
833*38fd1498Szrj {
834*38fd1498Szrj case IP_NORMAL:
835*38fd1498Szrj fprintf (file, " Incr POS: before exit test\n");
836*38fd1498Szrj break;
837*38fd1498Szrj
838*38fd1498Szrj case IP_BEFORE_USE:
839*38fd1498Szrj fprintf (file, " Incr POS: before use %d\n", cand->ainc_use->id);
840*38fd1498Szrj break;
841*38fd1498Szrj
842*38fd1498Szrj case IP_AFTER_USE:
843*38fd1498Szrj fprintf (file, " Incr POS: after use %d\n", cand->ainc_use->id);
844*38fd1498Szrj break;
845*38fd1498Szrj
846*38fd1498Szrj case IP_END:
847*38fd1498Szrj fprintf (file, " Incr POS: at end\n");
848*38fd1498Szrj break;
849*38fd1498Szrj
850*38fd1498Szrj case IP_ORIGINAL:
851*38fd1498Szrj fprintf (file, " Incr POS: orig biv\n");
852*38fd1498Szrj break;
853*38fd1498Szrj }
854*38fd1498Szrj
855*38fd1498Szrj dump_iv (file, iv, false, 1);
856*38fd1498Szrj }
857*38fd1498Szrj
858*38fd1498Szrj /* Returns the info for ssa version VER. */
859*38fd1498Szrj
860*38fd1498Szrj static inline struct version_info *
ver_info(struct ivopts_data * data,unsigned ver)861*38fd1498Szrj ver_info (struct ivopts_data *data, unsigned ver)
862*38fd1498Szrj {
863*38fd1498Szrj return data->version_info + ver;
864*38fd1498Szrj }
865*38fd1498Szrj
866*38fd1498Szrj /* Returns the info for ssa name NAME. */
867*38fd1498Szrj
868*38fd1498Szrj static inline struct version_info *
name_info(struct ivopts_data * data,tree name)869*38fd1498Szrj name_info (struct ivopts_data *data, tree name)
870*38fd1498Szrj {
871*38fd1498Szrj return ver_info (data, SSA_NAME_VERSION (name));
872*38fd1498Szrj }
873*38fd1498Szrj
874*38fd1498Szrj /* Returns true if STMT is after the place where the IP_NORMAL ivs will be
875*38fd1498Szrj emitted in LOOP. */
876*38fd1498Szrj
877*38fd1498Szrj static bool
stmt_after_ip_normal_pos(struct loop * loop,gimple * stmt)878*38fd1498Szrj stmt_after_ip_normal_pos (struct loop *loop, gimple *stmt)
879*38fd1498Szrj {
880*38fd1498Szrj basic_block bb = ip_normal_pos (loop), sbb = gimple_bb (stmt);
881*38fd1498Szrj
882*38fd1498Szrj gcc_assert (bb);
883*38fd1498Szrj
884*38fd1498Szrj if (sbb == loop->latch)
885*38fd1498Szrj return true;
886*38fd1498Szrj
887*38fd1498Szrj if (sbb != bb)
888*38fd1498Szrj return false;
889*38fd1498Szrj
890*38fd1498Szrj return stmt == last_stmt (bb);
891*38fd1498Szrj }
892*38fd1498Szrj
893*38fd1498Szrj /* Returns true if STMT if after the place where the original induction
894*38fd1498Szrj variable CAND is incremented. If TRUE_IF_EQUAL is set, we return true
895*38fd1498Szrj if the positions are identical. */
896*38fd1498Szrj
897*38fd1498Szrj static bool
stmt_after_inc_pos(struct iv_cand * cand,gimple * stmt,bool true_if_equal)898*38fd1498Szrj stmt_after_inc_pos (struct iv_cand *cand, gimple *stmt, bool true_if_equal)
899*38fd1498Szrj {
900*38fd1498Szrj basic_block cand_bb = gimple_bb (cand->incremented_at);
901*38fd1498Szrj basic_block stmt_bb = gimple_bb (stmt);
902*38fd1498Szrj
903*38fd1498Szrj if (!dominated_by_p (CDI_DOMINATORS, stmt_bb, cand_bb))
904*38fd1498Szrj return false;
905*38fd1498Szrj
906*38fd1498Szrj if (stmt_bb != cand_bb)
907*38fd1498Szrj return true;
908*38fd1498Szrj
909*38fd1498Szrj if (true_if_equal
910*38fd1498Szrj && gimple_uid (stmt) == gimple_uid (cand->incremented_at))
911*38fd1498Szrj return true;
912*38fd1498Szrj return gimple_uid (stmt) > gimple_uid (cand->incremented_at);
913*38fd1498Szrj }
914*38fd1498Szrj
915*38fd1498Szrj /* Returns true if STMT if after the place where the induction variable
916*38fd1498Szrj CAND is incremented in LOOP. */
917*38fd1498Szrj
918*38fd1498Szrj static bool
stmt_after_increment(struct loop * loop,struct iv_cand * cand,gimple * stmt)919*38fd1498Szrj stmt_after_increment (struct loop *loop, struct iv_cand *cand, gimple *stmt)
920*38fd1498Szrj {
921*38fd1498Szrj switch (cand->pos)
922*38fd1498Szrj {
923*38fd1498Szrj case IP_END:
924*38fd1498Szrj return false;
925*38fd1498Szrj
926*38fd1498Szrj case IP_NORMAL:
927*38fd1498Szrj return stmt_after_ip_normal_pos (loop, stmt);
928*38fd1498Szrj
929*38fd1498Szrj case IP_ORIGINAL:
930*38fd1498Szrj case IP_AFTER_USE:
931*38fd1498Szrj return stmt_after_inc_pos (cand, stmt, false);
932*38fd1498Szrj
933*38fd1498Szrj case IP_BEFORE_USE:
934*38fd1498Szrj return stmt_after_inc_pos (cand, stmt, true);
935*38fd1498Szrj
936*38fd1498Szrj default:
937*38fd1498Szrj gcc_unreachable ();
938*38fd1498Szrj }
939*38fd1498Szrj }
940*38fd1498Szrj
941*38fd1498Szrj /* Returns true if EXP is a ssa name that occurs in an abnormal phi node. */
942*38fd1498Szrj
943*38fd1498Szrj static bool
abnormal_ssa_name_p(tree exp)944*38fd1498Szrj abnormal_ssa_name_p (tree exp)
945*38fd1498Szrj {
946*38fd1498Szrj if (!exp)
947*38fd1498Szrj return false;
948*38fd1498Szrj
949*38fd1498Szrj if (TREE_CODE (exp) != SSA_NAME)
950*38fd1498Szrj return false;
951*38fd1498Szrj
952*38fd1498Szrj return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (exp) != 0;
953*38fd1498Szrj }
954*38fd1498Szrj
955*38fd1498Szrj /* Returns false if BASE or INDEX contains a ssa name that occurs in an
956*38fd1498Szrj abnormal phi node. Callback for for_each_index. */
957*38fd1498Szrj
958*38fd1498Szrj static bool
idx_contains_abnormal_ssa_name_p(tree base,tree * index,void * data ATTRIBUTE_UNUSED)959*38fd1498Szrj idx_contains_abnormal_ssa_name_p (tree base, tree *index,
960*38fd1498Szrj void *data ATTRIBUTE_UNUSED)
961*38fd1498Szrj {
962*38fd1498Szrj if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
963*38fd1498Szrj {
964*38fd1498Szrj if (abnormal_ssa_name_p (TREE_OPERAND (base, 2)))
965*38fd1498Szrj return false;
966*38fd1498Szrj if (abnormal_ssa_name_p (TREE_OPERAND (base, 3)))
967*38fd1498Szrj return false;
968*38fd1498Szrj }
969*38fd1498Szrj
970*38fd1498Szrj return !abnormal_ssa_name_p (*index);
971*38fd1498Szrj }
972*38fd1498Szrj
973*38fd1498Szrj /* Returns true if EXPR contains a ssa name that occurs in an
974*38fd1498Szrj abnormal phi node. */
975*38fd1498Szrj
976*38fd1498Szrj bool
contains_abnormal_ssa_name_p(tree expr)977*38fd1498Szrj contains_abnormal_ssa_name_p (tree expr)
978*38fd1498Szrj {
979*38fd1498Szrj enum tree_code code;
980*38fd1498Szrj enum tree_code_class codeclass;
981*38fd1498Szrj
982*38fd1498Szrj if (!expr)
983*38fd1498Szrj return false;
984*38fd1498Szrj
985*38fd1498Szrj code = TREE_CODE (expr);
986*38fd1498Szrj codeclass = TREE_CODE_CLASS (code);
987*38fd1498Szrj
988*38fd1498Szrj if (code == SSA_NAME)
989*38fd1498Szrj return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (expr) != 0;
990*38fd1498Szrj
991*38fd1498Szrj if (code == INTEGER_CST
992*38fd1498Szrj || is_gimple_min_invariant (expr))
993*38fd1498Szrj return false;
994*38fd1498Szrj
995*38fd1498Szrj if (code == ADDR_EXPR)
996*38fd1498Szrj return !for_each_index (&TREE_OPERAND (expr, 0),
997*38fd1498Szrj idx_contains_abnormal_ssa_name_p,
998*38fd1498Szrj NULL);
999*38fd1498Szrj
1000*38fd1498Szrj if (code == COND_EXPR)
1001*38fd1498Szrj return contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 0))
1002*38fd1498Szrj || contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 1))
1003*38fd1498Szrj || contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 2));
1004*38fd1498Szrj
1005*38fd1498Szrj switch (codeclass)
1006*38fd1498Szrj {
1007*38fd1498Szrj case tcc_binary:
1008*38fd1498Szrj case tcc_comparison:
1009*38fd1498Szrj if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 1)))
1010*38fd1498Szrj return true;
1011*38fd1498Szrj
1012*38fd1498Szrj /* Fallthru. */
1013*38fd1498Szrj case tcc_unary:
1014*38fd1498Szrj if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 0)))
1015*38fd1498Szrj return true;
1016*38fd1498Szrj
1017*38fd1498Szrj break;
1018*38fd1498Szrj
1019*38fd1498Szrj default:
1020*38fd1498Szrj gcc_unreachable ();
1021*38fd1498Szrj }
1022*38fd1498Szrj
1023*38fd1498Szrj return false;
1024*38fd1498Szrj }
1025*38fd1498Szrj
1026*38fd1498Szrj /* Returns the structure describing number of iterations determined from
1027*38fd1498Szrj EXIT of DATA->current_loop, or NULL if something goes wrong. */
1028*38fd1498Szrj
1029*38fd1498Szrj static struct tree_niter_desc *
niter_for_exit(struct ivopts_data * data,edge exit)1030*38fd1498Szrj niter_for_exit (struct ivopts_data *data, edge exit)
1031*38fd1498Szrj {
1032*38fd1498Szrj struct tree_niter_desc *desc;
1033*38fd1498Szrj tree_niter_desc **slot;
1034*38fd1498Szrj
1035*38fd1498Szrj if (!data->niters)
1036*38fd1498Szrj {
1037*38fd1498Szrj data->niters = new hash_map<edge, tree_niter_desc *>;
1038*38fd1498Szrj slot = NULL;
1039*38fd1498Szrj }
1040*38fd1498Szrj else
1041*38fd1498Szrj slot = data->niters->get (exit);
1042*38fd1498Szrj
1043*38fd1498Szrj if (!slot)
1044*38fd1498Szrj {
1045*38fd1498Szrj /* Try to determine number of iterations. We cannot safely work with ssa
1046*38fd1498Szrj names that appear in phi nodes on abnormal edges, so that we do not
1047*38fd1498Szrj create overlapping life ranges for them (PR 27283). */
1048*38fd1498Szrj desc = XNEW (struct tree_niter_desc);
1049*38fd1498Szrj if (!number_of_iterations_exit (data->current_loop,
1050*38fd1498Szrj exit, desc, true)
1051*38fd1498Szrj || contains_abnormal_ssa_name_p (desc->niter))
1052*38fd1498Szrj {
1053*38fd1498Szrj XDELETE (desc);
1054*38fd1498Szrj desc = NULL;
1055*38fd1498Szrj }
1056*38fd1498Szrj data->niters->put (exit, desc);
1057*38fd1498Szrj }
1058*38fd1498Szrj else
1059*38fd1498Szrj desc = *slot;
1060*38fd1498Szrj
1061*38fd1498Szrj return desc;
1062*38fd1498Szrj }
1063*38fd1498Szrj
1064*38fd1498Szrj /* Returns the structure describing number of iterations determined from
1065*38fd1498Szrj single dominating exit of DATA->current_loop, or NULL if something
1066*38fd1498Szrj goes wrong. */
1067*38fd1498Szrj
1068*38fd1498Szrj static struct tree_niter_desc *
niter_for_single_dom_exit(struct ivopts_data * data)1069*38fd1498Szrj niter_for_single_dom_exit (struct ivopts_data *data)
1070*38fd1498Szrj {
1071*38fd1498Szrj edge exit = single_dom_exit (data->current_loop);
1072*38fd1498Szrj
1073*38fd1498Szrj if (!exit)
1074*38fd1498Szrj return NULL;
1075*38fd1498Szrj
1076*38fd1498Szrj return niter_for_exit (data, exit);
1077*38fd1498Szrj }
1078*38fd1498Szrj
1079*38fd1498Szrj /* Initializes data structures used by the iv optimization pass, stored
1080*38fd1498Szrj in DATA. */
1081*38fd1498Szrj
1082*38fd1498Szrj static void
tree_ssa_iv_optimize_init(struct ivopts_data * data)1083*38fd1498Szrj tree_ssa_iv_optimize_init (struct ivopts_data *data)
1084*38fd1498Szrj {
1085*38fd1498Szrj data->version_info_size = 2 * num_ssa_names;
1086*38fd1498Szrj data->version_info = XCNEWVEC (struct version_info, data->version_info_size);
1087*38fd1498Szrj data->relevant = BITMAP_ALLOC (NULL);
1088*38fd1498Szrj data->important_candidates = BITMAP_ALLOC (NULL);
1089*38fd1498Szrj data->max_inv_var_id = 0;
1090*38fd1498Szrj data->max_inv_expr_id = 0;
1091*38fd1498Szrj data->niters = NULL;
1092*38fd1498Szrj data->vgroups.create (20);
1093*38fd1498Szrj data->vcands.create (20);
1094*38fd1498Szrj data->inv_expr_tab = new hash_table<iv_inv_expr_hasher> (10);
1095*38fd1498Szrj data->name_expansion_cache = NULL;
1096*38fd1498Szrj data->iv_common_cand_tab = new hash_table<iv_common_cand_hasher> (10);
1097*38fd1498Szrj data->iv_common_cands.create (20);
1098*38fd1498Szrj decl_rtl_to_reset.create (20);
1099*38fd1498Szrj gcc_obstack_init (&data->iv_obstack);
1100*38fd1498Szrj }
1101*38fd1498Szrj
1102*38fd1498Szrj /* Returns a memory object to that EXPR points. In case we are able to
1103*38fd1498Szrj determine that it does not point to any such object, NULL is returned. */
1104*38fd1498Szrj
1105*38fd1498Szrj static tree
determine_base_object(tree expr)1106*38fd1498Szrj determine_base_object (tree expr)
1107*38fd1498Szrj {
1108*38fd1498Szrj enum tree_code code = TREE_CODE (expr);
1109*38fd1498Szrj tree base, obj;
1110*38fd1498Szrj
1111*38fd1498Szrj /* If this is a pointer casted to any type, we need to determine
1112*38fd1498Szrj the base object for the pointer; so handle conversions before
1113*38fd1498Szrj throwing away non-pointer expressions. */
1114*38fd1498Szrj if (CONVERT_EXPR_P (expr))
1115*38fd1498Szrj return determine_base_object (TREE_OPERAND (expr, 0));
1116*38fd1498Szrj
1117*38fd1498Szrj if (!POINTER_TYPE_P (TREE_TYPE (expr)))
1118*38fd1498Szrj return NULL_TREE;
1119*38fd1498Szrj
1120*38fd1498Szrj switch (code)
1121*38fd1498Szrj {
1122*38fd1498Szrj case INTEGER_CST:
1123*38fd1498Szrj return NULL_TREE;
1124*38fd1498Szrj
1125*38fd1498Szrj case ADDR_EXPR:
1126*38fd1498Szrj obj = TREE_OPERAND (expr, 0);
1127*38fd1498Szrj base = get_base_address (obj);
1128*38fd1498Szrj
1129*38fd1498Szrj if (!base)
1130*38fd1498Szrj return expr;
1131*38fd1498Szrj
1132*38fd1498Szrj if (TREE_CODE (base) == MEM_REF)
1133*38fd1498Szrj return determine_base_object (TREE_OPERAND (base, 0));
1134*38fd1498Szrj
1135*38fd1498Szrj return fold_convert (ptr_type_node,
1136*38fd1498Szrj build_fold_addr_expr (base));
1137*38fd1498Szrj
1138*38fd1498Szrj case POINTER_PLUS_EXPR:
1139*38fd1498Szrj return determine_base_object (TREE_OPERAND (expr, 0));
1140*38fd1498Szrj
1141*38fd1498Szrj case PLUS_EXPR:
1142*38fd1498Szrj case MINUS_EXPR:
1143*38fd1498Szrj /* Pointer addition is done solely using POINTER_PLUS_EXPR. */
1144*38fd1498Szrj gcc_unreachable ();
1145*38fd1498Szrj
1146*38fd1498Szrj default:
1147*38fd1498Szrj if (POLY_INT_CST_P (expr))
1148*38fd1498Szrj return NULL_TREE;
1149*38fd1498Szrj return fold_convert (ptr_type_node, expr);
1150*38fd1498Szrj }
1151*38fd1498Szrj }
1152*38fd1498Szrj
1153*38fd1498Szrj /* Return true if address expression with non-DECL_P operand appears
1154*38fd1498Szrj in EXPR. */
1155*38fd1498Szrj
1156*38fd1498Szrj static bool
contain_complex_addr_expr(tree expr)1157*38fd1498Szrj contain_complex_addr_expr (tree expr)
1158*38fd1498Szrj {
1159*38fd1498Szrj bool res = false;
1160*38fd1498Szrj
1161*38fd1498Szrj STRIP_NOPS (expr);
1162*38fd1498Szrj switch (TREE_CODE (expr))
1163*38fd1498Szrj {
1164*38fd1498Szrj case POINTER_PLUS_EXPR:
1165*38fd1498Szrj case PLUS_EXPR:
1166*38fd1498Szrj case MINUS_EXPR:
1167*38fd1498Szrj res |= contain_complex_addr_expr (TREE_OPERAND (expr, 0));
1168*38fd1498Szrj res |= contain_complex_addr_expr (TREE_OPERAND (expr, 1));
1169*38fd1498Szrj break;
1170*38fd1498Szrj
1171*38fd1498Szrj case ADDR_EXPR:
1172*38fd1498Szrj return (!DECL_P (TREE_OPERAND (expr, 0)));
1173*38fd1498Szrj
1174*38fd1498Szrj default:
1175*38fd1498Szrj return false;
1176*38fd1498Szrj }
1177*38fd1498Szrj
1178*38fd1498Szrj return res;
1179*38fd1498Szrj }
1180*38fd1498Szrj
1181*38fd1498Szrj /* Allocates an induction variable with given initial value BASE and step STEP
1182*38fd1498Szrj for loop LOOP. NO_OVERFLOW implies the iv doesn't overflow. */
1183*38fd1498Szrj
1184*38fd1498Szrj static struct iv *
1185*38fd1498Szrj alloc_iv (struct ivopts_data *data, tree base, tree step,
1186*38fd1498Szrj bool no_overflow = false)
1187*38fd1498Szrj {
1188*38fd1498Szrj tree expr = base;
1189*38fd1498Szrj struct iv *iv = (struct iv*) obstack_alloc (&data->iv_obstack,
1190*38fd1498Szrj sizeof (struct iv));
1191*38fd1498Szrj gcc_assert (step != NULL_TREE);
1192*38fd1498Szrj
1193*38fd1498Szrj /* Lower address expression in base except ones with DECL_P as operand.
1194*38fd1498Szrj By doing this:
1195*38fd1498Szrj 1) More accurate cost can be computed for address expressions;
1196*38fd1498Szrj 2) Duplicate candidates won't be created for bases in different
1197*38fd1498Szrj forms, like &a[0] and &a. */
1198*38fd1498Szrj STRIP_NOPS (expr);
1199*38fd1498Szrj if ((TREE_CODE (expr) == ADDR_EXPR && !DECL_P (TREE_OPERAND (expr, 0)))
1200*38fd1498Szrj || contain_complex_addr_expr (expr))
1201*38fd1498Szrj {
1202*38fd1498Szrj aff_tree comb;
1203*38fd1498Szrj tree_to_aff_combination (expr, TREE_TYPE (expr), &comb);
1204*38fd1498Szrj base = fold_convert (TREE_TYPE (base), aff_combination_to_tree (&comb));
1205*38fd1498Szrj }
1206*38fd1498Szrj
1207*38fd1498Szrj iv->base = base;
1208*38fd1498Szrj iv->base_object = determine_base_object (base);
1209*38fd1498Szrj iv->step = step;
1210*38fd1498Szrj iv->biv_p = false;
1211*38fd1498Szrj iv->nonlin_use = NULL;
1212*38fd1498Szrj iv->ssa_name = NULL_TREE;
1213*38fd1498Szrj if (!no_overflow
1214*38fd1498Szrj && !iv_can_overflow_p (data->current_loop, TREE_TYPE (base),
1215*38fd1498Szrj base, step))
1216*38fd1498Szrj no_overflow = true;
1217*38fd1498Szrj iv->no_overflow = no_overflow;
1218*38fd1498Szrj iv->have_address_use = false;
1219*38fd1498Szrj
1220*38fd1498Szrj return iv;
1221*38fd1498Szrj }
1222*38fd1498Szrj
1223*38fd1498Szrj /* Sets STEP and BASE for induction variable IV. NO_OVERFLOW implies the IV
1224*38fd1498Szrj doesn't overflow. */
1225*38fd1498Szrj
1226*38fd1498Szrj static void
set_iv(struct ivopts_data * data,tree iv,tree base,tree step,bool no_overflow)1227*38fd1498Szrj set_iv (struct ivopts_data *data, tree iv, tree base, tree step,
1228*38fd1498Szrj bool no_overflow)
1229*38fd1498Szrj {
1230*38fd1498Szrj struct version_info *info = name_info (data, iv);
1231*38fd1498Szrj
1232*38fd1498Szrj gcc_assert (!info->iv);
1233*38fd1498Szrj
1234*38fd1498Szrj bitmap_set_bit (data->relevant, SSA_NAME_VERSION (iv));
1235*38fd1498Szrj info->iv = alloc_iv (data, base, step, no_overflow);
1236*38fd1498Szrj info->iv->ssa_name = iv;
1237*38fd1498Szrj }
1238*38fd1498Szrj
1239*38fd1498Szrj /* Finds induction variable declaration for VAR. */
1240*38fd1498Szrj
1241*38fd1498Szrj static struct iv *
get_iv(struct ivopts_data * data,tree var)1242*38fd1498Szrj get_iv (struct ivopts_data *data, tree var)
1243*38fd1498Szrj {
1244*38fd1498Szrj basic_block bb;
1245*38fd1498Szrj tree type = TREE_TYPE (var);
1246*38fd1498Szrj
1247*38fd1498Szrj if (!POINTER_TYPE_P (type)
1248*38fd1498Szrj && !INTEGRAL_TYPE_P (type))
1249*38fd1498Szrj return NULL;
1250*38fd1498Szrj
1251*38fd1498Szrj if (!name_info (data, var)->iv)
1252*38fd1498Szrj {
1253*38fd1498Szrj bb = gimple_bb (SSA_NAME_DEF_STMT (var));
1254*38fd1498Szrj
1255*38fd1498Szrj if (!bb
1256*38fd1498Szrj || !flow_bb_inside_loop_p (data->current_loop, bb))
1257*38fd1498Szrj set_iv (data, var, var, build_int_cst (type, 0), true);
1258*38fd1498Szrj }
1259*38fd1498Szrj
1260*38fd1498Szrj return name_info (data, var)->iv;
1261*38fd1498Szrj }
1262*38fd1498Szrj
1263*38fd1498Szrj /* Return the first non-invariant ssa var found in EXPR. */
1264*38fd1498Szrj
1265*38fd1498Szrj static tree
extract_single_var_from_expr(tree expr)1266*38fd1498Szrj extract_single_var_from_expr (tree expr)
1267*38fd1498Szrj {
1268*38fd1498Szrj int i, n;
1269*38fd1498Szrj tree tmp;
1270*38fd1498Szrj enum tree_code code;
1271*38fd1498Szrj
1272*38fd1498Szrj if (!expr || is_gimple_min_invariant (expr))
1273*38fd1498Szrj return NULL;
1274*38fd1498Szrj
1275*38fd1498Szrj code = TREE_CODE (expr);
1276*38fd1498Szrj if (IS_EXPR_CODE_CLASS (TREE_CODE_CLASS (code)))
1277*38fd1498Szrj {
1278*38fd1498Szrj n = TREE_OPERAND_LENGTH (expr);
1279*38fd1498Szrj for (i = 0; i < n; i++)
1280*38fd1498Szrj {
1281*38fd1498Szrj tmp = extract_single_var_from_expr (TREE_OPERAND (expr, i));
1282*38fd1498Szrj
1283*38fd1498Szrj if (tmp)
1284*38fd1498Szrj return tmp;
1285*38fd1498Szrj }
1286*38fd1498Szrj }
1287*38fd1498Szrj return (TREE_CODE (expr) == SSA_NAME) ? expr : NULL;
1288*38fd1498Szrj }
1289*38fd1498Szrj
1290*38fd1498Szrj /* Finds basic ivs. */
1291*38fd1498Szrj
1292*38fd1498Szrj static bool
find_bivs(struct ivopts_data * data)1293*38fd1498Szrj find_bivs (struct ivopts_data *data)
1294*38fd1498Szrj {
1295*38fd1498Szrj gphi *phi;
1296*38fd1498Szrj affine_iv iv;
1297*38fd1498Szrj tree step, type, base, stop;
1298*38fd1498Szrj bool found = false;
1299*38fd1498Szrj struct loop *loop = data->current_loop;
1300*38fd1498Szrj gphi_iterator psi;
1301*38fd1498Szrj
1302*38fd1498Szrj for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
1303*38fd1498Szrj {
1304*38fd1498Szrj phi = psi.phi ();
1305*38fd1498Szrj
1306*38fd1498Szrj if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (PHI_RESULT (phi)))
1307*38fd1498Szrj continue;
1308*38fd1498Szrj
1309*38fd1498Szrj if (virtual_operand_p (PHI_RESULT (phi)))
1310*38fd1498Szrj continue;
1311*38fd1498Szrj
1312*38fd1498Szrj if (!simple_iv (loop, loop, PHI_RESULT (phi), &iv, true))
1313*38fd1498Szrj continue;
1314*38fd1498Szrj
1315*38fd1498Szrj if (integer_zerop (iv.step))
1316*38fd1498Szrj continue;
1317*38fd1498Szrj
1318*38fd1498Szrj step = iv.step;
1319*38fd1498Szrj base = PHI_ARG_DEF_FROM_EDGE (phi, loop_preheader_edge (loop));
1320*38fd1498Szrj /* Stop expanding iv base at the first ssa var referred by iv step.
1321*38fd1498Szrj Ideally we should stop at any ssa var, because that's expensive
1322*38fd1498Szrj and unusual to happen, we just do it on the first one.
1323*38fd1498Szrj
1324*38fd1498Szrj See PR64705 for the rationale. */
1325*38fd1498Szrj stop = extract_single_var_from_expr (step);
1326*38fd1498Szrj base = expand_simple_operations (base, stop);
1327*38fd1498Szrj if (contains_abnormal_ssa_name_p (base)
1328*38fd1498Szrj || contains_abnormal_ssa_name_p (step))
1329*38fd1498Szrj continue;
1330*38fd1498Szrj
1331*38fd1498Szrj type = TREE_TYPE (PHI_RESULT (phi));
1332*38fd1498Szrj base = fold_convert (type, base);
1333*38fd1498Szrj if (step)
1334*38fd1498Szrj {
1335*38fd1498Szrj if (POINTER_TYPE_P (type))
1336*38fd1498Szrj step = convert_to_ptrofftype (step);
1337*38fd1498Szrj else
1338*38fd1498Szrj step = fold_convert (type, step);
1339*38fd1498Szrj }
1340*38fd1498Szrj
1341*38fd1498Szrj set_iv (data, PHI_RESULT (phi), base, step, iv.no_overflow);
1342*38fd1498Szrj found = true;
1343*38fd1498Szrj }
1344*38fd1498Szrj
1345*38fd1498Szrj return found;
1346*38fd1498Szrj }
1347*38fd1498Szrj
1348*38fd1498Szrj /* Marks basic ivs. */
1349*38fd1498Szrj
1350*38fd1498Szrj static void
mark_bivs(struct ivopts_data * data)1351*38fd1498Szrj mark_bivs (struct ivopts_data *data)
1352*38fd1498Szrj {
1353*38fd1498Szrj gphi *phi;
1354*38fd1498Szrj gimple *def;
1355*38fd1498Szrj tree var;
1356*38fd1498Szrj struct iv *iv, *incr_iv;
1357*38fd1498Szrj struct loop *loop = data->current_loop;
1358*38fd1498Szrj basic_block incr_bb;
1359*38fd1498Szrj gphi_iterator psi;
1360*38fd1498Szrj
1361*38fd1498Szrj data->bivs_not_used_in_addr = 0;
1362*38fd1498Szrj for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
1363*38fd1498Szrj {
1364*38fd1498Szrj phi = psi.phi ();
1365*38fd1498Szrj
1366*38fd1498Szrj iv = get_iv (data, PHI_RESULT (phi));
1367*38fd1498Szrj if (!iv)
1368*38fd1498Szrj continue;
1369*38fd1498Szrj
1370*38fd1498Szrj var = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (loop));
1371*38fd1498Szrj def = SSA_NAME_DEF_STMT (var);
1372*38fd1498Szrj /* Don't mark iv peeled from other one as biv. */
1373*38fd1498Szrj if (def
1374*38fd1498Szrj && gimple_code (def) == GIMPLE_PHI
1375*38fd1498Szrj && gimple_bb (def) == loop->header)
1376*38fd1498Szrj continue;
1377*38fd1498Szrj
1378*38fd1498Szrj incr_iv = get_iv (data, var);
1379*38fd1498Szrj if (!incr_iv)
1380*38fd1498Szrj continue;
1381*38fd1498Szrj
1382*38fd1498Szrj /* If the increment is in the subloop, ignore it. */
1383*38fd1498Szrj incr_bb = gimple_bb (SSA_NAME_DEF_STMT (var));
1384*38fd1498Szrj if (incr_bb->loop_father != data->current_loop
1385*38fd1498Szrj || (incr_bb->flags & BB_IRREDUCIBLE_LOOP))
1386*38fd1498Szrj continue;
1387*38fd1498Szrj
1388*38fd1498Szrj iv->biv_p = true;
1389*38fd1498Szrj incr_iv->biv_p = true;
1390*38fd1498Szrj if (iv->no_overflow)
1391*38fd1498Szrj data->bivs_not_used_in_addr++;
1392*38fd1498Szrj if (incr_iv->no_overflow)
1393*38fd1498Szrj data->bivs_not_used_in_addr++;
1394*38fd1498Szrj }
1395*38fd1498Szrj }
1396*38fd1498Szrj
1397*38fd1498Szrj /* Checks whether STMT defines a linear induction variable and stores its
1398*38fd1498Szrj parameters to IV. */
1399*38fd1498Szrj
1400*38fd1498Szrj static bool
find_givs_in_stmt_scev(struct ivopts_data * data,gimple * stmt,affine_iv * iv)1401*38fd1498Szrj find_givs_in_stmt_scev (struct ivopts_data *data, gimple *stmt, affine_iv *iv)
1402*38fd1498Szrj {
1403*38fd1498Szrj tree lhs, stop;
1404*38fd1498Szrj struct loop *loop = data->current_loop;
1405*38fd1498Szrj
1406*38fd1498Szrj iv->base = NULL_TREE;
1407*38fd1498Szrj iv->step = NULL_TREE;
1408*38fd1498Szrj
1409*38fd1498Szrj if (gimple_code (stmt) != GIMPLE_ASSIGN)
1410*38fd1498Szrj return false;
1411*38fd1498Szrj
1412*38fd1498Szrj lhs = gimple_assign_lhs (stmt);
1413*38fd1498Szrj if (TREE_CODE (lhs) != SSA_NAME)
1414*38fd1498Szrj return false;
1415*38fd1498Szrj
1416*38fd1498Szrj if (!simple_iv (loop, loop_containing_stmt (stmt), lhs, iv, true))
1417*38fd1498Szrj return false;
1418*38fd1498Szrj
1419*38fd1498Szrj /* Stop expanding iv base at the first ssa var referred by iv step.
1420*38fd1498Szrj Ideally we should stop at any ssa var, because that's expensive
1421*38fd1498Szrj and unusual to happen, we just do it on the first one.
1422*38fd1498Szrj
1423*38fd1498Szrj See PR64705 for the rationale. */
1424*38fd1498Szrj stop = extract_single_var_from_expr (iv->step);
1425*38fd1498Szrj iv->base = expand_simple_operations (iv->base, stop);
1426*38fd1498Szrj if (contains_abnormal_ssa_name_p (iv->base)
1427*38fd1498Szrj || contains_abnormal_ssa_name_p (iv->step))
1428*38fd1498Szrj return false;
1429*38fd1498Szrj
1430*38fd1498Szrj /* If STMT could throw, then do not consider STMT as defining a GIV.
1431*38fd1498Szrj While this will suppress optimizations, we can not safely delete this
1432*38fd1498Szrj GIV and associated statements, even if it appears it is not used. */
1433*38fd1498Szrj if (stmt_could_throw_p (stmt))
1434*38fd1498Szrj return false;
1435*38fd1498Szrj
1436*38fd1498Szrj return true;
1437*38fd1498Szrj }
1438*38fd1498Szrj
1439*38fd1498Szrj /* Finds general ivs in statement STMT. */
1440*38fd1498Szrj
1441*38fd1498Szrj static void
find_givs_in_stmt(struct ivopts_data * data,gimple * stmt)1442*38fd1498Szrj find_givs_in_stmt (struct ivopts_data *data, gimple *stmt)
1443*38fd1498Szrj {
1444*38fd1498Szrj affine_iv iv;
1445*38fd1498Szrj
1446*38fd1498Szrj if (!find_givs_in_stmt_scev (data, stmt, &iv))
1447*38fd1498Szrj return;
1448*38fd1498Szrj
1449*38fd1498Szrj set_iv (data, gimple_assign_lhs (stmt), iv.base, iv.step, iv.no_overflow);
1450*38fd1498Szrj }
1451*38fd1498Szrj
1452*38fd1498Szrj /* Finds general ivs in basic block BB. */
1453*38fd1498Szrj
1454*38fd1498Szrj static void
find_givs_in_bb(struct ivopts_data * data,basic_block bb)1455*38fd1498Szrj find_givs_in_bb (struct ivopts_data *data, basic_block bb)
1456*38fd1498Szrj {
1457*38fd1498Szrj gimple_stmt_iterator bsi;
1458*38fd1498Szrj
1459*38fd1498Szrj for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
1460*38fd1498Szrj find_givs_in_stmt (data, gsi_stmt (bsi));
1461*38fd1498Szrj }
1462*38fd1498Szrj
1463*38fd1498Szrj /* Finds general ivs. */
1464*38fd1498Szrj
1465*38fd1498Szrj static void
find_givs(struct ivopts_data * data)1466*38fd1498Szrj find_givs (struct ivopts_data *data)
1467*38fd1498Szrj {
1468*38fd1498Szrj struct loop *loop = data->current_loop;
1469*38fd1498Szrj basic_block *body = get_loop_body_in_dom_order (loop);
1470*38fd1498Szrj unsigned i;
1471*38fd1498Szrj
1472*38fd1498Szrj for (i = 0; i < loop->num_nodes; i++)
1473*38fd1498Szrj find_givs_in_bb (data, body[i]);
1474*38fd1498Szrj free (body);
1475*38fd1498Szrj }
1476*38fd1498Szrj
1477*38fd1498Szrj /* For each ssa name defined in LOOP determines whether it is an induction
1478*38fd1498Szrj variable and if so, its initial value and step. */
1479*38fd1498Szrj
1480*38fd1498Szrj static bool
find_induction_variables(struct ivopts_data * data)1481*38fd1498Szrj find_induction_variables (struct ivopts_data *data)
1482*38fd1498Szrj {
1483*38fd1498Szrj unsigned i;
1484*38fd1498Szrj bitmap_iterator bi;
1485*38fd1498Szrj
1486*38fd1498Szrj if (!find_bivs (data))
1487*38fd1498Szrj return false;
1488*38fd1498Szrj
1489*38fd1498Szrj find_givs (data);
1490*38fd1498Szrj mark_bivs (data);
1491*38fd1498Szrj
1492*38fd1498Szrj if (dump_file && (dump_flags & TDF_DETAILS))
1493*38fd1498Szrj {
1494*38fd1498Szrj struct tree_niter_desc *niter = niter_for_single_dom_exit (data);
1495*38fd1498Szrj
1496*38fd1498Szrj if (niter)
1497*38fd1498Szrj {
1498*38fd1498Szrj fprintf (dump_file, " number of iterations ");
1499*38fd1498Szrj print_generic_expr (dump_file, niter->niter, TDF_SLIM);
1500*38fd1498Szrj if (!integer_zerop (niter->may_be_zero))
1501*38fd1498Szrj {
1502*38fd1498Szrj fprintf (dump_file, "; zero if ");
1503*38fd1498Szrj print_generic_expr (dump_file, niter->may_be_zero, TDF_SLIM);
1504*38fd1498Szrj }
1505*38fd1498Szrj fprintf (dump_file, "\n");
1506*38fd1498Szrj };
1507*38fd1498Szrj
1508*38fd1498Szrj fprintf (dump_file, "\n<Induction Vars>:\n");
1509*38fd1498Szrj EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
1510*38fd1498Szrj {
1511*38fd1498Szrj struct version_info *info = ver_info (data, i);
1512*38fd1498Szrj if (info->iv && info->iv->step && !integer_zerop (info->iv->step))
1513*38fd1498Szrj dump_iv (dump_file, ver_info (data, i)->iv, true, 0);
1514*38fd1498Szrj }
1515*38fd1498Szrj }
1516*38fd1498Szrj
1517*38fd1498Szrj return true;
1518*38fd1498Szrj }
1519*38fd1498Szrj
1520*38fd1498Szrj /* Records a use of TYPE at *USE_P in STMT whose value is IV in GROUP.
1521*38fd1498Szrj For address type use, ADDR_BASE is the stripped IV base, ADDR_OFFSET
1522*38fd1498Szrj is the const offset stripped from IV base and MEM_TYPE is the type
1523*38fd1498Szrj of the memory being addressed. For uses of other types, ADDR_BASE
1524*38fd1498Szrj and ADDR_OFFSET are zero by default and MEM_TYPE is NULL_TREE. */
1525*38fd1498Szrj
1526*38fd1498Szrj static struct iv_use *
record_use(struct iv_group * group,tree * use_p,struct iv * iv,gimple * stmt,enum use_type type,tree mem_type,tree addr_base,poly_uint64 addr_offset)1527*38fd1498Szrj record_use (struct iv_group *group, tree *use_p, struct iv *iv,
1528*38fd1498Szrj gimple *stmt, enum use_type type, tree mem_type,
1529*38fd1498Szrj tree addr_base, poly_uint64 addr_offset)
1530*38fd1498Szrj {
1531*38fd1498Szrj struct iv_use *use = XCNEW (struct iv_use);
1532*38fd1498Szrj
1533*38fd1498Szrj use->id = group->vuses.length ();
1534*38fd1498Szrj use->group_id = group->id;
1535*38fd1498Szrj use->type = type;
1536*38fd1498Szrj use->mem_type = mem_type;
1537*38fd1498Szrj use->iv = iv;
1538*38fd1498Szrj use->stmt = stmt;
1539*38fd1498Szrj use->op_p = use_p;
1540*38fd1498Szrj use->addr_base = addr_base;
1541*38fd1498Szrj use->addr_offset = addr_offset;
1542*38fd1498Szrj
1543*38fd1498Szrj group->vuses.safe_push (use);
1544*38fd1498Szrj return use;
1545*38fd1498Szrj }
1546*38fd1498Szrj
1547*38fd1498Szrj /* Checks whether OP is a loop-level invariant and if so, records it.
1548*38fd1498Szrj NONLINEAR_USE is true if the invariant is used in a way we do not
1549*38fd1498Szrj handle specially. */
1550*38fd1498Szrj
1551*38fd1498Szrj static void
record_invariant(struct ivopts_data * data,tree op,bool nonlinear_use)1552*38fd1498Szrj record_invariant (struct ivopts_data *data, tree op, bool nonlinear_use)
1553*38fd1498Szrj {
1554*38fd1498Szrj basic_block bb;
1555*38fd1498Szrj struct version_info *info;
1556*38fd1498Szrj
1557*38fd1498Szrj if (TREE_CODE (op) != SSA_NAME
1558*38fd1498Szrj || virtual_operand_p (op))
1559*38fd1498Szrj return;
1560*38fd1498Szrj
1561*38fd1498Szrj bb = gimple_bb (SSA_NAME_DEF_STMT (op));
1562*38fd1498Szrj if (bb
1563*38fd1498Szrj && flow_bb_inside_loop_p (data->current_loop, bb))
1564*38fd1498Szrj return;
1565*38fd1498Szrj
1566*38fd1498Szrj info = name_info (data, op);
1567*38fd1498Szrj info->name = op;
1568*38fd1498Szrj info->has_nonlin_use |= nonlinear_use;
1569*38fd1498Szrj if (!info->inv_id)
1570*38fd1498Szrj info->inv_id = ++data->max_inv_var_id;
1571*38fd1498Szrj bitmap_set_bit (data->relevant, SSA_NAME_VERSION (op));
1572*38fd1498Szrj }
1573*38fd1498Szrj
1574*38fd1498Szrj /* Record a group of TYPE. */
1575*38fd1498Szrj
1576*38fd1498Szrj static struct iv_group *
record_group(struct ivopts_data * data,enum use_type type)1577*38fd1498Szrj record_group (struct ivopts_data *data, enum use_type type)
1578*38fd1498Szrj {
1579*38fd1498Szrj struct iv_group *group = XCNEW (struct iv_group);
1580*38fd1498Szrj
1581*38fd1498Szrj group->id = data->vgroups.length ();
1582*38fd1498Szrj group->type = type;
1583*38fd1498Szrj group->related_cands = BITMAP_ALLOC (NULL);
1584*38fd1498Szrj group->vuses.create (1);
1585*38fd1498Szrj
1586*38fd1498Szrj data->vgroups.safe_push (group);
1587*38fd1498Szrj return group;
1588*38fd1498Szrj }
1589*38fd1498Szrj
1590*38fd1498Szrj /* Record a use of TYPE at *USE_P in STMT whose value is IV in a group.
1591*38fd1498Szrj New group will be created if there is no existing group for the use.
1592*38fd1498Szrj MEM_TYPE is the type of memory being addressed, or NULL if this
1593*38fd1498Szrj isn't an address reference. */
1594*38fd1498Szrj
1595*38fd1498Szrj static struct iv_use *
record_group_use(struct ivopts_data * data,tree * use_p,struct iv * iv,gimple * stmt,enum use_type type,tree mem_type)1596*38fd1498Szrj record_group_use (struct ivopts_data *data, tree *use_p,
1597*38fd1498Szrj struct iv *iv, gimple *stmt, enum use_type type,
1598*38fd1498Szrj tree mem_type)
1599*38fd1498Szrj {
1600*38fd1498Szrj tree addr_base = NULL;
1601*38fd1498Szrj struct iv_group *group = NULL;
1602*38fd1498Szrj poly_uint64 addr_offset = 0;
1603*38fd1498Szrj
1604*38fd1498Szrj /* Record non address type use in a new group. */
1605*38fd1498Szrj if (address_p (type))
1606*38fd1498Szrj {
1607*38fd1498Szrj unsigned int i;
1608*38fd1498Szrj
1609*38fd1498Szrj addr_base = strip_offset (iv->base, &addr_offset);
1610*38fd1498Szrj for (i = 0; i < data->vgroups.length (); i++)
1611*38fd1498Szrj {
1612*38fd1498Szrj struct iv_use *use;
1613*38fd1498Szrj
1614*38fd1498Szrj group = data->vgroups[i];
1615*38fd1498Szrj use = group->vuses[0];
1616*38fd1498Szrj if (!address_p (use->type))
1617*38fd1498Szrj continue;
1618*38fd1498Szrj
1619*38fd1498Szrj /* Check if it has the same stripped base and step. */
1620*38fd1498Szrj if (operand_equal_p (iv->base_object, use->iv->base_object, 0)
1621*38fd1498Szrj && operand_equal_p (iv->step, use->iv->step, 0)
1622*38fd1498Szrj && operand_equal_p (addr_base, use->addr_base, 0))
1623*38fd1498Szrj break;
1624*38fd1498Szrj }
1625*38fd1498Szrj if (i == data->vgroups.length ())
1626*38fd1498Szrj group = NULL;
1627*38fd1498Szrj }
1628*38fd1498Szrj
1629*38fd1498Szrj if (!group)
1630*38fd1498Szrj group = record_group (data, type);
1631*38fd1498Szrj
1632*38fd1498Szrj return record_use (group, use_p, iv, stmt, type, mem_type,
1633*38fd1498Szrj addr_base, addr_offset);
1634*38fd1498Szrj }
1635*38fd1498Szrj
1636*38fd1498Szrj /* Checks whether the use OP is interesting and if so, records it. */
1637*38fd1498Szrj
1638*38fd1498Szrj static struct iv_use *
find_interesting_uses_op(struct ivopts_data * data,tree op)1639*38fd1498Szrj find_interesting_uses_op (struct ivopts_data *data, tree op)
1640*38fd1498Szrj {
1641*38fd1498Szrj struct iv *iv;
1642*38fd1498Szrj gimple *stmt;
1643*38fd1498Szrj struct iv_use *use;
1644*38fd1498Szrj
1645*38fd1498Szrj if (TREE_CODE (op) != SSA_NAME)
1646*38fd1498Szrj return NULL;
1647*38fd1498Szrj
1648*38fd1498Szrj iv = get_iv (data, op);
1649*38fd1498Szrj if (!iv)
1650*38fd1498Szrj return NULL;
1651*38fd1498Szrj
1652*38fd1498Szrj if (iv->nonlin_use)
1653*38fd1498Szrj {
1654*38fd1498Szrj gcc_assert (iv->nonlin_use->type == USE_NONLINEAR_EXPR);
1655*38fd1498Szrj return iv->nonlin_use;
1656*38fd1498Szrj }
1657*38fd1498Szrj
1658*38fd1498Szrj if (integer_zerop (iv->step))
1659*38fd1498Szrj {
1660*38fd1498Szrj record_invariant (data, op, true);
1661*38fd1498Szrj return NULL;
1662*38fd1498Szrj }
1663*38fd1498Szrj
1664*38fd1498Szrj stmt = SSA_NAME_DEF_STMT (op);
1665*38fd1498Szrj gcc_assert (gimple_code (stmt) == GIMPLE_PHI || is_gimple_assign (stmt));
1666*38fd1498Szrj
1667*38fd1498Szrj use = record_group_use (data, NULL, iv, stmt, USE_NONLINEAR_EXPR, NULL_TREE);
1668*38fd1498Szrj iv->nonlin_use = use;
1669*38fd1498Szrj return use;
1670*38fd1498Szrj }
1671*38fd1498Szrj
1672*38fd1498Szrj /* Indicate how compare type iv_use can be handled. */
1673*38fd1498Szrj enum comp_iv_rewrite
1674*38fd1498Szrj {
1675*38fd1498Szrj COMP_IV_NA,
1676*38fd1498Szrj /* We may rewrite compare type iv_use by expressing value of the iv_use. */
1677*38fd1498Szrj COMP_IV_EXPR,
1678*38fd1498Szrj /* We may rewrite compare type iv_uses on both sides of comparison by
1679*38fd1498Szrj expressing value of each iv_use. */
1680*38fd1498Szrj COMP_IV_EXPR_2,
1681*38fd1498Szrj /* We may rewrite compare type iv_use by expressing value of the iv_use
1682*38fd1498Szrj or by eliminating it with other iv_cand. */
1683*38fd1498Szrj COMP_IV_ELIM
1684*38fd1498Szrj };
1685*38fd1498Szrj
1686*38fd1498Szrj /* Given a condition in statement STMT, checks whether it is a compare
1687*38fd1498Szrj of an induction variable and an invariant. If this is the case,
1688*38fd1498Szrj CONTROL_VAR is set to location of the iv, BOUND to the location of
1689*38fd1498Szrj the invariant, IV_VAR and IV_BOUND are set to the corresponding
1690*38fd1498Szrj induction variable descriptions, and true is returned. If this is not
1691*38fd1498Szrj the case, CONTROL_VAR and BOUND are set to the arguments of the
1692*38fd1498Szrj condition and false is returned. */
1693*38fd1498Szrj
1694*38fd1498Szrj static enum comp_iv_rewrite
extract_cond_operands(struct ivopts_data * data,gimple * stmt,tree ** control_var,tree ** bound,struct iv ** iv_var,struct iv ** iv_bound)1695*38fd1498Szrj extract_cond_operands (struct ivopts_data *data, gimple *stmt,
1696*38fd1498Szrj tree **control_var, tree **bound,
1697*38fd1498Szrj struct iv **iv_var, struct iv **iv_bound)
1698*38fd1498Szrj {
1699*38fd1498Szrj /* The objects returned when COND has constant operands. */
1700*38fd1498Szrj static struct iv const_iv;
1701*38fd1498Szrj static tree zero;
1702*38fd1498Szrj tree *op0 = &zero, *op1 = &zero;
1703*38fd1498Szrj struct iv *iv0 = &const_iv, *iv1 = &const_iv;
1704*38fd1498Szrj enum comp_iv_rewrite rewrite_type = COMP_IV_NA;
1705*38fd1498Szrj
1706*38fd1498Szrj if (gimple_code (stmt) == GIMPLE_COND)
1707*38fd1498Szrj {
1708*38fd1498Szrj gcond *cond_stmt = as_a <gcond *> (stmt);
1709*38fd1498Szrj op0 = gimple_cond_lhs_ptr (cond_stmt);
1710*38fd1498Szrj op1 = gimple_cond_rhs_ptr (cond_stmt);
1711*38fd1498Szrj }
1712*38fd1498Szrj else
1713*38fd1498Szrj {
1714*38fd1498Szrj op0 = gimple_assign_rhs1_ptr (stmt);
1715*38fd1498Szrj op1 = gimple_assign_rhs2_ptr (stmt);
1716*38fd1498Szrj }
1717*38fd1498Szrj
1718*38fd1498Szrj zero = integer_zero_node;
1719*38fd1498Szrj const_iv.step = integer_zero_node;
1720*38fd1498Szrj
1721*38fd1498Szrj if (TREE_CODE (*op0) == SSA_NAME)
1722*38fd1498Szrj iv0 = get_iv (data, *op0);
1723*38fd1498Szrj if (TREE_CODE (*op1) == SSA_NAME)
1724*38fd1498Szrj iv1 = get_iv (data, *op1);
1725*38fd1498Szrj
1726*38fd1498Szrj /* If both sides of comparison are IVs. We can express ivs on both end. */
1727*38fd1498Szrj if (iv0 && iv1 && !integer_zerop (iv0->step) && !integer_zerop (iv1->step))
1728*38fd1498Szrj {
1729*38fd1498Szrj rewrite_type = COMP_IV_EXPR_2;
1730*38fd1498Szrj goto end;
1731*38fd1498Szrj }
1732*38fd1498Szrj
1733*38fd1498Szrj /* If none side of comparison is IV. */
1734*38fd1498Szrj if ((!iv0 || integer_zerop (iv0->step))
1735*38fd1498Szrj && (!iv1 || integer_zerop (iv1->step)))
1736*38fd1498Szrj goto end;
1737*38fd1498Szrj
1738*38fd1498Szrj /* Control variable may be on the other side. */
1739*38fd1498Szrj if (!iv0 || integer_zerop (iv0->step))
1740*38fd1498Szrj {
1741*38fd1498Szrj std::swap (op0, op1);
1742*38fd1498Szrj std::swap (iv0, iv1);
1743*38fd1498Szrj }
1744*38fd1498Szrj /* If one side is IV and the other side isn't loop invariant. */
1745*38fd1498Szrj if (!iv1)
1746*38fd1498Szrj rewrite_type = COMP_IV_EXPR;
1747*38fd1498Szrj /* If one side is IV and the other side is loop invariant. */
1748*38fd1498Szrj else if (!integer_zerop (iv0->step) && integer_zerop (iv1->step))
1749*38fd1498Szrj rewrite_type = COMP_IV_ELIM;
1750*38fd1498Szrj
1751*38fd1498Szrj end:
1752*38fd1498Szrj if (control_var)
1753*38fd1498Szrj *control_var = op0;
1754*38fd1498Szrj if (iv_var)
1755*38fd1498Szrj *iv_var = iv0;
1756*38fd1498Szrj if (bound)
1757*38fd1498Szrj *bound = op1;
1758*38fd1498Szrj if (iv_bound)
1759*38fd1498Szrj *iv_bound = iv1;
1760*38fd1498Szrj
1761*38fd1498Szrj return rewrite_type;
1762*38fd1498Szrj }
1763*38fd1498Szrj
1764*38fd1498Szrj /* Checks whether the condition in STMT is interesting and if so,
1765*38fd1498Szrj records it. */
1766*38fd1498Szrj
1767*38fd1498Szrj static void
find_interesting_uses_cond(struct ivopts_data * data,gimple * stmt)1768*38fd1498Szrj find_interesting_uses_cond (struct ivopts_data *data, gimple *stmt)
1769*38fd1498Szrj {
1770*38fd1498Szrj tree *var_p, *bound_p;
1771*38fd1498Szrj struct iv *var_iv, *bound_iv;
1772*38fd1498Szrj enum comp_iv_rewrite ret;
1773*38fd1498Szrj
1774*38fd1498Szrj ret = extract_cond_operands (data, stmt,
1775*38fd1498Szrj &var_p, &bound_p, &var_iv, &bound_iv);
1776*38fd1498Szrj if (ret == COMP_IV_NA)
1777*38fd1498Szrj {
1778*38fd1498Szrj find_interesting_uses_op (data, *var_p);
1779*38fd1498Szrj find_interesting_uses_op (data, *bound_p);
1780*38fd1498Szrj return;
1781*38fd1498Szrj }
1782*38fd1498Szrj
1783*38fd1498Szrj record_group_use (data, var_p, var_iv, stmt, USE_COMPARE, NULL_TREE);
1784*38fd1498Szrj /* Record compare type iv_use for iv on the other side of comparison. */
1785*38fd1498Szrj if (ret == COMP_IV_EXPR_2)
1786*38fd1498Szrj record_group_use (data, bound_p, bound_iv, stmt, USE_COMPARE, NULL_TREE);
1787*38fd1498Szrj }
1788*38fd1498Szrj
1789*38fd1498Szrj /* Returns the outermost loop EXPR is obviously invariant in
1790*38fd1498Szrj relative to the loop LOOP, i.e. if all its operands are defined
1791*38fd1498Szrj outside of the returned loop. Returns NULL if EXPR is not
1792*38fd1498Szrj even obviously invariant in LOOP. */
1793*38fd1498Szrj
1794*38fd1498Szrj struct loop *
outermost_invariant_loop_for_expr(struct loop * loop,tree expr)1795*38fd1498Szrj outermost_invariant_loop_for_expr (struct loop *loop, tree expr)
1796*38fd1498Szrj {
1797*38fd1498Szrj basic_block def_bb;
1798*38fd1498Szrj unsigned i, len;
1799*38fd1498Szrj
1800*38fd1498Szrj if (is_gimple_min_invariant (expr))
1801*38fd1498Szrj return current_loops->tree_root;
1802*38fd1498Szrj
1803*38fd1498Szrj if (TREE_CODE (expr) == SSA_NAME)
1804*38fd1498Szrj {
1805*38fd1498Szrj def_bb = gimple_bb (SSA_NAME_DEF_STMT (expr));
1806*38fd1498Szrj if (def_bb)
1807*38fd1498Szrj {
1808*38fd1498Szrj if (flow_bb_inside_loop_p (loop, def_bb))
1809*38fd1498Szrj return NULL;
1810*38fd1498Szrj return superloop_at_depth (loop,
1811*38fd1498Szrj loop_depth (def_bb->loop_father) + 1);
1812*38fd1498Szrj }
1813*38fd1498Szrj
1814*38fd1498Szrj return current_loops->tree_root;
1815*38fd1498Szrj }
1816*38fd1498Szrj
1817*38fd1498Szrj if (!EXPR_P (expr))
1818*38fd1498Szrj return NULL;
1819*38fd1498Szrj
1820*38fd1498Szrj unsigned maxdepth = 0;
1821*38fd1498Szrj len = TREE_OPERAND_LENGTH (expr);
1822*38fd1498Szrj for (i = 0; i < len; i++)
1823*38fd1498Szrj {
1824*38fd1498Szrj struct loop *ivloop;
1825*38fd1498Szrj if (!TREE_OPERAND (expr, i))
1826*38fd1498Szrj continue;
1827*38fd1498Szrj
1828*38fd1498Szrj ivloop = outermost_invariant_loop_for_expr (loop, TREE_OPERAND (expr, i));
1829*38fd1498Szrj if (!ivloop)
1830*38fd1498Szrj return NULL;
1831*38fd1498Szrj maxdepth = MAX (maxdepth, loop_depth (ivloop));
1832*38fd1498Szrj }
1833*38fd1498Szrj
1834*38fd1498Szrj return superloop_at_depth (loop, maxdepth);
1835*38fd1498Szrj }
1836*38fd1498Szrj
1837*38fd1498Szrj /* Returns true if expression EXPR is obviously invariant in LOOP,
1838*38fd1498Szrj i.e. if all its operands are defined outside of the LOOP. LOOP
1839*38fd1498Szrj should not be the function body. */
1840*38fd1498Szrj
1841*38fd1498Szrj bool
expr_invariant_in_loop_p(struct loop * loop,tree expr)1842*38fd1498Szrj expr_invariant_in_loop_p (struct loop *loop, tree expr)
1843*38fd1498Szrj {
1844*38fd1498Szrj basic_block def_bb;
1845*38fd1498Szrj unsigned i, len;
1846*38fd1498Szrj
1847*38fd1498Szrj gcc_assert (loop_depth (loop) > 0);
1848*38fd1498Szrj
1849*38fd1498Szrj if (is_gimple_min_invariant (expr))
1850*38fd1498Szrj return true;
1851*38fd1498Szrj
1852*38fd1498Szrj if (TREE_CODE (expr) == SSA_NAME)
1853*38fd1498Szrj {
1854*38fd1498Szrj def_bb = gimple_bb (SSA_NAME_DEF_STMT (expr));
1855*38fd1498Szrj if (def_bb
1856*38fd1498Szrj && flow_bb_inside_loop_p (loop, def_bb))
1857*38fd1498Szrj return false;
1858*38fd1498Szrj
1859*38fd1498Szrj return true;
1860*38fd1498Szrj }
1861*38fd1498Szrj
1862*38fd1498Szrj if (!EXPR_P (expr))
1863*38fd1498Szrj return false;
1864*38fd1498Szrj
1865*38fd1498Szrj len = TREE_OPERAND_LENGTH (expr);
1866*38fd1498Szrj for (i = 0; i < len; i++)
1867*38fd1498Szrj if (TREE_OPERAND (expr, i)
1868*38fd1498Szrj && !expr_invariant_in_loop_p (loop, TREE_OPERAND (expr, i)))
1869*38fd1498Szrj return false;
1870*38fd1498Szrj
1871*38fd1498Szrj return true;
1872*38fd1498Szrj }
1873*38fd1498Szrj
1874*38fd1498Szrj /* Given expression EXPR which computes inductive values with respect
1875*38fd1498Szrj to loop recorded in DATA, this function returns biv from which EXPR
1876*38fd1498Szrj is derived by tracing definition chains of ssa variables in EXPR. */
1877*38fd1498Szrj
1878*38fd1498Szrj static struct iv*
find_deriving_biv_for_expr(struct ivopts_data * data,tree expr)1879*38fd1498Szrj find_deriving_biv_for_expr (struct ivopts_data *data, tree expr)
1880*38fd1498Szrj {
1881*38fd1498Szrj struct iv *iv;
1882*38fd1498Szrj unsigned i, n;
1883*38fd1498Szrj tree e2, e1;
1884*38fd1498Szrj enum tree_code code;
1885*38fd1498Szrj gimple *stmt;
1886*38fd1498Szrj
1887*38fd1498Szrj if (expr == NULL_TREE)
1888*38fd1498Szrj return NULL;
1889*38fd1498Szrj
1890*38fd1498Szrj if (is_gimple_min_invariant (expr))
1891*38fd1498Szrj return NULL;
1892*38fd1498Szrj
1893*38fd1498Szrj code = TREE_CODE (expr);
1894*38fd1498Szrj if (IS_EXPR_CODE_CLASS (TREE_CODE_CLASS (code)))
1895*38fd1498Szrj {
1896*38fd1498Szrj n = TREE_OPERAND_LENGTH (expr);
1897*38fd1498Szrj for (i = 0; i < n; i++)
1898*38fd1498Szrj {
1899*38fd1498Szrj iv = find_deriving_biv_for_expr (data, TREE_OPERAND (expr, i));
1900*38fd1498Szrj if (iv)
1901*38fd1498Szrj return iv;
1902*38fd1498Szrj }
1903*38fd1498Szrj }
1904*38fd1498Szrj
1905*38fd1498Szrj /* Stop if it's not ssa name. */
1906*38fd1498Szrj if (code != SSA_NAME)
1907*38fd1498Szrj return NULL;
1908*38fd1498Szrj
1909*38fd1498Szrj iv = get_iv (data, expr);
1910*38fd1498Szrj if (!iv || integer_zerop (iv->step))
1911*38fd1498Szrj return NULL;
1912*38fd1498Szrj else if (iv->biv_p)
1913*38fd1498Szrj return iv;
1914*38fd1498Szrj
1915*38fd1498Szrj stmt = SSA_NAME_DEF_STMT (expr);
1916*38fd1498Szrj if (gphi *phi = dyn_cast <gphi *> (stmt))
1917*38fd1498Szrj {
1918*38fd1498Szrj ssa_op_iter iter;
1919*38fd1498Szrj use_operand_p use_p;
1920*38fd1498Szrj basic_block phi_bb = gimple_bb (phi);
1921*38fd1498Szrj
1922*38fd1498Szrj /* Skip loop header PHI that doesn't define biv. */
1923*38fd1498Szrj if (phi_bb->loop_father == data->current_loop)
1924*38fd1498Szrj return NULL;
1925*38fd1498Szrj
1926*38fd1498Szrj if (virtual_operand_p (gimple_phi_result (phi)))
1927*38fd1498Szrj return NULL;
1928*38fd1498Szrj
1929*38fd1498Szrj FOR_EACH_PHI_ARG (use_p, phi, iter, SSA_OP_USE)
1930*38fd1498Szrj {
1931*38fd1498Szrj tree use = USE_FROM_PTR (use_p);
1932*38fd1498Szrj iv = find_deriving_biv_for_expr (data, use);
1933*38fd1498Szrj if (iv)
1934*38fd1498Szrj return iv;
1935*38fd1498Szrj }
1936*38fd1498Szrj return NULL;
1937*38fd1498Szrj }
1938*38fd1498Szrj if (gimple_code (stmt) != GIMPLE_ASSIGN)
1939*38fd1498Szrj return NULL;
1940*38fd1498Szrj
1941*38fd1498Szrj e1 = gimple_assign_rhs1 (stmt);
1942*38fd1498Szrj code = gimple_assign_rhs_code (stmt);
1943*38fd1498Szrj if (get_gimple_rhs_class (code) == GIMPLE_SINGLE_RHS)
1944*38fd1498Szrj return find_deriving_biv_for_expr (data, e1);
1945*38fd1498Szrj
1946*38fd1498Szrj switch (code)
1947*38fd1498Szrj {
1948*38fd1498Szrj case MULT_EXPR:
1949*38fd1498Szrj case PLUS_EXPR:
1950*38fd1498Szrj case MINUS_EXPR:
1951*38fd1498Szrj case POINTER_PLUS_EXPR:
1952*38fd1498Szrj /* Increments, decrements and multiplications by a constant
1953*38fd1498Szrj are simple. */
1954*38fd1498Szrj e2 = gimple_assign_rhs2 (stmt);
1955*38fd1498Szrj iv = find_deriving_biv_for_expr (data, e2);
1956*38fd1498Szrj if (iv)
1957*38fd1498Szrj return iv;
1958*38fd1498Szrj gcc_fallthrough ();
1959*38fd1498Szrj
1960*38fd1498Szrj CASE_CONVERT:
1961*38fd1498Szrj /* Casts are simple. */
1962*38fd1498Szrj return find_deriving_biv_for_expr (data, e1);
1963*38fd1498Szrj
1964*38fd1498Szrj default:
1965*38fd1498Szrj break;
1966*38fd1498Szrj }
1967*38fd1498Szrj
1968*38fd1498Szrj return NULL;
1969*38fd1498Szrj }
1970*38fd1498Szrj
1971*38fd1498Szrj /* Record BIV, its predecessor and successor that they are used in
1972*38fd1498Szrj address type uses. */
1973*38fd1498Szrj
1974*38fd1498Szrj static void
record_biv_for_address_use(struct ivopts_data * data,struct iv * biv)1975*38fd1498Szrj record_biv_for_address_use (struct ivopts_data *data, struct iv *biv)
1976*38fd1498Szrj {
1977*38fd1498Szrj unsigned i;
1978*38fd1498Szrj tree type, base_1, base_2;
1979*38fd1498Szrj bitmap_iterator bi;
1980*38fd1498Szrj
1981*38fd1498Szrj if (!biv || !biv->biv_p || integer_zerop (biv->step)
1982*38fd1498Szrj || biv->have_address_use || !biv->no_overflow)
1983*38fd1498Szrj return;
1984*38fd1498Szrj
1985*38fd1498Szrj type = TREE_TYPE (biv->base);
1986*38fd1498Szrj if (!INTEGRAL_TYPE_P (type))
1987*38fd1498Szrj return;
1988*38fd1498Szrj
1989*38fd1498Szrj biv->have_address_use = true;
1990*38fd1498Szrj data->bivs_not_used_in_addr--;
1991*38fd1498Szrj base_1 = fold_build2 (PLUS_EXPR, type, biv->base, biv->step);
1992*38fd1498Szrj EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
1993*38fd1498Szrj {
1994*38fd1498Szrj struct iv *iv = ver_info (data, i)->iv;
1995*38fd1498Szrj
1996*38fd1498Szrj if (!iv || !iv->biv_p || integer_zerop (iv->step)
1997*38fd1498Szrj || iv->have_address_use || !iv->no_overflow)
1998*38fd1498Szrj continue;
1999*38fd1498Szrj
2000*38fd1498Szrj if (type != TREE_TYPE (iv->base)
2001*38fd1498Szrj || !INTEGRAL_TYPE_P (TREE_TYPE (iv->base)))
2002*38fd1498Szrj continue;
2003*38fd1498Szrj
2004*38fd1498Szrj if (!operand_equal_p (biv->step, iv->step, 0))
2005*38fd1498Szrj continue;
2006*38fd1498Szrj
2007*38fd1498Szrj base_2 = fold_build2 (PLUS_EXPR, type, iv->base, iv->step);
2008*38fd1498Szrj if (operand_equal_p (base_1, iv->base, 0)
2009*38fd1498Szrj || operand_equal_p (base_2, biv->base, 0))
2010*38fd1498Szrj {
2011*38fd1498Szrj iv->have_address_use = true;
2012*38fd1498Szrj data->bivs_not_used_in_addr--;
2013*38fd1498Szrj }
2014*38fd1498Szrj }
2015*38fd1498Szrj }
2016*38fd1498Szrj
2017*38fd1498Szrj /* Cumulates the steps of indices into DATA and replaces their values with the
2018*38fd1498Szrj initial ones. Returns false when the value of the index cannot be determined.
2019*38fd1498Szrj Callback for for_each_index. */
2020*38fd1498Szrj
2021*38fd1498Szrj struct ifs_ivopts_data
2022*38fd1498Szrj {
2023*38fd1498Szrj struct ivopts_data *ivopts_data;
2024*38fd1498Szrj gimple *stmt;
2025*38fd1498Szrj tree step;
2026*38fd1498Szrj };
2027*38fd1498Szrj
2028*38fd1498Szrj static bool
idx_find_step(tree base,tree * idx,void * data)2029*38fd1498Szrj idx_find_step (tree base, tree *idx, void *data)
2030*38fd1498Szrj {
2031*38fd1498Szrj struct ifs_ivopts_data *dta = (struct ifs_ivopts_data *) data;
2032*38fd1498Szrj struct iv *iv;
2033*38fd1498Szrj bool use_overflow_semantics = false;
2034*38fd1498Szrj tree step, iv_base, iv_step, lbound, off;
2035*38fd1498Szrj struct loop *loop = dta->ivopts_data->current_loop;
2036*38fd1498Szrj
2037*38fd1498Szrj /* If base is a component ref, require that the offset of the reference
2038*38fd1498Szrj be invariant. */
2039*38fd1498Szrj if (TREE_CODE (base) == COMPONENT_REF)
2040*38fd1498Szrj {
2041*38fd1498Szrj off = component_ref_field_offset (base);
2042*38fd1498Szrj return expr_invariant_in_loop_p (loop, off);
2043*38fd1498Szrj }
2044*38fd1498Szrj
2045*38fd1498Szrj /* If base is array, first check whether we will be able to move the
2046*38fd1498Szrj reference out of the loop (in order to take its address in strength
2047*38fd1498Szrj reduction). In order for this to work we need both lower bound
2048*38fd1498Szrj and step to be loop invariants. */
2049*38fd1498Szrj if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
2050*38fd1498Szrj {
2051*38fd1498Szrj /* Moreover, for a range, the size needs to be invariant as well. */
2052*38fd1498Szrj if (TREE_CODE (base) == ARRAY_RANGE_REF
2053*38fd1498Szrj && !expr_invariant_in_loop_p (loop, TYPE_SIZE (TREE_TYPE (base))))
2054*38fd1498Szrj return false;
2055*38fd1498Szrj
2056*38fd1498Szrj step = array_ref_element_size (base);
2057*38fd1498Szrj lbound = array_ref_low_bound (base);
2058*38fd1498Szrj
2059*38fd1498Szrj if (!expr_invariant_in_loop_p (loop, step)
2060*38fd1498Szrj || !expr_invariant_in_loop_p (loop, lbound))
2061*38fd1498Szrj return false;
2062*38fd1498Szrj }
2063*38fd1498Szrj
2064*38fd1498Szrj if (TREE_CODE (*idx) != SSA_NAME)
2065*38fd1498Szrj return true;
2066*38fd1498Szrj
2067*38fd1498Szrj iv = get_iv (dta->ivopts_data, *idx);
2068*38fd1498Szrj if (!iv)
2069*38fd1498Szrj return false;
2070*38fd1498Szrj
2071*38fd1498Szrj /* XXX We produce for a base of *D42 with iv->base being &x[0]
2072*38fd1498Szrj *&x[0], which is not folded and does not trigger the
2073*38fd1498Szrj ARRAY_REF path below. */
2074*38fd1498Szrj *idx = iv->base;
2075*38fd1498Szrj
2076*38fd1498Szrj if (integer_zerop (iv->step))
2077*38fd1498Szrj return true;
2078*38fd1498Szrj
2079*38fd1498Szrj if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
2080*38fd1498Szrj {
2081*38fd1498Szrj step = array_ref_element_size (base);
2082*38fd1498Szrj
2083*38fd1498Szrj /* We only handle addresses whose step is an integer constant. */
2084*38fd1498Szrj if (TREE_CODE (step) != INTEGER_CST)
2085*38fd1498Szrj return false;
2086*38fd1498Szrj }
2087*38fd1498Szrj else
2088*38fd1498Szrj /* The step for pointer arithmetics already is 1 byte. */
2089*38fd1498Szrj step = size_one_node;
2090*38fd1498Szrj
2091*38fd1498Szrj iv_base = iv->base;
2092*38fd1498Szrj iv_step = iv->step;
2093*38fd1498Szrj if (iv->no_overflow && nowrap_type_p (TREE_TYPE (iv_step)))
2094*38fd1498Szrj use_overflow_semantics = true;
2095*38fd1498Szrj
2096*38fd1498Szrj if (!convert_affine_scev (dta->ivopts_data->current_loop,
2097*38fd1498Szrj sizetype, &iv_base, &iv_step, dta->stmt,
2098*38fd1498Szrj use_overflow_semantics))
2099*38fd1498Szrj {
2100*38fd1498Szrj /* The index might wrap. */
2101*38fd1498Szrj return false;
2102*38fd1498Szrj }
2103*38fd1498Szrj
2104*38fd1498Szrj step = fold_build2 (MULT_EXPR, sizetype, step, iv_step);
2105*38fd1498Szrj dta->step = fold_build2 (PLUS_EXPR, sizetype, dta->step, step);
2106*38fd1498Szrj
2107*38fd1498Szrj if (dta->ivopts_data->bivs_not_used_in_addr)
2108*38fd1498Szrj {
2109*38fd1498Szrj if (!iv->biv_p)
2110*38fd1498Szrj iv = find_deriving_biv_for_expr (dta->ivopts_data, iv->ssa_name);
2111*38fd1498Szrj
2112*38fd1498Szrj record_biv_for_address_use (dta->ivopts_data, iv);
2113*38fd1498Szrj }
2114*38fd1498Szrj return true;
2115*38fd1498Szrj }
2116*38fd1498Szrj
2117*38fd1498Szrj /* Records use in index IDX. Callback for for_each_index. Ivopts data
2118*38fd1498Szrj object is passed to it in DATA. */
2119*38fd1498Szrj
2120*38fd1498Szrj static bool
idx_record_use(tree base,tree * idx,void * vdata)2121*38fd1498Szrj idx_record_use (tree base, tree *idx,
2122*38fd1498Szrj void *vdata)
2123*38fd1498Szrj {
2124*38fd1498Szrj struct ivopts_data *data = (struct ivopts_data *) vdata;
2125*38fd1498Szrj find_interesting_uses_op (data, *idx);
2126*38fd1498Szrj if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
2127*38fd1498Szrj {
2128*38fd1498Szrj find_interesting_uses_op (data, array_ref_element_size (base));
2129*38fd1498Szrj find_interesting_uses_op (data, array_ref_low_bound (base));
2130*38fd1498Szrj }
2131*38fd1498Szrj return true;
2132*38fd1498Szrj }
2133*38fd1498Szrj
2134*38fd1498Szrj /* If we can prove that TOP = cst * BOT for some constant cst,
2135*38fd1498Szrj store cst to MUL and return true. Otherwise return false.
2136*38fd1498Szrj The returned value is always sign-extended, regardless of the
2137*38fd1498Szrj signedness of TOP and BOT. */
2138*38fd1498Szrj
2139*38fd1498Szrj static bool
constant_multiple_of(tree top,tree bot,widest_int * mul)2140*38fd1498Szrj constant_multiple_of (tree top, tree bot, widest_int *mul)
2141*38fd1498Szrj {
2142*38fd1498Szrj tree mby;
2143*38fd1498Szrj enum tree_code code;
2144*38fd1498Szrj unsigned precision = TYPE_PRECISION (TREE_TYPE (top));
2145*38fd1498Szrj widest_int res, p0, p1;
2146*38fd1498Szrj
2147*38fd1498Szrj STRIP_NOPS (top);
2148*38fd1498Szrj STRIP_NOPS (bot);
2149*38fd1498Szrj
2150*38fd1498Szrj if (operand_equal_p (top, bot, 0))
2151*38fd1498Szrj {
2152*38fd1498Szrj *mul = 1;
2153*38fd1498Szrj return true;
2154*38fd1498Szrj }
2155*38fd1498Szrj
2156*38fd1498Szrj code = TREE_CODE (top);
2157*38fd1498Szrj switch (code)
2158*38fd1498Szrj {
2159*38fd1498Szrj case MULT_EXPR:
2160*38fd1498Szrj mby = TREE_OPERAND (top, 1);
2161*38fd1498Szrj if (TREE_CODE (mby) != INTEGER_CST)
2162*38fd1498Szrj return false;
2163*38fd1498Szrj
2164*38fd1498Szrj if (!constant_multiple_of (TREE_OPERAND (top, 0), bot, &res))
2165*38fd1498Szrj return false;
2166*38fd1498Szrj
2167*38fd1498Szrj *mul = wi::sext (res * wi::to_widest (mby), precision);
2168*38fd1498Szrj return true;
2169*38fd1498Szrj
2170*38fd1498Szrj case PLUS_EXPR:
2171*38fd1498Szrj case MINUS_EXPR:
2172*38fd1498Szrj if (!constant_multiple_of (TREE_OPERAND (top, 0), bot, &p0)
2173*38fd1498Szrj || !constant_multiple_of (TREE_OPERAND (top, 1), bot, &p1))
2174*38fd1498Szrj return false;
2175*38fd1498Szrj
2176*38fd1498Szrj if (code == MINUS_EXPR)
2177*38fd1498Szrj p1 = -p1;
2178*38fd1498Szrj *mul = wi::sext (p0 + p1, precision);
2179*38fd1498Szrj return true;
2180*38fd1498Szrj
2181*38fd1498Szrj case INTEGER_CST:
2182*38fd1498Szrj if (TREE_CODE (bot) != INTEGER_CST)
2183*38fd1498Szrj return false;
2184*38fd1498Szrj
2185*38fd1498Szrj p0 = widest_int::from (wi::to_wide (top), SIGNED);
2186*38fd1498Szrj p1 = widest_int::from (wi::to_wide (bot), SIGNED);
2187*38fd1498Szrj if (p1 == 0)
2188*38fd1498Szrj return false;
2189*38fd1498Szrj *mul = wi::sext (wi::divmod_trunc (p0, p1, SIGNED, &res), precision);
2190*38fd1498Szrj return res == 0;
2191*38fd1498Szrj
2192*38fd1498Szrj default:
2193*38fd1498Szrj if (POLY_INT_CST_P (top)
2194*38fd1498Szrj && POLY_INT_CST_P (bot)
2195*38fd1498Szrj && constant_multiple_p (wi::to_poly_widest (top),
2196*38fd1498Szrj wi::to_poly_widest (bot), mul))
2197*38fd1498Szrj return true;
2198*38fd1498Szrj
2199*38fd1498Szrj return false;
2200*38fd1498Szrj }
2201*38fd1498Szrj }
2202*38fd1498Szrj
2203*38fd1498Szrj /* Return true if memory reference REF with step STEP may be unaligned. */
2204*38fd1498Szrj
2205*38fd1498Szrj static bool
may_be_unaligned_p(tree ref,tree step)2206*38fd1498Szrj may_be_unaligned_p (tree ref, tree step)
2207*38fd1498Szrj {
2208*38fd1498Szrj /* TARGET_MEM_REFs are translated directly to valid MEMs on the target,
2209*38fd1498Szrj thus they are not misaligned. */
2210*38fd1498Szrj if (TREE_CODE (ref) == TARGET_MEM_REF)
2211*38fd1498Szrj return false;
2212*38fd1498Szrj
2213*38fd1498Szrj unsigned int align = TYPE_ALIGN (TREE_TYPE (ref));
2214*38fd1498Szrj if (GET_MODE_ALIGNMENT (TYPE_MODE (TREE_TYPE (ref))) > align)
2215*38fd1498Szrj align = GET_MODE_ALIGNMENT (TYPE_MODE (TREE_TYPE (ref)));
2216*38fd1498Szrj
2217*38fd1498Szrj unsigned HOST_WIDE_INT bitpos;
2218*38fd1498Szrj unsigned int ref_align;
2219*38fd1498Szrj get_object_alignment_1 (ref, &ref_align, &bitpos);
2220*38fd1498Szrj if (ref_align < align
2221*38fd1498Szrj || (bitpos % align) != 0
2222*38fd1498Szrj || (bitpos % BITS_PER_UNIT) != 0)
2223*38fd1498Szrj return true;
2224*38fd1498Szrj
2225*38fd1498Szrj unsigned int trailing_zeros = tree_ctz (step);
2226*38fd1498Szrj if (trailing_zeros < HOST_BITS_PER_INT
2227*38fd1498Szrj && (1U << trailing_zeros) * BITS_PER_UNIT < align)
2228*38fd1498Szrj return true;
2229*38fd1498Szrj
2230*38fd1498Szrj return false;
2231*38fd1498Szrj }
2232*38fd1498Szrj
2233*38fd1498Szrj /* Return true if EXPR may be non-addressable. */
2234*38fd1498Szrj
2235*38fd1498Szrj bool
may_be_nonaddressable_p(tree expr)2236*38fd1498Szrj may_be_nonaddressable_p (tree expr)
2237*38fd1498Szrj {
2238*38fd1498Szrj switch (TREE_CODE (expr))
2239*38fd1498Szrj {
2240*38fd1498Szrj case TARGET_MEM_REF:
2241*38fd1498Szrj /* TARGET_MEM_REFs are translated directly to valid MEMs on the
2242*38fd1498Szrj target, thus they are always addressable. */
2243*38fd1498Szrj return false;
2244*38fd1498Szrj
2245*38fd1498Szrj case MEM_REF:
2246*38fd1498Szrj /* Likewise for MEM_REFs, modulo the storage order. */
2247*38fd1498Szrj return REF_REVERSE_STORAGE_ORDER (expr);
2248*38fd1498Szrj
2249*38fd1498Szrj case BIT_FIELD_REF:
2250*38fd1498Szrj if (REF_REVERSE_STORAGE_ORDER (expr))
2251*38fd1498Szrj return true;
2252*38fd1498Szrj return may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
2253*38fd1498Szrj
2254*38fd1498Szrj case COMPONENT_REF:
2255*38fd1498Szrj if (TYPE_REVERSE_STORAGE_ORDER (TREE_TYPE (TREE_OPERAND (expr, 0))))
2256*38fd1498Szrj return true;
2257*38fd1498Szrj return DECL_NONADDRESSABLE_P (TREE_OPERAND (expr, 1))
2258*38fd1498Szrj || may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
2259*38fd1498Szrj
2260*38fd1498Szrj case ARRAY_REF:
2261*38fd1498Szrj case ARRAY_RANGE_REF:
2262*38fd1498Szrj if (TYPE_REVERSE_STORAGE_ORDER (TREE_TYPE (TREE_OPERAND (expr, 0))))
2263*38fd1498Szrj return true;
2264*38fd1498Szrj return may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
2265*38fd1498Szrj
2266*38fd1498Szrj case VIEW_CONVERT_EXPR:
2267*38fd1498Szrj /* This kind of view-conversions may wrap non-addressable objects
2268*38fd1498Szrj and make them look addressable. After some processing the
2269*38fd1498Szrj non-addressability may be uncovered again, causing ADDR_EXPRs
2270*38fd1498Szrj of inappropriate objects to be built. */
2271*38fd1498Szrj if (is_gimple_reg (TREE_OPERAND (expr, 0))
2272*38fd1498Szrj || !is_gimple_addressable (TREE_OPERAND (expr, 0)))
2273*38fd1498Szrj return true;
2274*38fd1498Szrj return may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
2275*38fd1498Szrj
2276*38fd1498Szrj CASE_CONVERT:
2277*38fd1498Szrj return true;
2278*38fd1498Szrj
2279*38fd1498Szrj default:
2280*38fd1498Szrj break;
2281*38fd1498Szrj }
2282*38fd1498Szrj
2283*38fd1498Szrj return false;
2284*38fd1498Szrj }
2285*38fd1498Szrj
2286*38fd1498Szrj /* Finds addresses in *OP_P inside STMT. */
2287*38fd1498Szrj
2288*38fd1498Szrj static void
find_interesting_uses_address(struct ivopts_data * data,gimple * stmt,tree * op_p)2289*38fd1498Szrj find_interesting_uses_address (struct ivopts_data *data, gimple *stmt,
2290*38fd1498Szrj tree *op_p)
2291*38fd1498Szrj {
2292*38fd1498Szrj tree base = *op_p, step = size_zero_node;
2293*38fd1498Szrj struct iv *civ;
2294*38fd1498Szrj struct ifs_ivopts_data ifs_ivopts_data;
2295*38fd1498Szrj
2296*38fd1498Szrj /* Do not play with volatile memory references. A bit too conservative,
2297*38fd1498Szrj perhaps, but safe. */
2298*38fd1498Szrj if (gimple_has_volatile_ops (stmt))
2299*38fd1498Szrj goto fail;
2300*38fd1498Szrj
2301*38fd1498Szrj /* Ignore bitfields for now. Not really something terribly complicated
2302*38fd1498Szrj to handle. TODO. */
2303*38fd1498Szrj if (TREE_CODE (base) == BIT_FIELD_REF)
2304*38fd1498Szrj goto fail;
2305*38fd1498Szrj
2306*38fd1498Szrj base = unshare_expr (base);
2307*38fd1498Szrj
2308*38fd1498Szrj if (TREE_CODE (base) == TARGET_MEM_REF)
2309*38fd1498Szrj {
2310*38fd1498Szrj tree type = build_pointer_type (TREE_TYPE (base));
2311*38fd1498Szrj tree astep;
2312*38fd1498Szrj
2313*38fd1498Szrj if (TMR_BASE (base)
2314*38fd1498Szrj && TREE_CODE (TMR_BASE (base)) == SSA_NAME)
2315*38fd1498Szrj {
2316*38fd1498Szrj civ = get_iv (data, TMR_BASE (base));
2317*38fd1498Szrj if (!civ)
2318*38fd1498Szrj goto fail;
2319*38fd1498Szrj
2320*38fd1498Szrj TMR_BASE (base) = civ->base;
2321*38fd1498Szrj step = civ->step;
2322*38fd1498Szrj }
2323*38fd1498Szrj if (TMR_INDEX2 (base)
2324*38fd1498Szrj && TREE_CODE (TMR_INDEX2 (base)) == SSA_NAME)
2325*38fd1498Szrj {
2326*38fd1498Szrj civ = get_iv (data, TMR_INDEX2 (base));
2327*38fd1498Szrj if (!civ)
2328*38fd1498Szrj goto fail;
2329*38fd1498Szrj
2330*38fd1498Szrj TMR_INDEX2 (base) = civ->base;
2331*38fd1498Szrj step = civ->step;
2332*38fd1498Szrj }
2333*38fd1498Szrj if (TMR_INDEX (base)
2334*38fd1498Szrj && TREE_CODE (TMR_INDEX (base)) == SSA_NAME)
2335*38fd1498Szrj {
2336*38fd1498Szrj civ = get_iv (data, TMR_INDEX (base));
2337*38fd1498Szrj if (!civ)
2338*38fd1498Szrj goto fail;
2339*38fd1498Szrj
2340*38fd1498Szrj TMR_INDEX (base) = civ->base;
2341*38fd1498Szrj astep = civ->step;
2342*38fd1498Szrj
2343*38fd1498Szrj if (astep)
2344*38fd1498Szrj {
2345*38fd1498Szrj if (TMR_STEP (base))
2346*38fd1498Szrj astep = fold_build2 (MULT_EXPR, type, TMR_STEP (base), astep);
2347*38fd1498Szrj
2348*38fd1498Szrj step = fold_build2 (PLUS_EXPR, type, step, astep);
2349*38fd1498Szrj }
2350*38fd1498Szrj }
2351*38fd1498Szrj
2352*38fd1498Szrj if (integer_zerop (step))
2353*38fd1498Szrj goto fail;
2354*38fd1498Szrj base = tree_mem_ref_addr (type, base);
2355*38fd1498Szrj }
2356*38fd1498Szrj else
2357*38fd1498Szrj {
2358*38fd1498Szrj ifs_ivopts_data.ivopts_data = data;
2359*38fd1498Szrj ifs_ivopts_data.stmt = stmt;
2360*38fd1498Szrj ifs_ivopts_data.step = size_zero_node;
2361*38fd1498Szrj if (!for_each_index (&base, idx_find_step, &ifs_ivopts_data)
2362*38fd1498Szrj || integer_zerop (ifs_ivopts_data.step))
2363*38fd1498Szrj goto fail;
2364*38fd1498Szrj step = ifs_ivopts_data.step;
2365*38fd1498Szrj
2366*38fd1498Szrj /* Check that the base expression is addressable. This needs
2367*38fd1498Szrj to be done after substituting bases of IVs into it. */
2368*38fd1498Szrj if (may_be_nonaddressable_p (base))
2369*38fd1498Szrj goto fail;
2370*38fd1498Szrj
2371*38fd1498Szrj /* Moreover, on strict alignment platforms, check that it is
2372*38fd1498Szrj sufficiently aligned. */
2373*38fd1498Szrj if (STRICT_ALIGNMENT && may_be_unaligned_p (base, step))
2374*38fd1498Szrj goto fail;
2375*38fd1498Szrj
2376*38fd1498Szrj base = build_fold_addr_expr (base);
2377*38fd1498Szrj
2378*38fd1498Szrj /* Substituting bases of IVs into the base expression might
2379*38fd1498Szrj have caused folding opportunities. */
2380*38fd1498Szrj if (TREE_CODE (base) == ADDR_EXPR)
2381*38fd1498Szrj {
2382*38fd1498Szrj tree *ref = &TREE_OPERAND (base, 0);
2383*38fd1498Szrj while (handled_component_p (*ref))
2384*38fd1498Szrj ref = &TREE_OPERAND (*ref, 0);
2385*38fd1498Szrj if (TREE_CODE (*ref) == MEM_REF)
2386*38fd1498Szrj {
2387*38fd1498Szrj tree tem = fold_binary (MEM_REF, TREE_TYPE (*ref),
2388*38fd1498Szrj TREE_OPERAND (*ref, 0),
2389*38fd1498Szrj TREE_OPERAND (*ref, 1));
2390*38fd1498Szrj if (tem)
2391*38fd1498Szrj *ref = tem;
2392*38fd1498Szrj }
2393*38fd1498Szrj }
2394*38fd1498Szrj }
2395*38fd1498Szrj
2396*38fd1498Szrj civ = alloc_iv (data, base, step);
2397*38fd1498Szrj /* Fail if base object of this memory reference is unknown. */
2398*38fd1498Szrj if (civ->base_object == NULL_TREE)
2399*38fd1498Szrj goto fail;
2400*38fd1498Szrj
2401*38fd1498Szrj record_group_use (data, op_p, civ, stmt, USE_REF_ADDRESS, TREE_TYPE (*op_p));
2402*38fd1498Szrj return;
2403*38fd1498Szrj
2404*38fd1498Szrj fail:
2405*38fd1498Szrj for_each_index (op_p, idx_record_use, data);
2406*38fd1498Szrj }
2407*38fd1498Szrj
2408*38fd1498Szrj /* Finds and records invariants used in STMT. */
2409*38fd1498Szrj
2410*38fd1498Szrj static void
find_invariants_stmt(struct ivopts_data * data,gimple * stmt)2411*38fd1498Szrj find_invariants_stmt (struct ivopts_data *data, gimple *stmt)
2412*38fd1498Szrj {
2413*38fd1498Szrj ssa_op_iter iter;
2414*38fd1498Szrj use_operand_p use_p;
2415*38fd1498Szrj tree op;
2416*38fd1498Szrj
2417*38fd1498Szrj FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
2418*38fd1498Szrj {
2419*38fd1498Szrj op = USE_FROM_PTR (use_p);
2420*38fd1498Szrj record_invariant (data, op, false);
2421*38fd1498Szrj }
2422*38fd1498Szrj }
2423*38fd1498Szrj
2424*38fd1498Szrj /* CALL calls an internal function. If operand *OP_P will become an
2425*38fd1498Szrj address when the call is expanded, return the type of the memory
2426*38fd1498Szrj being addressed, otherwise return null. */
2427*38fd1498Szrj
2428*38fd1498Szrj static tree
get_mem_type_for_internal_fn(gcall * call,tree * op_p)2429*38fd1498Szrj get_mem_type_for_internal_fn (gcall *call, tree *op_p)
2430*38fd1498Szrj {
2431*38fd1498Szrj switch (gimple_call_internal_fn (call))
2432*38fd1498Szrj {
2433*38fd1498Szrj case IFN_MASK_LOAD:
2434*38fd1498Szrj if (op_p == gimple_call_arg_ptr (call, 0))
2435*38fd1498Szrj return TREE_TYPE (gimple_call_lhs (call));
2436*38fd1498Szrj return NULL_TREE;
2437*38fd1498Szrj
2438*38fd1498Szrj case IFN_MASK_STORE:
2439*38fd1498Szrj if (op_p == gimple_call_arg_ptr (call, 0))
2440*38fd1498Szrj return TREE_TYPE (gimple_call_arg (call, 3));
2441*38fd1498Szrj return NULL_TREE;
2442*38fd1498Szrj
2443*38fd1498Szrj default:
2444*38fd1498Szrj return NULL_TREE;
2445*38fd1498Szrj }
2446*38fd1498Szrj }
2447*38fd1498Szrj
2448*38fd1498Szrj /* IV is a (non-address) iv that describes operand *OP_P of STMT.
2449*38fd1498Szrj Return true if the operand will become an address when STMT
2450*38fd1498Szrj is expanded and record the associated address use if so. */
2451*38fd1498Szrj
2452*38fd1498Szrj static bool
find_address_like_use(struct ivopts_data * data,gimple * stmt,tree * op_p,struct iv * iv)2453*38fd1498Szrj find_address_like_use (struct ivopts_data *data, gimple *stmt, tree *op_p,
2454*38fd1498Szrj struct iv *iv)
2455*38fd1498Szrj {
2456*38fd1498Szrj /* Fail if base object of this memory reference is unknown. */
2457*38fd1498Szrj if (iv->base_object == NULL_TREE)
2458*38fd1498Szrj return false;
2459*38fd1498Szrj
2460*38fd1498Szrj tree mem_type = NULL_TREE;
2461*38fd1498Szrj if (gcall *call = dyn_cast <gcall *> (stmt))
2462*38fd1498Szrj if (gimple_call_internal_p (call))
2463*38fd1498Szrj mem_type = get_mem_type_for_internal_fn (call, op_p);
2464*38fd1498Szrj if (mem_type)
2465*38fd1498Szrj {
2466*38fd1498Szrj iv = alloc_iv (data, iv->base, iv->step);
2467*38fd1498Szrj record_group_use (data, op_p, iv, stmt, USE_PTR_ADDRESS, mem_type);
2468*38fd1498Szrj return true;
2469*38fd1498Szrj }
2470*38fd1498Szrj return false;
2471*38fd1498Szrj }
2472*38fd1498Szrj
2473*38fd1498Szrj /* Finds interesting uses of induction variables in the statement STMT. */
2474*38fd1498Szrj
2475*38fd1498Szrj static void
find_interesting_uses_stmt(struct ivopts_data * data,gimple * stmt)2476*38fd1498Szrj find_interesting_uses_stmt (struct ivopts_data *data, gimple *stmt)
2477*38fd1498Szrj {
2478*38fd1498Szrj struct iv *iv;
2479*38fd1498Szrj tree op, *lhs, *rhs;
2480*38fd1498Szrj ssa_op_iter iter;
2481*38fd1498Szrj use_operand_p use_p;
2482*38fd1498Szrj enum tree_code code;
2483*38fd1498Szrj
2484*38fd1498Szrj find_invariants_stmt (data, stmt);
2485*38fd1498Szrj
2486*38fd1498Szrj if (gimple_code (stmt) == GIMPLE_COND)
2487*38fd1498Szrj {
2488*38fd1498Szrj find_interesting_uses_cond (data, stmt);
2489*38fd1498Szrj return;
2490*38fd1498Szrj }
2491*38fd1498Szrj
2492*38fd1498Szrj if (is_gimple_assign (stmt))
2493*38fd1498Szrj {
2494*38fd1498Szrj lhs = gimple_assign_lhs_ptr (stmt);
2495*38fd1498Szrj rhs = gimple_assign_rhs1_ptr (stmt);
2496*38fd1498Szrj
2497*38fd1498Szrj if (TREE_CODE (*lhs) == SSA_NAME)
2498*38fd1498Szrj {
2499*38fd1498Szrj /* If the statement defines an induction variable, the uses are not
2500*38fd1498Szrj interesting by themselves. */
2501*38fd1498Szrj
2502*38fd1498Szrj iv = get_iv (data, *lhs);
2503*38fd1498Szrj
2504*38fd1498Szrj if (iv && !integer_zerop (iv->step))
2505*38fd1498Szrj return;
2506*38fd1498Szrj }
2507*38fd1498Szrj
2508*38fd1498Szrj code = gimple_assign_rhs_code (stmt);
2509*38fd1498Szrj if (get_gimple_rhs_class (code) == GIMPLE_SINGLE_RHS
2510*38fd1498Szrj && (REFERENCE_CLASS_P (*rhs)
2511*38fd1498Szrj || is_gimple_val (*rhs)))
2512*38fd1498Szrj {
2513*38fd1498Szrj if (REFERENCE_CLASS_P (*rhs))
2514*38fd1498Szrj find_interesting_uses_address (data, stmt, rhs);
2515*38fd1498Szrj else
2516*38fd1498Szrj find_interesting_uses_op (data, *rhs);
2517*38fd1498Szrj
2518*38fd1498Szrj if (REFERENCE_CLASS_P (*lhs))
2519*38fd1498Szrj find_interesting_uses_address (data, stmt, lhs);
2520*38fd1498Szrj return;
2521*38fd1498Szrj }
2522*38fd1498Szrj else if (TREE_CODE_CLASS (code) == tcc_comparison)
2523*38fd1498Szrj {
2524*38fd1498Szrj find_interesting_uses_cond (data, stmt);
2525*38fd1498Szrj return;
2526*38fd1498Szrj }
2527*38fd1498Szrj
2528*38fd1498Szrj /* TODO -- we should also handle address uses of type
2529*38fd1498Szrj
2530*38fd1498Szrj memory = call (whatever);
2531*38fd1498Szrj
2532*38fd1498Szrj and
2533*38fd1498Szrj
2534*38fd1498Szrj call (memory). */
2535*38fd1498Szrj }
2536*38fd1498Szrj
2537*38fd1498Szrj if (gimple_code (stmt) == GIMPLE_PHI
2538*38fd1498Szrj && gimple_bb (stmt) == data->current_loop->header)
2539*38fd1498Szrj {
2540*38fd1498Szrj iv = get_iv (data, PHI_RESULT (stmt));
2541*38fd1498Szrj
2542*38fd1498Szrj if (iv && !integer_zerop (iv->step))
2543*38fd1498Szrj return;
2544*38fd1498Szrj }
2545*38fd1498Szrj
2546*38fd1498Szrj FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
2547*38fd1498Szrj {
2548*38fd1498Szrj op = USE_FROM_PTR (use_p);
2549*38fd1498Szrj
2550*38fd1498Szrj if (TREE_CODE (op) != SSA_NAME)
2551*38fd1498Szrj continue;
2552*38fd1498Szrj
2553*38fd1498Szrj iv = get_iv (data, op);
2554*38fd1498Szrj if (!iv)
2555*38fd1498Szrj continue;
2556*38fd1498Szrj
2557*38fd1498Szrj if (!find_address_like_use (data, stmt, use_p->use, iv))
2558*38fd1498Szrj find_interesting_uses_op (data, op);
2559*38fd1498Szrj }
2560*38fd1498Szrj }
2561*38fd1498Szrj
2562*38fd1498Szrj /* Finds interesting uses of induction variables outside of loops
2563*38fd1498Szrj on loop exit edge EXIT. */
2564*38fd1498Szrj
2565*38fd1498Szrj static void
find_interesting_uses_outside(struct ivopts_data * data,edge exit)2566*38fd1498Szrj find_interesting_uses_outside (struct ivopts_data *data, edge exit)
2567*38fd1498Szrj {
2568*38fd1498Szrj gphi *phi;
2569*38fd1498Szrj gphi_iterator psi;
2570*38fd1498Szrj tree def;
2571*38fd1498Szrj
2572*38fd1498Szrj for (psi = gsi_start_phis (exit->dest); !gsi_end_p (psi); gsi_next (&psi))
2573*38fd1498Szrj {
2574*38fd1498Szrj phi = psi.phi ();
2575*38fd1498Szrj def = PHI_ARG_DEF_FROM_EDGE (phi, exit);
2576*38fd1498Szrj if (!virtual_operand_p (def))
2577*38fd1498Szrj find_interesting_uses_op (data, def);
2578*38fd1498Szrj }
2579*38fd1498Szrj }
2580*38fd1498Szrj
2581*38fd1498Szrj /* Return TRUE if OFFSET is within the range of [base + offset] addressing
2582*38fd1498Szrj mode for memory reference represented by USE. */
2583*38fd1498Szrj
2584*38fd1498Szrj static GTY (()) vec<rtx, va_gc> *addr_list;
2585*38fd1498Szrj
2586*38fd1498Szrj static bool
addr_offset_valid_p(struct iv_use * use,poly_int64 offset)2587*38fd1498Szrj addr_offset_valid_p (struct iv_use *use, poly_int64 offset)
2588*38fd1498Szrj {
2589*38fd1498Szrj rtx reg, addr;
2590*38fd1498Szrj unsigned list_index;
2591*38fd1498Szrj addr_space_t as = TYPE_ADDR_SPACE (TREE_TYPE (use->iv->base));
2592*38fd1498Szrj machine_mode addr_mode, mem_mode = TYPE_MODE (use->mem_type);
2593*38fd1498Szrj
2594*38fd1498Szrj list_index = (unsigned) as * MAX_MACHINE_MODE + (unsigned) mem_mode;
2595*38fd1498Szrj if (list_index >= vec_safe_length (addr_list))
2596*38fd1498Szrj vec_safe_grow_cleared (addr_list, list_index + MAX_MACHINE_MODE);
2597*38fd1498Szrj
2598*38fd1498Szrj addr = (*addr_list)[list_index];
2599*38fd1498Szrj if (!addr)
2600*38fd1498Szrj {
2601*38fd1498Szrj addr_mode = targetm.addr_space.address_mode (as);
2602*38fd1498Szrj reg = gen_raw_REG (addr_mode, LAST_VIRTUAL_REGISTER + 1);
2603*38fd1498Szrj addr = gen_rtx_fmt_ee (PLUS, addr_mode, reg, NULL_RTX);
2604*38fd1498Szrj (*addr_list)[list_index] = addr;
2605*38fd1498Szrj }
2606*38fd1498Szrj else
2607*38fd1498Szrj addr_mode = GET_MODE (addr);
2608*38fd1498Szrj
2609*38fd1498Szrj XEXP (addr, 1) = gen_int_mode (offset, addr_mode);
2610*38fd1498Szrj return (memory_address_addr_space_p (mem_mode, addr, as));
2611*38fd1498Szrj }
2612*38fd1498Szrj
2613*38fd1498Szrj /* Comparison function to sort group in ascending order of addr_offset. */
2614*38fd1498Szrj
2615*38fd1498Szrj static int
group_compare_offset(const void * a,const void * b)2616*38fd1498Szrj group_compare_offset (const void *a, const void *b)
2617*38fd1498Szrj {
2618*38fd1498Szrj const struct iv_use *const *u1 = (const struct iv_use *const *) a;
2619*38fd1498Szrj const struct iv_use *const *u2 = (const struct iv_use *const *) b;
2620*38fd1498Szrj
2621*38fd1498Szrj return compare_sizes_for_sort ((*u1)->addr_offset, (*u2)->addr_offset);
2622*38fd1498Szrj }
2623*38fd1498Szrj
2624*38fd1498Szrj /* Check if small groups should be split. Return true if no group
2625*38fd1498Szrj contains more than two uses with distinct addr_offsets. Return
2626*38fd1498Szrj false otherwise. We want to split such groups because:
2627*38fd1498Szrj
2628*38fd1498Szrj 1) Small groups don't have much benefit and may interfer with
2629*38fd1498Szrj general candidate selection.
2630*38fd1498Szrj 2) Size for problem with only small groups is usually small and
2631*38fd1498Szrj general algorithm can handle it well.
2632*38fd1498Szrj
2633*38fd1498Szrj TODO -- Above claim may not hold when we want to merge memory
2634*38fd1498Szrj accesses with conseuctive addresses. */
2635*38fd1498Szrj
2636*38fd1498Szrj static bool
split_small_address_groups_p(struct ivopts_data * data)2637*38fd1498Szrj split_small_address_groups_p (struct ivopts_data *data)
2638*38fd1498Szrj {
2639*38fd1498Szrj unsigned int i, j, distinct = 1;
2640*38fd1498Szrj struct iv_use *pre;
2641*38fd1498Szrj struct iv_group *group;
2642*38fd1498Szrj
2643*38fd1498Szrj for (i = 0; i < data->vgroups.length (); i++)
2644*38fd1498Szrj {
2645*38fd1498Szrj group = data->vgroups[i];
2646*38fd1498Szrj if (group->vuses.length () == 1)
2647*38fd1498Szrj continue;
2648*38fd1498Szrj
2649*38fd1498Szrj gcc_assert (address_p (group->type));
2650*38fd1498Szrj if (group->vuses.length () == 2)
2651*38fd1498Szrj {
2652*38fd1498Szrj if (compare_sizes_for_sort (group->vuses[0]->addr_offset,
2653*38fd1498Szrj group->vuses[1]->addr_offset) > 0)
2654*38fd1498Szrj std::swap (group->vuses[0], group->vuses[1]);
2655*38fd1498Szrj }
2656*38fd1498Szrj else
2657*38fd1498Szrj group->vuses.qsort (group_compare_offset);
2658*38fd1498Szrj
2659*38fd1498Szrj if (distinct > 2)
2660*38fd1498Szrj continue;
2661*38fd1498Szrj
2662*38fd1498Szrj distinct = 1;
2663*38fd1498Szrj for (pre = group->vuses[0], j = 1; j < group->vuses.length (); j++)
2664*38fd1498Szrj {
2665*38fd1498Szrj if (maybe_ne (group->vuses[j]->addr_offset, pre->addr_offset))
2666*38fd1498Szrj {
2667*38fd1498Szrj pre = group->vuses[j];
2668*38fd1498Szrj distinct++;
2669*38fd1498Szrj }
2670*38fd1498Szrj
2671*38fd1498Szrj if (distinct > 2)
2672*38fd1498Szrj break;
2673*38fd1498Szrj }
2674*38fd1498Szrj }
2675*38fd1498Szrj
2676*38fd1498Szrj return (distinct <= 2);
2677*38fd1498Szrj }
2678*38fd1498Szrj
2679*38fd1498Szrj /* For each group of address type uses, this function further groups
2680*38fd1498Szrj these uses according to the maximum offset supported by target's
2681*38fd1498Szrj [base + offset] addressing mode. */
2682*38fd1498Szrj
2683*38fd1498Szrj static void
split_address_groups(struct ivopts_data * data)2684*38fd1498Szrj split_address_groups (struct ivopts_data *data)
2685*38fd1498Szrj {
2686*38fd1498Szrj unsigned int i, j;
2687*38fd1498Szrj /* Always split group. */
2688*38fd1498Szrj bool split_p = split_small_address_groups_p (data);
2689*38fd1498Szrj
2690*38fd1498Szrj for (i = 0; i < data->vgroups.length (); i++)
2691*38fd1498Szrj {
2692*38fd1498Szrj struct iv_group *new_group = NULL;
2693*38fd1498Szrj struct iv_group *group = data->vgroups[i];
2694*38fd1498Szrj struct iv_use *use = group->vuses[0];
2695*38fd1498Szrj
2696*38fd1498Szrj use->id = 0;
2697*38fd1498Szrj use->group_id = group->id;
2698*38fd1498Szrj if (group->vuses.length () == 1)
2699*38fd1498Szrj continue;
2700*38fd1498Szrj
2701*38fd1498Szrj gcc_assert (address_p (use->type));
2702*38fd1498Szrj
2703*38fd1498Szrj for (j = 1; j < group->vuses.length ();)
2704*38fd1498Szrj {
2705*38fd1498Szrj struct iv_use *next = group->vuses[j];
2706*38fd1498Szrj poly_int64 offset = next->addr_offset - use->addr_offset;
2707*38fd1498Szrj
2708*38fd1498Szrj /* Split group if aksed to, or the offset against the first
2709*38fd1498Szrj use can't fit in offset part of addressing mode. IV uses
2710*38fd1498Szrj having the same offset are still kept in one group. */
2711*38fd1498Szrj if (maybe_ne (offset, 0)
2712*38fd1498Szrj && (split_p || !addr_offset_valid_p (use, offset)))
2713*38fd1498Szrj {
2714*38fd1498Szrj if (!new_group)
2715*38fd1498Szrj new_group = record_group (data, group->type);
2716*38fd1498Szrj group->vuses.ordered_remove (j);
2717*38fd1498Szrj new_group->vuses.safe_push (next);
2718*38fd1498Szrj continue;
2719*38fd1498Szrj }
2720*38fd1498Szrj
2721*38fd1498Szrj next->id = j;
2722*38fd1498Szrj next->group_id = group->id;
2723*38fd1498Szrj j++;
2724*38fd1498Szrj }
2725*38fd1498Szrj }
2726*38fd1498Szrj }
2727*38fd1498Szrj
2728*38fd1498Szrj /* Finds uses of the induction variables that are interesting. */
2729*38fd1498Szrj
2730*38fd1498Szrj static void
find_interesting_uses(struct ivopts_data * data)2731*38fd1498Szrj find_interesting_uses (struct ivopts_data *data)
2732*38fd1498Szrj {
2733*38fd1498Szrj basic_block bb;
2734*38fd1498Szrj gimple_stmt_iterator bsi;
2735*38fd1498Szrj basic_block *body = get_loop_body (data->current_loop);
2736*38fd1498Szrj unsigned i;
2737*38fd1498Szrj edge e;
2738*38fd1498Szrj
2739*38fd1498Szrj for (i = 0; i < data->current_loop->num_nodes; i++)
2740*38fd1498Szrj {
2741*38fd1498Szrj edge_iterator ei;
2742*38fd1498Szrj bb = body[i];
2743*38fd1498Szrj
2744*38fd1498Szrj FOR_EACH_EDGE (e, ei, bb->succs)
2745*38fd1498Szrj if (e->dest != EXIT_BLOCK_PTR_FOR_FN (cfun)
2746*38fd1498Szrj && !flow_bb_inside_loop_p (data->current_loop, e->dest))
2747*38fd1498Szrj find_interesting_uses_outside (data, e);
2748*38fd1498Szrj
2749*38fd1498Szrj for (bsi = gsi_start_phis (bb); !gsi_end_p (bsi); gsi_next (&bsi))
2750*38fd1498Szrj find_interesting_uses_stmt (data, gsi_stmt (bsi));
2751*38fd1498Szrj for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
2752*38fd1498Szrj if (!is_gimple_debug (gsi_stmt (bsi)))
2753*38fd1498Szrj find_interesting_uses_stmt (data, gsi_stmt (bsi));
2754*38fd1498Szrj }
2755*38fd1498Szrj free (body);
2756*38fd1498Szrj
2757*38fd1498Szrj split_address_groups (data);
2758*38fd1498Szrj
2759*38fd1498Szrj if (dump_file && (dump_flags & TDF_DETAILS))
2760*38fd1498Szrj {
2761*38fd1498Szrj fprintf (dump_file, "\n<IV Groups>:\n");
2762*38fd1498Szrj dump_groups (dump_file, data);
2763*38fd1498Szrj fprintf (dump_file, "\n");
2764*38fd1498Szrj }
2765*38fd1498Szrj }
2766*38fd1498Szrj
2767*38fd1498Szrj /* Strips constant offsets from EXPR and stores them to OFFSET. If INSIDE_ADDR
2768*38fd1498Szrj is true, assume we are inside an address. If TOP_COMPREF is true, assume
2769*38fd1498Szrj we are at the top-level of the processed address. */
2770*38fd1498Szrj
2771*38fd1498Szrj static tree
strip_offset_1(tree expr,bool inside_addr,bool top_compref,poly_int64 * offset)2772*38fd1498Szrj strip_offset_1 (tree expr, bool inside_addr, bool top_compref,
2773*38fd1498Szrj poly_int64 *offset)
2774*38fd1498Szrj {
2775*38fd1498Szrj tree op0 = NULL_TREE, op1 = NULL_TREE, tmp, step;
2776*38fd1498Szrj enum tree_code code;
2777*38fd1498Szrj tree type, orig_type = TREE_TYPE (expr);
2778*38fd1498Szrj poly_int64 off0, off1;
2779*38fd1498Szrj HOST_WIDE_INT st;
2780*38fd1498Szrj tree orig_expr = expr;
2781*38fd1498Szrj
2782*38fd1498Szrj STRIP_NOPS (expr);
2783*38fd1498Szrj
2784*38fd1498Szrj type = TREE_TYPE (expr);
2785*38fd1498Szrj code = TREE_CODE (expr);
2786*38fd1498Szrj *offset = 0;
2787*38fd1498Szrj
2788*38fd1498Szrj switch (code)
2789*38fd1498Szrj {
2790*38fd1498Szrj case POINTER_PLUS_EXPR:
2791*38fd1498Szrj case PLUS_EXPR:
2792*38fd1498Szrj case MINUS_EXPR:
2793*38fd1498Szrj op0 = TREE_OPERAND (expr, 0);
2794*38fd1498Szrj op1 = TREE_OPERAND (expr, 1);
2795*38fd1498Szrj
2796*38fd1498Szrj op0 = strip_offset_1 (op0, false, false, &off0);
2797*38fd1498Szrj op1 = strip_offset_1 (op1, false, false, &off1);
2798*38fd1498Szrj
2799*38fd1498Szrj *offset = (code == MINUS_EXPR ? off0 - off1 : off0 + off1);
2800*38fd1498Szrj if (op0 == TREE_OPERAND (expr, 0)
2801*38fd1498Szrj && op1 == TREE_OPERAND (expr, 1))
2802*38fd1498Szrj return orig_expr;
2803*38fd1498Szrj
2804*38fd1498Szrj if (integer_zerop (op1))
2805*38fd1498Szrj expr = op0;
2806*38fd1498Szrj else if (integer_zerop (op0))
2807*38fd1498Szrj {
2808*38fd1498Szrj if (code == MINUS_EXPR)
2809*38fd1498Szrj expr = fold_build1 (NEGATE_EXPR, type, op1);
2810*38fd1498Szrj else
2811*38fd1498Szrj expr = op1;
2812*38fd1498Szrj }
2813*38fd1498Szrj else
2814*38fd1498Szrj expr = fold_build2 (code, type, op0, op1);
2815*38fd1498Szrj
2816*38fd1498Szrj return fold_convert (orig_type, expr);
2817*38fd1498Szrj
2818*38fd1498Szrj case MULT_EXPR:
2819*38fd1498Szrj op1 = TREE_OPERAND (expr, 1);
2820*38fd1498Szrj if (!cst_and_fits_in_hwi (op1))
2821*38fd1498Szrj return orig_expr;
2822*38fd1498Szrj
2823*38fd1498Szrj op0 = TREE_OPERAND (expr, 0);
2824*38fd1498Szrj op0 = strip_offset_1 (op0, false, false, &off0);
2825*38fd1498Szrj if (op0 == TREE_OPERAND (expr, 0))
2826*38fd1498Szrj return orig_expr;
2827*38fd1498Szrj
2828*38fd1498Szrj *offset = off0 * int_cst_value (op1);
2829*38fd1498Szrj if (integer_zerop (op0))
2830*38fd1498Szrj expr = op0;
2831*38fd1498Szrj else
2832*38fd1498Szrj expr = fold_build2 (MULT_EXPR, type, op0, op1);
2833*38fd1498Szrj
2834*38fd1498Szrj return fold_convert (orig_type, expr);
2835*38fd1498Szrj
2836*38fd1498Szrj case ARRAY_REF:
2837*38fd1498Szrj case ARRAY_RANGE_REF:
2838*38fd1498Szrj if (!inside_addr)
2839*38fd1498Szrj return orig_expr;
2840*38fd1498Szrj
2841*38fd1498Szrj step = array_ref_element_size (expr);
2842*38fd1498Szrj if (!cst_and_fits_in_hwi (step))
2843*38fd1498Szrj break;
2844*38fd1498Szrj
2845*38fd1498Szrj st = int_cst_value (step);
2846*38fd1498Szrj op1 = TREE_OPERAND (expr, 1);
2847*38fd1498Szrj op1 = strip_offset_1 (op1, false, false, &off1);
2848*38fd1498Szrj *offset = off1 * st;
2849*38fd1498Szrj
2850*38fd1498Szrj if (top_compref
2851*38fd1498Szrj && integer_zerop (op1))
2852*38fd1498Szrj {
2853*38fd1498Szrj /* Strip the component reference completely. */
2854*38fd1498Szrj op0 = TREE_OPERAND (expr, 0);
2855*38fd1498Szrj op0 = strip_offset_1 (op0, inside_addr, top_compref, &off0);
2856*38fd1498Szrj *offset += off0;
2857*38fd1498Szrj return op0;
2858*38fd1498Szrj }
2859*38fd1498Szrj break;
2860*38fd1498Szrj
2861*38fd1498Szrj case COMPONENT_REF:
2862*38fd1498Szrj {
2863*38fd1498Szrj tree field;
2864*38fd1498Szrj
2865*38fd1498Szrj if (!inside_addr)
2866*38fd1498Szrj return orig_expr;
2867*38fd1498Szrj
2868*38fd1498Szrj tmp = component_ref_field_offset (expr);
2869*38fd1498Szrj field = TREE_OPERAND (expr, 1);
2870*38fd1498Szrj if (top_compref
2871*38fd1498Szrj && cst_and_fits_in_hwi (tmp)
2872*38fd1498Szrj && cst_and_fits_in_hwi (DECL_FIELD_BIT_OFFSET (field)))
2873*38fd1498Szrj {
2874*38fd1498Szrj HOST_WIDE_INT boffset, abs_off;
2875*38fd1498Szrj
2876*38fd1498Szrj /* Strip the component reference completely. */
2877*38fd1498Szrj op0 = TREE_OPERAND (expr, 0);
2878*38fd1498Szrj op0 = strip_offset_1 (op0, inside_addr, top_compref, &off0);
2879*38fd1498Szrj boffset = int_cst_value (DECL_FIELD_BIT_OFFSET (field));
2880*38fd1498Szrj abs_off = abs_hwi (boffset) / BITS_PER_UNIT;
2881*38fd1498Szrj if (boffset < 0)
2882*38fd1498Szrj abs_off = -abs_off;
2883*38fd1498Szrj
2884*38fd1498Szrj *offset = off0 + int_cst_value (tmp) + abs_off;
2885*38fd1498Szrj return op0;
2886*38fd1498Szrj }
2887*38fd1498Szrj }
2888*38fd1498Szrj break;
2889*38fd1498Szrj
2890*38fd1498Szrj case ADDR_EXPR:
2891*38fd1498Szrj op0 = TREE_OPERAND (expr, 0);
2892*38fd1498Szrj op0 = strip_offset_1 (op0, true, true, &off0);
2893*38fd1498Szrj *offset += off0;
2894*38fd1498Szrj
2895*38fd1498Szrj if (op0 == TREE_OPERAND (expr, 0))
2896*38fd1498Szrj return orig_expr;
2897*38fd1498Szrj
2898*38fd1498Szrj expr = build_fold_addr_expr (op0);
2899*38fd1498Szrj return fold_convert (orig_type, expr);
2900*38fd1498Szrj
2901*38fd1498Szrj case MEM_REF:
2902*38fd1498Szrj /* ??? Offset operand? */
2903*38fd1498Szrj inside_addr = false;
2904*38fd1498Szrj break;
2905*38fd1498Szrj
2906*38fd1498Szrj default:
2907*38fd1498Szrj if (ptrdiff_tree_p (expr, offset) && maybe_ne (*offset, 0))
2908*38fd1498Szrj return build_int_cst (orig_type, 0);
2909*38fd1498Szrj return orig_expr;
2910*38fd1498Szrj }
2911*38fd1498Szrj
2912*38fd1498Szrj /* Default handling of expressions for that we want to recurse into
2913*38fd1498Szrj the first operand. */
2914*38fd1498Szrj op0 = TREE_OPERAND (expr, 0);
2915*38fd1498Szrj op0 = strip_offset_1 (op0, inside_addr, false, &off0);
2916*38fd1498Szrj *offset += off0;
2917*38fd1498Szrj
2918*38fd1498Szrj if (op0 == TREE_OPERAND (expr, 0)
2919*38fd1498Szrj && (!op1 || op1 == TREE_OPERAND (expr, 1)))
2920*38fd1498Szrj return orig_expr;
2921*38fd1498Szrj
2922*38fd1498Szrj expr = copy_node (expr);
2923*38fd1498Szrj TREE_OPERAND (expr, 0) = op0;
2924*38fd1498Szrj if (op1)
2925*38fd1498Szrj TREE_OPERAND (expr, 1) = op1;
2926*38fd1498Szrj
2927*38fd1498Szrj /* Inside address, we might strip the top level component references,
2928*38fd1498Szrj thus changing type of the expression. Handling of ADDR_EXPR
2929*38fd1498Szrj will fix that. */
2930*38fd1498Szrj expr = fold_convert (orig_type, expr);
2931*38fd1498Szrj
2932*38fd1498Szrj return expr;
2933*38fd1498Szrj }
2934*38fd1498Szrj
2935*38fd1498Szrj /* Strips constant offsets from EXPR and stores them to OFFSET. */
2936*38fd1498Szrj
2937*38fd1498Szrj tree
strip_offset(tree expr,poly_uint64_pod * offset)2938*38fd1498Szrj strip_offset (tree expr, poly_uint64_pod *offset)
2939*38fd1498Szrj {
2940*38fd1498Szrj poly_int64 off;
2941*38fd1498Szrj tree core = strip_offset_1 (expr, false, false, &off);
2942*38fd1498Szrj *offset = off;
2943*38fd1498Szrj return core;
2944*38fd1498Szrj }
2945*38fd1498Szrj
2946*38fd1498Szrj /* Returns variant of TYPE that can be used as base for different uses.
2947*38fd1498Szrj We return unsigned type with the same precision, which avoids problems
2948*38fd1498Szrj with overflows. */
2949*38fd1498Szrj
2950*38fd1498Szrj static tree
generic_type_for(tree type)2951*38fd1498Szrj generic_type_for (tree type)
2952*38fd1498Szrj {
2953*38fd1498Szrj if (POINTER_TYPE_P (type))
2954*38fd1498Szrj return unsigned_type_for (type);
2955*38fd1498Szrj
2956*38fd1498Szrj if (TYPE_UNSIGNED (type))
2957*38fd1498Szrj return type;
2958*38fd1498Szrj
2959*38fd1498Szrj return unsigned_type_for (type);
2960*38fd1498Szrj }
2961*38fd1498Szrj
2962*38fd1498Szrj /* Private data for walk_tree. */
2963*38fd1498Szrj
2964*38fd1498Szrj struct walk_tree_data
2965*38fd1498Szrj {
2966*38fd1498Szrj bitmap *inv_vars;
2967*38fd1498Szrj struct ivopts_data *idata;
2968*38fd1498Szrj };
2969*38fd1498Szrj
2970*38fd1498Szrj /* Callback function for walk_tree, it records invariants and symbol
2971*38fd1498Szrj reference in *EXPR_P. DATA is the structure storing result info. */
2972*38fd1498Szrj
2973*38fd1498Szrj static tree
find_inv_vars_cb(tree * expr_p,int * ws ATTRIBUTE_UNUSED,void * data)2974*38fd1498Szrj find_inv_vars_cb (tree *expr_p, int *ws ATTRIBUTE_UNUSED, void *data)
2975*38fd1498Szrj {
2976*38fd1498Szrj tree op = *expr_p;
2977*38fd1498Szrj struct version_info *info;
2978*38fd1498Szrj struct walk_tree_data *wdata = (struct walk_tree_data*) data;
2979*38fd1498Szrj
2980*38fd1498Szrj if (TREE_CODE (op) != SSA_NAME)
2981*38fd1498Szrj return NULL_TREE;
2982*38fd1498Szrj
2983*38fd1498Szrj info = name_info (wdata->idata, op);
2984*38fd1498Szrj /* Because we expand simple operations when finding IVs, loop invariant
2985*38fd1498Szrj variable that isn't referred by the original loop could be used now.
2986*38fd1498Szrj Record such invariant variables here. */
2987*38fd1498Szrj if (!info->iv)
2988*38fd1498Szrj {
2989*38fd1498Szrj struct ivopts_data *idata = wdata->idata;
2990*38fd1498Szrj basic_block bb = gimple_bb (SSA_NAME_DEF_STMT (op));
2991*38fd1498Szrj
2992*38fd1498Szrj if (!bb || !flow_bb_inside_loop_p (idata->current_loop, bb))
2993*38fd1498Szrj {
2994*38fd1498Szrj set_iv (idata, op, op, build_int_cst (TREE_TYPE (op), 0), true);
2995*38fd1498Szrj record_invariant (idata, op, false);
2996*38fd1498Szrj }
2997*38fd1498Szrj }
2998*38fd1498Szrj if (!info->inv_id || info->has_nonlin_use)
2999*38fd1498Szrj return NULL_TREE;
3000*38fd1498Szrj
3001*38fd1498Szrj if (!*wdata->inv_vars)
3002*38fd1498Szrj *wdata->inv_vars = BITMAP_ALLOC (NULL);
3003*38fd1498Szrj bitmap_set_bit (*wdata->inv_vars, info->inv_id);
3004*38fd1498Szrj
3005*38fd1498Szrj return NULL_TREE;
3006*38fd1498Szrj }
3007*38fd1498Szrj
3008*38fd1498Szrj /* Records invariants in *EXPR_P. INV_VARS is the bitmap to that we should
3009*38fd1498Szrj store it. */
3010*38fd1498Szrj
3011*38fd1498Szrj static inline void
find_inv_vars(struct ivopts_data * data,tree * expr_p,bitmap * inv_vars)3012*38fd1498Szrj find_inv_vars (struct ivopts_data *data, tree *expr_p, bitmap *inv_vars)
3013*38fd1498Szrj {
3014*38fd1498Szrj struct walk_tree_data wdata;
3015*38fd1498Szrj
3016*38fd1498Szrj if (!inv_vars)
3017*38fd1498Szrj return;
3018*38fd1498Szrj
3019*38fd1498Szrj wdata.idata = data;
3020*38fd1498Szrj wdata.inv_vars = inv_vars;
3021*38fd1498Szrj walk_tree (expr_p, find_inv_vars_cb, &wdata, NULL);
3022*38fd1498Szrj }
3023*38fd1498Szrj
3024*38fd1498Szrj /* Get entry from invariant expr hash table for INV_EXPR. New entry
3025*38fd1498Szrj will be recorded if it doesn't exist yet. Given below two exprs:
3026*38fd1498Szrj inv_expr + cst1, inv_expr + cst2
3027*38fd1498Szrj It's hard to make decision whether constant part should be stripped
3028*38fd1498Szrj or not. We choose to not strip based on below facts:
3029*38fd1498Szrj 1) We need to count ADD cost for constant part if it's stripped,
3030*38fd1498Szrj which is't always trivial where this functions is called.
3031*38fd1498Szrj 2) Stripping constant away may be conflict with following loop
3032*38fd1498Szrj invariant hoisting pass.
3033*38fd1498Szrj 3) Not stripping constant away results in more invariant exprs,
3034*38fd1498Szrj which usually leads to decision preferring lower reg pressure. */
3035*38fd1498Szrj
3036*38fd1498Szrj static iv_inv_expr_ent *
get_loop_invariant_expr(struct ivopts_data * data,tree inv_expr)3037*38fd1498Szrj get_loop_invariant_expr (struct ivopts_data *data, tree inv_expr)
3038*38fd1498Szrj {
3039*38fd1498Szrj STRIP_NOPS (inv_expr);
3040*38fd1498Szrj
3041*38fd1498Szrj if (poly_int_tree_p (inv_expr)
3042*38fd1498Szrj || TREE_CODE (inv_expr) == SSA_NAME)
3043*38fd1498Szrj return NULL;
3044*38fd1498Szrj
3045*38fd1498Szrj /* Don't strip constant part away as we used to. */
3046*38fd1498Szrj
3047*38fd1498Szrj /* Stores EXPR in DATA->inv_expr_tab, return pointer to iv_inv_expr_ent. */
3048*38fd1498Szrj struct iv_inv_expr_ent ent;
3049*38fd1498Szrj ent.expr = inv_expr;
3050*38fd1498Szrj ent.hash = iterative_hash_expr (inv_expr, 0);
3051*38fd1498Szrj struct iv_inv_expr_ent **slot = data->inv_expr_tab->find_slot (&ent, INSERT);
3052*38fd1498Szrj
3053*38fd1498Szrj if (!*slot)
3054*38fd1498Szrj {
3055*38fd1498Szrj *slot = XNEW (struct iv_inv_expr_ent);
3056*38fd1498Szrj (*slot)->expr = inv_expr;
3057*38fd1498Szrj (*slot)->hash = ent.hash;
3058*38fd1498Szrj (*slot)->id = ++data->max_inv_expr_id;
3059*38fd1498Szrj }
3060*38fd1498Szrj
3061*38fd1498Szrj return *slot;
3062*38fd1498Szrj }
3063*38fd1498Szrj
3064*38fd1498Szrj /* Adds a candidate BASE + STEP * i. Important field is set to IMPORTANT and
3065*38fd1498Szrj position to POS. If USE is not NULL, the candidate is set as related to
3066*38fd1498Szrj it. If both BASE and STEP are NULL, we add a pseudocandidate for the
3067*38fd1498Szrj replacement of the final value of the iv by a direct computation. */
3068*38fd1498Szrj
3069*38fd1498Szrj static struct iv_cand *
3070*38fd1498Szrj add_candidate_1 (struct ivopts_data *data,
3071*38fd1498Szrj tree base, tree step, bool important, enum iv_position pos,
3072*38fd1498Szrj struct iv_use *use, gimple *incremented_at,
3073*38fd1498Szrj struct iv *orig_iv = NULL)
3074*38fd1498Szrj {
3075*38fd1498Szrj unsigned i;
3076*38fd1498Szrj struct iv_cand *cand = NULL;
3077*38fd1498Szrj tree type, orig_type;
3078*38fd1498Szrj
3079*38fd1498Szrj gcc_assert (base && step);
3080*38fd1498Szrj
3081*38fd1498Szrj /* -fkeep-gc-roots-live means that we have to keep a real pointer
3082*38fd1498Szrj live, but the ivopts code may replace a real pointer with one
3083*38fd1498Szrj pointing before or after the memory block that is then adjusted
3084*38fd1498Szrj into the memory block during the loop. FIXME: It would likely be
3085*38fd1498Szrj better to actually force the pointer live and still use ivopts;
3086*38fd1498Szrj for example, it would be enough to write the pointer into memory
3087*38fd1498Szrj and keep it there until after the loop. */
3088*38fd1498Szrj if (flag_keep_gc_roots_live && POINTER_TYPE_P (TREE_TYPE (base)))
3089*38fd1498Szrj return NULL;
3090*38fd1498Szrj
3091*38fd1498Szrj /* For non-original variables, make sure their values are computed in a type
3092*38fd1498Szrj that does not invoke undefined behavior on overflows (since in general,
3093*38fd1498Szrj we cannot prove that these induction variables are non-wrapping). */
3094*38fd1498Szrj if (pos != IP_ORIGINAL)
3095*38fd1498Szrj {
3096*38fd1498Szrj orig_type = TREE_TYPE (base);
3097*38fd1498Szrj type = generic_type_for (orig_type);
3098*38fd1498Szrj if (type != orig_type)
3099*38fd1498Szrj {
3100*38fd1498Szrj base = fold_convert (type, base);
3101*38fd1498Szrj step = fold_convert (type, step);
3102*38fd1498Szrj }
3103*38fd1498Szrj }
3104*38fd1498Szrj
3105*38fd1498Szrj for (i = 0; i < data->vcands.length (); i++)
3106*38fd1498Szrj {
3107*38fd1498Szrj cand = data->vcands[i];
3108*38fd1498Szrj
3109*38fd1498Szrj if (cand->pos != pos)
3110*38fd1498Szrj continue;
3111*38fd1498Szrj
3112*38fd1498Szrj if (cand->incremented_at != incremented_at
3113*38fd1498Szrj || ((pos == IP_AFTER_USE || pos == IP_BEFORE_USE)
3114*38fd1498Szrj && cand->ainc_use != use))
3115*38fd1498Szrj continue;
3116*38fd1498Szrj
3117*38fd1498Szrj if (operand_equal_p (base, cand->iv->base, 0)
3118*38fd1498Szrj && operand_equal_p (step, cand->iv->step, 0)
3119*38fd1498Szrj && (TYPE_PRECISION (TREE_TYPE (base))
3120*38fd1498Szrj == TYPE_PRECISION (TREE_TYPE (cand->iv->base))))
3121*38fd1498Szrj break;
3122*38fd1498Szrj }
3123*38fd1498Szrj
3124*38fd1498Szrj if (i == data->vcands.length ())
3125*38fd1498Szrj {
3126*38fd1498Szrj cand = XCNEW (struct iv_cand);
3127*38fd1498Szrj cand->id = i;
3128*38fd1498Szrj cand->iv = alloc_iv (data, base, step);
3129*38fd1498Szrj cand->pos = pos;
3130*38fd1498Szrj if (pos != IP_ORIGINAL)
3131*38fd1498Szrj {
3132*38fd1498Szrj cand->var_before = create_tmp_var_raw (TREE_TYPE (base), "ivtmp");
3133*38fd1498Szrj cand->var_after = cand->var_before;
3134*38fd1498Szrj }
3135*38fd1498Szrj cand->important = important;
3136*38fd1498Szrj cand->incremented_at = incremented_at;
3137*38fd1498Szrj data->vcands.safe_push (cand);
3138*38fd1498Szrj
3139*38fd1498Szrj if (!poly_int_tree_p (step))
3140*38fd1498Szrj {
3141*38fd1498Szrj find_inv_vars (data, &step, &cand->inv_vars);
3142*38fd1498Szrj
3143*38fd1498Szrj iv_inv_expr_ent *inv_expr = get_loop_invariant_expr (data, step);
3144*38fd1498Szrj /* Share bitmap between inv_vars and inv_exprs for cand. */
3145*38fd1498Szrj if (inv_expr != NULL)
3146*38fd1498Szrj {
3147*38fd1498Szrj cand->inv_exprs = cand->inv_vars;
3148*38fd1498Szrj cand->inv_vars = NULL;
3149*38fd1498Szrj if (cand->inv_exprs)
3150*38fd1498Szrj bitmap_clear (cand->inv_exprs);
3151*38fd1498Szrj else
3152*38fd1498Szrj cand->inv_exprs = BITMAP_ALLOC (NULL);
3153*38fd1498Szrj
3154*38fd1498Szrj bitmap_set_bit (cand->inv_exprs, inv_expr->id);
3155*38fd1498Szrj }
3156*38fd1498Szrj }
3157*38fd1498Szrj
3158*38fd1498Szrj if (pos == IP_AFTER_USE || pos == IP_BEFORE_USE)
3159*38fd1498Szrj cand->ainc_use = use;
3160*38fd1498Szrj else
3161*38fd1498Szrj cand->ainc_use = NULL;
3162*38fd1498Szrj
3163*38fd1498Szrj cand->orig_iv = orig_iv;
3164*38fd1498Szrj if (dump_file && (dump_flags & TDF_DETAILS))
3165*38fd1498Szrj dump_cand (dump_file, cand);
3166*38fd1498Szrj }
3167*38fd1498Szrj
3168*38fd1498Szrj cand->important |= important;
3169*38fd1498Szrj
3170*38fd1498Szrj /* Relate candidate to the group for which it is added. */
3171*38fd1498Szrj if (use)
3172*38fd1498Szrj bitmap_set_bit (data->vgroups[use->group_id]->related_cands, i);
3173*38fd1498Szrj
3174*38fd1498Szrj return cand;
3175*38fd1498Szrj }
3176*38fd1498Szrj
3177*38fd1498Szrj /* Returns true if incrementing the induction variable at the end of the LOOP
3178*38fd1498Szrj is allowed.
3179*38fd1498Szrj
3180*38fd1498Szrj The purpose is to avoid splitting latch edge with a biv increment, thus
3181*38fd1498Szrj creating a jump, possibly confusing other optimization passes and leaving
3182*38fd1498Szrj less freedom to scheduler. So we allow IP_END only if IP_NORMAL is not
3183*38fd1498Szrj available (so we do not have a better alternative), or if the latch edge
3184*38fd1498Szrj is already nonempty. */
3185*38fd1498Szrj
3186*38fd1498Szrj static bool
allow_ip_end_pos_p(struct loop * loop)3187*38fd1498Szrj allow_ip_end_pos_p (struct loop *loop)
3188*38fd1498Szrj {
3189*38fd1498Szrj if (!ip_normal_pos (loop))
3190*38fd1498Szrj return true;
3191*38fd1498Szrj
3192*38fd1498Szrj if (!empty_block_p (ip_end_pos (loop)))
3193*38fd1498Szrj return true;
3194*38fd1498Szrj
3195*38fd1498Szrj return false;
3196*38fd1498Szrj }
3197*38fd1498Szrj
3198*38fd1498Szrj /* If possible, adds autoincrement candidates BASE + STEP * i based on use USE.
3199*38fd1498Szrj Important field is set to IMPORTANT. */
3200*38fd1498Szrj
3201*38fd1498Szrj static void
add_autoinc_candidates(struct ivopts_data * data,tree base,tree step,bool important,struct iv_use * use)3202*38fd1498Szrj add_autoinc_candidates (struct ivopts_data *data, tree base, tree step,
3203*38fd1498Szrj bool important, struct iv_use *use)
3204*38fd1498Szrj {
3205*38fd1498Szrj basic_block use_bb = gimple_bb (use->stmt);
3206*38fd1498Szrj machine_mode mem_mode;
3207*38fd1498Szrj unsigned HOST_WIDE_INT cstepi;
3208*38fd1498Szrj
3209*38fd1498Szrj /* If we insert the increment in any position other than the standard
3210*38fd1498Szrj ones, we must ensure that it is incremented once per iteration.
3211*38fd1498Szrj It must not be in an inner nested loop, or one side of an if
3212*38fd1498Szrj statement. */
3213*38fd1498Szrj if (use_bb->loop_father != data->current_loop
3214*38fd1498Szrj || !dominated_by_p (CDI_DOMINATORS, data->current_loop->latch, use_bb)
3215*38fd1498Szrj || stmt_can_throw_internal (use->stmt)
3216*38fd1498Szrj || !cst_and_fits_in_hwi (step))
3217*38fd1498Szrj return;
3218*38fd1498Szrj
3219*38fd1498Szrj cstepi = int_cst_value (step);
3220*38fd1498Szrj
3221*38fd1498Szrj mem_mode = TYPE_MODE (use->mem_type);
3222*38fd1498Szrj if (((USE_LOAD_PRE_INCREMENT (mem_mode)
3223*38fd1498Szrj || USE_STORE_PRE_INCREMENT (mem_mode))
3224*38fd1498Szrj && known_eq (GET_MODE_SIZE (mem_mode), cstepi))
3225*38fd1498Szrj || ((USE_LOAD_PRE_DECREMENT (mem_mode)
3226*38fd1498Szrj || USE_STORE_PRE_DECREMENT (mem_mode))
3227*38fd1498Szrj && known_eq (GET_MODE_SIZE (mem_mode), -cstepi)))
3228*38fd1498Szrj {
3229*38fd1498Szrj enum tree_code code = MINUS_EXPR;
3230*38fd1498Szrj tree new_base;
3231*38fd1498Szrj tree new_step = step;
3232*38fd1498Szrj
3233*38fd1498Szrj if (POINTER_TYPE_P (TREE_TYPE (base)))
3234*38fd1498Szrj {
3235*38fd1498Szrj new_step = fold_build1 (NEGATE_EXPR, TREE_TYPE (step), step);
3236*38fd1498Szrj code = POINTER_PLUS_EXPR;
3237*38fd1498Szrj }
3238*38fd1498Szrj else
3239*38fd1498Szrj new_step = fold_convert (TREE_TYPE (base), new_step);
3240*38fd1498Szrj new_base = fold_build2 (code, TREE_TYPE (base), base, new_step);
3241*38fd1498Szrj add_candidate_1 (data, new_base, step, important, IP_BEFORE_USE, use,
3242*38fd1498Szrj use->stmt);
3243*38fd1498Szrj }
3244*38fd1498Szrj if (((USE_LOAD_POST_INCREMENT (mem_mode)
3245*38fd1498Szrj || USE_STORE_POST_INCREMENT (mem_mode))
3246*38fd1498Szrj && known_eq (GET_MODE_SIZE (mem_mode), cstepi))
3247*38fd1498Szrj || ((USE_LOAD_POST_DECREMENT (mem_mode)
3248*38fd1498Szrj || USE_STORE_POST_DECREMENT (mem_mode))
3249*38fd1498Szrj && known_eq (GET_MODE_SIZE (mem_mode), -cstepi)))
3250*38fd1498Szrj {
3251*38fd1498Szrj add_candidate_1 (data, base, step, important, IP_AFTER_USE, use,
3252*38fd1498Szrj use->stmt);
3253*38fd1498Szrj }
3254*38fd1498Szrj }
3255*38fd1498Szrj
3256*38fd1498Szrj /* Adds a candidate BASE + STEP * i. Important field is set to IMPORTANT and
3257*38fd1498Szrj position to POS. If USE is not NULL, the candidate is set as related to
3258*38fd1498Szrj it. The candidate computation is scheduled before exit condition and at
3259*38fd1498Szrj the end of loop. */
3260*38fd1498Szrj
3261*38fd1498Szrj static void
3262*38fd1498Szrj add_candidate (struct ivopts_data *data,
3263*38fd1498Szrj tree base, tree step, bool important, struct iv_use *use,
3264*38fd1498Szrj struct iv *orig_iv = NULL)
3265*38fd1498Szrj {
3266*38fd1498Szrj if (ip_normal_pos (data->current_loop))
3267*38fd1498Szrj add_candidate_1 (data, base, step, important,
3268*38fd1498Szrj IP_NORMAL, use, NULL, orig_iv);
3269*38fd1498Szrj if (ip_end_pos (data->current_loop)
3270*38fd1498Szrj && allow_ip_end_pos_p (data->current_loop))
3271*38fd1498Szrj add_candidate_1 (data, base, step, important, IP_END, use, NULL, orig_iv);
3272*38fd1498Szrj }
3273*38fd1498Szrj
3274*38fd1498Szrj /* Adds standard iv candidates. */
3275*38fd1498Szrj
3276*38fd1498Szrj static void
add_standard_iv_candidates(struct ivopts_data * data)3277*38fd1498Szrj add_standard_iv_candidates (struct ivopts_data *data)
3278*38fd1498Szrj {
3279*38fd1498Szrj add_candidate (data, integer_zero_node, integer_one_node, true, NULL);
3280*38fd1498Szrj
3281*38fd1498Szrj /* The same for a double-integer type if it is still fast enough. */
3282*38fd1498Szrj if (TYPE_PRECISION
3283*38fd1498Szrj (long_integer_type_node) > TYPE_PRECISION (integer_type_node)
3284*38fd1498Szrj && TYPE_PRECISION (long_integer_type_node) <= BITS_PER_WORD)
3285*38fd1498Szrj add_candidate (data, build_int_cst (long_integer_type_node, 0),
3286*38fd1498Szrj build_int_cst (long_integer_type_node, 1), true, NULL);
3287*38fd1498Szrj
3288*38fd1498Szrj /* The same for a double-integer type if it is still fast enough. */
3289*38fd1498Szrj if (TYPE_PRECISION
3290*38fd1498Szrj (long_long_integer_type_node) > TYPE_PRECISION (long_integer_type_node)
3291*38fd1498Szrj && TYPE_PRECISION (long_long_integer_type_node) <= BITS_PER_WORD)
3292*38fd1498Szrj add_candidate (data, build_int_cst (long_long_integer_type_node, 0),
3293*38fd1498Szrj build_int_cst (long_long_integer_type_node, 1), true, NULL);
3294*38fd1498Szrj }
3295*38fd1498Szrj
3296*38fd1498Szrj
3297*38fd1498Szrj /* Adds candidates bases on the old induction variable IV. */
3298*38fd1498Szrj
3299*38fd1498Szrj static void
add_iv_candidate_for_biv(struct ivopts_data * data,struct iv * iv)3300*38fd1498Szrj add_iv_candidate_for_biv (struct ivopts_data *data, struct iv *iv)
3301*38fd1498Szrj {
3302*38fd1498Szrj gimple *phi;
3303*38fd1498Szrj tree def;
3304*38fd1498Szrj struct iv_cand *cand;
3305*38fd1498Szrj
3306*38fd1498Szrj /* Check if this biv is used in address type use. */
3307*38fd1498Szrj if (iv->no_overflow && iv->have_address_use
3308*38fd1498Szrj && INTEGRAL_TYPE_P (TREE_TYPE (iv->base))
3309*38fd1498Szrj && TYPE_PRECISION (TREE_TYPE (iv->base)) < TYPE_PRECISION (sizetype))
3310*38fd1498Szrj {
3311*38fd1498Szrj tree base = fold_convert (sizetype, iv->base);
3312*38fd1498Szrj tree step = fold_convert (sizetype, iv->step);
3313*38fd1498Szrj
3314*38fd1498Szrj /* Add iv cand of same precision as index part in TARGET_MEM_REF. */
3315*38fd1498Szrj add_candidate (data, base, step, true, NULL, iv);
3316*38fd1498Szrj /* Add iv cand of the original type only if it has nonlinear use. */
3317*38fd1498Szrj if (iv->nonlin_use)
3318*38fd1498Szrj add_candidate (data, iv->base, iv->step, true, NULL);
3319*38fd1498Szrj }
3320*38fd1498Szrj else
3321*38fd1498Szrj add_candidate (data, iv->base, iv->step, true, NULL);
3322*38fd1498Szrj
3323*38fd1498Szrj /* The same, but with initial value zero. */
3324*38fd1498Szrj if (POINTER_TYPE_P (TREE_TYPE (iv->base)))
3325*38fd1498Szrj add_candidate (data, size_int (0), iv->step, true, NULL);
3326*38fd1498Szrj else
3327*38fd1498Szrj add_candidate (data, build_int_cst (TREE_TYPE (iv->base), 0),
3328*38fd1498Szrj iv->step, true, NULL);
3329*38fd1498Szrj
3330*38fd1498Szrj phi = SSA_NAME_DEF_STMT (iv->ssa_name);
3331*38fd1498Szrj if (gimple_code (phi) == GIMPLE_PHI)
3332*38fd1498Szrj {
3333*38fd1498Szrj /* Additionally record the possibility of leaving the original iv
3334*38fd1498Szrj untouched. */
3335*38fd1498Szrj def = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (data->current_loop));
3336*38fd1498Szrj /* Don't add candidate if it's from another PHI node because
3337*38fd1498Szrj it's an affine iv appearing in the form of PEELED_CHREC. */
3338*38fd1498Szrj phi = SSA_NAME_DEF_STMT (def);
3339*38fd1498Szrj if (gimple_code (phi) != GIMPLE_PHI)
3340*38fd1498Szrj {
3341*38fd1498Szrj cand = add_candidate_1 (data,
3342*38fd1498Szrj iv->base, iv->step, true, IP_ORIGINAL, NULL,
3343*38fd1498Szrj SSA_NAME_DEF_STMT (def));
3344*38fd1498Szrj if (cand)
3345*38fd1498Szrj {
3346*38fd1498Szrj cand->var_before = iv->ssa_name;
3347*38fd1498Szrj cand->var_after = def;
3348*38fd1498Szrj }
3349*38fd1498Szrj }
3350*38fd1498Szrj else
3351*38fd1498Szrj gcc_assert (gimple_bb (phi) == data->current_loop->header);
3352*38fd1498Szrj }
3353*38fd1498Szrj }
3354*38fd1498Szrj
3355*38fd1498Szrj /* Adds candidates based on the old induction variables. */
3356*38fd1498Szrj
3357*38fd1498Szrj static void
add_iv_candidate_for_bivs(struct ivopts_data * data)3358*38fd1498Szrj add_iv_candidate_for_bivs (struct ivopts_data *data)
3359*38fd1498Szrj {
3360*38fd1498Szrj unsigned i;
3361*38fd1498Szrj struct iv *iv;
3362*38fd1498Szrj bitmap_iterator bi;
3363*38fd1498Szrj
3364*38fd1498Szrj EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
3365*38fd1498Szrj {
3366*38fd1498Szrj iv = ver_info (data, i)->iv;
3367*38fd1498Szrj if (iv && iv->biv_p && !integer_zerop (iv->step))
3368*38fd1498Szrj add_iv_candidate_for_biv (data, iv);
3369*38fd1498Szrj }
3370*38fd1498Szrj }
3371*38fd1498Szrj
3372*38fd1498Szrj /* Record common candidate {BASE, STEP} derived from USE in hashtable. */
3373*38fd1498Szrj
3374*38fd1498Szrj static void
record_common_cand(struct ivopts_data * data,tree base,tree step,struct iv_use * use)3375*38fd1498Szrj record_common_cand (struct ivopts_data *data, tree base,
3376*38fd1498Szrj tree step, struct iv_use *use)
3377*38fd1498Szrj {
3378*38fd1498Szrj struct iv_common_cand ent;
3379*38fd1498Szrj struct iv_common_cand **slot;
3380*38fd1498Szrj
3381*38fd1498Szrj ent.base = base;
3382*38fd1498Szrj ent.step = step;
3383*38fd1498Szrj ent.hash = iterative_hash_expr (base, 0);
3384*38fd1498Szrj ent.hash = iterative_hash_expr (step, ent.hash);
3385*38fd1498Szrj
3386*38fd1498Szrj slot = data->iv_common_cand_tab->find_slot (&ent, INSERT);
3387*38fd1498Szrj if (*slot == NULL)
3388*38fd1498Szrj {
3389*38fd1498Szrj *slot = new iv_common_cand ();
3390*38fd1498Szrj (*slot)->base = base;
3391*38fd1498Szrj (*slot)->step = step;
3392*38fd1498Szrj (*slot)->uses.create (8);
3393*38fd1498Szrj (*slot)->hash = ent.hash;
3394*38fd1498Szrj data->iv_common_cands.safe_push ((*slot));
3395*38fd1498Szrj }
3396*38fd1498Szrj
3397*38fd1498Szrj gcc_assert (use != NULL);
3398*38fd1498Szrj (*slot)->uses.safe_push (use);
3399*38fd1498Szrj return;
3400*38fd1498Szrj }
3401*38fd1498Szrj
3402*38fd1498Szrj /* Comparison function used to sort common candidates. */
3403*38fd1498Szrj
3404*38fd1498Szrj static int
common_cand_cmp(const void * p1,const void * p2)3405*38fd1498Szrj common_cand_cmp (const void *p1, const void *p2)
3406*38fd1498Szrj {
3407*38fd1498Szrj unsigned n1, n2;
3408*38fd1498Szrj const struct iv_common_cand *const *const ccand1
3409*38fd1498Szrj = (const struct iv_common_cand *const *)p1;
3410*38fd1498Szrj const struct iv_common_cand *const *const ccand2
3411*38fd1498Szrj = (const struct iv_common_cand *const *)p2;
3412*38fd1498Szrj
3413*38fd1498Szrj n1 = (*ccand1)->uses.length ();
3414*38fd1498Szrj n2 = (*ccand2)->uses.length ();
3415*38fd1498Szrj return n2 - n1;
3416*38fd1498Szrj }
3417*38fd1498Szrj
3418*38fd1498Szrj /* Adds IV candidates based on common candidated recorded. */
3419*38fd1498Szrj
3420*38fd1498Szrj static void
add_iv_candidate_derived_from_uses(struct ivopts_data * data)3421*38fd1498Szrj add_iv_candidate_derived_from_uses (struct ivopts_data *data)
3422*38fd1498Szrj {
3423*38fd1498Szrj unsigned i, j;
3424*38fd1498Szrj struct iv_cand *cand_1, *cand_2;
3425*38fd1498Szrj
3426*38fd1498Szrj data->iv_common_cands.qsort (common_cand_cmp);
3427*38fd1498Szrj for (i = 0; i < data->iv_common_cands.length (); i++)
3428*38fd1498Szrj {
3429*38fd1498Szrj struct iv_common_cand *ptr = data->iv_common_cands[i];
3430*38fd1498Szrj
3431*38fd1498Szrj /* Only add IV candidate if it's derived from multiple uses. */
3432*38fd1498Szrj if (ptr->uses.length () <= 1)
3433*38fd1498Szrj break;
3434*38fd1498Szrj
3435*38fd1498Szrj cand_1 = NULL;
3436*38fd1498Szrj cand_2 = NULL;
3437*38fd1498Szrj if (ip_normal_pos (data->current_loop))
3438*38fd1498Szrj cand_1 = add_candidate_1 (data, ptr->base, ptr->step,
3439*38fd1498Szrj false, IP_NORMAL, NULL, NULL);
3440*38fd1498Szrj
3441*38fd1498Szrj if (ip_end_pos (data->current_loop)
3442*38fd1498Szrj && allow_ip_end_pos_p (data->current_loop))
3443*38fd1498Szrj cand_2 = add_candidate_1 (data, ptr->base, ptr->step,
3444*38fd1498Szrj false, IP_END, NULL, NULL);
3445*38fd1498Szrj
3446*38fd1498Szrj /* Bind deriving uses and the new candidates. */
3447*38fd1498Szrj for (j = 0; j < ptr->uses.length (); j++)
3448*38fd1498Szrj {
3449*38fd1498Szrj struct iv_group *group = data->vgroups[ptr->uses[j]->group_id];
3450*38fd1498Szrj if (cand_1)
3451*38fd1498Szrj bitmap_set_bit (group->related_cands, cand_1->id);
3452*38fd1498Szrj if (cand_2)
3453*38fd1498Szrj bitmap_set_bit (group->related_cands, cand_2->id);
3454*38fd1498Szrj }
3455*38fd1498Szrj }
3456*38fd1498Szrj
3457*38fd1498Szrj /* Release data since it is useless from this point. */
3458*38fd1498Szrj data->iv_common_cand_tab->empty ();
3459*38fd1498Szrj data->iv_common_cands.truncate (0);
3460*38fd1498Szrj }
3461*38fd1498Szrj
3462*38fd1498Szrj /* Adds candidates based on the value of USE's iv. */
3463*38fd1498Szrj
3464*38fd1498Szrj static void
add_iv_candidate_for_use(struct ivopts_data * data,struct iv_use * use)3465*38fd1498Szrj add_iv_candidate_for_use (struct ivopts_data *data, struct iv_use *use)
3466*38fd1498Szrj {
3467*38fd1498Szrj poly_uint64 offset;
3468*38fd1498Szrj tree base;
3469*38fd1498Szrj tree basetype;
3470*38fd1498Szrj struct iv *iv = use->iv;
3471*38fd1498Szrj
3472*38fd1498Szrj add_candidate (data, iv->base, iv->step, false, use);
3473*38fd1498Szrj
3474*38fd1498Szrj /* Record common candidate for use in case it can be shared by others. */
3475*38fd1498Szrj record_common_cand (data, iv->base, iv->step, use);
3476*38fd1498Szrj
3477*38fd1498Szrj /* Record common candidate with initial value zero. */
3478*38fd1498Szrj basetype = TREE_TYPE (iv->base);
3479*38fd1498Szrj if (POINTER_TYPE_P (basetype))
3480*38fd1498Szrj basetype = sizetype;
3481*38fd1498Szrj record_common_cand (data, build_int_cst (basetype, 0), iv->step, use);
3482*38fd1498Szrj
3483*38fd1498Szrj /* Record common candidate with constant offset stripped in base.
3484*38fd1498Szrj Like the use itself, we also add candidate directly for it. */
3485*38fd1498Szrj base = strip_offset (iv->base, &offset);
3486*38fd1498Szrj if (maybe_ne (offset, 0U) || base != iv->base)
3487*38fd1498Szrj {
3488*38fd1498Szrj record_common_cand (data, base, iv->step, use);
3489*38fd1498Szrj add_candidate (data, base, iv->step, false, use);
3490*38fd1498Szrj }
3491*38fd1498Szrj
3492*38fd1498Szrj /* Record common candidate with base_object removed in base. */
3493*38fd1498Szrj base = iv->base;
3494*38fd1498Szrj STRIP_NOPS (base);
3495*38fd1498Szrj if (iv->base_object != NULL && TREE_CODE (base) == POINTER_PLUS_EXPR)
3496*38fd1498Szrj {
3497*38fd1498Szrj tree step = iv->step;
3498*38fd1498Szrj
3499*38fd1498Szrj STRIP_NOPS (step);
3500*38fd1498Szrj base = TREE_OPERAND (base, 1);
3501*38fd1498Szrj step = fold_convert (sizetype, step);
3502*38fd1498Szrj record_common_cand (data, base, step, use);
3503*38fd1498Szrj /* Also record common candidate with offset stripped. */
3504*38fd1498Szrj base = strip_offset (base, &offset);
3505*38fd1498Szrj if (maybe_ne (offset, 0U))
3506*38fd1498Szrj record_common_cand (data, base, step, use);
3507*38fd1498Szrj }
3508*38fd1498Szrj
3509*38fd1498Szrj /* At last, add auto-incremental candidates. Make such variables
3510*38fd1498Szrj important since other iv uses with same base object may be based
3511*38fd1498Szrj on it. */
3512*38fd1498Szrj if (use != NULL && address_p (use->type))
3513*38fd1498Szrj add_autoinc_candidates (data, iv->base, iv->step, true, use);
3514*38fd1498Szrj }
3515*38fd1498Szrj
3516*38fd1498Szrj /* Adds candidates based on the uses. */
3517*38fd1498Szrj
3518*38fd1498Szrj static void
add_iv_candidate_for_groups(struct ivopts_data * data)3519*38fd1498Szrj add_iv_candidate_for_groups (struct ivopts_data *data)
3520*38fd1498Szrj {
3521*38fd1498Szrj unsigned i;
3522*38fd1498Szrj
3523*38fd1498Szrj /* Only add candidate for the first use in group. */
3524*38fd1498Szrj for (i = 0; i < data->vgroups.length (); i++)
3525*38fd1498Szrj {
3526*38fd1498Szrj struct iv_group *group = data->vgroups[i];
3527*38fd1498Szrj
3528*38fd1498Szrj gcc_assert (group->vuses[0] != NULL);
3529*38fd1498Szrj add_iv_candidate_for_use (data, group->vuses[0]);
3530*38fd1498Szrj }
3531*38fd1498Szrj add_iv_candidate_derived_from_uses (data);
3532*38fd1498Szrj }
3533*38fd1498Szrj
3534*38fd1498Szrj /* Record important candidates and add them to related_cands bitmaps. */
3535*38fd1498Szrj
3536*38fd1498Szrj static void
record_important_candidates(struct ivopts_data * data)3537*38fd1498Szrj record_important_candidates (struct ivopts_data *data)
3538*38fd1498Szrj {
3539*38fd1498Szrj unsigned i;
3540*38fd1498Szrj struct iv_group *group;
3541*38fd1498Szrj
3542*38fd1498Szrj for (i = 0; i < data->vcands.length (); i++)
3543*38fd1498Szrj {
3544*38fd1498Szrj struct iv_cand *cand = data->vcands[i];
3545*38fd1498Szrj
3546*38fd1498Szrj if (cand->important)
3547*38fd1498Szrj bitmap_set_bit (data->important_candidates, i);
3548*38fd1498Szrj }
3549*38fd1498Szrj
3550*38fd1498Szrj data->consider_all_candidates = (data->vcands.length ()
3551*38fd1498Szrj <= CONSIDER_ALL_CANDIDATES_BOUND);
3552*38fd1498Szrj
3553*38fd1498Szrj /* Add important candidates to groups' related_cands bitmaps. */
3554*38fd1498Szrj for (i = 0; i < data->vgroups.length (); i++)
3555*38fd1498Szrj {
3556*38fd1498Szrj group = data->vgroups[i];
3557*38fd1498Szrj bitmap_ior_into (group->related_cands, data->important_candidates);
3558*38fd1498Szrj }
3559*38fd1498Szrj }
3560*38fd1498Szrj
3561*38fd1498Szrj /* Allocates the data structure mapping the (use, candidate) pairs to costs.
3562*38fd1498Szrj If consider_all_candidates is true, we use a two-dimensional array, otherwise
3563*38fd1498Szrj we allocate a simple list to every use. */
3564*38fd1498Szrj
3565*38fd1498Szrj static void
alloc_use_cost_map(struct ivopts_data * data)3566*38fd1498Szrj alloc_use_cost_map (struct ivopts_data *data)
3567*38fd1498Szrj {
3568*38fd1498Szrj unsigned i, size, s;
3569*38fd1498Szrj
3570*38fd1498Szrj for (i = 0; i < data->vgroups.length (); i++)
3571*38fd1498Szrj {
3572*38fd1498Szrj struct iv_group *group = data->vgroups[i];
3573*38fd1498Szrj
3574*38fd1498Szrj if (data->consider_all_candidates)
3575*38fd1498Szrj size = data->vcands.length ();
3576*38fd1498Szrj else
3577*38fd1498Szrj {
3578*38fd1498Szrj s = bitmap_count_bits (group->related_cands);
3579*38fd1498Szrj
3580*38fd1498Szrj /* Round up to the power of two, so that moduling by it is fast. */
3581*38fd1498Szrj size = s ? (1 << ceil_log2 (s)) : 1;
3582*38fd1498Szrj }
3583*38fd1498Szrj
3584*38fd1498Szrj group->n_map_members = size;
3585*38fd1498Szrj group->cost_map = XCNEWVEC (struct cost_pair, size);
3586*38fd1498Szrj }
3587*38fd1498Szrj }
3588*38fd1498Szrj
3589*38fd1498Szrj /* Sets cost of (GROUP, CAND) pair to COST and record that it depends
3590*38fd1498Szrj on invariants INV_VARS and that the value used in expressing it is
3591*38fd1498Szrj VALUE, and in case of iv elimination the comparison operator is COMP. */
3592*38fd1498Szrj
3593*38fd1498Szrj static void
set_group_iv_cost(struct ivopts_data * data,struct iv_group * group,struct iv_cand * cand,comp_cost cost,bitmap inv_vars,tree value,enum tree_code comp,bitmap inv_exprs)3594*38fd1498Szrj set_group_iv_cost (struct ivopts_data *data,
3595*38fd1498Szrj struct iv_group *group, struct iv_cand *cand,
3596*38fd1498Szrj comp_cost cost, bitmap inv_vars, tree value,
3597*38fd1498Szrj enum tree_code comp, bitmap inv_exprs)
3598*38fd1498Szrj {
3599*38fd1498Szrj unsigned i, s;
3600*38fd1498Szrj
3601*38fd1498Szrj if (cost.infinite_cost_p ())
3602*38fd1498Szrj {
3603*38fd1498Szrj BITMAP_FREE (inv_vars);
3604*38fd1498Szrj BITMAP_FREE (inv_exprs);
3605*38fd1498Szrj return;
3606*38fd1498Szrj }
3607*38fd1498Szrj
3608*38fd1498Szrj if (data->consider_all_candidates)
3609*38fd1498Szrj {
3610*38fd1498Szrj group->cost_map[cand->id].cand = cand;
3611*38fd1498Szrj group->cost_map[cand->id].cost = cost;
3612*38fd1498Szrj group->cost_map[cand->id].inv_vars = inv_vars;
3613*38fd1498Szrj group->cost_map[cand->id].inv_exprs = inv_exprs;
3614*38fd1498Szrj group->cost_map[cand->id].value = value;
3615*38fd1498Szrj group->cost_map[cand->id].comp = comp;
3616*38fd1498Szrj return;
3617*38fd1498Szrj }
3618*38fd1498Szrj
3619*38fd1498Szrj /* n_map_members is a power of two, so this computes modulo. */
3620*38fd1498Szrj s = cand->id & (group->n_map_members - 1);
3621*38fd1498Szrj for (i = s; i < group->n_map_members; i++)
3622*38fd1498Szrj if (!group->cost_map[i].cand)
3623*38fd1498Szrj goto found;
3624*38fd1498Szrj for (i = 0; i < s; i++)
3625*38fd1498Szrj if (!group->cost_map[i].cand)
3626*38fd1498Szrj goto found;
3627*38fd1498Szrj
3628*38fd1498Szrj gcc_unreachable ();
3629*38fd1498Szrj
3630*38fd1498Szrj found:
3631*38fd1498Szrj group->cost_map[i].cand = cand;
3632*38fd1498Szrj group->cost_map[i].cost = cost;
3633*38fd1498Szrj group->cost_map[i].inv_vars = inv_vars;
3634*38fd1498Szrj group->cost_map[i].inv_exprs = inv_exprs;
3635*38fd1498Szrj group->cost_map[i].value = value;
3636*38fd1498Szrj group->cost_map[i].comp = comp;
3637*38fd1498Szrj }
3638*38fd1498Szrj
3639*38fd1498Szrj /* Gets cost of (GROUP, CAND) pair. */
3640*38fd1498Szrj
3641*38fd1498Szrj static struct cost_pair *
get_group_iv_cost(struct ivopts_data * data,struct iv_group * group,struct iv_cand * cand)3642*38fd1498Szrj get_group_iv_cost (struct ivopts_data *data, struct iv_group *group,
3643*38fd1498Szrj struct iv_cand *cand)
3644*38fd1498Szrj {
3645*38fd1498Szrj unsigned i, s;
3646*38fd1498Szrj struct cost_pair *ret;
3647*38fd1498Szrj
3648*38fd1498Szrj if (!cand)
3649*38fd1498Szrj return NULL;
3650*38fd1498Szrj
3651*38fd1498Szrj if (data->consider_all_candidates)
3652*38fd1498Szrj {
3653*38fd1498Szrj ret = group->cost_map + cand->id;
3654*38fd1498Szrj if (!ret->cand)
3655*38fd1498Szrj return NULL;
3656*38fd1498Szrj
3657*38fd1498Szrj return ret;
3658*38fd1498Szrj }
3659*38fd1498Szrj
3660*38fd1498Szrj /* n_map_members is a power of two, so this computes modulo. */
3661*38fd1498Szrj s = cand->id & (group->n_map_members - 1);
3662*38fd1498Szrj for (i = s; i < group->n_map_members; i++)
3663*38fd1498Szrj if (group->cost_map[i].cand == cand)
3664*38fd1498Szrj return group->cost_map + i;
3665*38fd1498Szrj else if (group->cost_map[i].cand == NULL)
3666*38fd1498Szrj return NULL;
3667*38fd1498Szrj for (i = 0; i < s; i++)
3668*38fd1498Szrj if (group->cost_map[i].cand == cand)
3669*38fd1498Szrj return group->cost_map + i;
3670*38fd1498Szrj else if (group->cost_map[i].cand == NULL)
3671*38fd1498Szrj return NULL;
3672*38fd1498Szrj
3673*38fd1498Szrj return NULL;
3674*38fd1498Szrj }
3675*38fd1498Szrj
3676*38fd1498Szrj /* Produce DECL_RTL for object obj so it looks like it is stored in memory. */
3677*38fd1498Szrj static rtx
produce_memory_decl_rtl(tree obj,int * regno)3678*38fd1498Szrj produce_memory_decl_rtl (tree obj, int *regno)
3679*38fd1498Szrj {
3680*38fd1498Szrj addr_space_t as = TYPE_ADDR_SPACE (TREE_TYPE (obj));
3681*38fd1498Szrj machine_mode address_mode = targetm.addr_space.address_mode (as);
3682*38fd1498Szrj rtx x;
3683*38fd1498Szrj
3684*38fd1498Szrj gcc_assert (obj);
3685*38fd1498Szrj if (TREE_STATIC (obj) || DECL_EXTERNAL (obj))
3686*38fd1498Szrj {
3687*38fd1498Szrj const char *name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (obj));
3688*38fd1498Szrj x = gen_rtx_SYMBOL_REF (address_mode, name);
3689*38fd1498Szrj SET_SYMBOL_REF_DECL (x, obj);
3690*38fd1498Szrj x = gen_rtx_MEM (DECL_MODE (obj), x);
3691*38fd1498Szrj set_mem_addr_space (x, as);
3692*38fd1498Szrj targetm.encode_section_info (obj, x, true);
3693*38fd1498Szrj }
3694*38fd1498Szrj else
3695*38fd1498Szrj {
3696*38fd1498Szrj x = gen_raw_REG (address_mode, (*regno)++);
3697*38fd1498Szrj x = gen_rtx_MEM (DECL_MODE (obj), x);
3698*38fd1498Szrj set_mem_addr_space (x, as);
3699*38fd1498Szrj }
3700*38fd1498Szrj
3701*38fd1498Szrj return x;
3702*38fd1498Szrj }
3703*38fd1498Szrj
3704*38fd1498Szrj /* Prepares decl_rtl for variables referred in *EXPR_P. Callback for
3705*38fd1498Szrj walk_tree. DATA contains the actual fake register number. */
3706*38fd1498Szrj
3707*38fd1498Szrj static tree
prepare_decl_rtl(tree * expr_p,int * ws,void * data)3708*38fd1498Szrj prepare_decl_rtl (tree *expr_p, int *ws, void *data)
3709*38fd1498Szrj {
3710*38fd1498Szrj tree obj = NULL_TREE;
3711*38fd1498Szrj rtx x = NULL_RTX;
3712*38fd1498Szrj int *regno = (int *) data;
3713*38fd1498Szrj
3714*38fd1498Szrj switch (TREE_CODE (*expr_p))
3715*38fd1498Szrj {
3716*38fd1498Szrj case ADDR_EXPR:
3717*38fd1498Szrj for (expr_p = &TREE_OPERAND (*expr_p, 0);
3718*38fd1498Szrj handled_component_p (*expr_p);
3719*38fd1498Szrj expr_p = &TREE_OPERAND (*expr_p, 0))
3720*38fd1498Szrj continue;
3721*38fd1498Szrj obj = *expr_p;
3722*38fd1498Szrj if (DECL_P (obj) && HAS_RTL_P (obj) && !DECL_RTL_SET_P (obj))
3723*38fd1498Szrj x = produce_memory_decl_rtl (obj, regno);
3724*38fd1498Szrj break;
3725*38fd1498Szrj
3726*38fd1498Szrj case SSA_NAME:
3727*38fd1498Szrj *ws = 0;
3728*38fd1498Szrj obj = SSA_NAME_VAR (*expr_p);
3729*38fd1498Szrj /* Defer handling of anonymous SSA_NAMEs to the expander. */
3730*38fd1498Szrj if (!obj)
3731*38fd1498Szrj return NULL_TREE;
3732*38fd1498Szrj if (!DECL_RTL_SET_P (obj))
3733*38fd1498Szrj x = gen_raw_REG (DECL_MODE (obj), (*regno)++);
3734*38fd1498Szrj break;
3735*38fd1498Szrj
3736*38fd1498Szrj case VAR_DECL:
3737*38fd1498Szrj case PARM_DECL:
3738*38fd1498Szrj case RESULT_DECL:
3739*38fd1498Szrj *ws = 0;
3740*38fd1498Szrj obj = *expr_p;
3741*38fd1498Szrj
3742*38fd1498Szrj if (DECL_RTL_SET_P (obj))
3743*38fd1498Szrj break;
3744*38fd1498Szrj
3745*38fd1498Szrj if (DECL_MODE (obj) == BLKmode)
3746*38fd1498Szrj x = produce_memory_decl_rtl (obj, regno);
3747*38fd1498Szrj else
3748*38fd1498Szrj x = gen_raw_REG (DECL_MODE (obj), (*regno)++);
3749*38fd1498Szrj
3750*38fd1498Szrj break;
3751*38fd1498Szrj
3752*38fd1498Szrj default:
3753*38fd1498Szrj break;
3754*38fd1498Szrj }
3755*38fd1498Szrj
3756*38fd1498Szrj if (x)
3757*38fd1498Szrj {
3758*38fd1498Szrj decl_rtl_to_reset.safe_push (obj);
3759*38fd1498Szrj SET_DECL_RTL (obj, x);
3760*38fd1498Szrj }
3761*38fd1498Szrj
3762*38fd1498Szrj return NULL_TREE;
3763*38fd1498Szrj }
3764*38fd1498Szrj
3765*38fd1498Szrj /* Determines cost of the computation of EXPR. */
3766*38fd1498Szrj
3767*38fd1498Szrj static unsigned
computation_cost(tree expr,bool speed)3768*38fd1498Szrj computation_cost (tree expr, bool speed)
3769*38fd1498Szrj {
3770*38fd1498Szrj rtx_insn *seq;
3771*38fd1498Szrj rtx rslt;
3772*38fd1498Szrj tree type = TREE_TYPE (expr);
3773*38fd1498Szrj unsigned cost;
3774*38fd1498Szrj /* Avoid using hard regs in ways which may be unsupported. */
3775*38fd1498Szrj int regno = LAST_VIRTUAL_REGISTER + 1;
3776*38fd1498Szrj struct cgraph_node *node = cgraph_node::get (current_function_decl);
3777*38fd1498Szrj enum node_frequency real_frequency = node->frequency;
3778*38fd1498Szrj
3779*38fd1498Szrj node->frequency = NODE_FREQUENCY_NORMAL;
3780*38fd1498Szrj crtl->maybe_hot_insn_p = speed;
3781*38fd1498Szrj walk_tree (&expr, prepare_decl_rtl, ®no, NULL);
3782*38fd1498Szrj start_sequence ();
3783*38fd1498Szrj rslt = expand_expr (expr, NULL_RTX, TYPE_MODE (type), EXPAND_NORMAL);
3784*38fd1498Szrj seq = get_insns ();
3785*38fd1498Szrj end_sequence ();
3786*38fd1498Szrj default_rtl_profile ();
3787*38fd1498Szrj node->frequency = real_frequency;
3788*38fd1498Szrj
3789*38fd1498Szrj cost = seq_cost (seq, speed);
3790*38fd1498Szrj if (MEM_P (rslt))
3791*38fd1498Szrj cost += address_cost (XEXP (rslt, 0), TYPE_MODE (type),
3792*38fd1498Szrj TYPE_ADDR_SPACE (type), speed);
3793*38fd1498Szrj else if (!REG_P (rslt))
3794*38fd1498Szrj cost += set_src_cost (rslt, TYPE_MODE (type), speed);
3795*38fd1498Szrj
3796*38fd1498Szrj return cost;
3797*38fd1498Szrj }
3798*38fd1498Szrj
3799*38fd1498Szrj /* Returns variable containing the value of candidate CAND at statement AT. */
3800*38fd1498Szrj
3801*38fd1498Szrj static tree
var_at_stmt(struct loop * loop,struct iv_cand * cand,gimple * stmt)3802*38fd1498Szrj var_at_stmt (struct loop *loop, struct iv_cand *cand, gimple *stmt)
3803*38fd1498Szrj {
3804*38fd1498Szrj if (stmt_after_increment (loop, cand, stmt))
3805*38fd1498Szrj return cand->var_after;
3806*38fd1498Szrj else
3807*38fd1498Szrj return cand->var_before;
3808*38fd1498Szrj }
3809*38fd1498Szrj
3810*38fd1498Szrj /* If A is (TYPE) BA and B is (TYPE) BB, and the types of BA and BB have the
3811*38fd1498Szrj same precision that is at least as wide as the precision of TYPE, stores
3812*38fd1498Szrj BA to A and BB to B, and returns the type of BA. Otherwise, returns the
3813*38fd1498Szrj type of A and B. */
3814*38fd1498Szrj
3815*38fd1498Szrj static tree
determine_common_wider_type(tree * a,tree * b)3816*38fd1498Szrj determine_common_wider_type (tree *a, tree *b)
3817*38fd1498Szrj {
3818*38fd1498Szrj tree wider_type = NULL;
3819*38fd1498Szrj tree suba, subb;
3820*38fd1498Szrj tree atype = TREE_TYPE (*a);
3821*38fd1498Szrj
3822*38fd1498Szrj if (CONVERT_EXPR_P (*a))
3823*38fd1498Szrj {
3824*38fd1498Szrj suba = TREE_OPERAND (*a, 0);
3825*38fd1498Szrj wider_type = TREE_TYPE (suba);
3826*38fd1498Szrj if (TYPE_PRECISION (wider_type) < TYPE_PRECISION (atype))
3827*38fd1498Szrj return atype;
3828*38fd1498Szrj }
3829*38fd1498Szrj else
3830*38fd1498Szrj return atype;
3831*38fd1498Szrj
3832*38fd1498Szrj if (CONVERT_EXPR_P (*b))
3833*38fd1498Szrj {
3834*38fd1498Szrj subb = TREE_OPERAND (*b, 0);
3835*38fd1498Szrj if (TYPE_PRECISION (wider_type) != TYPE_PRECISION (TREE_TYPE (subb)))
3836*38fd1498Szrj return atype;
3837*38fd1498Szrj }
3838*38fd1498Szrj else
3839*38fd1498Szrj return atype;
3840*38fd1498Szrj
3841*38fd1498Szrj *a = suba;
3842*38fd1498Szrj *b = subb;
3843*38fd1498Szrj return wider_type;
3844*38fd1498Szrj }
3845*38fd1498Szrj
3846*38fd1498Szrj /* Determines the expression by that USE is expressed from induction variable
3847*38fd1498Szrj CAND at statement AT in LOOP. The expression is stored in two parts in a
3848*38fd1498Szrj decomposed form. The invariant part is stored in AFF_INV; while variant
3849*38fd1498Szrj part in AFF_VAR. Store ratio of CAND.step over USE.step in PRAT if it's
3850*38fd1498Szrj non-null. Returns false if USE cannot be expressed using CAND. */
3851*38fd1498Szrj
3852*38fd1498Szrj static bool
3853*38fd1498Szrj get_computation_aff_1 (struct loop *loop, gimple *at, struct iv_use *use,
3854*38fd1498Szrj struct iv_cand *cand, struct aff_tree *aff_inv,
3855*38fd1498Szrj struct aff_tree *aff_var, widest_int *prat = NULL)
3856*38fd1498Szrj {
3857*38fd1498Szrj tree ubase = use->iv->base, ustep = use->iv->step;
3858*38fd1498Szrj tree cbase = cand->iv->base, cstep = cand->iv->step;
3859*38fd1498Szrj tree common_type, uutype, var, cstep_common;
3860*38fd1498Szrj tree utype = TREE_TYPE (ubase), ctype = TREE_TYPE (cbase);
3861*38fd1498Szrj aff_tree aff_cbase;
3862*38fd1498Szrj widest_int rat;
3863*38fd1498Szrj
3864*38fd1498Szrj /* We must have a precision to express the values of use. */
3865*38fd1498Szrj if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
3866*38fd1498Szrj return false;
3867*38fd1498Szrj
3868*38fd1498Szrj var = var_at_stmt (loop, cand, at);
3869*38fd1498Szrj uutype = unsigned_type_for (utype);
3870*38fd1498Szrj
3871*38fd1498Szrj /* If the conversion is not noop, perform it. */
3872*38fd1498Szrj if (TYPE_PRECISION (utype) < TYPE_PRECISION (ctype))
3873*38fd1498Szrj {
3874*38fd1498Szrj if (cand->orig_iv != NULL && CONVERT_EXPR_P (cbase)
3875*38fd1498Szrj && (CONVERT_EXPR_P (cstep) || poly_int_tree_p (cstep)))
3876*38fd1498Szrj {
3877*38fd1498Szrj tree inner_base, inner_step, inner_type;
3878*38fd1498Szrj inner_base = TREE_OPERAND (cbase, 0);
3879*38fd1498Szrj if (CONVERT_EXPR_P (cstep))
3880*38fd1498Szrj inner_step = TREE_OPERAND (cstep, 0);
3881*38fd1498Szrj else
3882*38fd1498Szrj inner_step = cstep;
3883*38fd1498Szrj
3884*38fd1498Szrj inner_type = TREE_TYPE (inner_base);
3885*38fd1498Szrj /* If candidate is added from a biv whose type is smaller than
3886*38fd1498Szrj ctype, we know both candidate and the biv won't overflow.
3887*38fd1498Szrj In this case, it's safe to skip the convertion in candidate.
3888*38fd1498Szrj As an example, (unsigned short)((unsigned long)A) equals to
3889*38fd1498Szrj (unsigned short)A, if A has a type no larger than short. */
3890*38fd1498Szrj if (TYPE_PRECISION (inner_type) <= TYPE_PRECISION (uutype))
3891*38fd1498Szrj {
3892*38fd1498Szrj cbase = inner_base;
3893*38fd1498Szrj cstep = inner_step;
3894*38fd1498Szrj }
3895*38fd1498Szrj }
3896*38fd1498Szrj cbase = fold_convert (uutype, cbase);
3897*38fd1498Szrj cstep = fold_convert (uutype, cstep);
3898*38fd1498Szrj var = fold_convert (uutype, var);
3899*38fd1498Szrj }
3900*38fd1498Szrj
3901*38fd1498Szrj /* Ratio is 1 when computing the value of biv cand by itself.
3902*38fd1498Szrj We can't rely on constant_multiple_of in this case because the
3903*38fd1498Szrj use is created after the original biv is selected. The call
3904*38fd1498Szrj could fail because of inconsistent fold behavior. See PR68021
3905*38fd1498Szrj for more information. */
3906*38fd1498Szrj if (cand->pos == IP_ORIGINAL && cand->incremented_at == use->stmt)
3907*38fd1498Szrj {
3908*38fd1498Szrj gcc_assert (is_gimple_assign (use->stmt));
3909*38fd1498Szrj gcc_assert (use->iv->ssa_name == cand->var_after);
3910*38fd1498Szrj gcc_assert (gimple_assign_lhs (use->stmt) == cand->var_after);
3911*38fd1498Szrj rat = 1;
3912*38fd1498Szrj }
3913*38fd1498Szrj else if (!constant_multiple_of (ustep, cstep, &rat))
3914*38fd1498Szrj return false;
3915*38fd1498Szrj
3916*38fd1498Szrj if (prat)
3917*38fd1498Szrj *prat = rat;
3918*38fd1498Szrj
3919*38fd1498Szrj /* In case both UBASE and CBASE are shortened to UUTYPE from some common
3920*38fd1498Szrj type, we achieve better folding by computing their difference in this
3921*38fd1498Szrj wider type, and cast the result to UUTYPE. We do not need to worry about
3922*38fd1498Szrj overflows, as all the arithmetics will in the end be performed in UUTYPE
3923*38fd1498Szrj anyway. */
3924*38fd1498Szrj common_type = determine_common_wider_type (&ubase, &cbase);
3925*38fd1498Szrj
3926*38fd1498Szrj /* use = ubase - ratio * cbase + ratio * var. */
3927*38fd1498Szrj tree_to_aff_combination (ubase, common_type, aff_inv);
3928*38fd1498Szrj tree_to_aff_combination (cbase, common_type, &aff_cbase);
3929*38fd1498Szrj tree_to_aff_combination (var, uutype, aff_var);
3930*38fd1498Szrj
3931*38fd1498Szrj /* We need to shift the value if we are after the increment. */
3932*38fd1498Szrj if (stmt_after_increment (loop, cand, at))
3933*38fd1498Szrj {
3934*38fd1498Szrj aff_tree cstep_aff;
3935*38fd1498Szrj
3936*38fd1498Szrj if (common_type != uutype)
3937*38fd1498Szrj cstep_common = fold_convert (common_type, cstep);
3938*38fd1498Szrj else
3939*38fd1498Szrj cstep_common = cstep;
3940*38fd1498Szrj
3941*38fd1498Szrj tree_to_aff_combination (cstep_common, common_type, &cstep_aff);
3942*38fd1498Szrj aff_combination_add (&aff_cbase, &cstep_aff);
3943*38fd1498Szrj }
3944*38fd1498Szrj
3945*38fd1498Szrj aff_combination_scale (&aff_cbase, -rat);
3946*38fd1498Szrj aff_combination_add (aff_inv, &aff_cbase);
3947*38fd1498Szrj if (common_type != uutype)
3948*38fd1498Szrj aff_combination_convert (aff_inv, uutype);
3949*38fd1498Szrj
3950*38fd1498Szrj aff_combination_scale (aff_var, rat);
3951*38fd1498Szrj return true;
3952*38fd1498Szrj }
3953*38fd1498Szrj
3954*38fd1498Szrj /* Determines the expression by that USE is expressed from induction variable
3955*38fd1498Szrj CAND at statement AT in LOOP. The expression is stored in a decomposed
3956*38fd1498Szrj form into AFF. Returns false if USE cannot be expressed using CAND. */
3957*38fd1498Szrj
3958*38fd1498Szrj static bool
get_computation_aff(struct loop * loop,gimple * at,struct iv_use * use,struct iv_cand * cand,struct aff_tree * aff)3959*38fd1498Szrj get_computation_aff (struct loop *loop, gimple *at, struct iv_use *use,
3960*38fd1498Szrj struct iv_cand *cand, struct aff_tree *aff)
3961*38fd1498Szrj {
3962*38fd1498Szrj aff_tree aff_var;
3963*38fd1498Szrj
3964*38fd1498Szrj if (!get_computation_aff_1 (loop, at, use, cand, aff, &aff_var))
3965*38fd1498Szrj return false;
3966*38fd1498Szrj
3967*38fd1498Szrj aff_combination_add (aff, &aff_var);
3968*38fd1498Szrj return true;
3969*38fd1498Szrj }
3970*38fd1498Szrj
3971*38fd1498Szrj /* Return the type of USE. */
3972*38fd1498Szrj
3973*38fd1498Szrj static tree
get_use_type(struct iv_use * use)3974*38fd1498Szrj get_use_type (struct iv_use *use)
3975*38fd1498Szrj {
3976*38fd1498Szrj tree base_type = TREE_TYPE (use->iv->base);
3977*38fd1498Szrj tree type;
3978*38fd1498Szrj
3979*38fd1498Szrj if (use->type == USE_REF_ADDRESS)
3980*38fd1498Szrj {
3981*38fd1498Szrj /* The base_type may be a void pointer. Create a pointer type based on
3982*38fd1498Szrj the mem_ref instead. */
3983*38fd1498Szrj type = build_pointer_type (TREE_TYPE (*use->op_p));
3984*38fd1498Szrj gcc_assert (TYPE_ADDR_SPACE (TREE_TYPE (type))
3985*38fd1498Szrj == TYPE_ADDR_SPACE (TREE_TYPE (base_type)));
3986*38fd1498Szrj }
3987*38fd1498Szrj else
3988*38fd1498Szrj type = base_type;
3989*38fd1498Szrj
3990*38fd1498Szrj return type;
3991*38fd1498Szrj }
3992*38fd1498Szrj
3993*38fd1498Szrj /* Determines the expression by that USE is expressed from induction variable
3994*38fd1498Szrj CAND at statement AT in LOOP. The computation is unshared. */
3995*38fd1498Szrj
3996*38fd1498Szrj static tree
get_computation_at(struct loop * loop,gimple * at,struct iv_use * use,struct iv_cand * cand)3997*38fd1498Szrj get_computation_at (struct loop *loop, gimple *at,
3998*38fd1498Szrj struct iv_use *use, struct iv_cand *cand)
3999*38fd1498Szrj {
4000*38fd1498Szrj aff_tree aff;
4001*38fd1498Szrj tree type = get_use_type (use);
4002*38fd1498Szrj
4003*38fd1498Szrj if (!get_computation_aff (loop, at, use, cand, &aff))
4004*38fd1498Szrj return NULL_TREE;
4005*38fd1498Szrj unshare_aff_combination (&aff);
4006*38fd1498Szrj return fold_convert (type, aff_combination_to_tree (&aff));
4007*38fd1498Szrj }
4008*38fd1498Szrj
4009*38fd1498Szrj /* Adjust the cost COST for being in loop setup rather than loop body.
4010*38fd1498Szrj If we're optimizing for space, the loop setup overhead is constant;
4011*38fd1498Szrj if we're optimizing for speed, amortize it over the per-iteration cost.
4012*38fd1498Szrj If ROUND_UP_P is true, the result is round up rather than to zero when
4013*38fd1498Szrj optimizing for speed. */
4014*38fd1498Szrj static unsigned
4015*38fd1498Szrj adjust_setup_cost (struct ivopts_data *data, unsigned cost,
4016*38fd1498Szrj bool round_up_p = false)
4017*38fd1498Szrj {
4018*38fd1498Szrj if (cost == INFTY)
4019*38fd1498Szrj return cost;
4020*38fd1498Szrj else if (optimize_loop_for_speed_p (data->current_loop))
4021*38fd1498Szrj {
4022*38fd1498Szrj HOST_WIDE_INT niters = avg_loop_niter (data->current_loop);
4023*38fd1498Szrj return ((HOST_WIDE_INT) cost + (round_up_p ? niters - 1 : 0)) / niters;
4024*38fd1498Szrj }
4025*38fd1498Szrj else
4026*38fd1498Szrj return cost;
4027*38fd1498Szrj }
4028*38fd1498Szrj
4029*38fd1498Szrj /* Calculate the SPEED or size cost of shiftadd EXPR in MODE. MULT is the
4030*38fd1498Szrj EXPR operand holding the shift. COST0 and COST1 are the costs for
4031*38fd1498Szrj calculating the operands of EXPR. Returns true if successful, and returns
4032*38fd1498Szrj the cost in COST. */
4033*38fd1498Szrj
4034*38fd1498Szrj static bool
get_shiftadd_cost(tree expr,scalar_int_mode mode,comp_cost cost0,comp_cost cost1,tree mult,bool speed,comp_cost * cost)4035*38fd1498Szrj get_shiftadd_cost (tree expr, scalar_int_mode mode, comp_cost cost0,
4036*38fd1498Szrj comp_cost cost1, tree mult, bool speed, comp_cost *cost)
4037*38fd1498Szrj {
4038*38fd1498Szrj comp_cost res;
4039*38fd1498Szrj tree op1 = TREE_OPERAND (expr, 1);
4040*38fd1498Szrj tree cst = TREE_OPERAND (mult, 1);
4041*38fd1498Szrj tree multop = TREE_OPERAND (mult, 0);
4042*38fd1498Szrj int m = exact_log2 (int_cst_value (cst));
4043*38fd1498Szrj int maxm = MIN (BITS_PER_WORD, GET_MODE_BITSIZE (mode));
4044*38fd1498Szrj int as_cost, sa_cost;
4045*38fd1498Szrj bool mult_in_op1;
4046*38fd1498Szrj
4047*38fd1498Szrj if (!(m >= 0 && m < maxm))
4048*38fd1498Szrj return false;
4049*38fd1498Szrj
4050*38fd1498Szrj STRIP_NOPS (op1);
4051*38fd1498Szrj mult_in_op1 = operand_equal_p (op1, mult, 0);
4052*38fd1498Szrj
4053*38fd1498Szrj as_cost = add_cost (speed, mode) + shift_cost (speed, mode, m);
4054*38fd1498Szrj
4055*38fd1498Szrj /* If the target has a cheap shift-and-add or shift-and-sub instruction,
4056*38fd1498Szrj use that in preference to a shift insn followed by an add insn. */
4057*38fd1498Szrj sa_cost = (TREE_CODE (expr) != MINUS_EXPR
4058*38fd1498Szrj ? shiftadd_cost (speed, mode, m)
4059*38fd1498Szrj : (mult_in_op1
4060*38fd1498Szrj ? shiftsub1_cost (speed, mode, m)
4061*38fd1498Szrj : shiftsub0_cost (speed, mode, m)));
4062*38fd1498Szrj
4063*38fd1498Szrj res = comp_cost (MIN (as_cost, sa_cost), 0);
4064*38fd1498Szrj res += (mult_in_op1 ? cost0 : cost1);
4065*38fd1498Szrj
4066*38fd1498Szrj STRIP_NOPS (multop);
4067*38fd1498Szrj if (!is_gimple_val (multop))
4068*38fd1498Szrj res += force_expr_to_var_cost (multop, speed);
4069*38fd1498Szrj
4070*38fd1498Szrj *cost = res;
4071*38fd1498Szrj return true;
4072*38fd1498Szrj }
4073*38fd1498Szrj
4074*38fd1498Szrj /* Estimates cost of forcing expression EXPR into a variable. */
4075*38fd1498Szrj
4076*38fd1498Szrj static comp_cost
force_expr_to_var_cost(tree expr,bool speed)4077*38fd1498Szrj force_expr_to_var_cost (tree expr, bool speed)
4078*38fd1498Szrj {
4079*38fd1498Szrj static bool costs_initialized = false;
4080*38fd1498Szrj static unsigned integer_cost [2];
4081*38fd1498Szrj static unsigned symbol_cost [2];
4082*38fd1498Szrj static unsigned address_cost [2];
4083*38fd1498Szrj tree op0, op1;
4084*38fd1498Szrj comp_cost cost0, cost1, cost;
4085*38fd1498Szrj machine_mode mode;
4086*38fd1498Szrj scalar_int_mode int_mode;
4087*38fd1498Szrj
4088*38fd1498Szrj if (!costs_initialized)
4089*38fd1498Szrj {
4090*38fd1498Szrj tree type = build_pointer_type (integer_type_node);
4091*38fd1498Szrj tree var, addr;
4092*38fd1498Szrj rtx x;
4093*38fd1498Szrj int i;
4094*38fd1498Szrj
4095*38fd1498Szrj var = create_tmp_var_raw (integer_type_node, "test_var");
4096*38fd1498Szrj TREE_STATIC (var) = 1;
4097*38fd1498Szrj x = produce_memory_decl_rtl (var, NULL);
4098*38fd1498Szrj SET_DECL_RTL (var, x);
4099*38fd1498Szrj
4100*38fd1498Szrj addr = build1 (ADDR_EXPR, type, var);
4101*38fd1498Szrj
4102*38fd1498Szrj
4103*38fd1498Szrj for (i = 0; i < 2; i++)
4104*38fd1498Szrj {
4105*38fd1498Szrj integer_cost[i] = computation_cost (build_int_cst (integer_type_node,
4106*38fd1498Szrj 2000), i);
4107*38fd1498Szrj
4108*38fd1498Szrj symbol_cost[i] = computation_cost (addr, i) + 1;
4109*38fd1498Szrj
4110*38fd1498Szrj address_cost[i]
4111*38fd1498Szrj = computation_cost (fold_build_pointer_plus_hwi (addr, 2000), i) + 1;
4112*38fd1498Szrj if (dump_file && (dump_flags & TDF_DETAILS))
4113*38fd1498Szrj {
4114*38fd1498Szrj fprintf (dump_file, "force_expr_to_var_cost %s costs:\n", i ? "speed" : "size");
4115*38fd1498Szrj fprintf (dump_file, " integer %d\n", (int) integer_cost[i]);
4116*38fd1498Szrj fprintf (dump_file, " symbol %d\n", (int) symbol_cost[i]);
4117*38fd1498Szrj fprintf (dump_file, " address %d\n", (int) address_cost[i]);
4118*38fd1498Szrj fprintf (dump_file, " other %d\n", (int) target_spill_cost[i]);
4119*38fd1498Szrj fprintf (dump_file, "\n");
4120*38fd1498Szrj }
4121*38fd1498Szrj }
4122*38fd1498Szrj
4123*38fd1498Szrj costs_initialized = true;
4124*38fd1498Szrj }
4125*38fd1498Szrj
4126*38fd1498Szrj STRIP_NOPS (expr);
4127*38fd1498Szrj
4128*38fd1498Szrj if (SSA_VAR_P (expr))
4129*38fd1498Szrj return no_cost;
4130*38fd1498Szrj
4131*38fd1498Szrj if (is_gimple_min_invariant (expr))
4132*38fd1498Szrj {
4133*38fd1498Szrj if (poly_int_tree_p (expr))
4134*38fd1498Szrj return comp_cost (integer_cost [speed], 0);
4135*38fd1498Szrj
4136*38fd1498Szrj if (TREE_CODE (expr) == ADDR_EXPR)
4137*38fd1498Szrj {
4138*38fd1498Szrj tree obj = TREE_OPERAND (expr, 0);
4139*38fd1498Szrj
4140*38fd1498Szrj if (VAR_P (obj)
4141*38fd1498Szrj || TREE_CODE (obj) == PARM_DECL
4142*38fd1498Szrj || TREE_CODE (obj) == RESULT_DECL)
4143*38fd1498Szrj return comp_cost (symbol_cost [speed], 0);
4144*38fd1498Szrj }
4145*38fd1498Szrj
4146*38fd1498Szrj return comp_cost (address_cost [speed], 0);
4147*38fd1498Szrj }
4148*38fd1498Szrj
4149*38fd1498Szrj switch (TREE_CODE (expr))
4150*38fd1498Szrj {
4151*38fd1498Szrj case POINTER_PLUS_EXPR:
4152*38fd1498Szrj case PLUS_EXPR:
4153*38fd1498Szrj case MINUS_EXPR:
4154*38fd1498Szrj case MULT_EXPR:
4155*38fd1498Szrj case TRUNC_DIV_EXPR:
4156*38fd1498Szrj case BIT_AND_EXPR:
4157*38fd1498Szrj case BIT_IOR_EXPR:
4158*38fd1498Szrj case LSHIFT_EXPR:
4159*38fd1498Szrj case RSHIFT_EXPR:
4160*38fd1498Szrj op0 = TREE_OPERAND (expr, 0);
4161*38fd1498Szrj op1 = TREE_OPERAND (expr, 1);
4162*38fd1498Szrj STRIP_NOPS (op0);
4163*38fd1498Szrj STRIP_NOPS (op1);
4164*38fd1498Szrj break;
4165*38fd1498Szrj
4166*38fd1498Szrj CASE_CONVERT:
4167*38fd1498Szrj case NEGATE_EXPR:
4168*38fd1498Szrj case BIT_NOT_EXPR:
4169*38fd1498Szrj op0 = TREE_OPERAND (expr, 0);
4170*38fd1498Szrj STRIP_NOPS (op0);
4171*38fd1498Szrj op1 = NULL_TREE;
4172*38fd1498Szrj break;
4173*38fd1498Szrj
4174*38fd1498Szrj default:
4175*38fd1498Szrj /* Just an arbitrary value, FIXME. */
4176*38fd1498Szrj return comp_cost (target_spill_cost[speed], 0);
4177*38fd1498Szrj }
4178*38fd1498Szrj
4179*38fd1498Szrj if (op0 == NULL_TREE
4180*38fd1498Szrj || TREE_CODE (op0) == SSA_NAME || CONSTANT_CLASS_P (op0))
4181*38fd1498Szrj cost0 = no_cost;
4182*38fd1498Szrj else
4183*38fd1498Szrj cost0 = force_expr_to_var_cost (op0, speed);
4184*38fd1498Szrj
4185*38fd1498Szrj if (op1 == NULL_TREE
4186*38fd1498Szrj || TREE_CODE (op1) == SSA_NAME || CONSTANT_CLASS_P (op1))
4187*38fd1498Szrj cost1 = no_cost;
4188*38fd1498Szrj else
4189*38fd1498Szrj cost1 = force_expr_to_var_cost (op1, speed);
4190*38fd1498Szrj
4191*38fd1498Szrj mode = TYPE_MODE (TREE_TYPE (expr));
4192*38fd1498Szrj switch (TREE_CODE (expr))
4193*38fd1498Szrj {
4194*38fd1498Szrj case POINTER_PLUS_EXPR:
4195*38fd1498Szrj case PLUS_EXPR:
4196*38fd1498Szrj case MINUS_EXPR:
4197*38fd1498Szrj case NEGATE_EXPR:
4198*38fd1498Szrj cost = comp_cost (add_cost (speed, mode), 0);
4199*38fd1498Szrj if (TREE_CODE (expr) != NEGATE_EXPR)
4200*38fd1498Szrj {
4201*38fd1498Szrj tree mult = NULL_TREE;
4202*38fd1498Szrj comp_cost sa_cost;
4203*38fd1498Szrj if (TREE_CODE (op1) == MULT_EXPR)
4204*38fd1498Szrj mult = op1;
4205*38fd1498Szrj else if (TREE_CODE (op0) == MULT_EXPR)
4206*38fd1498Szrj mult = op0;
4207*38fd1498Szrj
4208*38fd1498Szrj if (mult != NULL_TREE
4209*38fd1498Szrj && is_a <scalar_int_mode> (mode, &int_mode)
4210*38fd1498Szrj && cst_and_fits_in_hwi (TREE_OPERAND (mult, 1))
4211*38fd1498Szrj && get_shiftadd_cost (expr, int_mode, cost0, cost1, mult,
4212*38fd1498Szrj speed, &sa_cost))
4213*38fd1498Szrj return sa_cost;
4214*38fd1498Szrj }
4215*38fd1498Szrj break;
4216*38fd1498Szrj
4217*38fd1498Szrj CASE_CONVERT:
4218*38fd1498Szrj {
4219*38fd1498Szrj tree inner_mode, outer_mode;
4220*38fd1498Szrj outer_mode = TREE_TYPE (expr);
4221*38fd1498Szrj inner_mode = TREE_TYPE (op0);
4222*38fd1498Szrj cost = comp_cost (convert_cost (TYPE_MODE (outer_mode),
4223*38fd1498Szrj TYPE_MODE (inner_mode), speed), 0);
4224*38fd1498Szrj }
4225*38fd1498Szrj break;
4226*38fd1498Szrj
4227*38fd1498Szrj case MULT_EXPR:
4228*38fd1498Szrj if (cst_and_fits_in_hwi (op0))
4229*38fd1498Szrj cost = comp_cost (mult_by_coeff_cost (int_cst_value (op0),
4230*38fd1498Szrj mode, speed), 0);
4231*38fd1498Szrj else if (cst_and_fits_in_hwi (op1))
4232*38fd1498Szrj cost = comp_cost (mult_by_coeff_cost (int_cst_value (op1),
4233*38fd1498Szrj mode, speed), 0);
4234*38fd1498Szrj else
4235*38fd1498Szrj return comp_cost (target_spill_cost [speed], 0);
4236*38fd1498Szrj break;
4237*38fd1498Szrj
4238*38fd1498Szrj case TRUNC_DIV_EXPR:
4239*38fd1498Szrj /* Division by power of two is usually cheap, so we allow it. Forbid
4240*38fd1498Szrj anything else. */
4241*38fd1498Szrj if (integer_pow2p (TREE_OPERAND (expr, 1)))
4242*38fd1498Szrj cost = comp_cost (add_cost (speed, mode), 0);
4243*38fd1498Szrj else
4244*38fd1498Szrj cost = comp_cost (target_spill_cost[speed], 0);
4245*38fd1498Szrj break;
4246*38fd1498Szrj
4247*38fd1498Szrj case BIT_AND_EXPR:
4248*38fd1498Szrj case BIT_IOR_EXPR:
4249*38fd1498Szrj case BIT_NOT_EXPR:
4250*38fd1498Szrj case LSHIFT_EXPR:
4251*38fd1498Szrj case RSHIFT_EXPR:
4252*38fd1498Szrj cost = comp_cost (add_cost (speed, mode), 0);
4253*38fd1498Szrj break;
4254*38fd1498Szrj
4255*38fd1498Szrj default:
4256*38fd1498Szrj gcc_unreachable ();
4257*38fd1498Szrj }
4258*38fd1498Szrj
4259*38fd1498Szrj cost += cost0;
4260*38fd1498Szrj cost += cost1;
4261*38fd1498Szrj return cost;
4262*38fd1498Szrj }
4263*38fd1498Szrj
4264*38fd1498Szrj /* Estimates cost of forcing EXPR into a variable. INV_VARS is a set of the
4265*38fd1498Szrj invariants the computation depends on. */
4266*38fd1498Szrj
4267*38fd1498Szrj static comp_cost
force_var_cost(struct ivopts_data * data,tree expr,bitmap * inv_vars)4268*38fd1498Szrj force_var_cost (struct ivopts_data *data, tree expr, bitmap *inv_vars)
4269*38fd1498Szrj {
4270*38fd1498Szrj if (!expr)
4271*38fd1498Szrj return no_cost;
4272*38fd1498Szrj
4273*38fd1498Szrj find_inv_vars (data, &expr, inv_vars);
4274*38fd1498Szrj return force_expr_to_var_cost (expr, data->speed);
4275*38fd1498Szrj }
4276*38fd1498Szrj
4277*38fd1498Szrj /* Returns cost of auto-modifying address expression in shape base + offset.
4278*38fd1498Szrj AINC_STEP is step size of the address IV. AINC_OFFSET is offset of the
4279*38fd1498Szrj address expression. The address expression has ADDR_MODE in addr space
4280*38fd1498Szrj AS. The memory access has MEM_MODE. SPEED means we are optimizing for
4281*38fd1498Szrj speed or size. */
4282*38fd1498Szrj
4283*38fd1498Szrj enum ainc_type
4284*38fd1498Szrj {
4285*38fd1498Szrj AINC_PRE_INC, /* Pre increment. */
4286*38fd1498Szrj AINC_PRE_DEC, /* Pre decrement. */
4287*38fd1498Szrj AINC_POST_INC, /* Post increment. */
4288*38fd1498Szrj AINC_POST_DEC, /* Post decrement. */
4289*38fd1498Szrj AINC_NONE /* Also the number of auto increment types. */
4290*38fd1498Szrj };
4291*38fd1498Szrj
4292*38fd1498Szrj struct ainc_cost_data
4293*38fd1498Szrj {
4294*38fd1498Szrj unsigned costs[AINC_NONE];
4295*38fd1498Szrj };
4296*38fd1498Szrj
4297*38fd1498Szrj static comp_cost
get_address_cost_ainc(poly_int64 ainc_step,poly_int64 ainc_offset,machine_mode addr_mode,machine_mode mem_mode,addr_space_t as,bool speed)4298*38fd1498Szrj get_address_cost_ainc (poly_int64 ainc_step, poly_int64 ainc_offset,
4299*38fd1498Szrj machine_mode addr_mode, machine_mode mem_mode,
4300*38fd1498Szrj addr_space_t as, bool speed)
4301*38fd1498Szrj {
4302*38fd1498Szrj if (!USE_LOAD_PRE_DECREMENT (mem_mode)
4303*38fd1498Szrj && !USE_STORE_PRE_DECREMENT (mem_mode)
4304*38fd1498Szrj && !USE_LOAD_POST_DECREMENT (mem_mode)
4305*38fd1498Szrj && !USE_STORE_POST_DECREMENT (mem_mode)
4306*38fd1498Szrj && !USE_LOAD_PRE_INCREMENT (mem_mode)
4307*38fd1498Szrj && !USE_STORE_PRE_INCREMENT (mem_mode)
4308*38fd1498Szrj && !USE_LOAD_POST_INCREMENT (mem_mode)
4309*38fd1498Szrj && !USE_STORE_POST_INCREMENT (mem_mode))
4310*38fd1498Szrj return infinite_cost;
4311*38fd1498Szrj
4312*38fd1498Szrj static vec<ainc_cost_data *> ainc_cost_data_list;
4313*38fd1498Szrj unsigned idx = (unsigned) as * MAX_MACHINE_MODE + (unsigned) mem_mode;
4314*38fd1498Szrj if (idx >= ainc_cost_data_list.length ())
4315*38fd1498Szrj {
4316*38fd1498Szrj unsigned nsize = ((unsigned) as + 1) *MAX_MACHINE_MODE;
4317*38fd1498Szrj
4318*38fd1498Szrj gcc_assert (nsize > idx);
4319*38fd1498Szrj ainc_cost_data_list.safe_grow_cleared (nsize);
4320*38fd1498Szrj }
4321*38fd1498Szrj
4322*38fd1498Szrj ainc_cost_data *data = ainc_cost_data_list[idx];
4323*38fd1498Szrj if (data == NULL)
4324*38fd1498Szrj {
4325*38fd1498Szrj rtx reg = gen_raw_REG (addr_mode, LAST_VIRTUAL_REGISTER + 1);
4326*38fd1498Szrj
4327*38fd1498Szrj data = (ainc_cost_data *) xcalloc (1, sizeof (*data));
4328*38fd1498Szrj data->costs[AINC_PRE_DEC] = INFTY;
4329*38fd1498Szrj data->costs[AINC_POST_DEC] = INFTY;
4330*38fd1498Szrj data->costs[AINC_PRE_INC] = INFTY;
4331*38fd1498Szrj data->costs[AINC_POST_INC] = INFTY;
4332*38fd1498Szrj if (USE_LOAD_PRE_DECREMENT (mem_mode)
4333*38fd1498Szrj || USE_STORE_PRE_DECREMENT (mem_mode))
4334*38fd1498Szrj {
4335*38fd1498Szrj rtx addr = gen_rtx_PRE_DEC (addr_mode, reg);
4336*38fd1498Szrj
4337*38fd1498Szrj if (memory_address_addr_space_p (mem_mode, addr, as))
4338*38fd1498Szrj data->costs[AINC_PRE_DEC]
4339*38fd1498Szrj = address_cost (addr, mem_mode, as, speed);
4340*38fd1498Szrj }
4341*38fd1498Szrj if (USE_LOAD_POST_DECREMENT (mem_mode)
4342*38fd1498Szrj || USE_STORE_POST_DECREMENT (mem_mode))
4343*38fd1498Szrj {
4344*38fd1498Szrj rtx addr = gen_rtx_POST_DEC (addr_mode, reg);
4345*38fd1498Szrj
4346*38fd1498Szrj if (memory_address_addr_space_p (mem_mode, addr, as))
4347*38fd1498Szrj data->costs[AINC_POST_DEC]
4348*38fd1498Szrj = address_cost (addr, mem_mode, as, speed);
4349*38fd1498Szrj }
4350*38fd1498Szrj if (USE_LOAD_PRE_INCREMENT (mem_mode)
4351*38fd1498Szrj || USE_STORE_PRE_INCREMENT (mem_mode))
4352*38fd1498Szrj {
4353*38fd1498Szrj rtx addr = gen_rtx_PRE_INC (addr_mode, reg);
4354*38fd1498Szrj
4355*38fd1498Szrj if (memory_address_addr_space_p (mem_mode, addr, as))
4356*38fd1498Szrj data->costs[AINC_PRE_INC]
4357*38fd1498Szrj = address_cost (addr, mem_mode, as, speed);
4358*38fd1498Szrj }
4359*38fd1498Szrj if (USE_LOAD_POST_INCREMENT (mem_mode)
4360*38fd1498Szrj || USE_STORE_POST_INCREMENT (mem_mode))
4361*38fd1498Szrj {
4362*38fd1498Szrj rtx addr = gen_rtx_POST_INC (addr_mode, reg);
4363*38fd1498Szrj
4364*38fd1498Szrj if (memory_address_addr_space_p (mem_mode, addr, as))
4365*38fd1498Szrj data->costs[AINC_POST_INC]
4366*38fd1498Szrj = address_cost (addr, mem_mode, as, speed);
4367*38fd1498Szrj }
4368*38fd1498Szrj ainc_cost_data_list[idx] = data;
4369*38fd1498Szrj }
4370*38fd1498Szrj
4371*38fd1498Szrj poly_int64 msize = GET_MODE_SIZE (mem_mode);
4372*38fd1498Szrj if (known_eq (ainc_offset, 0) && known_eq (msize, ainc_step))
4373*38fd1498Szrj return comp_cost (data->costs[AINC_POST_INC], 0);
4374*38fd1498Szrj if (known_eq (ainc_offset, 0) && known_eq (msize, -ainc_step))
4375*38fd1498Szrj return comp_cost (data->costs[AINC_POST_DEC], 0);
4376*38fd1498Szrj if (known_eq (ainc_offset, msize) && known_eq (msize, ainc_step))
4377*38fd1498Szrj return comp_cost (data->costs[AINC_PRE_INC], 0);
4378*38fd1498Szrj if (known_eq (ainc_offset, -msize) && known_eq (msize, -ainc_step))
4379*38fd1498Szrj return comp_cost (data->costs[AINC_PRE_DEC], 0);
4380*38fd1498Szrj
4381*38fd1498Szrj return infinite_cost;
4382*38fd1498Szrj }
4383*38fd1498Szrj
4384*38fd1498Szrj /* Return cost of computing USE's address expression by using CAND.
4385*38fd1498Szrj AFF_INV and AFF_VAR represent invariant and variant parts of the
4386*38fd1498Szrj address expression, respectively. If AFF_INV is simple, store
4387*38fd1498Szrj the loop invariant variables which are depended by it in INV_VARS;
4388*38fd1498Szrj if AFF_INV is complicated, handle it as a new invariant expression
4389*38fd1498Szrj and record it in INV_EXPR. RATIO indicates multiple times between
4390*38fd1498Szrj steps of USE and CAND. If CAN_AUTOINC is nonNULL, store boolean
4391*38fd1498Szrj value to it indicating if this is an auto-increment address. */
4392*38fd1498Szrj
4393*38fd1498Szrj static comp_cost
get_address_cost(struct ivopts_data * data,struct iv_use * use,struct iv_cand * cand,aff_tree * aff_inv,aff_tree * aff_var,HOST_WIDE_INT ratio,bitmap * inv_vars,iv_inv_expr_ent ** inv_expr,bool * can_autoinc,bool speed)4394*38fd1498Szrj get_address_cost (struct ivopts_data *data, struct iv_use *use,
4395*38fd1498Szrj struct iv_cand *cand, aff_tree *aff_inv,
4396*38fd1498Szrj aff_tree *aff_var, HOST_WIDE_INT ratio,
4397*38fd1498Szrj bitmap *inv_vars, iv_inv_expr_ent **inv_expr,
4398*38fd1498Szrj bool *can_autoinc, bool speed)
4399*38fd1498Szrj {
4400*38fd1498Szrj rtx addr;
4401*38fd1498Szrj bool simple_inv = true;
4402*38fd1498Szrj tree comp_inv = NULL_TREE, type = aff_var->type;
4403*38fd1498Szrj comp_cost var_cost = no_cost, cost = no_cost;
4404*38fd1498Szrj struct mem_address parts = {NULL_TREE, integer_one_node,
4405*38fd1498Szrj NULL_TREE, NULL_TREE, NULL_TREE};
4406*38fd1498Szrj machine_mode addr_mode = TYPE_MODE (type);
4407*38fd1498Szrj machine_mode mem_mode = TYPE_MODE (use->mem_type);
4408*38fd1498Szrj addr_space_t as = TYPE_ADDR_SPACE (TREE_TYPE (use->iv->base));
4409*38fd1498Szrj /* Only true if ratio != 1. */
4410*38fd1498Szrj bool ok_with_ratio_p = false;
4411*38fd1498Szrj bool ok_without_ratio_p = false;
4412*38fd1498Szrj
4413*38fd1498Szrj if (!aff_combination_const_p (aff_inv))
4414*38fd1498Szrj {
4415*38fd1498Szrj parts.index = integer_one_node;
4416*38fd1498Szrj /* Addressing mode "base + index". */
4417*38fd1498Szrj ok_without_ratio_p = valid_mem_ref_p (mem_mode, as, &parts);
4418*38fd1498Szrj if (ratio != 1)
4419*38fd1498Szrj {
4420*38fd1498Szrj parts.step = wide_int_to_tree (type, ratio);
4421*38fd1498Szrj /* Addressing mode "base + index << scale". */
4422*38fd1498Szrj ok_with_ratio_p = valid_mem_ref_p (mem_mode, as, &parts);
4423*38fd1498Szrj if (!ok_with_ratio_p)
4424*38fd1498Szrj parts.step = NULL_TREE;
4425*38fd1498Szrj }
4426*38fd1498Szrj if (ok_with_ratio_p || ok_without_ratio_p)
4427*38fd1498Szrj {
4428*38fd1498Szrj if (maybe_ne (aff_inv->offset, 0))
4429*38fd1498Szrj {
4430*38fd1498Szrj parts.offset = wide_int_to_tree (sizetype, aff_inv->offset);
4431*38fd1498Szrj /* Addressing mode "base + index [<< scale] + offset". */
4432*38fd1498Szrj if (!valid_mem_ref_p (mem_mode, as, &parts))
4433*38fd1498Szrj parts.offset = NULL_TREE;
4434*38fd1498Szrj else
4435*38fd1498Szrj aff_inv->offset = 0;
4436*38fd1498Szrj }
4437*38fd1498Szrj
4438*38fd1498Szrj move_fixed_address_to_symbol (&parts, aff_inv);
4439*38fd1498Szrj /* Base is fixed address and is moved to symbol part. */
4440*38fd1498Szrj if (parts.symbol != NULL_TREE && aff_combination_zero_p (aff_inv))
4441*38fd1498Szrj parts.base = NULL_TREE;
4442*38fd1498Szrj
4443*38fd1498Szrj /* Addressing mode "symbol + base + index [<< scale] [+ offset]". */
4444*38fd1498Szrj if (parts.symbol != NULL_TREE
4445*38fd1498Szrj && !valid_mem_ref_p (mem_mode, as, &parts))
4446*38fd1498Szrj {
4447*38fd1498Szrj aff_combination_add_elt (aff_inv, parts.symbol, 1);
4448*38fd1498Szrj parts.symbol = NULL_TREE;
4449*38fd1498Szrj /* Reset SIMPLE_INV since symbol address needs to be computed
4450*38fd1498Szrj outside of address expression in this case. */
4451*38fd1498Szrj simple_inv = false;
4452*38fd1498Szrj /* Symbol part is moved back to base part, it can't be NULL. */
4453*38fd1498Szrj parts.base = integer_one_node;
4454*38fd1498Szrj }
4455*38fd1498Szrj }
4456*38fd1498Szrj else
4457*38fd1498Szrj parts.index = NULL_TREE;
4458*38fd1498Szrj }
4459*38fd1498Szrj else
4460*38fd1498Szrj {
4461*38fd1498Szrj poly_int64 ainc_step;
4462*38fd1498Szrj if (can_autoinc
4463*38fd1498Szrj && ratio == 1
4464*38fd1498Szrj && ptrdiff_tree_p (cand->iv->step, &ainc_step))
4465*38fd1498Szrj {
4466*38fd1498Szrj poly_int64 ainc_offset = (aff_inv->offset).force_shwi ();
4467*38fd1498Szrj
4468*38fd1498Szrj if (stmt_after_increment (data->current_loop, cand, use->stmt))
4469*38fd1498Szrj ainc_offset += ainc_step;
4470*38fd1498Szrj cost = get_address_cost_ainc (ainc_step, ainc_offset,
4471*38fd1498Szrj addr_mode, mem_mode, as, speed);
4472*38fd1498Szrj if (!cost.infinite_cost_p ())
4473*38fd1498Szrj {
4474*38fd1498Szrj *can_autoinc = true;
4475*38fd1498Szrj return cost;
4476*38fd1498Szrj }
4477*38fd1498Szrj cost = no_cost;
4478*38fd1498Szrj }
4479*38fd1498Szrj if (!aff_combination_zero_p (aff_inv))
4480*38fd1498Szrj {
4481*38fd1498Szrj parts.offset = wide_int_to_tree (sizetype, aff_inv->offset);
4482*38fd1498Szrj /* Addressing mode "base + offset". */
4483*38fd1498Szrj if (!valid_mem_ref_p (mem_mode, as, &parts))
4484*38fd1498Szrj parts.offset = NULL_TREE;
4485*38fd1498Szrj else
4486*38fd1498Szrj aff_inv->offset = 0;
4487*38fd1498Szrj }
4488*38fd1498Szrj }
4489*38fd1498Szrj
4490*38fd1498Szrj if (simple_inv)
4491*38fd1498Szrj simple_inv = (aff_inv == NULL
4492*38fd1498Szrj || aff_combination_const_p (aff_inv)
4493*38fd1498Szrj || aff_combination_singleton_var_p (aff_inv));
4494*38fd1498Szrj if (!aff_combination_zero_p (aff_inv))
4495*38fd1498Szrj comp_inv = aff_combination_to_tree (aff_inv);
4496*38fd1498Szrj if (comp_inv != NULL_TREE)
4497*38fd1498Szrj cost = force_var_cost (data, comp_inv, inv_vars);
4498*38fd1498Szrj if (ratio != 1 && parts.step == NULL_TREE)
4499*38fd1498Szrj var_cost += mult_by_coeff_cost (ratio, addr_mode, speed);
4500*38fd1498Szrj if (comp_inv != NULL_TREE && parts.index == NULL_TREE)
4501*38fd1498Szrj var_cost += add_cost (speed, addr_mode);
4502*38fd1498Szrj
4503*38fd1498Szrj if (comp_inv && inv_expr && !simple_inv)
4504*38fd1498Szrj {
4505*38fd1498Szrj *inv_expr = get_loop_invariant_expr (data, comp_inv);
4506*38fd1498Szrj /* Clear depends on. */
4507*38fd1498Szrj if (*inv_expr != NULL && inv_vars && *inv_vars)
4508*38fd1498Szrj bitmap_clear (*inv_vars);
4509*38fd1498Szrj
4510*38fd1498Szrj /* Cost of small invariant expression adjusted against loop niters
4511*38fd1498Szrj is usually zero, which makes it difficult to be differentiated
4512*38fd1498Szrj from candidate based on loop invariant variables. Secondly, the
4513*38fd1498Szrj generated invariant expression may not be hoisted out of loop by
4514*38fd1498Szrj following pass. We penalize the cost by rounding up in order to
4515*38fd1498Szrj neutralize such effects. */
4516*38fd1498Szrj cost.cost = adjust_setup_cost (data, cost.cost, true);
4517*38fd1498Szrj cost.scratch = cost.cost;
4518*38fd1498Szrj }
4519*38fd1498Szrj
4520*38fd1498Szrj cost += var_cost;
4521*38fd1498Szrj addr = addr_for_mem_ref (&parts, as, false);
4522*38fd1498Szrj gcc_assert (memory_address_addr_space_p (mem_mode, addr, as));
4523*38fd1498Szrj cost += address_cost (addr, mem_mode, as, speed);
4524*38fd1498Szrj
4525*38fd1498Szrj if (parts.symbol != NULL_TREE)
4526*38fd1498Szrj cost.complexity += 1;
4527*38fd1498Szrj /* Don't increase the complexity of adding a scaled index if it's
4528*38fd1498Szrj the only kind of index that the target allows. */
4529*38fd1498Szrj if (parts.step != NULL_TREE && ok_without_ratio_p)
4530*38fd1498Szrj cost.complexity += 1;
4531*38fd1498Szrj if (parts.base != NULL_TREE && parts.index != NULL_TREE)
4532*38fd1498Szrj cost.complexity += 1;
4533*38fd1498Szrj if (parts.offset != NULL_TREE && !integer_zerop (parts.offset))
4534*38fd1498Szrj cost.complexity += 1;
4535*38fd1498Szrj
4536*38fd1498Szrj return cost;
4537*38fd1498Szrj }
4538*38fd1498Szrj
4539*38fd1498Szrj /* Scale (multiply) the computed COST (except scratch part that should be
4540*38fd1498Szrj hoisted out a loop) by header->frequency / AT->frequency, which makes
4541*38fd1498Szrj expected cost more accurate. */
4542*38fd1498Szrj
4543*38fd1498Szrj static comp_cost
get_scaled_computation_cost_at(ivopts_data * data,gimple * at,comp_cost cost)4544*38fd1498Szrj get_scaled_computation_cost_at (ivopts_data *data, gimple *at, comp_cost cost)
4545*38fd1498Szrj {
4546*38fd1498Szrj int loop_freq = data->current_loop->header->count.to_frequency (cfun);
4547*38fd1498Szrj int bb_freq = gimple_bb (at)->count.to_frequency (cfun);
4548*38fd1498Szrj if (loop_freq != 0)
4549*38fd1498Szrj {
4550*38fd1498Szrj gcc_assert (cost.scratch <= cost.cost);
4551*38fd1498Szrj int scaled_cost
4552*38fd1498Szrj = cost.scratch + (cost.cost - cost.scratch) * bb_freq / loop_freq;
4553*38fd1498Szrj
4554*38fd1498Szrj if (dump_file && (dump_flags & TDF_DETAILS))
4555*38fd1498Szrj fprintf (dump_file, "Scaling cost based on bb prob "
4556*38fd1498Szrj "by %2.2f: %d (scratch: %d) -> %d (%d/%d)\n",
4557*38fd1498Szrj 1.0f * bb_freq / loop_freq, cost.cost,
4558*38fd1498Szrj cost.scratch, scaled_cost, bb_freq, loop_freq);
4559*38fd1498Szrj
4560*38fd1498Szrj cost.cost = scaled_cost;
4561*38fd1498Szrj }
4562*38fd1498Szrj
4563*38fd1498Szrj return cost;
4564*38fd1498Szrj }
4565*38fd1498Szrj
4566*38fd1498Szrj /* Determines the cost of the computation by that USE is expressed
4567*38fd1498Szrj from induction variable CAND. If ADDRESS_P is true, we just need
4568*38fd1498Szrj to create an address from it, otherwise we want to get it into
4569*38fd1498Szrj register. A set of invariants we depend on is stored in INV_VARS.
4570*38fd1498Szrj If CAN_AUTOINC is nonnull, use it to record whether autoinc
4571*38fd1498Szrj addressing is likely. If INV_EXPR is nonnull, record invariant
4572*38fd1498Szrj expr entry in it. */
4573*38fd1498Szrj
4574*38fd1498Szrj static comp_cost
get_computation_cost(struct ivopts_data * data,struct iv_use * use,struct iv_cand * cand,bool address_p,bitmap * inv_vars,bool * can_autoinc,iv_inv_expr_ent ** inv_expr)4575*38fd1498Szrj get_computation_cost (struct ivopts_data *data, struct iv_use *use,
4576*38fd1498Szrj struct iv_cand *cand, bool address_p, bitmap *inv_vars,
4577*38fd1498Szrj bool *can_autoinc, iv_inv_expr_ent **inv_expr)
4578*38fd1498Szrj {
4579*38fd1498Szrj gimple *at = use->stmt;
4580*38fd1498Szrj tree ubase = use->iv->base, cbase = cand->iv->base;
4581*38fd1498Szrj tree utype = TREE_TYPE (ubase), ctype = TREE_TYPE (cbase);
4582*38fd1498Szrj tree comp_inv = NULL_TREE;
4583*38fd1498Szrj HOST_WIDE_INT ratio, aratio;
4584*38fd1498Szrj comp_cost cost;
4585*38fd1498Szrj widest_int rat;
4586*38fd1498Szrj aff_tree aff_inv, aff_var;
4587*38fd1498Szrj bool speed = optimize_bb_for_speed_p (gimple_bb (at));
4588*38fd1498Szrj
4589*38fd1498Szrj if (inv_vars)
4590*38fd1498Szrj *inv_vars = NULL;
4591*38fd1498Szrj if (can_autoinc)
4592*38fd1498Szrj *can_autoinc = false;
4593*38fd1498Szrj if (inv_expr)
4594*38fd1498Szrj *inv_expr = NULL;
4595*38fd1498Szrj
4596*38fd1498Szrj /* Check if we have enough precision to express the values of use. */
4597*38fd1498Szrj if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
4598*38fd1498Szrj return infinite_cost;
4599*38fd1498Szrj
4600*38fd1498Szrj if (address_p
4601*38fd1498Szrj || (use->iv->base_object
4602*38fd1498Szrj && cand->iv->base_object
4603*38fd1498Szrj && POINTER_TYPE_P (TREE_TYPE (use->iv->base_object))
4604*38fd1498Szrj && POINTER_TYPE_P (TREE_TYPE (cand->iv->base_object))))
4605*38fd1498Szrj {
4606*38fd1498Szrj /* Do not try to express address of an object with computation based
4607*38fd1498Szrj on address of a different object. This may cause problems in rtl
4608*38fd1498Szrj level alias analysis (that does not expect this to be happening,
4609*38fd1498Szrj as this is illegal in C), and would be unlikely to be useful
4610*38fd1498Szrj anyway. */
4611*38fd1498Szrj if (use->iv->base_object
4612*38fd1498Szrj && cand->iv->base_object
4613*38fd1498Szrj && !operand_equal_p (use->iv->base_object, cand->iv->base_object, 0))
4614*38fd1498Szrj return infinite_cost;
4615*38fd1498Szrj }
4616*38fd1498Szrj
4617*38fd1498Szrj if (!get_computation_aff_1 (data->current_loop, at, use,
4618*38fd1498Szrj cand, &aff_inv, &aff_var, &rat)
4619*38fd1498Szrj || !wi::fits_shwi_p (rat))
4620*38fd1498Szrj return infinite_cost;
4621*38fd1498Szrj
4622*38fd1498Szrj ratio = rat.to_shwi ();
4623*38fd1498Szrj if (address_p)
4624*38fd1498Szrj {
4625*38fd1498Szrj cost = get_address_cost (data, use, cand, &aff_inv, &aff_var, ratio,
4626*38fd1498Szrj inv_vars, inv_expr, can_autoinc, speed);
4627*38fd1498Szrj return get_scaled_computation_cost_at (data, at, cost);
4628*38fd1498Szrj }
4629*38fd1498Szrj
4630*38fd1498Szrj bool simple_inv = (aff_combination_const_p (&aff_inv)
4631*38fd1498Szrj || aff_combination_singleton_var_p (&aff_inv));
4632*38fd1498Szrj tree signed_type = signed_type_for (aff_combination_type (&aff_inv));
4633*38fd1498Szrj aff_combination_convert (&aff_inv, signed_type);
4634*38fd1498Szrj if (!aff_combination_zero_p (&aff_inv))
4635*38fd1498Szrj comp_inv = aff_combination_to_tree (&aff_inv);
4636*38fd1498Szrj
4637*38fd1498Szrj cost = force_var_cost (data, comp_inv, inv_vars);
4638*38fd1498Szrj if (comp_inv && inv_expr && !simple_inv)
4639*38fd1498Szrj {
4640*38fd1498Szrj *inv_expr = get_loop_invariant_expr (data, comp_inv);
4641*38fd1498Szrj /* Clear depends on. */
4642*38fd1498Szrj if (*inv_expr != NULL && inv_vars && *inv_vars)
4643*38fd1498Szrj bitmap_clear (*inv_vars);
4644*38fd1498Szrj
4645*38fd1498Szrj cost.cost = adjust_setup_cost (data, cost.cost);
4646*38fd1498Szrj /* Record setup cost in scratch field. */
4647*38fd1498Szrj cost.scratch = cost.cost;
4648*38fd1498Szrj }
4649*38fd1498Szrj /* Cost of constant integer can be covered when adding invariant part to
4650*38fd1498Szrj variant part. */
4651*38fd1498Szrj else if (comp_inv && CONSTANT_CLASS_P (comp_inv))
4652*38fd1498Szrj cost = no_cost;
4653*38fd1498Szrj
4654*38fd1498Szrj /* Need type narrowing to represent use with cand. */
4655*38fd1498Szrj if (TYPE_PRECISION (utype) < TYPE_PRECISION (ctype))
4656*38fd1498Szrj {
4657*38fd1498Szrj machine_mode outer_mode = TYPE_MODE (utype);
4658*38fd1498Szrj machine_mode inner_mode = TYPE_MODE (ctype);
4659*38fd1498Szrj cost += comp_cost (convert_cost (outer_mode, inner_mode, speed), 0);
4660*38fd1498Szrj }
4661*38fd1498Szrj
4662*38fd1498Szrj /* Turn a + i * (-c) into a - i * c. */
4663*38fd1498Szrj if (ratio < 0 && comp_inv && !integer_zerop (comp_inv))
4664*38fd1498Szrj aratio = -ratio;
4665*38fd1498Szrj else
4666*38fd1498Szrj aratio = ratio;
4667*38fd1498Szrj
4668*38fd1498Szrj if (ratio != 1)
4669*38fd1498Szrj cost += mult_by_coeff_cost (aratio, TYPE_MODE (utype), speed);
4670*38fd1498Szrj
4671*38fd1498Szrj /* TODO: We may also need to check if we can compute a + i * 4 in one
4672*38fd1498Szrj instruction. */
4673*38fd1498Szrj /* Need to add up the invariant and variant parts. */
4674*38fd1498Szrj if (comp_inv && !integer_zerop (comp_inv))
4675*38fd1498Szrj cost += add_cost (speed, TYPE_MODE (utype));
4676*38fd1498Szrj
4677*38fd1498Szrj return get_scaled_computation_cost_at (data, at, cost);
4678*38fd1498Szrj }
4679*38fd1498Szrj
4680*38fd1498Szrj /* Determines cost of computing the use in GROUP with CAND in a generic
4681*38fd1498Szrj expression. */
4682*38fd1498Szrj
4683*38fd1498Szrj static bool
determine_group_iv_cost_generic(struct ivopts_data * data,struct iv_group * group,struct iv_cand * cand)4684*38fd1498Szrj determine_group_iv_cost_generic (struct ivopts_data *data,
4685*38fd1498Szrj struct iv_group *group, struct iv_cand *cand)
4686*38fd1498Szrj {
4687*38fd1498Szrj comp_cost cost;
4688*38fd1498Szrj iv_inv_expr_ent *inv_expr = NULL;
4689*38fd1498Szrj bitmap inv_vars = NULL, inv_exprs = NULL;
4690*38fd1498Szrj struct iv_use *use = group->vuses[0];
4691*38fd1498Szrj
4692*38fd1498Szrj /* The simple case first -- if we need to express value of the preserved
4693*38fd1498Szrj original biv, the cost is 0. This also prevents us from counting the
4694*38fd1498Szrj cost of increment twice -- once at this use and once in the cost of
4695*38fd1498Szrj the candidate. */
4696*38fd1498Szrj if (cand->pos == IP_ORIGINAL && cand->incremented_at == use->stmt)
4697*38fd1498Szrj cost = no_cost;
4698*38fd1498Szrj else
4699*38fd1498Szrj cost = get_computation_cost (data, use, cand, false,
4700*38fd1498Szrj &inv_vars, NULL, &inv_expr);
4701*38fd1498Szrj
4702*38fd1498Szrj if (inv_expr)
4703*38fd1498Szrj {
4704*38fd1498Szrj inv_exprs = BITMAP_ALLOC (NULL);
4705*38fd1498Szrj bitmap_set_bit (inv_exprs, inv_expr->id);
4706*38fd1498Szrj }
4707*38fd1498Szrj set_group_iv_cost (data, group, cand, cost, inv_vars,
4708*38fd1498Szrj NULL_TREE, ERROR_MARK, inv_exprs);
4709*38fd1498Szrj return !cost.infinite_cost_p ();
4710*38fd1498Szrj }
4711*38fd1498Szrj
4712*38fd1498Szrj /* Determines cost of computing uses in GROUP with CAND in addresses. */
4713*38fd1498Szrj
4714*38fd1498Szrj static bool
determine_group_iv_cost_address(struct ivopts_data * data,struct iv_group * group,struct iv_cand * cand)4715*38fd1498Szrj determine_group_iv_cost_address (struct ivopts_data *data,
4716*38fd1498Szrj struct iv_group *group, struct iv_cand *cand)
4717*38fd1498Szrj {
4718*38fd1498Szrj unsigned i;
4719*38fd1498Szrj bitmap inv_vars = NULL, inv_exprs = NULL;
4720*38fd1498Szrj bool can_autoinc;
4721*38fd1498Szrj iv_inv_expr_ent *inv_expr = NULL;
4722*38fd1498Szrj struct iv_use *use = group->vuses[0];
4723*38fd1498Szrj comp_cost sum_cost = no_cost, cost;
4724*38fd1498Szrj
4725*38fd1498Szrj cost = get_computation_cost (data, use, cand, true,
4726*38fd1498Szrj &inv_vars, &can_autoinc, &inv_expr);
4727*38fd1498Szrj
4728*38fd1498Szrj if (inv_expr)
4729*38fd1498Szrj {
4730*38fd1498Szrj inv_exprs = BITMAP_ALLOC (NULL);
4731*38fd1498Szrj bitmap_set_bit (inv_exprs, inv_expr->id);
4732*38fd1498Szrj }
4733*38fd1498Szrj sum_cost = cost;
4734*38fd1498Szrj if (!sum_cost.infinite_cost_p () && cand->ainc_use == use)
4735*38fd1498Szrj {
4736*38fd1498Szrj if (can_autoinc)
4737*38fd1498Szrj sum_cost -= cand->cost_step;
4738*38fd1498Szrj /* If we generated the candidate solely for exploiting autoincrement
4739*38fd1498Szrj opportunities, and it turns out it can't be used, set the cost to
4740*38fd1498Szrj infinity to make sure we ignore it. */
4741*38fd1498Szrj else if (cand->pos == IP_AFTER_USE || cand->pos == IP_BEFORE_USE)
4742*38fd1498Szrj sum_cost = infinite_cost;
4743*38fd1498Szrj }
4744*38fd1498Szrj
4745*38fd1498Szrj /* Uses in a group can share setup code, so only add setup cost once. */
4746*38fd1498Szrj cost -= cost.scratch;
4747*38fd1498Szrj /* Compute and add costs for rest uses of this group. */
4748*38fd1498Szrj for (i = 1; i < group->vuses.length () && !sum_cost.infinite_cost_p (); i++)
4749*38fd1498Szrj {
4750*38fd1498Szrj struct iv_use *next = group->vuses[i];
4751*38fd1498Szrj
4752*38fd1498Szrj /* TODO: We could skip computing cost for sub iv_use when it has the
4753*38fd1498Szrj same cost as the first iv_use, but the cost really depends on the
4754*38fd1498Szrj offset and where the iv_use is. */
4755*38fd1498Szrj cost = get_computation_cost (data, next, cand, true,
4756*38fd1498Szrj NULL, &can_autoinc, &inv_expr);
4757*38fd1498Szrj if (inv_expr)
4758*38fd1498Szrj {
4759*38fd1498Szrj if (!inv_exprs)
4760*38fd1498Szrj inv_exprs = BITMAP_ALLOC (NULL);
4761*38fd1498Szrj
4762*38fd1498Szrj bitmap_set_bit (inv_exprs, inv_expr->id);
4763*38fd1498Szrj }
4764*38fd1498Szrj sum_cost += cost;
4765*38fd1498Szrj }
4766*38fd1498Szrj set_group_iv_cost (data, group, cand, sum_cost, inv_vars,
4767*38fd1498Szrj NULL_TREE, ERROR_MARK, inv_exprs);
4768*38fd1498Szrj
4769*38fd1498Szrj return !sum_cost.infinite_cost_p ();
4770*38fd1498Szrj }
4771*38fd1498Szrj
4772*38fd1498Szrj /* Computes value of candidate CAND at position AT in iteration NITER, and
4773*38fd1498Szrj stores it to VAL. */
4774*38fd1498Szrj
4775*38fd1498Szrj static void
cand_value_at(struct loop * loop,struct iv_cand * cand,gimple * at,tree niter,aff_tree * val)4776*38fd1498Szrj cand_value_at (struct loop *loop, struct iv_cand *cand, gimple *at, tree niter,
4777*38fd1498Szrj aff_tree *val)
4778*38fd1498Szrj {
4779*38fd1498Szrj aff_tree step, delta, nit;
4780*38fd1498Szrj struct iv *iv = cand->iv;
4781*38fd1498Szrj tree type = TREE_TYPE (iv->base);
4782*38fd1498Szrj tree steptype;
4783*38fd1498Szrj if (POINTER_TYPE_P (type))
4784*38fd1498Szrj steptype = sizetype;
4785*38fd1498Szrj else
4786*38fd1498Szrj steptype = unsigned_type_for (type);
4787*38fd1498Szrj
4788*38fd1498Szrj tree_to_aff_combination (iv->step, TREE_TYPE (iv->step), &step);
4789*38fd1498Szrj aff_combination_convert (&step, steptype);
4790*38fd1498Szrj tree_to_aff_combination (niter, TREE_TYPE (niter), &nit);
4791*38fd1498Szrj aff_combination_convert (&nit, steptype);
4792*38fd1498Szrj aff_combination_mult (&nit, &step, &delta);
4793*38fd1498Szrj if (stmt_after_increment (loop, cand, at))
4794*38fd1498Szrj aff_combination_add (&delta, &step);
4795*38fd1498Szrj
4796*38fd1498Szrj tree_to_aff_combination (iv->base, type, val);
4797*38fd1498Szrj if (!POINTER_TYPE_P (type))
4798*38fd1498Szrj aff_combination_convert (val, steptype);
4799*38fd1498Szrj aff_combination_add (val, &delta);
4800*38fd1498Szrj }
4801*38fd1498Szrj
4802*38fd1498Szrj /* Returns period of induction variable iv. */
4803*38fd1498Szrj
4804*38fd1498Szrj static tree
iv_period(struct iv * iv)4805*38fd1498Szrj iv_period (struct iv *iv)
4806*38fd1498Szrj {
4807*38fd1498Szrj tree step = iv->step, period, type;
4808*38fd1498Szrj tree pow2div;
4809*38fd1498Szrj
4810*38fd1498Szrj gcc_assert (step && TREE_CODE (step) == INTEGER_CST);
4811*38fd1498Szrj
4812*38fd1498Szrj type = unsigned_type_for (TREE_TYPE (step));
4813*38fd1498Szrj /* Period of the iv is lcm (step, type_range)/step -1,
4814*38fd1498Szrj i.e., N*type_range/step - 1. Since type range is power
4815*38fd1498Szrj of two, N == (step >> num_of_ending_zeros_binary (step),
4816*38fd1498Szrj so the final result is
4817*38fd1498Szrj
4818*38fd1498Szrj (type_range >> num_of_ending_zeros_binary (step)) - 1
4819*38fd1498Szrj
4820*38fd1498Szrj */
4821*38fd1498Szrj pow2div = num_ending_zeros (step);
4822*38fd1498Szrj
4823*38fd1498Szrj period = build_low_bits_mask (type,
4824*38fd1498Szrj (TYPE_PRECISION (type)
4825*38fd1498Szrj - tree_to_uhwi (pow2div)));
4826*38fd1498Szrj
4827*38fd1498Szrj return period;
4828*38fd1498Szrj }
4829*38fd1498Szrj
4830*38fd1498Szrj /* Returns the comparison operator used when eliminating the iv USE. */
4831*38fd1498Szrj
4832*38fd1498Szrj static enum tree_code
iv_elimination_compare(struct ivopts_data * data,struct iv_use * use)4833*38fd1498Szrj iv_elimination_compare (struct ivopts_data *data, struct iv_use *use)
4834*38fd1498Szrj {
4835*38fd1498Szrj struct loop *loop = data->current_loop;
4836*38fd1498Szrj basic_block ex_bb;
4837*38fd1498Szrj edge exit;
4838*38fd1498Szrj
4839*38fd1498Szrj ex_bb = gimple_bb (use->stmt);
4840*38fd1498Szrj exit = EDGE_SUCC (ex_bb, 0);
4841*38fd1498Szrj if (flow_bb_inside_loop_p (loop, exit->dest))
4842*38fd1498Szrj exit = EDGE_SUCC (ex_bb, 1);
4843*38fd1498Szrj
4844*38fd1498Szrj return (exit->flags & EDGE_TRUE_VALUE ? EQ_EXPR : NE_EXPR);
4845*38fd1498Szrj }
4846*38fd1498Szrj
4847*38fd1498Szrj /* Returns true if we can prove that BASE - OFFSET does not overflow. For now,
4848*38fd1498Szrj we only detect the situation that BASE = SOMETHING + OFFSET, where the
4849*38fd1498Szrj calculation is performed in non-wrapping type.
4850*38fd1498Szrj
4851*38fd1498Szrj TODO: More generally, we could test for the situation that
4852*38fd1498Szrj BASE = SOMETHING + OFFSET' and OFFSET is between OFFSET' and zero.
4853*38fd1498Szrj This would require knowing the sign of OFFSET. */
4854*38fd1498Szrj
4855*38fd1498Szrj static bool
difference_cannot_overflow_p(struct ivopts_data * data,tree base,tree offset)4856*38fd1498Szrj difference_cannot_overflow_p (struct ivopts_data *data, tree base, tree offset)
4857*38fd1498Szrj {
4858*38fd1498Szrj enum tree_code code;
4859*38fd1498Szrj tree e1, e2;
4860*38fd1498Szrj aff_tree aff_e1, aff_e2, aff_offset;
4861*38fd1498Szrj
4862*38fd1498Szrj if (!nowrap_type_p (TREE_TYPE (base)))
4863*38fd1498Szrj return false;
4864*38fd1498Szrj
4865*38fd1498Szrj base = expand_simple_operations (base);
4866*38fd1498Szrj
4867*38fd1498Szrj if (TREE_CODE (base) == SSA_NAME)
4868*38fd1498Szrj {
4869*38fd1498Szrj gimple *stmt = SSA_NAME_DEF_STMT (base);
4870*38fd1498Szrj
4871*38fd1498Szrj if (gimple_code (stmt) != GIMPLE_ASSIGN)
4872*38fd1498Szrj return false;
4873*38fd1498Szrj
4874*38fd1498Szrj code = gimple_assign_rhs_code (stmt);
4875*38fd1498Szrj if (get_gimple_rhs_class (code) != GIMPLE_BINARY_RHS)
4876*38fd1498Szrj return false;
4877*38fd1498Szrj
4878*38fd1498Szrj e1 = gimple_assign_rhs1 (stmt);
4879*38fd1498Szrj e2 = gimple_assign_rhs2 (stmt);
4880*38fd1498Szrj }
4881*38fd1498Szrj else
4882*38fd1498Szrj {
4883*38fd1498Szrj code = TREE_CODE (base);
4884*38fd1498Szrj if (get_gimple_rhs_class (code) != GIMPLE_BINARY_RHS)
4885*38fd1498Szrj return false;
4886*38fd1498Szrj e1 = TREE_OPERAND (base, 0);
4887*38fd1498Szrj e2 = TREE_OPERAND (base, 1);
4888*38fd1498Szrj }
4889*38fd1498Szrj
4890*38fd1498Szrj /* Use affine expansion as deeper inspection to prove the equality. */
4891*38fd1498Szrj tree_to_aff_combination_expand (e2, TREE_TYPE (e2),
4892*38fd1498Szrj &aff_e2, &data->name_expansion_cache);
4893*38fd1498Szrj tree_to_aff_combination_expand (offset, TREE_TYPE (offset),
4894*38fd1498Szrj &aff_offset, &data->name_expansion_cache);
4895*38fd1498Szrj aff_combination_scale (&aff_offset, -1);
4896*38fd1498Szrj switch (code)
4897*38fd1498Szrj {
4898*38fd1498Szrj case PLUS_EXPR:
4899*38fd1498Szrj aff_combination_add (&aff_e2, &aff_offset);
4900*38fd1498Szrj if (aff_combination_zero_p (&aff_e2))
4901*38fd1498Szrj return true;
4902*38fd1498Szrj
4903*38fd1498Szrj tree_to_aff_combination_expand (e1, TREE_TYPE (e1),
4904*38fd1498Szrj &aff_e1, &data->name_expansion_cache);
4905*38fd1498Szrj aff_combination_add (&aff_e1, &aff_offset);
4906*38fd1498Szrj return aff_combination_zero_p (&aff_e1);
4907*38fd1498Szrj
4908*38fd1498Szrj case POINTER_PLUS_EXPR:
4909*38fd1498Szrj aff_combination_add (&aff_e2, &aff_offset);
4910*38fd1498Szrj return aff_combination_zero_p (&aff_e2);
4911*38fd1498Szrj
4912*38fd1498Szrj default:
4913*38fd1498Szrj return false;
4914*38fd1498Szrj }
4915*38fd1498Szrj }
4916*38fd1498Szrj
4917*38fd1498Szrj /* Tries to replace loop exit by one formulated in terms of a LT_EXPR
4918*38fd1498Szrj comparison with CAND. NITER describes the number of iterations of
4919*38fd1498Szrj the loops. If successful, the comparison in COMP_P is altered accordingly.
4920*38fd1498Szrj
4921*38fd1498Szrj We aim to handle the following situation:
4922*38fd1498Szrj
4923*38fd1498Szrj sometype *base, *p;
4924*38fd1498Szrj int a, b, i;
4925*38fd1498Szrj
4926*38fd1498Szrj i = a;
4927*38fd1498Szrj p = p_0 = base + a;
4928*38fd1498Szrj
4929*38fd1498Szrj do
4930*38fd1498Szrj {
4931*38fd1498Szrj bla (*p);
4932*38fd1498Szrj p++;
4933*38fd1498Szrj i++;
4934*38fd1498Szrj }
4935*38fd1498Szrj while (i < b);
4936*38fd1498Szrj
4937*38fd1498Szrj Here, the number of iterations of the loop is (a + 1 > b) ? 0 : b - a - 1.
4938*38fd1498Szrj We aim to optimize this to
4939*38fd1498Szrj
4940*38fd1498Szrj p = p_0 = base + a;
4941*38fd1498Szrj do
4942*38fd1498Szrj {
4943*38fd1498Szrj bla (*p);
4944*38fd1498Szrj p++;
4945*38fd1498Szrj }
4946*38fd1498Szrj while (p < p_0 - a + b);
4947*38fd1498Szrj
4948*38fd1498Szrj This preserves the correctness, since the pointer arithmetics does not
4949*38fd1498Szrj overflow. More precisely:
4950*38fd1498Szrj
4951*38fd1498Szrj 1) if a + 1 <= b, then p_0 - a + b is the final value of p, hence there is no
4952*38fd1498Szrj overflow in computing it or the values of p.
4953*38fd1498Szrj 2) if a + 1 > b, then we need to verify that the expression p_0 - a does not
4954*38fd1498Szrj overflow. To prove this, we use the fact that p_0 = base + a. */
4955*38fd1498Szrj
4956*38fd1498Szrj static bool
iv_elimination_compare_lt(struct ivopts_data * data,struct iv_cand * cand,enum tree_code * comp_p,struct tree_niter_desc * niter)4957*38fd1498Szrj iv_elimination_compare_lt (struct ivopts_data *data,
4958*38fd1498Szrj struct iv_cand *cand, enum tree_code *comp_p,
4959*38fd1498Szrj struct tree_niter_desc *niter)
4960*38fd1498Szrj {
4961*38fd1498Szrj tree cand_type, a, b, mbz, nit_type = TREE_TYPE (niter->niter), offset;
4962*38fd1498Szrj struct aff_tree nit, tmpa, tmpb;
4963*38fd1498Szrj enum tree_code comp;
4964*38fd1498Szrj HOST_WIDE_INT step;
4965*38fd1498Szrj
4966*38fd1498Szrj /* We need to know that the candidate induction variable does not overflow.
4967*38fd1498Szrj While more complex analysis may be used to prove this, for now just
4968*38fd1498Szrj check that the variable appears in the original program and that it
4969*38fd1498Szrj is computed in a type that guarantees no overflows. */
4970*38fd1498Szrj cand_type = TREE_TYPE (cand->iv->base);
4971*38fd1498Szrj if (cand->pos != IP_ORIGINAL || !nowrap_type_p (cand_type))
4972*38fd1498Szrj return false;
4973*38fd1498Szrj
4974*38fd1498Szrj /* Make sure that the loop iterates till the loop bound is hit, as otherwise
4975*38fd1498Szrj the calculation of the BOUND could overflow, making the comparison
4976*38fd1498Szrj invalid. */
4977*38fd1498Szrj if (!data->loop_single_exit_p)
4978*38fd1498Szrj return false;
4979*38fd1498Szrj
4980*38fd1498Szrj /* We need to be able to decide whether candidate is increasing or decreasing
4981*38fd1498Szrj in order to choose the right comparison operator. */
4982*38fd1498Szrj if (!cst_and_fits_in_hwi (cand->iv->step))
4983*38fd1498Szrj return false;
4984*38fd1498Szrj step = int_cst_value (cand->iv->step);
4985*38fd1498Szrj
4986*38fd1498Szrj /* Check that the number of iterations matches the expected pattern:
4987*38fd1498Szrj a + 1 > b ? 0 : b - a - 1. */
4988*38fd1498Szrj mbz = niter->may_be_zero;
4989*38fd1498Szrj if (TREE_CODE (mbz) == GT_EXPR)
4990*38fd1498Szrj {
4991*38fd1498Szrj /* Handle a + 1 > b. */
4992*38fd1498Szrj tree op0 = TREE_OPERAND (mbz, 0);
4993*38fd1498Szrj if (TREE_CODE (op0) == PLUS_EXPR && integer_onep (TREE_OPERAND (op0, 1)))
4994*38fd1498Szrj {
4995*38fd1498Szrj a = TREE_OPERAND (op0, 0);
4996*38fd1498Szrj b = TREE_OPERAND (mbz, 1);
4997*38fd1498Szrj }
4998*38fd1498Szrj else
4999*38fd1498Szrj return false;
5000*38fd1498Szrj }
5001*38fd1498Szrj else if (TREE_CODE (mbz) == LT_EXPR)
5002*38fd1498Szrj {
5003*38fd1498Szrj tree op1 = TREE_OPERAND (mbz, 1);
5004*38fd1498Szrj
5005*38fd1498Szrj /* Handle b < a + 1. */
5006*38fd1498Szrj if (TREE_CODE (op1) == PLUS_EXPR && integer_onep (TREE_OPERAND (op1, 1)))
5007*38fd1498Szrj {
5008*38fd1498Szrj a = TREE_OPERAND (op1, 0);
5009*38fd1498Szrj b = TREE_OPERAND (mbz, 0);
5010*38fd1498Szrj }
5011*38fd1498Szrj else
5012*38fd1498Szrj return false;
5013*38fd1498Szrj }
5014*38fd1498Szrj else
5015*38fd1498Szrj return false;
5016*38fd1498Szrj
5017*38fd1498Szrj /* Expected number of iterations is B - A - 1. Check that it matches
5018*38fd1498Szrj the actual number, i.e., that B - A - NITER = 1. */
5019*38fd1498Szrj tree_to_aff_combination (niter->niter, nit_type, &nit);
5020*38fd1498Szrj tree_to_aff_combination (fold_convert (nit_type, a), nit_type, &tmpa);
5021*38fd1498Szrj tree_to_aff_combination (fold_convert (nit_type, b), nit_type, &tmpb);
5022*38fd1498Szrj aff_combination_scale (&nit, -1);
5023*38fd1498Szrj aff_combination_scale (&tmpa, -1);
5024*38fd1498Szrj aff_combination_add (&tmpb, &tmpa);
5025*38fd1498Szrj aff_combination_add (&tmpb, &nit);
5026*38fd1498Szrj if (tmpb.n != 0 || maybe_ne (tmpb.offset, 1))
5027*38fd1498Szrj return false;
5028*38fd1498Szrj
5029*38fd1498Szrj /* Finally, check that CAND->IV->BASE - CAND->IV->STEP * A does not
5030*38fd1498Szrj overflow. */
5031*38fd1498Szrj offset = fold_build2 (MULT_EXPR, TREE_TYPE (cand->iv->step),
5032*38fd1498Szrj cand->iv->step,
5033*38fd1498Szrj fold_convert (TREE_TYPE (cand->iv->step), a));
5034*38fd1498Szrj if (!difference_cannot_overflow_p (data, cand->iv->base, offset))
5035*38fd1498Szrj return false;
5036*38fd1498Szrj
5037*38fd1498Szrj /* Determine the new comparison operator. */
5038*38fd1498Szrj comp = step < 0 ? GT_EXPR : LT_EXPR;
5039*38fd1498Szrj if (*comp_p == NE_EXPR)
5040*38fd1498Szrj *comp_p = comp;
5041*38fd1498Szrj else if (*comp_p == EQ_EXPR)
5042*38fd1498Szrj *comp_p = invert_tree_comparison (comp, false);
5043*38fd1498Szrj else
5044*38fd1498Szrj gcc_unreachable ();
5045*38fd1498Szrj
5046*38fd1498Szrj return true;
5047*38fd1498Szrj }
5048*38fd1498Szrj
5049*38fd1498Szrj /* Check whether it is possible to express the condition in USE by comparison
5050*38fd1498Szrj of candidate CAND. If so, store the value compared with to BOUND, and the
5051*38fd1498Szrj comparison operator to COMP. */
5052*38fd1498Szrj
5053*38fd1498Szrj static bool
may_eliminate_iv(struct ivopts_data * data,struct iv_use * use,struct iv_cand * cand,tree * bound,enum tree_code * comp)5054*38fd1498Szrj may_eliminate_iv (struct ivopts_data *data,
5055*38fd1498Szrj struct iv_use *use, struct iv_cand *cand, tree *bound,
5056*38fd1498Szrj enum tree_code *comp)
5057*38fd1498Szrj {
5058*38fd1498Szrj basic_block ex_bb;
5059*38fd1498Szrj edge exit;
5060*38fd1498Szrj tree period;
5061*38fd1498Szrj struct loop *loop = data->current_loop;
5062*38fd1498Szrj aff_tree bnd;
5063*38fd1498Szrj struct tree_niter_desc *desc = NULL;
5064*38fd1498Szrj
5065*38fd1498Szrj if (TREE_CODE (cand->iv->step) != INTEGER_CST)
5066*38fd1498Szrj return false;
5067*38fd1498Szrj
5068*38fd1498Szrj /* For now works only for exits that dominate the loop latch.
5069*38fd1498Szrj TODO: extend to other conditions inside loop body. */
5070*38fd1498Szrj ex_bb = gimple_bb (use->stmt);
5071*38fd1498Szrj if (use->stmt != last_stmt (ex_bb)
5072*38fd1498Szrj || gimple_code (use->stmt) != GIMPLE_COND
5073*38fd1498Szrj || !dominated_by_p (CDI_DOMINATORS, loop->latch, ex_bb))
5074*38fd1498Szrj return false;
5075*38fd1498Szrj
5076*38fd1498Szrj exit = EDGE_SUCC (ex_bb, 0);
5077*38fd1498Szrj if (flow_bb_inside_loop_p (loop, exit->dest))
5078*38fd1498Szrj exit = EDGE_SUCC (ex_bb, 1);
5079*38fd1498Szrj if (flow_bb_inside_loop_p (loop, exit->dest))
5080*38fd1498Szrj return false;
5081*38fd1498Szrj
5082*38fd1498Szrj desc = niter_for_exit (data, exit);
5083*38fd1498Szrj if (!desc)
5084*38fd1498Szrj return false;
5085*38fd1498Szrj
5086*38fd1498Szrj /* Determine whether we can use the variable to test the exit condition.
5087*38fd1498Szrj This is the case iff the period of the induction variable is greater
5088*38fd1498Szrj than the number of iterations for which the exit condition is true. */
5089*38fd1498Szrj period = iv_period (cand->iv);
5090*38fd1498Szrj
5091*38fd1498Szrj /* If the number of iterations is constant, compare against it directly. */
5092*38fd1498Szrj if (TREE_CODE (desc->niter) == INTEGER_CST)
5093*38fd1498Szrj {
5094*38fd1498Szrj /* See cand_value_at. */
5095*38fd1498Szrj if (stmt_after_increment (loop, cand, use->stmt))
5096*38fd1498Szrj {
5097*38fd1498Szrj if (!tree_int_cst_lt (desc->niter, period))
5098*38fd1498Szrj return false;
5099*38fd1498Szrj }
5100*38fd1498Szrj else
5101*38fd1498Szrj {
5102*38fd1498Szrj if (tree_int_cst_lt (period, desc->niter))
5103*38fd1498Szrj return false;
5104*38fd1498Szrj }
5105*38fd1498Szrj }
5106*38fd1498Szrj
5107*38fd1498Szrj /* If not, and if this is the only possible exit of the loop, see whether
5108*38fd1498Szrj we can get a conservative estimate on the number of iterations of the
5109*38fd1498Szrj entire loop and compare against that instead. */
5110*38fd1498Szrj else
5111*38fd1498Szrj {
5112*38fd1498Szrj widest_int period_value, max_niter;
5113*38fd1498Szrj
5114*38fd1498Szrj max_niter = desc->max;
5115*38fd1498Szrj if (stmt_after_increment (loop, cand, use->stmt))
5116*38fd1498Szrj max_niter += 1;
5117*38fd1498Szrj period_value = wi::to_widest (period);
5118*38fd1498Szrj if (wi::gtu_p (max_niter, period_value))
5119*38fd1498Szrj {
5120*38fd1498Szrj /* See if we can take advantage of inferred loop bound
5121*38fd1498Szrj information. */
5122*38fd1498Szrj if (data->loop_single_exit_p)
5123*38fd1498Szrj {
5124*38fd1498Szrj if (!max_loop_iterations (loop, &max_niter))
5125*38fd1498Szrj return false;
5126*38fd1498Szrj /* The loop bound is already adjusted by adding 1. */
5127*38fd1498Szrj if (wi::gtu_p (max_niter, period_value))
5128*38fd1498Szrj return false;
5129*38fd1498Szrj }
5130*38fd1498Szrj else
5131*38fd1498Szrj return false;
5132*38fd1498Szrj }
5133*38fd1498Szrj }
5134*38fd1498Szrj
5135*38fd1498Szrj cand_value_at (loop, cand, use->stmt, desc->niter, &bnd);
5136*38fd1498Szrj
5137*38fd1498Szrj *bound = fold_convert (TREE_TYPE (cand->iv->base),
5138*38fd1498Szrj aff_combination_to_tree (&bnd));
5139*38fd1498Szrj *comp = iv_elimination_compare (data, use);
5140*38fd1498Szrj
5141*38fd1498Szrj /* It is unlikely that computing the number of iterations using division
5142*38fd1498Szrj would be more profitable than keeping the original induction variable. */
5143*38fd1498Szrj if (expression_expensive_p (*bound))
5144*38fd1498Szrj return false;
5145*38fd1498Szrj
5146*38fd1498Szrj /* Sometimes, it is possible to handle the situation that the number of
5147*38fd1498Szrj iterations may be zero unless additional assumptions by using <
5148*38fd1498Szrj instead of != in the exit condition.
5149*38fd1498Szrj
5150*38fd1498Szrj TODO: we could also calculate the value MAY_BE_ZERO ? 0 : NITER and
5151*38fd1498Szrj base the exit condition on it. However, that is often too
5152*38fd1498Szrj expensive. */
5153*38fd1498Szrj if (!integer_zerop (desc->may_be_zero))
5154*38fd1498Szrj return iv_elimination_compare_lt (data, cand, comp, desc);
5155*38fd1498Szrj
5156*38fd1498Szrj return true;
5157*38fd1498Szrj }
5158*38fd1498Szrj
5159*38fd1498Szrj /* Calculates the cost of BOUND, if it is a PARM_DECL. A PARM_DECL must
5160*38fd1498Szrj be copied, if it is used in the loop body and DATA->body_includes_call. */
5161*38fd1498Szrj
5162*38fd1498Szrj static int
parm_decl_cost(struct ivopts_data * data,tree bound)5163*38fd1498Szrj parm_decl_cost (struct ivopts_data *data, tree bound)
5164*38fd1498Szrj {
5165*38fd1498Szrj tree sbound = bound;
5166*38fd1498Szrj STRIP_NOPS (sbound);
5167*38fd1498Szrj
5168*38fd1498Szrj if (TREE_CODE (sbound) == SSA_NAME
5169*38fd1498Szrj && SSA_NAME_IS_DEFAULT_DEF (sbound)
5170*38fd1498Szrj && TREE_CODE (SSA_NAME_VAR (sbound)) == PARM_DECL
5171*38fd1498Szrj && data->body_includes_call)
5172*38fd1498Szrj return COSTS_N_INSNS (1);
5173*38fd1498Szrj
5174*38fd1498Szrj return 0;
5175*38fd1498Szrj }
5176*38fd1498Szrj
5177*38fd1498Szrj /* Determines cost of computing the use in GROUP with CAND in a condition. */
5178*38fd1498Szrj
5179*38fd1498Szrj static bool
determine_group_iv_cost_cond(struct ivopts_data * data,struct iv_group * group,struct iv_cand * cand)5180*38fd1498Szrj determine_group_iv_cost_cond (struct ivopts_data *data,
5181*38fd1498Szrj struct iv_group *group, struct iv_cand *cand)
5182*38fd1498Szrj {
5183*38fd1498Szrj tree bound = NULL_TREE;
5184*38fd1498Szrj struct iv *cmp_iv;
5185*38fd1498Szrj bitmap inv_exprs = NULL;
5186*38fd1498Szrj bitmap inv_vars_elim = NULL, inv_vars_express = NULL, inv_vars;
5187*38fd1498Szrj comp_cost elim_cost = infinite_cost, express_cost, cost, bound_cost;
5188*38fd1498Szrj enum comp_iv_rewrite rewrite_type;
5189*38fd1498Szrj iv_inv_expr_ent *inv_expr_elim = NULL, *inv_expr_express = NULL, *inv_expr;
5190*38fd1498Szrj tree *control_var, *bound_cst;
5191*38fd1498Szrj enum tree_code comp = ERROR_MARK;
5192*38fd1498Szrj struct iv_use *use = group->vuses[0];
5193*38fd1498Szrj
5194*38fd1498Szrj /* Extract condition operands. */
5195*38fd1498Szrj rewrite_type = extract_cond_operands (data, use->stmt, &control_var,
5196*38fd1498Szrj &bound_cst, NULL, &cmp_iv);
5197*38fd1498Szrj gcc_assert (rewrite_type != COMP_IV_NA);
5198*38fd1498Szrj
5199*38fd1498Szrj /* Try iv elimination. */
5200*38fd1498Szrj if (rewrite_type == COMP_IV_ELIM
5201*38fd1498Szrj && may_eliminate_iv (data, use, cand, &bound, &comp))
5202*38fd1498Szrj {
5203*38fd1498Szrj elim_cost = force_var_cost (data, bound, &inv_vars_elim);
5204*38fd1498Szrj if (elim_cost.cost == 0)
5205*38fd1498Szrj elim_cost.cost = parm_decl_cost (data, bound);
5206*38fd1498Szrj else if (TREE_CODE (bound) == INTEGER_CST)
5207*38fd1498Szrj elim_cost.cost = 0;
5208*38fd1498Szrj /* If we replace a loop condition 'i < n' with 'p < base + n',
5209*38fd1498Szrj inv_vars_elim will have 'base' and 'n' set, which implies that both
5210*38fd1498Szrj 'base' and 'n' will be live during the loop. More likely,
5211*38fd1498Szrj 'base + n' will be loop invariant, resulting in only one live value
5212*38fd1498Szrj during the loop. So in that case we clear inv_vars_elim and set
5213*38fd1498Szrj inv_expr_elim instead. */
5214*38fd1498Szrj if (inv_vars_elim && bitmap_count_bits (inv_vars_elim) > 1)
5215*38fd1498Szrj {
5216*38fd1498Szrj inv_expr_elim = get_loop_invariant_expr (data, bound);
5217*38fd1498Szrj bitmap_clear (inv_vars_elim);
5218*38fd1498Szrj }
5219*38fd1498Szrj /* The bound is a loop invariant, so it will be only computed
5220*38fd1498Szrj once. */
5221*38fd1498Szrj elim_cost.cost = adjust_setup_cost (data, elim_cost.cost);
5222*38fd1498Szrj }
5223*38fd1498Szrj
5224*38fd1498Szrj /* When the condition is a comparison of the candidate IV against
5225*38fd1498Szrj zero, prefer this IV.
5226*38fd1498Szrj
5227*38fd1498Szrj TODO: The constant that we're subtracting from the cost should
5228*38fd1498Szrj be target-dependent. This information should be added to the
5229*38fd1498Szrj target costs for each backend. */
5230*38fd1498Szrj if (!elim_cost.infinite_cost_p () /* Do not try to decrease infinite! */
5231*38fd1498Szrj && integer_zerop (*bound_cst)
5232*38fd1498Szrj && (operand_equal_p (*control_var, cand->var_after, 0)
5233*38fd1498Szrj || operand_equal_p (*control_var, cand->var_before, 0)))
5234*38fd1498Szrj elim_cost -= 1;
5235*38fd1498Szrj
5236*38fd1498Szrj express_cost = get_computation_cost (data, use, cand, false,
5237*38fd1498Szrj &inv_vars_express, NULL,
5238*38fd1498Szrj &inv_expr_express);
5239*38fd1498Szrj if (cmp_iv != NULL)
5240*38fd1498Szrj find_inv_vars (data, &cmp_iv->base, &inv_vars_express);
5241*38fd1498Szrj
5242*38fd1498Szrj /* Count the cost of the original bound as well. */
5243*38fd1498Szrj bound_cost = force_var_cost (data, *bound_cst, NULL);
5244*38fd1498Szrj if (bound_cost.cost == 0)
5245*38fd1498Szrj bound_cost.cost = parm_decl_cost (data, *bound_cst);
5246*38fd1498Szrj else if (TREE_CODE (*bound_cst) == INTEGER_CST)
5247*38fd1498Szrj bound_cost.cost = 0;
5248*38fd1498Szrj express_cost += bound_cost;
5249*38fd1498Szrj
5250*38fd1498Szrj /* Choose the better approach, preferring the eliminated IV. */
5251*38fd1498Szrj if (elim_cost <= express_cost)
5252*38fd1498Szrj {
5253*38fd1498Szrj cost = elim_cost;
5254*38fd1498Szrj inv_vars = inv_vars_elim;
5255*38fd1498Szrj inv_vars_elim = NULL;
5256*38fd1498Szrj inv_expr = inv_expr_elim;
5257*38fd1498Szrj }
5258*38fd1498Szrj else
5259*38fd1498Szrj {
5260*38fd1498Szrj cost = express_cost;
5261*38fd1498Szrj inv_vars = inv_vars_express;
5262*38fd1498Szrj inv_vars_express = NULL;
5263*38fd1498Szrj bound = NULL_TREE;
5264*38fd1498Szrj comp = ERROR_MARK;
5265*38fd1498Szrj inv_expr = inv_expr_express;
5266*38fd1498Szrj }
5267*38fd1498Szrj
5268*38fd1498Szrj if (inv_expr)
5269*38fd1498Szrj {
5270*38fd1498Szrj inv_exprs = BITMAP_ALLOC (NULL);
5271*38fd1498Szrj bitmap_set_bit (inv_exprs, inv_expr->id);
5272*38fd1498Szrj }
5273*38fd1498Szrj set_group_iv_cost (data, group, cand, cost,
5274*38fd1498Szrj inv_vars, bound, comp, inv_exprs);
5275*38fd1498Szrj
5276*38fd1498Szrj if (inv_vars_elim)
5277*38fd1498Szrj BITMAP_FREE (inv_vars_elim);
5278*38fd1498Szrj if (inv_vars_express)
5279*38fd1498Szrj BITMAP_FREE (inv_vars_express);
5280*38fd1498Szrj
5281*38fd1498Szrj return !cost.infinite_cost_p ();
5282*38fd1498Szrj }
5283*38fd1498Szrj
5284*38fd1498Szrj /* Determines cost of computing uses in GROUP with CAND. Returns false
5285*38fd1498Szrj if USE cannot be represented with CAND. */
5286*38fd1498Szrj
5287*38fd1498Szrj static bool
determine_group_iv_cost(struct ivopts_data * data,struct iv_group * group,struct iv_cand * cand)5288*38fd1498Szrj determine_group_iv_cost (struct ivopts_data *data,
5289*38fd1498Szrj struct iv_group *group, struct iv_cand *cand)
5290*38fd1498Szrj {
5291*38fd1498Szrj switch (group->type)
5292*38fd1498Szrj {
5293*38fd1498Szrj case USE_NONLINEAR_EXPR:
5294*38fd1498Szrj return determine_group_iv_cost_generic (data, group, cand);
5295*38fd1498Szrj
5296*38fd1498Szrj case USE_REF_ADDRESS:
5297*38fd1498Szrj case USE_PTR_ADDRESS:
5298*38fd1498Szrj return determine_group_iv_cost_address (data, group, cand);
5299*38fd1498Szrj
5300*38fd1498Szrj case USE_COMPARE:
5301*38fd1498Szrj return determine_group_iv_cost_cond (data, group, cand);
5302*38fd1498Szrj
5303*38fd1498Szrj default:
5304*38fd1498Szrj gcc_unreachable ();
5305*38fd1498Szrj }
5306*38fd1498Szrj }
5307*38fd1498Szrj
5308*38fd1498Szrj /* Return true if get_computation_cost indicates that autoincrement is
5309*38fd1498Szrj a possibility for the pair of USE and CAND, false otherwise. */
5310*38fd1498Szrj
5311*38fd1498Szrj static bool
autoinc_possible_for_pair(struct ivopts_data * data,struct iv_use * use,struct iv_cand * cand)5312*38fd1498Szrj autoinc_possible_for_pair (struct ivopts_data *data, struct iv_use *use,
5313*38fd1498Szrj struct iv_cand *cand)
5314*38fd1498Szrj {
5315*38fd1498Szrj if (!address_p (use->type))
5316*38fd1498Szrj return false;
5317*38fd1498Szrj
5318*38fd1498Szrj bool can_autoinc = false;
5319*38fd1498Szrj get_computation_cost (data, use, cand, true, NULL, &can_autoinc, NULL);
5320*38fd1498Szrj return can_autoinc;
5321*38fd1498Szrj }
5322*38fd1498Szrj
5323*38fd1498Szrj /* Examine IP_ORIGINAL candidates to see if they are incremented next to a
5324*38fd1498Szrj use that allows autoincrement, and set their AINC_USE if possible. */
5325*38fd1498Szrj
5326*38fd1498Szrj static void
set_autoinc_for_original_candidates(struct ivopts_data * data)5327*38fd1498Szrj set_autoinc_for_original_candidates (struct ivopts_data *data)
5328*38fd1498Szrj {
5329*38fd1498Szrj unsigned i, j;
5330*38fd1498Szrj
5331*38fd1498Szrj for (i = 0; i < data->vcands.length (); i++)
5332*38fd1498Szrj {
5333*38fd1498Szrj struct iv_cand *cand = data->vcands[i];
5334*38fd1498Szrj struct iv_use *closest_before = NULL;
5335*38fd1498Szrj struct iv_use *closest_after = NULL;
5336*38fd1498Szrj if (cand->pos != IP_ORIGINAL)
5337*38fd1498Szrj continue;
5338*38fd1498Szrj
5339*38fd1498Szrj for (j = 0; j < data->vgroups.length (); j++)
5340*38fd1498Szrj {
5341*38fd1498Szrj struct iv_group *group = data->vgroups[j];
5342*38fd1498Szrj struct iv_use *use = group->vuses[0];
5343*38fd1498Szrj unsigned uid = gimple_uid (use->stmt);
5344*38fd1498Szrj
5345*38fd1498Szrj if (gimple_bb (use->stmt) != gimple_bb (cand->incremented_at))
5346*38fd1498Szrj continue;
5347*38fd1498Szrj
5348*38fd1498Szrj if (uid < gimple_uid (cand->incremented_at)
5349*38fd1498Szrj && (closest_before == NULL
5350*38fd1498Szrj || uid > gimple_uid (closest_before->stmt)))
5351*38fd1498Szrj closest_before = use;
5352*38fd1498Szrj
5353*38fd1498Szrj if (uid > gimple_uid (cand->incremented_at)
5354*38fd1498Szrj && (closest_after == NULL
5355*38fd1498Szrj || uid < gimple_uid (closest_after->stmt)))
5356*38fd1498Szrj closest_after = use;
5357*38fd1498Szrj }
5358*38fd1498Szrj
5359*38fd1498Szrj if (closest_before != NULL
5360*38fd1498Szrj && autoinc_possible_for_pair (data, closest_before, cand))
5361*38fd1498Szrj cand->ainc_use = closest_before;
5362*38fd1498Szrj else if (closest_after != NULL
5363*38fd1498Szrj && autoinc_possible_for_pair (data, closest_after, cand))
5364*38fd1498Szrj cand->ainc_use = closest_after;
5365*38fd1498Szrj }
5366*38fd1498Szrj }
5367*38fd1498Szrj
5368*38fd1498Szrj /* Relate compare use with all candidates. */
5369*38fd1498Szrj
5370*38fd1498Szrj static void
relate_compare_use_with_all_cands(struct ivopts_data * data)5371*38fd1498Szrj relate_compare_use_with_all_cands (struct ivopts_data *data)
5372*38fd1498Szrj {
5373*38fd1498Szrj unsigned i, count = data->vcands.length ();
5374*38fd1498Szrj for (i = 0; i < data->vgroups.length (); i++)
5375*38fd1498Szrj {
5376*38fd1498Szrj struct iv_group *group = data->vgroups[i];
5377*38fd1498Szrj
5378*38fd1498Szrj if (group->type == USE_COMPARE)
5379*38fd1498Szrj bitmap_set_range (group->related_cands, 0, count);
5380*38fd1498Szrj }
5381*38fd1498Szrj }
5382*38fd1498Szrj
5383*38fd1498Szrj /* Finds the candidates for the induction variables. */
5384*38fd1498Szrj
5385*38fd1498Szrj static void
find_iv_candidates(struct ivopts_data * data)5386*38fd1498Szrj find_iv_candidates (struct ivopts_data *data)
5387*38fd1498Szrj {
5388*38fd1498Szrj /* Add commonly used ivs. */
5389*38fd1498Szrj add_standard_iv_candidates (data);
5390*38fd1498Szrj
5391*38fd1498Szrj /* Add old induction variables. */
5392*38fd1498Szrj add_iv_candidate_for_bivs (data);
5393*38fd1498Szrj
5394*38fd1498Szrj /* Add induction variables derived from uses. */
5395*38fd1498Szrj add_iv_candidate_for_groups (data);
5396*38fd1498Szrj
5397*38fd1498Szrj set_autoinc_for_original_candidates (data);
5398*38fd1498Szrj
5399*38fd1498Szrj /* Record the important candidates. */
5400*38fd1498Szrj record_important_candidates (data);
5401*38fd1498Szrj
5402*38fd1498Szrj /* Relate compare iv_use with all candidates. */
5403*38fd1498Szrj if (!data->consider_all_candidates)
5404*38fd1498Szrj relate_compare_use_with_all_cands (data);
5405*38fd1498Szrj
5406*38fd1498Szrj if (dump_file && (dump_flags & TDF_DETAILS))
5407*38fd1498Szrj {
5408*38fd1498Szrj unsigned i;
5409*38fd1498Szrj
5410*38fd1498Szrj fprintf (dump_file, "\n<Important Candidates>:\t");
5411*38fd1498Szrj for (i = 0; i < data->vcands.length (); i++)
5412*38fd1498Szrj if (data->vcands[i]->important)
5413*38fd1498Szrj fprintf (dump_file, " %d,", data->vcands[i]->id);
5414*38fd1498Szrj fprintf (dump_file, "\n");
5415*38fd1498Szrj
5416*38fd1498Szrj fprintf (dump_file, "\n<Group, Cand> Related:\n");
5417*38fd1498Szrj for (i = 0; i < data->vgroups.length (); i++)
5418*38fd1498Szrj {
5419*38fd1498Szrj struct iv_group *group = data->vgroups[i];
5420*38fd1498Szrj
5421*38fd1498Szrj if (group->related_cands)
5422*38fd1498Szrj {
5423*38fd1498Szrj fprintf (dump_file, " Group %d:\t", group->id);
5424*38fd1498Szrj dump_bitmap (dump_file, group->related_cands);
5425*38fd1498Szrj }
5426*38fd1498Szrj }
5427*38fd1498Szrj fprintf (dump_file, "\n");
5428*38fd1498Szrj }
5429*38fd1498Szrj }
5430*38fd1498Szrj
5431*38fd1498Szrj /* Determines costs of computing use of iv with an iv candidate. */
5432*38fd1498Szrj
5433*38fd1498Szrj static void
determine_group_iv_costs(struct ivopts_data * data)5434*38fd1498Szrj determine_group_iv_costs (struct ivopts_data *data)
5435*38fd1498Szrj {
5436*38fd1498Szrj unsigned i, j;
5437*38fd1498Szrj struct iv_cand *cand;
5438*38fd1498Szrj struct iv_group *group;
5439*38fd1498Szrj bitmap to_clear = BITMAP_ALLOC (NULL);
5440*38fd1498Szrj
5441*38fd1498Szrj alloc_use_cost_map (data);
5442*38fd1498Szrj
5443*38fd1498Szrj for (i = 0; i < data->vgroups.length (); i++)
5444*38fd1498Szrj {
5445*38fd1498Szrj group = data->vgroups[i];
5446*38fd1498Szrj
5447*38fd1498Szrj if (data->consider_all_candidates)
5448*38fd1498Szrj {
5449*38fd1498Szrj for (j = 0; j < data->vcands.length (); j++)
5450*38fd1498Szrj {
5451*38fd1498Szrj cand = data->vcands[j];
5452*38fd1498Szrj determine_group_iv_cost (data, group, cand);
5453*38fd1498Szrj }
5454*38fd1498Szrj }
5455*38fd1498Szrj else
5456*38fd1498Szrj {
5457*38fd1498Szrj bitmap_iterator bi;
5458*38fd1498Szrj
5459*38fd1498Szrj EXECUTE_IF_SET_IN_BITMAP (group->related_cands, 0, j, bi)
5460*38fd1498Szrj {
5461*38fd1498Szrj cand = data->vcands[j];
5462*38fd1498Szrj if (!determine_group_iv_cost (data, group, cand))
5463*38fd1498Szrj bitmap_set_bit (to_clear, j);
5464*38fd1498Szrj }
5465*38fd1498Szrj
5466*38fd1498Szrj /* Remove the candidates for that the cost is infinite from
5467*38fd1498Szrj the list of related candidates. */
5468*38fd1498Szrj bitmap_and_compl_into (group->related_cands, to_clear);
5469*38fd1498Szrj bitmap_clear (to_clear);
5470*38fd1498Szrj }
5471*38fd1498Szrj }
5472*38fd1498Szrj
5473*38fd1498Szrj BITMAP_FREE (to_clear);
5474*38fd1498Szrj
5475*38fd1498Szrj if (dump_file && (dump_flags & TDF_DETAILS))
5476*38fd1498Szrj {
5477*38fd1498Szrj bitmap_iterator bi;
5478*38fd1498Szrj
5479*38fd1498Szrj /* Dump invariant variables. */
5480*38fd1498Szrj fprintf (dump_file, "\n<Invariant Vars>:\n");
5481*38fd1498Szrj EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
5482*38fd1498Szrj {
5483*38fd1498Szrj struct version_info *info = ver_info (data, i);
5484*38fd1498Szrj if (info->inv_id)
5485*38fd1498Szrj {
5486*38fd1498Szrj fprintf (dump_file, "Inv %d:\t", info->inv_id);
5487*38fd1498Szrj print_generic_expr (dump_file, info->name, TDF_SLIM);
5488*38fd1498Szrj fprintf (dump_file, "%s\n",
5489*38fd1498Szrj info->has_nonlin_use ? "" : "\t(eliminable)");
5490*38fd1498Szrj }
5491*38fd1498Szrj }
5492*38fd1498Szrj
5493*38fd1498Szrj /* Dump invariant expressions. */
5494*38fd1498Szrj fprintf (dump_file, "\n<Invariant Expressions>:\n");
5495*38fd1498Szrj auto_vec <iv_inv_expr_ent *> list (data->inv_expr_tab->elements ());
5496*38fd1498Szrj
5497*38fd1498Szrj for (hash_table<iv_inv_expr_hasher>::iterator it
5498*38fd1498Szrj = data->inv_expr_tab->begin (); it != data->inv_expr_tab->end ();
5499*38fd1498Szrj ++it)
5500*38fd1498Szrj list.safe_push (*it);
5501*38fd1498Szrj
5502*38fd1498Szrj list.qsort (sort_iv_inv_expr_ent);
5503*38fd1498Szrj
5504*38fd1498Szrj for (i = 0; i < list.length (); ++i)
5505*38fd1498Szrj {
5506*38fd1498Szrj fprintf (dump_file, "inv_expr %d: \t", list[i]->id);
5507*38fd1498Szrj print_generic_expr (dump_file, list[i]->expr, TDF_SLIM);
5508*38fd1498Szrj fprintf (dump_file, "\n");
5509*38fd1498Szrj }
5510*38fd1498Szrj
5511*38fd1498Szrj fprintf (dump_file, "\n<Group-candidate Costs>:\n");
5512*38fd1498Szrj
5513*38fd1498Szrj for (i = 0; i < data->vgroups.length (); i++)
5514*38fd1498Szrj {
5515*38fd1498Szrj group = data->vgroups[i];
5516*38fd1498Szrj
5517*38fd1498Szrj fprintf (dump_file, "Group %d:\n", i);
5518*38fd1498Szrj fprintf (dump_file, " cand\tcost\tcompl.\tinv.expr.\tinv.vars\n");
5519*38fd1498Szrj for (j = 0; j < group->n_map_members; j++)
5520*38fd1498Szrj {
5521*38fd1498Szrj if (!group->cost_map[j].cand
5522*38fd1498Szrj || group->cost_map[j].cost.infinite_cost_p ())
5523*38fd1498Szrj continue;
5524*38fd1498Szrj
5525*38fd1498Szrj fprintf (dump_file, " %d\t%d\t%d\t",
5526*38fd1498Szrj group->cost_map[j].cand->id,
5527*38fd1498Szrj group->cost_map[j].cost.cost,
5528*38fd1498Szrj group->cost_map[j].cost.complexity);
5529*38fd1498Szrj if (!group->cost_map[j].inv_exprs
5530*38fd1498Szrj || bitmap_empty_p (group->cost_map[j].inv_exprs))
5531*38fd1498Szrj fprintf (dump_file, "NIL;\t");
5532*38fd1498Szrj else
5533*38fd1498Szrj bitmap_print (dump_file,
5534*38fd1498Szrj group->cost_map[j].inv_exprs, "", ";\t");
5535*38fd1498Szrj if (!group->cost_map[j].inv_vars
5536*38fd1498Szrj || bitmap_empty_p (group->cost_map[j].inv_vars))
5537*38fd1498Szrj fprintf (dump_file, "NIL;\n");
5538*38fd1498Szrj else
5539*38fd1498Szrj bitmap_print (dump_file,
5540*38fd1498Szrj group->cost_map[j].inv_vars, "", "\n");
5541*38fd1498Szrj }
5542*38fd1498Szrj
5543*38fd1498Szrj fprintf (dump_file, "\n");
5544*38fd1498Szrj }
5545*38fd1498Szrj fprintf (dump_file, "\n");
5546*38fd1498Szrj }
5547*38fd1498Szrj }
5548*38fd1498Szrj
5549*38fd1498Szrj /* Determines cost of the candidate CAND. */
5550*38fd1498Szrj
5551*38fd1498Szrj static void
determine_iv_cost(struct ivopts_data * data,struct iv_cand * cand)5552*38fd1498Szrj determine_iv_cost (struct ivopts_data *data, struct iv_cand *cand)
5553*38fd1498Szrj {
5554*38fd1498Szrj comp_cost cost_base;
5555*38fd1498Szrj unsigned cost, cost_step;
5556*38fd1498Szrj tree base;
5557*38fd1498Szrj
5558*38fd1498Szrj gcc_assert (cand->iv != NULL);
5559*38fd1498Szrj
5560*38fd1498Szrj /* There are two costs associated with the candidate -- its increment
5561*38fd1498Szrj and its initialization. The second is almost negligible for any loop
5562*38fd1498Szrj that rolls enough, so we take it just very little into account. */
5563*38fd1498Szrj
5564*38fd1498Szrj base = cand->iv->base;
5565*38fd1498Szrj cost_base = force_var_cost (data, base, NULL);
5566*38fd1498Szrj /* It will be exceptional that the iv register happens to be initialized with
5567*38fd1498Szrj the proper value at no cost. In general, there will at least be a regcopy
5568*38fd1498Szrj or a const set. */
5569*38fd1498Szrj if (cost_base.cost == 0)
5570*38fd1498Szrj cost_base.cost = COSTS_N_INSNS (1);
5571*38fd1498Szrj cost_step = add_cost (data->speed, TYPE_MODE (TREE_TYPE (base)));
5572*38fd1498Szrj
5573*38fd1498Szrj cost = cost_step + adjust_setup_cost (data, cost_base.cost);
5574*38fd1498Szrj
5575*38fd1498Szrj /* Prefer the original ivs unless we may gain something by replacing it.
5576*38fd1498Szrj The reason is to make debugging simpler; so this is not relevant for
5577*38fd1498Szrj artificial ivs created by other optimization passes. */
5578*38fd1498Szrj if (cand->pos != IP_ORIGINAL
5579*38fd1498Szrj || !SSA_NAME_VAR (cand->var_before)
5580*38fd1498Szrj || DECL_ARTIFICIAL (SSA_NAME_VAR (cand->var_before)))
5581*38fd1498Szrj cost++;
5582*38fd1498Szrj
5583*38fd1498Szrj /* Prefer not to insert statements into latch unless there are some
5584*38fd1498Szrj already (so that we do not create unnecessary jumps). */
5585*38fd1498Szrj if (cand->pos == IP_END
5586*38fd1498Szrj && empty_block_p (ip_end_pos (data->current_loop)))
5587*38fd1498Szrj cost++;
5588*38fd1498Szrj
5589*38fd1498Szrj cand->cost = cost;
5590*38fd1498Szrj cand->cost_step = cost_step;
5591*38fd1498Szrj }
5592*38fd1498Szrj
5593*38fd1498Szrj /* Determines costs of computation of the candidates. */
5594*38fd1498Szrj
5595*38fd1498Szrj static void
determine_iv_costs(struct ivopts_data * data)5596*38fd1498Szrj determine_iv_costs (struct ivopts_data *data)
5597*38fd1498Szrj {
5598*38fd1498Szrj unsigned i;
5599*38fd1498Szrj
5600*38fd1498Szrj if (dump_file && (dump_flags & TDF_DETAILS))
5601*38fd1498Szrj {
5602*38fd1498Szrj fprintf (dump_file, "<Candidate Costs>:\n");
5603*38fd1498Szrj fprintf (dump_file, " cand\tcost\n");
5604*38fd1498Szrj }
5605*38fd1498Szrj
5606*38fd1498Szrj for (i = 0; i < data->vcands.length (); i++)
5607*38fd1498Szrj {
5608*38fd1498Szrj struct iv_cand *cand = data->vcands[i];
5609*38fd1498Szrj
5610*38fd1498Szrj determine_iv_cost (data, cand);
5611*38fd1498Szrj
5612*38fd1498Szrj if (dump_file && (dump_flags & TDF_DETAILS))
5613*38fd1498Szrj fprintf (dump_file, " %d\t%d\n", i, cand->cost);
5614*38fd1498Szrj }
5615*38fd1498Szrj
5616*38fd1498Szrj if (dump_file && (dump_flags & TDF_DETAILS))
5617*38fd1498Szrj fprintf (dump_file, "\n");
5618*38fd1498Szrj }
5619*38fd1498Szrj
5620*38fd1498Szrj /* Estimate register pressure for loop having N_INVS invariants and N_CANDS
5621*38fd1498Szrj induction variables. Note N_INVS includes both invariant variables and
5622*38fd1498Szrj invariant expressions. */
5623*38fd1498Szrj
5624*38fd1498Szrj static unsigned
ivopts_estimate_reg_pressure(struct ivopts_data * data,unsigned n_invs,unsigned n_cands)5625*38fd1498Szrj ivopts_estimate_reg_pressure (struct ivopts_data *data, unsigned n_invs,
5626*38fd1498Szrj unsigned n_cands)
5627*38fd1498Szrj {
5628*38fd1498Szrj unsigned cost;
5629*38fd1498Szrj unsigned n_old = data->regs_used, n_new = n_invs + n_cands;
5630*38fd1498Szrj unsigned regs_needed = n_new + n_old, available_regs = target_avail_regs;
5631*38fd1498Szrj bool speed = data->speed;
5632*38fd1498Szrj
5633*38fd1498Szrj /* If there is a call in the loop body, the call-clobbered registers
5634*38fd1498Szrj are not available for loop invariants. */
5635*38fd1498Szrj if (data->body_includes_call)
5636*38fd1498Szrj available_regs = available_regs - target_clobbered_regs;
5637*38fd1498Szrj
5638*38fd1498Szrj /* If we have enough registers. */
5639*38fd1498Szrj if (regs_needed + target_res_regs < available_regs)
5640*38fd1498Szrj cost = n_new;
5641*38fd1498Szrj /* If close to running out of registers, try to preserve them. */
5642*38fd1498Szrj else if (regs_needed <= available_regs)
5643*38fd1498Szrj cost = target_reg_cost [speed] * regs_needed;
5644*38fd1498Szrj /* If we run out of available registers but the number of candidates
5645*38fd1498Szrj does not, we penalize extra registers using target_spill_cost. */
5646*38fd1498Szrj else if (n_cands <= available_regs)
5647*38fd1498Szrj cost = target_reg_cost [speed] * available_regs
5648*38fd1498Szrj + target_spill_cost [speed] * (regs_needed - available_regs);
5649*38fd1498Szrj /* If the number of candidates runs out available registers, we penalize
5650*38fd1498Szrj extra candidate registers using target_spill_cost * 2. Because it is
5651*38fd1498Szrj more expensive to spill induction variable than invariant. */
5652*38fd1498Szrj else
5653*38fd1498Szrj cost = target_reg_cost [speed] * available_regs
5654*38fd1498Szrj + target_spill_cost [speed] * (n_cands - available_regs) * 2
5655*38fd1498Szrj + target_spill_cost [speed] * (regs_needed - n_cands);
5656*38fd1498Szrj
5657*38fd1498Szrj /* Finally, add the number of candidates, so that we prefer eliminating
5658*38fd1498Szrj induction variables if possible. */
5659*38fd1498Szrj return cost + n_cands;
5660*38fd1498Szrj }
5661*38fd1498Szrj
5662*38fd1498Szrj /* For each size of the induction variable set determine the penalty. */
5663*38fd1498Szrj
5664*38fd1498Szrj static void
determine_set_costs(struct ivopts_data * data)5665*38fd1498Szrj determine_set_costs (struct ivopts_data *data)
5666*38fd1498Szrj {
5667*38fd1498Szrj unsigned j, n;
5668*38fd1498Szrj gphi *phi;
5669*38fd1498Szrj gphi_iterator psi;
5670*38fd1498Szrj tree op;
5671*38fd1498Szrj struct loop *loop = data->current_loop;
5672*38fd1498Szrj bitmap_iterator bi;
5673*38fd1498Szrj
5674*38fd1498Szrj if (dump_file && (dump_flags & TDF_DETAILS))
5675*38fd1498Szrj {
5676*38fd1498Szrj fprintf (dump_file, "<Global Costs>:\n");
5677*38fd1498Szrj fprintf (dump_file, " target_avail_regs %d\n", target_avail_regs);
5678*38fd1498Szrj fprintf (dump_file, " target_clobbered_regs %d\n", target_clobbered_regs);
5679*38fd1498Szrj fprintf (dump_file, " target_reg_cost %d\n", target_reg_cost[data->speed]);
5680*38fd1498Szrj fprintf (dump_file, " target_spill_cost %d\n", target_spill_cost[data->speed]);
5681*38fd1498Szrj }
5682*38fd1498Szrj
5683*38fd1498Szrj n = 0;
5684*38fd1498Szrj for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
5685*38fd1498Szrj {
5686*38fd1498Szrj phi = psi.phi ();
5687*38fd1498Szrj op = PHI_RESULT (phi);
5688*38fd1498Szrj
5689*38fd1498Szrj if (virtual_operand_p (op))
5690*38fd1498Szrj continue;
5691*38fd1498Szrj
5692*38fd1498Szrj if (get_iv (data, op))
5693*38fd1498Szrj continue;
5694*38fd1498Szrj
5695*38fd1498Szrj if (!POINTER_TYPE_P (TREE_TYPE (op))
5696*38fd1498Szrj && !INTEGRAL_TYPE_P (TREE_TYPE (op)))
5697*38fd1498Szrj continue;
5698*38fd1498Szrj
5699*38fd1498Szrj n++;
5700*38fd1498Szrj }
5701*38fd1498Szrj
5702*38fd1498Szrj EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j, bi)
5703*38fd1498Szrj {
5704*38fd1498Szrj struct version_info *info = ver_info (data, j);
5705*38fd1498Szrj
5706*38fd1498Szrj if (info->inv_id && info->has_nonlin_use)
5707*38fd1498Szrj n++;
5708*38fd1498Szrj }
5709*38fd1498Szrj
5710*38fd1498Szrj data->regs_used = n;
5711*38fd1498Szrj if (dump_file && (dump_flags & TDF_DETAILS))
5712*38fd1498Szrj fprintf (dump_file, " regs_used %d\n", n);
5713*38fd1498Szrj
5714*38fd1498Szrj if (dump_file && (dump_flags & TDF_DETAILS))
5715*38fd1498Szrj {
5716*38fd1498Szrj fprintf (dump_file, " cost for size:\n");
5717*38fd1498Szrj fprintf (dump_file, " ivs\tcost\n");
5718*38fd1498Szrj for (j = 0; j <= 2 * target_avail_regs; j++)
5719*38fd1498Szrj fprintf (dump_file, " %d\t%d\n", j,
5720*38fd1498Szrj ivopts_estimate_reg_pressure (data, 0, j));
5721*38fd1498Szrj fprintf (dump_file, "\n");
5722*38fd1498Szrj }
5723*38fd1498Szrj }
5724*38fd1498Szrj
5725*38fd1498Szrj /* Returns true if A is a cheaper cost pair than B. */
5726*38fd1498Szrj
5727*38fd1498Szrj static bool
cheaper_cost_pair(struct cost_pair * a,struct cost_pair * b)5728*38fd1498Szrj cheaper_cost_pair (struct cost_pair *a, struct cost_pair *b)
5729*38fd1498Szrj {
5730*38fd1498Szrj if (!a)
5731*38fd1498Szrj return false;
5732*38fd1498Szrj
5733*38fd1498Szrj if (!b)
5734*38fd1498Szrj return true;
5735*38fd1498Szrj
5736*38fd1498Szrj if (a->cost < b->cost)
5737*38fd1498Szrj return true;
5738*38fd1498Szrj
5739*38fd1498Szrj if (b->cost < a->cost)
5740*38fd1498Szrj return false;
5741*38fd1498Szrj
5742*38fd1498Szrj /* In case the costs are the same, prefer the cheaper candidate. */
5743*38fd1498Szrj if (a->cand->cost < b->cand->cost)
5744*38fd1498Szrj return true;
5745*38fd1498Szrj
5746*38fd1498Szrj return false;
5747*38fd1498Szrj }
5748*38fd1498Szrj
5749*38fd1498Szrj /* Compare if A is a more expensive cost pair than B. Return 1, 0 and -1
5750*38fd1498Szrj for more expensive, equal and cheaper respectively. */
5751*38fd1498Szrj
5752*38fd1498Szrj static int
compare_cost_pair(struct cost_pair * a,struct cost_pair * b)5753*38fd1498Szrj compare_cost_pair (struct cost_pair *a, struct cost_pair *b)
5754*38fd1498Szrj {
5755*38fd1498Szrj if (cheaper_cost_pair (a, b))
5756*38fd1498Szrj return -1;
5757*38fd1498Szrj if (cheaper_cost_pair (b, a))
5758*38fd1498Szrj return 1;
5759*38fd1498Szrj
5760*38fd1498Szrj return 0;
5761*38fd1498Szrj }
5762*38fd1498Szrj
5763*38fd1498Szrj /* Returns candidate by that USE is expressed in IVS. */
5764*38fd1498Szrj
5765*38fd1498Szrj static struct cost_pair *
iv_ca_cand_for_group(struct iv_ca * ivs,struct iv_group * group)5766*38fd1498Szrj iv_ca_cand_for_group (struct iv_ca *ivs, struct iv_group *group)
5767*38fd1498Szrj {
5768*38fd1498Szrj return ivs->cand_for_group[group->id];
5769*38fd1498Szrj }
5770*38fd1498Szrj
5771*38fd1498Szrj /* Computes the cost field of IVS structure. */
5772*38fd1498Szrj
5773*38fd1498Szrj static void
iv_ca_recount_cost(struct ivopts_data * data,struct iv_ca * ivs)5774*38fd1498Szrj iv_ca_recount_cost (struct ivopts_data *data, struct iv_ca *ivs)
5775*38fd1498Szrj {
5776*38fd1498Szrj comp_cost cost = ivs->cand_use_cost;
5777*38fd1498Szrj
5778*38fd1498Szrj cost += ivs->cand_cost;
5779*38fd1498Szrj cost += ivopts_estimate_reg_pressure (data, ivs->n_invs, ivs->n_cands);
5780*38fd1498Szrj ivs->cost = cost;
5781*38fd1498Szrj }
5782*38fd1498Szrj
5783*38fd1498Szrj /* Remove use of invariants in set INVS by decreasing counter in N_INV_USES
5784*38fd1498Szrj and IVS. */
5785*38fd1498Szrj
5786*38fd1498Szrj static void
iv_ca_set_remove_invs(struct iv_ca * ivs,bitmap invs,unsigned * n_inv_uses)5787*38fd1498Szrj iv_ca_set_remove_invs (struct iv_ca *ivs, bitmap invs, unsigned *n_inv_uses)
5788*38fd1498Szrj {
5789*38fd1498Szrj bitmap_iterator bi;
5790*38fd1498Szrj unsigned iid;
5791*38fd1498Szrj
5792*38fd1498Szrj if (!invs)
5793*38fd1498Szrj return;
5794*38fd1498Szrj
5795*38fd1498Szrj gcc_assert (n_inv_uses != NULL);
5796*38fd1498Szrj EXECUTE_IF_SET_IN_BITMAP (invs, 0, iid, bi)
5797*38fd1498Szrj {
5798*38fd1498Szrj n_inv_uses[iid]--;
5799*38fd1498Szrj if (n_inv_uses[iid] == 0)
5800*38fd1498Szrj ivs->n_invs--;
5801*38fd1498Szrj }
5802*38fd1498Szrj }
5803*38fd1498Szrj
5804*38fd1498Szrj /* Set USE not to be expressed by any candidate in IVS. */
5805*38fd1498Szrj
5806*38fd1498Szrj static void
iv_ca_set_no_cp(struct ivopts_data * data,struct iv_ca * ivs,struct iv_group * group)5807*38fd1498Szrj iv_ca_set_no_cp (struct ivopts_data *data, struct iv_ca *ivs,
5808*38fd1498Szrj struct iv_group *group)
5809*38fd1498Szrj {
5810*38fd1498Szrj unsigned gid = group->id, cid;
5811*38fd1498Szrj struct cost_pair *cp;
5812*38fd1498Szrj
5813*38fd1498Szrj cp = ivs->cand_for_group[gid];
5814*38fd1498Szrj if (!cp)
5815*38fd1498Szrj return;
5816*38fd1498Szrj cid = cp->cand->id;
5817*38fd1498Szrj
5818*38fd1498Szrj ivs->bad_groups++;
5819*38fd1498Szrj ivs->cand_for_group[gid] = NULL;
5820*38fd1498Szrj ivs->n_cand_uses[cid]--;
5821*38fd1498Szrj
5822*38fd1498Szrj if (ivs->n_cand_uses[cid] == 0)
5823*38fd1498Szrj {
5824*38fd1498Szrj bitmap_clear_bit (ivs->cands, cid);
5825*38fd1498Szrj ivs->n_cands--;
5826*38fd1498Szrj ivs->cand_cost -= cp->cand->cost;
5827*38fd1498Szrj iv_ca_set_remove_invs (ivs, cp->cand->inv_vars, ivs->n_inv_var_uses);
5828*38fd1498Szrj iv_ca_set_remove_invs (ivs, cp->cand->inv_exprs, ivs->n_inv_expr_uses);
5829*38fd1498Szrj }
5830*38fd1498Szrj
5831*38fd1498Szrj ivs->cand_use_cost -= cp->cost;
5832*38fd1498Szrj iv_ca_set_remove_invs (ivs, cp->inv_vars, ivs->n_inv_var_uses);
5833*38fd1498Szrj iv_ca_set_remove_invs (ivs, cp->inv_exprs, ivs->n_inv_expr_uses);
5834*38fd1498Szrj iv_ca_recount_cost (data, ivs);
5835*38fd1498Szrj }
5836*38fd1498Szrj
5837*38fd1498Szrj /* Add use of invariants in set INVS by increasing counter in N_INV_USES and
5838*38fd1498Szrj IVS. */
5839*38fd1498Szrj
5840*38fd1498Szrj static void
iv_ca_set_add_invs(struct iv_ca * ivs,bitmap invs,unsigned * n_inv_uses)5841*38fd1498Szrj iv_ca_set_add_invs (struct iv_ca *ivs, bitmap invs, unsigned *n_inv_uses)
5842*38fd1498Szrj {
5843*38fd1498Szrj bitmap_iterator bi;
5844*38fd1498Szrj unsigned iid;
5845*38fd1498Szrj
5846*38fd1498Szrj if (!invs)
5847*38fd1498Szrj return;
5848*38fd1498Szrj
5849*38fd1498Szrj gcc_assert (n_inv_uses != NULL);
5850*38fd1498Szrj EXECUTE_IF_SET_IN_BITMAP (invs, 0, iid, bi)
5851*38fd1498Szrj {
5852*38fd1498Szrj n_inv_uses[iid]++;
5853*38fd1498Szrj if (n_inv_uses[iid] == 1)
5854*38fd1498Szrj ivs->n_invs++;
5855*38fd1498Szrj }
5856*38fd1498Szrj }
5857*38fd1498Szrj
5858*38fd1498Szrj /* Set cost pair for GROUP in set IVS to CP. */
5859*38fd1498Szrj
5860*38fd1498Szrj static void
iv_ca_set_cp(struct ivopts_data * data,struct iv_ca * ivs,struct iv_group * group,struct cost_pair * cp)5861*38fd1498Szrj iv_ca_set_cp (struct ivopts_data *data, struct iv_ca *ivs,
5862*38fd1498Szrj struct iv_group *group, struct cost_pair *cp)
5863*38fd1498Szrj {
5864*38fd1498Szrj unsigned gid = group->id, cid;
5865*38fd1498Szrj
5866*38fd1498Szrj if (ivs->cand_for_group[gid] == cp)
5867*38fd1498Szrj return;
5868*38fd1498Szrj
5869*38fd1498Szrj if (ivs->cand_for_group[gid])
5870*38fd1498Szrj iv_ca_set_no_cp (data, ivs, group);
5871*38fd1498Szrj
5872*38fd1498Szrj if (cp)
5873*38fd1498Szrj {
5874*38fd1498Szrj cid = cp->cand->id;
5875*38fd1498Szrj
5876*38fd1498Szrj ivs->bad_groups--;
5877*38fd1498Szrj ivs->cand_for_group[gid] = cp;
5878*38fd1498Szrj ivs->n_cand_uses[cid]++;
5879*38fd1498Szrj if (ivs->n_cand_uses[cid] == 1)
5880*38fd1498Szrj {
5881*38fd1498Szrj bitmap_set_bit (ivs->cands, cid);
5882*38fd1498Szrj ivs->n_cands++;
5883*38fd1498Szrj ivs->cand_cost += cp->cand->cost;
5884*38fd1498Szrj iv_ca_set_add_invs (ivs, cp->cand->inv_vars, ivs->n_inv_var_uses);
5885*38fd1498Szrj iv_ca_set_add_invs (ivs, cp->cand->inv_exprs, ivs->n_inv_expr_uses);
5886*38fd1498Szrj }
5887*38fd1498Szrj
5888*38fd1498Szrj ivs->cand_use_cost += cp->cost;
5889*38fd1498Szrj iv_ca_set_add_invs (ivs, cp->inv_vars, ivs->n_inv_var_uses);
5890*38fd1498Szrj iv_ca_set_add_invs (ivs, cp->inv_exprs, ivs->n_inv_expr_uses);
5891*38fd1498Szrj iv_ca_recount_cost (data, ivs);
5892*38fd1498Szrj }
5893*38fd1498Szrj }
5894*38fd1498Szrj
5895*38fd1498Szrj /* Extend set IVS by expressing USE by some of the candidates in it
5896*38fd1498Szrj if possible. Consider all important candidates if candidates in
5897*38fd1498Szrj set IVS don't give any result. */
5898*38fd1498Szrj
5899*38fd1498Szrj static void
iv_ca_add_group(struct ivopts_data * data,struct iv_ca * ivs,struct iv_group * group)5900*38fd1498Szrj iv_ca_add_group (struct ivopts_data *data, struct iv_ca *ivs,
5901*38fd1498Szrj struct iv_group *group)
5902*38fd1498Szrj {
5903*38fd1498Szrj struct cost_pair *best_cp = NULL, *cp;
5904*38fd1498Szrj bitmap_iterator bi;
5905*38fd1498Szrj unsigned i;
5906*38fd1498Szrj struct iv_cand *cand;
5907*38fd1498Szrj
5908*38fd1498Szrj gcc_assert (ivs->upto >= group->id);
5909*38fd1498Szrj ivs->upto++;
5910*38fd1498Szrj ivs->bad_groups++;
5911*38fd1498Szrj
5912*38fd1498Szrj EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
5913*38fd1498Szrj {
5914*38fd1498Szrj cand = data->vcands[i];
5915*38fd1498Szrj cp = get_group_iv_cost (data, group, cand);
5916*38fd1498Szrj if (cheaper_cost_pair (cp, best_cp))
5917*38fd1498Szrj best_cp = cp;
5918*38fd1498Szrj }
5919*38fd1498Szrj
5920*38fd1498Szrj if (best_cp == NULL)
5921*38fd1498Szrj {
5922*38fd1498Szrj EXECUTE_IF_SET_IN_BITMAP (data->important_candidates, 0, i, bi)
5923*38fd1498Szrj {
5924*38fd1498Szrj cand = data->vcands[i];
5925*38fd1498Szrj cp = get_group_iv_cost (data, group, cand);
5926*38fd1498Szrj if (cheaper_cost_pair (cp, best_cp))
5927*38fd1498Szrj best_cp = cp;
5928*38fd1498Szrj }
5929*38fd1498Szrj }
5930*38fd1498Szrj
5931*38fd1498Szrj iv_ca_set_cp (data, ivs, group, best_cp);
5932*38fd1498Szrj }
5933*38fd1498Szrj
5934*38fd1498Szrj /* Get cost for assignment IVS. */
5935*38fd1498Szrj
5936*38fd1498Szrj static comp_cost
iv_ca_cost(struct iv_ca * ivs)5937*38fd1498Szrj iv_ca_cost (struct iv_ca *ivs)
5938*38fd1498Szrj {
5939*38fd1498Szrj /* This was a conditional expression but it triggered a bug in
5940*38fd1498Szrj Sun C 5.5. */
5941*38fd1498Szrj if (ivs->bad_groups)
5942*38fd1498Szrj return infinite_cost;
5943*38fd1498Szrj else
5944*38fd1498Szrj return ivs->cost;
5945*38fd1498Szrj }
5946*38fd1498Szrj
5947*38fd1498Szrj /* Compare if applying NEW_CP to GROUP for IVS introduces more invariants
5948*38fd1498Szrj than OLD_CP. Return 1, 0 and -1 for more, equal and fewer invariants
5949*38fd1498Szrj respectively. */
5950*38fd1498Szrj
5951*38fd1498Szrj static int
iv_ca_compare_deps(struct ivopts_data * data,struct iv_ca * ivs,struct iv_group * group,struct cost_pair * old_cp,struct cost_pair * new_cp)5952*38fd1498Szrj iv_ca_compare_deps (struct ivopts_data *data, struct iv_ca *ivs,
5953*38fd1498Szrj struct iv_group *group, struct cost_pair *old_cp,
5954*38fd1498Szrj struct cost_pair *new_cp)
5955*38fd1498Szrj {
5956*38fd1498Szrj gcc_assert (old_cp && new_cp && old_cp != new_cp);
5957*38fd1498Szrj unsigned old_n_invs = ivs->n_invs;
5958*38fd1498Szrj iv_ca_set_cp (data, ivs, group, new_cp);
5959*38fd1498Szrj unsigned new_n_invs = ivs->n_invs;
5960*38fd1498Szrj iv_ca_set_cp (data, ivs, group, old_cp);
5961*38fd1498Szrj
5962*38fd1498Szrj return new_n_invs > old_n_invs ? 1 : (new_n_invs < old_n_invs ? -1 : 0);
5963*38fd1498Szrj }
5964*38fd1498Szrj
5965*38fd1498Szrj /* Creates change of expressing GROUP by NEW_CP instead of OLD_CP and chains
5966*38fd1498Szrj it before NEXT. */
5967*38fd1498Szrj
5968*38fd1498Szrj static struct iv_ca_delta *
iv_ca_delta_add(struct iv_group * group,struct cost_pair * old_cp,struct cost_pair * new_cp,struct iv_ca_delta * next)5969*38fd1498Szrj iv_ca_delta_add (struct iv_group *group, struct cost_pair *old_cp,
5970*38fd1498Szrj struct cost_pair *new_cp, struct iv_ca_delta *next)
5971*38fd1498Szrj {
5972*38fd1498Szrj struct iv_ca_delta *change = XNEW (struct iv_ca_delta);
5973*38fd1498Szrj
5974*38fd1498Szrj change->group = group;
5975*38fd1498Szrj change->old_cp = old_cp;
5976*38fd1498Szrj change->new_cp = new_cp;
5977*38fd1498Szrj change->next = next;
5978*38fd1498Szrj
5979*38fd1498Szrj return change;
5980*38fd1498Szrj }
5981*38fd1498Szrj
5982*38fd1498Szrj /* Joins two lists of changes L1 and L2. Destructive -- old lists
5983*38fd1498Szrj are rewritten. */
5984*38fd1498Szrj
5985*38fd1498Szrj static struct iv_ca_delta *
iv_ca_delta_join(struct iv_ca_delta * l1,struct iv_ca_delta * l2)5986*38fd1498Szrj iv_ca_delta_join (struct iv_ca_delta *l1, struct iv_ca_delta *l2)
5987*38fd1498Szrj {
5988*38fd1498Szrj struct iv_ca_delta *last;
5989*38fd1498Szrj
5990*38fd1498Szrj if (!l2)
5991*38fd1498Szrj return l1;
5992*38fd1498Szrj
5993*38fd1498Szrj if (!l1)
5994*38fd1498Szrj return l2;
5995*38fd1498Szrj
5996*38fd1498Szrj for (last = l1; last->next; last = last->next)
5997*38fd1498Szrj continue;
5998*38fd1498Szrj last->next = l2;
5999*38fd1498Szrj
6000*38fd1498Szrj return l1;
6001*38fd1498Szrj }
6002*38fd1498Szrj
6003*38fd1498Szrj /* Reverse the list of changes DELTA, forming the inverse to it. */
6004*38fd1498Szrj
6005*38fd1498Szrj static struct iv_ca_delta *
iv_ca_delta_reverse(struct iv_ca_delta * delta)6006*38fd1498Szrj iv_ca_delta_reverse (struct iv_ca_delta *delta)
6007*38fd1498Szrj {
6008*38fd1498Szrj struct iv_ca_delta *act, *next, *prev = NULL;
6009*38fd1498Szrj
6010*38fd1498Szrj for (act = delta; act; act = next)
6011*38fd1498Szrj {
6012*38fd1498Szrj next = act->next;
6013*38fd1498Szrj act->next = prev;
6014*38fd1498Szrj prev = act;
6015*38fd1498Szrj
6016*38fd1498Szrj std::swap (act->old_cp, act->new_cp);
6017*38fd1498Szrj }
6018*38fd1498Szrj
6019*38fd1498Szrj return prev;
6020*38fd1498Szrj }
6021*38fd1498Szrj
6022*38fd1498Szrj /* Commit changes in DELTA to IVS. If FORWARD is false, the changes are
6023*38fd1498Szrj reverted instead. */
6024*38fd1498Szrj
6025*38fd1498Szrj static void
iv_ca_delta_commit(struct ivopts_data * data,struct iv_ca * ivs,struct iv_ca_delta * delta,bool forward)6026*38fd1498Szrj iv_ca_delta_commit (struct ivopts_data *data, struct iv_ca *ivs,
6027*38fd1498Szrj struct iv_ca_delta *delta, bool forward)
6028*38fd1498Szrj {
6029*38fd1498Szrj struct cost_pair *from, *to;
6030*38fd1498Szrj struct iv_ca_delta *act;
6031*38fd1498Szrj
6032*38fd1498Szrj if (!forward)
6033*38fd1498Szrj delta = iv_ca_delta_reverse (delta);
6034*38fd1498Szrj
6035*38fd1498Szrj for (act = delta; act; act = act->next)
6036*38fd1498Szrj {
6037*38fd1498Szrj from = act->old_cp;
6038*38fd1498Szrj to = act->new_cp;
6039*38fd1498Szrj gcc_assert (iv_ca_cand_for_group (ivs, act->group) == from);
6040*38fd1498Szrj iv_ca_set_cp (data, ivs, act->group, to);
6041*38fd1498Szrj }
6042*38fd1498Szrj
6043*38fd1498Szrj if (!forward)
6044*38fd1498Szrj iv_ca_delta_reverse (delta);
6045*38fd1498Szrj }
6046*38fd1498Szrj
6047*38fd1498Szrj /* Returns true if CAND is used in IVS. */
6048*38fd1498Szrj
6049*38fd1498Szrj static bool
iv_ca_cand_used_p(struct iv_ca * ivs,struct iv_cand * cand)6050*38fd1498Szrj iv_ca_cand_used_p (struct iv_ca *ivs, struct iv_cand *cand)
6051*38fd1498Szrj {
6052*38fd1498Szrj return ivs->n_cand_uses[cand->id] > 0;
6053*38fd1498Szrj }
6054*38fd1498Szrj
6055*38fd1498Szrj /* Returns number of induction variable candidates in the set IVS. */
6056*38fd1498Szrj
6057*38fd1498Szrj static unsigned
iv_ca_n_cands(struct iv_ca * ivs)6058*38fd1498Szrj iv_ca_n_cands (struct iv_ca *ivs)
6059*38fd1498Szrj {
6060*38fd1498Szrj return ivs->n_cands;
6061*38fd1498Szrj }
6062*38fd1498Szrj
6063*38fd1498Szrj /* Free the list of changes DELTA. */
6064*38fd1498Szrj
6065*38fd1498Szrj static void
iv_ca_delta_free(struct iv_ca_delta ** delta)6066*38fd1498Szrj iv_ca_delta_free (struct iv_ca_delta **delta)
6067*38fd1498Szrj {
6068*38fd1498Szrj struct iv_ca_delta *act, *next;
6069*38fd1498Szrj
6070*38fd1498Szrj for (act = *delta; act; act = next)
6071*38fd1498Szrj {
6072*38fd1498Szrj next = act->next;
6073*38fd1498Szrj free (act);
6074*38fd1498Szrj }
6075*38fd1498Szrj
6076*38fd1498Szrj *delta = NULL;
6077*38fd1498Szrj }
6078*38fd1498Szrj
6079*38fd1498Szrj /* Allocates new iv candidates assignment. */
6080*38fd1498Szrj
6081*38fd1498Szrj static struct iv_ca *
iv_ca_new(struct ivopts_data * data)6082*38fd1498Szrj iv_ca_new (struct ivopts_data *data)
6083*38fd1498Szrj {
6084*38fd1498Szrj struct iv_ca *nw = XNEW (struct iv_ca);
6085*38fd1498Szrj
6086*38fd1498Szrj nw->upto = 0;
6087*38fd1498Szrj nw->bad_groups = 0;
6088*38fd1498Szrj nw->cand_for_group = XCNEWVEC (struct cost_pair *,
6089*38fd1498Szrj data->vgroups.length ());
6090*38fd1498Szrj nw->n_cand_uses = XCNEWVEC (unsigned, data->vcands.length ());
6091*38fd1498Szrj nw->cands = BITMAP_ALLOC (NULL);
6092*38fd1498Szrj nw->n_cands = 0;
6093*38fd1498Szrj nw->n_invs = 0;
6094*38fd1498Szrj nw->cand_use_cost = no_cost;
6095*38fd1498Szrj nw->cand_cost = 0;
6096*38fd1498Szrj nw->n_inv_var_uses = XCNEWVEC (unsigned, data->max_inv_var_id + 1);
6097*38fd1498Szrj nw->n_inv_expr_uses = XCNEWVEC (unsigned, data->max_inv_expr_id + 1);
6098*38fd1498Szrj nw->cost = no_cost;
6099*38fd1498Szrj
6100*38fd1498Szrj return nw;
6101*38fd1498Szrj }
6102*38fd1498Szrj
6103*38fd1498Szrj /* Free memory occupied by the set IVS. */
6104*38fd1498Szrj
6105*38fd1498Szrj static void
iv_ca_free(struct iv_ca ** ivs)6106*38fd1498Szrj iv_ca_free (struct iv_ca **ivs)
6107*38fd1498Szrj {
6108*38fd1498Szrj free ((*ivs)->cand_for_group);
6109*38fd1498Szrj free ((*ivs)->n_cand_uses);
6110*38fd1498Szrj BITMAP_FREE ((*ivs)->cands);
6111*38fd1498Szrj free ((*ivs)->n_inv_var_uses);
6112*38fd1498Szrj free ((*ivs)->n_inv_expr_uses);
6113*38fd1498Szrj free (*ivs);
6114*38fd1498Szrj *ivs = NULL;
6115*38fd1498Szrj }
6116*38fd1498Szrj
6117*38fd1498Szrj /* Dumps IVS to FILE. */
6118*38fd1498Szrj
6119*38fd1498Szrj static void
iv_ca_dump(struct ivopts_data * data,FILE * file,struct iv_ca * ivs)6120*38fd1498Szrj iv_ca_dump (struct ivopts_data *data, FILE *file, struct iv_ca *ivs)
6121*38fd1498Szrj {
6122*38fd1498Szrj unsigned i;
6123*38fd1498Szrj comp_cost cost = iv_ca_cost (ivs);
6124*38fd1498Szrj
6125*38fd1498Szrj fprintf (file, " cost: %d (complexity %d)\n", cost.cost,
6126*38fd1498Szrj cost.complexity);
6127*38fd1498Szrj fprintf (file, " cand_cost: %d\n cand_group_cost: %d (complexity %d)\n",
6128*38fd1498Szrj ivs->cand_cost, ivs->cand_use_cost.cost,
6129*38fd1498Szrj ivs->cand_use_cost.complexity);
6130*38fd1498Szrj bitmap_print (file, ivs->cands, " candidates: ","\n");
6131*38fd1498Szrj
6132*38fd1498Szrj for (i = 0; i < ivs->upto; i++)
6133*38fd1498Szrj {
6134*38fd1498Szrj struct iv_group *group = data->vgroups[i];
6135*38fd1498Szrj struct cost_pair *cp = iv_ca_cand_for_group (ivs, group);
6136*38fd1498Szrj if (cp)
6137*38fd1498Szrj fprintf (file, " group:%d --> iv_cand:%d, cost=(%d,%d)\n",
6138*38fd1498Szrj group->id, cp->cand->id, cp->cost.cost,
6139*38fd1498Szrj cp->cost.complexity);
6140*38fd1498Szrj else
6141*38fd1498Szrj fprintf (file, " group:%d --> ??\n", group->id);
6142*38fd1498Szrj }
6143*38fd1498Szrj
6144*38fd1498Szrj const char *pref = "";
6145*38fd1498Szrj fprintf (file, " invariant variables: ");
6146*38fd1498Szrj for (i = 1; i <= data->max_inv_var_id; i++)
6147*38fd1498Szrj if (ivs->n_inv_var_uses[i])
6148*38fd1498Szrj {
6149*38fd1498Szrj fprintf (file, "%s%d", pref, i);
6150*38fd1498Szrj pref = ", ";
6151*38fd1498Szrj }
6152*38fd1498Szrj
6153*38fd1498Szrj pref = "";
6154*38fd1498Szrj fprintf (file, "\n invariant expressions: ");
6155*38fd1498Szrj for (i = 1; i <= data->max_inv_expr_id; i++)
6156*38fd1498Szrj if (ivs->n_inv_expr_uses[i])
6157*38fd1498Szrj {
6158*38fd1498Szrj fprintf (file, "%s%d", pref, i);
6159*38fd1498Szrj pref = ", ";
6160*38fd1498Szrj }
6161*38fd1498Szrj
6162*38fd1498Szrj fprintf (file, "\n\n");
6163*38fd1498Szrj }
6164*38fd1498Szrj
6165*38fd1498Szrj /* Try changing candidate in IVS to CAND for each use. Return cost of the
6166*38fd1498Szrj new set, and store differences in DELTA. Number of induction variables
6167*38fd1498Szrj in the new set is stored to N_IVS. MIN_NCAND is a flag. When it is true
6168*38fd1498Szrj the function will try to find a solution with mimimal iv candidates. */
6169*38fd1498Szrj
6170*38fd1498Szrj static comp_cost
iv_ca_extend(struct ivopts_data * data,struct iv_ca * ivs,struct iv_cand * cand,struct iv_ca_delta ** delta,unsigned * n_ivs,bool min_ncand)6171*38fd1498Szrj iv_ca_extend (struct ivopts_data *data, struct iv_ca *ivs,
6172*38fd1498Szrj struct iv_cand *cand, struct iv_ca_delta **delta,
6173*38fd1498Szrj unsigned *n_ivs, bool min_ncand)
6174*38fd1498Szrj {
6175*38fd1498Szrj unsigned i;
6176*38fd1498Szrj comp_cost cost;
6177*38fd1498Szrj struct iv_group *group;
6178*38fd1498Szrj struct cost_pair *old_cp, *new_cp;
6179*38fd1498Szrj
6180*38fd1498Szrj *delta = NULL;
6181*38fd1498Szrj for (i = 0; i < ivs->upto; i++)
6182*38fd1498Szrj {
6183*38fd1498Szrj group = data->vgroups[i];
6184*38fd1498Szrj old_cp = iv_ca_cand_for_group (ivs, group);
6185*38fd1498Szrj
6186*38fd1498Szrj if (old_cp
6187*38fd1498Szrj && old_cp->cand == cand)
6188*38fd1498Szrj continue;
6189*38fd1498Szrj
6190*38fd1498Szrj new_cp = get_group_iv_cost (data, group, cand);
6191*38fd1498Szrj if (!new_cp)
6192*38fd1498Szrj continue;
6193*38fd1498Szrj
6194*38fd1498Szrj if (!min_ncand)
6195*38fd1498Szrj {
6196*38fd1498Szrj int cmp_invs = iv_ca_compare_deps (data, ivs, group, old_cp, new_cp);
6197*38fd1498Szrj /* Skip if new_cp depends on more invariants. */
6198*38fd1498Szrj if (cmp_invs > 0)
6199*38fd1498Szrj continue;
6200*38fd1498Szrj
6201*38fd1498Szrj int cmp_cost = compare_cost_pair (new_cp, old_cp);
6202*38fd1498Szrj /* Skip if new_cp is not cheaper. */
6203*38fd1498Szrj if (cmp_cost > 0 || (cmp_cost == 0 && cmp_invs == 0))
6204*38fd1498Szrj continue;
6205*38fd1498Szrj }
6206*38fd1498Szrj
6207*38fd1498Szrj *delta = iv_ca_delta_add (group, old_cp, new_cp, *delta);
6208*38fd1498Szrj }
6209*38fd1498Szrj
6210*38fd1498Szrj iv_ca_delta_commit (data, ivs, *delta, true);
6211*38fd1498Szrj cost = iv_ca_cost (ivs);
6212*38fd1498Szrj if (n_ivs)
6213*38fd1498Szrj *n_ivs = iv_ca_n_cands (ivs);
6214*38fd1498Szrj iv_ca_delta_commit (data, ivs, *delta, false);
6215*38fd1498Szrj
6216*38fd1498Szrj return cost;
6217*38fd1498Szrj }
6218*38fd1498Szrj
6219*38fd1498Szrj /* Try narrowing set IVS by removing CAND. Return the cost of
6220*38fd1498Szrj the new set and store the differences in DELTA. START is
6221*38fd1498Szrj the candidate with which we start narrowing. */
6222*38fd1498Szrj
6223*38fd1498Szrj static comp_cost
iv_ca_narrow(struct ivopts_data * data,struct iv_ca * ivs,struct iv_cand * cand,struct iv_cand * start,struct iv_ca_delta ** delta)6224*38fd1498Szrj iv_ca_narrow (struct ivopts_data *data, struct iv_ca *ivs,
6225*38fd1498Szrj struct iv_cand *cand, struct iv_cand *start,
6226*38fd1498Szrj struct iv_ca_delta **delta)
6227*38fd1498Szrj {
6228*38fd1498Szrj unsigned i, ci;
6229*38fd1498Szrj struct iv_group *group;
6230*38fd1498Szrj struct cost_pair *old_cp, *new_cp, *cp;
6231*38fd1498Szrj bitmap_iterator bi;
6232*38fd1498Szrj struct iv_cand *cnd;
6233*38fd1498Szrj comp_cost cost, best_cost, acost;
6234*38fd1498Szrj
6235*38fd1498Szrj *delta = NULL;
6236*38fd1498Szrj for (i = 0; i < data->vgroups.length (); i++)
6237*38fd1498Szrj {
6238*38fd1498Szrj group = data->vgroups[i];
6239*38fd1498Szrj
6240*38fd1498Szrj old_cp = iv_ca_cand_for_group (ivs, group);
6241*38fd1498Szrj if (old_cp->cand != cand)
6242*38fd1498Szrj continue;
6243*38fd1498Szrj
6244*38fd1498Szrj best_cost = iv_ca_cost (ivs);
6245*38fd1498Szrj /* Start narrowing with START. */
6246*38fd1498Szrj new_cp = get_group_iv_cost (data, group, start);
6247*38fd1498Szrj
6248*38fd1498Szrj if (data->consider_all_candidates)
6249*38fd1498Szrj {
6250*38fd1498Szrj EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, ci, bi)
6251*38fd1498Szrj {
6252*38fd1498Szrj if (ci == cand->id || (start && ci == start->id))
6253*38fd1498Szrj continue;
6254*38fd1498Szrj
6255*38fd1498Szrj cnd = data->vcands[ci];
6256*38fd1498Szrj
6257*38fd1498Szrj cp = get_group_iv_cost (data, group, cnd);
6258*38fd1498Szrj if (!cp)
6259*38fd1498Szrj continue;
6260*38fd1498Szrj
6261*38fd1498Szrj iv_ca_set_cp (data, ivs, group, cp);
6262*38fd1498Szrj acost = iv_ca_cost (ivs);
6263*38fd1498Szrj
6264*38fd1498Szrj if (acost < best_cost)
6265*38fd1498Szrj {
6266*38fd1498Szrj best_cost = acost;
6267*38fd1498Szrj new_cp = cp;
6268*38fd1498Szrj }
6269*38fd1498Szrj }
6270*38fd1498Szrj }
6271*38fd1498Szrj else
6272*38fd1498Szrj {
6273*38fd1498Szrj EXECUTE_IF_AND_IN_BITMAP (group->related_cands, ivs->cands, 0, ci, bi)
6274*38fd1498Szrj {
6275*38fd1498Szrj if (ci == cand->id || (start && ci == start->id))
6276*38fd1498Szrj continue;
6277*38fd1498Szrj
6278*38fd1498Szrj cnd = data->vcands[ci];
6279*38fd1498Szrj
6280*38fd1498Szrj cp = get_group_iv_cost (data, group, cnd);
6281*38fd1498Szrj if (!cp)
6282*38fd1498Szrj continue;
6283*38fd1498Szrj
6284*38fd1498Szrj iv_ca_set_cp (data, ivs, group, cp);
6285*38fd1498Szrj acost = iv_ca_cost (ivs);
6286*38fd1498Szrj
6287*38fd1498Szrj if (acost < best_cost)
6288*38fd1498Szrj {
6289*38fd1498Szrj best_cost = acost;
6290*38fd1498Szrj new_cp = cp;
6291*38fd1498Szrj }
6292*38fd1498Szrj }
6293*38fd1498Szrj }
6294*38fd1498Szrj /* Restore to old cp for use. */
6295*38fd1498Szrj iv_ca_set_cp (data, ivs, group, old_cp);
6296*38fd1498Szrj
6297*38fd1498Szrj if (!new_cp)
6298*38fd1498Szrj {
6299*38fd1498Szrj iv_ca_delta_free (delta);
6300*38fd1498Szrj return infinite_cost;
6301*38fd1498Szrj }
6302*38fd1498Szrj
6303*38fd1498Szrj *delta = iv_ca_delta_add (group, old_cp, new_cp, *delta);
6304*38fd1498Szrj }
6305*38fd1498Szrj
6306*38fd1498Szrj iv_ca_delta_commit (data, ivs, *delta, true);
6307*38fd1498Szrj cost = iv_ca_cost (ivs);
6308*38fd1498Szrj iv_ca_delta_commit (data, ivs, *delta, false);
6309*38fd1498Szrj
6310*38fd1498Szrj return cost;
6311*38fd1498Szrj }
6312*38fd1498Szrj
6313*38fd1498Szrj /* Try optimizing the set of candidates IVS by removing candidates different
6314*38fd1498Szrj from to EXCEPT_CAND from it. Return cost of the new set, and store
6315*38fd1498Szrj differences in DELTA. */
6316*38fd1498Szrj
6317*38fd1498Szrj static comp_cost
iv_ca_prune(struct ivopts_data * data,struct iv_ca * ivs,struct iv_cand * except_cand,struct iv_ca_delta ** delta)6318*38fd1498Szrj iv_ca_prune (struct ivopts_data *data, struct iv_ca *ivs,
6319*38fd1498Szrj struct iv_cand *except_cand, struct iv_ca_delta **delta)
6320*38fd1498Szrj {
6321*38fd1498Szrj bitmap_iterator bi;
6322*38fd1498Szrj struct iv_ca_delta *act_delta, *best_delta;
6323*38fd1498Szrj unsigned i;
6324*38fd1498Szrj comp_cost best_cost, acost;
6325*38fd1498Szrj struct iv_cand *cand;
6326*38fd1498Szrj
6327*38fd1498Szrj best_delta = NULL;
6328*38fd1498Szrj best_cost = iv_ca_cost (ivs);
6329*38fd1498Szrj
6330*38fd1498Szrj EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
6331*38fd1498Szrj {
6332*38fd1498Szrj cand = data->vcands[i];
6333*38fd1498Szrj
6334*38fd1498Szrj if (cand == except_cand)
6335*38fd1498Szrj continue;
6336*38fd1498Szrj
6337*38fd1498Szrj acost = iv_ca_narrow (data, ivs, cand, except_cand, &act_delta);
6338*38fd1498Szrj
6339*38fd1498Szrj if (acost < best_cost)
6340*38fd1498Szrj {
6341*38fd1498Szrj best_cost = acost;
6342*38fd1498Szrj iv_ca_delta_free (&best_delta);
6343*38fd1498Szrj best_delta = act_delta;
6344*38fd1498Szrj }
6345*38fd1498Szrj else
6346*38fd1498Szrj iv_ca_delta_free (&act_delta);
6347*38fd1498Szrj }
6348*38fd1498Szrj
6349*38fd1498Szrj if (!best_delta)
6350*38fd1498Szrj {
6351*38fd1498Szrj *delta = NULL;
6352*38fd1498Szrj return best_cost;
6353*38fd1498Szrj }
6354*38fd1498Szrj
6355*38fd1498Szrj /* Recurse to possibly remove other unnecessary ivs. */
6356*38fd1498Szrj iv_ca_delta_commit (data, ivs, best_delta, true);
6357*38fd1498Szrj best_cost = iv_ca_prune (data, ivs, except_cand, delta);
6358*38fd1498Szrj iv_ca_delta_commit (data, ivs, best_delta, false);
6359*38fd1498Szrj *delta = iv_ca_delta_join (best_delta, *delta);
6360*38fd1498Szrj return best_cost;
6361*38fd1498Szrj }
6362*38fd1498Szrj
6363*38fd1498Szrj /* Check if CAND_IDX is a candidate other than OLD_CAND and has
6364*38fd1498Szrj cheaper local cost for GROUP than BEST_CP. Return pointer to
6365*38fd1498Szrj the corresponding cost_pair, otherwise just return BEST_CP. */
6366*38fd1498Szrj
6367*38fd1498Szrj static struct cost_pair*
cheaper_cost_with_cand(struct ivopts_data * data,struct iv_group * group,unsigned int cand_idx,struct iv_cand * old_cand,struct cost_pair * best_cp)6368*38fd1498Szrj cheaper_cost_with_cand (struct ivopts_data *data, struct iv_group *group,
6369*38fd1498Szrj unsigned int cand_idx, struct iv_cand *old_cand,
6370*38fd1498Szrj struct cost_pair *best_cp)
6371*38fd1498Szrj {
6372*38fd1498Szrj struct iv_cand *cand;
6373*38fd1498Szrj struct cost_pair *cp;
6374*38fd1498Szrj
6375*38fd1498Szrj gcc_assert (old_cand != NULL && best_cp != NULL);
6376*38fd1498Szrj if (cand_idx == old_cand->id)
6377*38fd1498Szrj return best_cp;
6378*38fd1498Szrj
6379*38fd1498Szrj cand = data->vcands[cand_idx];
6380*38fd1498Szrj cp = get_group_iv_cost (data, group, cand);
6381*38fd1498Szrj if (cp != NULL && cheaper_cost_pair (cp, best_cp))
6382*38fd1498Szrj return cp;
6383*38fd1498Szrj
6384*38fd1498Szrj return best_cp;
6385*38fd1498Szrj }
6386*38fd1498Szrj
6387*38fd1498Szrj /* Try breaking local optimal fixed-point for IVS by replacing candidates
6388*38fd1498Szrj which are used by more than one iv uses. For each of those candidates,
6389*38fd1498Szrj this function tries to represent iv uses under that candidate using
6390*38fd1498Szrj other ones with lower local cost, then tries to prune the new set.
6391*38fd1498Szrj If the new set has lower cost, It returns the new cost after recording
6392*38fd1498Szrj candidate replacement in list DELTA. */
6393*38fd1498Szrj
6394*38fd1498Szrj static comp_cost
iv_ca_replace(struct ivopts_data * data,struct iv_ca * ivs,struct iv_ca_delta ** delta)6395*38fd1498Szrj iv_ca_replace (struct ivopts_data *data, struct iv_ca *ivs,
6396*38fd1498Szrj struct iv_ca_delta **delta)
6397*38fd1498Szrj {
6398*38fd1498Szrj bitmap_iterator bi, bj;
6399*38fd1498Szrj unsigned int i, j, k;
6400*38fd1498Szrj struct iv_cand *cand;
6401*38fd1498Szrj comp_cost orig_cost, acost;
6402*38fd1498Szrj struct iv_ca_delta *act_delta, *tmp_delta;
6403*38fd1498Szrj struct cost_pair *old_cp, *best_cp = NULL;
6404*38fd1498Szrj
6405*38fd1498Szrj *delta = NULL;
6406*38fd1498Szrj orig_cost = iv_ca_cost (ivs);
6407*38fd1498Szrj
6408*38fd1498Szrj EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
6409*38fd1498Szrj {
6410*38fd1498Szrj if (ivs->n_cand_uses[i] == 1
6411*38fd1498Szrj || ivs->n_cand_uses[i] > ALWAYS_PRUNE_CAND_SET_BOUND)
6412*38fd1498Szrj continue;
6413*38fd1498Szrj
6414*38fd1498Szrj cand = data->vcands[i];
6415*38fd1498Szrj
6416*38fd1498Szrj act_delta = NULL;
6417*38fd1498Szrj /* Represent uses under current candidate using other ones with
6418*38fd1498Szrj lower local cost. */
6419*38fd1498Szrj for (j = 0; j < ivs->upto; j++)
6420*38fd1498Szrj {
6421*38fd1498Szrj struct iv_group *group = data->vgroups[j];
6422*38fd1498Szrj old_cp = iv_ca_cand_for_group (ivs, group);
6423*38fd1498Szrj
6424*38fd1498Szrj if (old_cp->cand != cand)
6425*38fd1498Szrj continue;
6426*38fd1498Szrj
6427*38fd1498Szrj best_cp = old_cp;
6428*38fd1498Szrj if (data->consider_all_candidates)
6429*38fd1498Szrj for (k = 0; k < data->vcands.length (); k++)
6430*38fd1498Szrj best_cp = cheaper_cost_with_cand (data, group, k,
6431*38fd1498Szrj old_cp->cand, best_cp);
6432*38fd1498Szrj else
6433*38fd1498Szrj EXECUTE_IF_SET_IN_BITMAP (group->related_cands, 0, k, bj)
6434*38fd1498Szrj best_cp = cheaper_cost_with_cand (data, group, k,
6435*38fd1498Szrj old_cp->cand, best_cp);
6436*38fd1498Szrj
6437*38fd1498Szrj if (best_cp == old_cp)
6438*38fd1498Szrj continue;
6439*38fd1498Szrj
6440*38fd1498Szrj act_delta = iv_ca_delta_add (group, old_cp, best_cp, act_delta);
6441*38fd1498Szrj }
6442*38fd1498Szrj /* No need for further prune. */
6443*38fd1498Szrj if (!act_delta)
6444*38fd1498Szrj continue;
6445*38fd1498Szrj
6446*38fd1498Szrj /* Prune the new candidate set. */
6447*38fd1498Szrj iv_ca_delta_commit (data, ivs, act_delta, true);
6448*38fd1498Szrj acost = iv_ca_prune (data, ivs, NULL, &tmp_delta);
6449*38fd1498Szrj iv_ca_delta_commit (data, ivs, act_delta, false);
6450*38fd1498Szrj act_delta = iv_ca_delta_join (act_delta, tmp_delta);
6451*38fd1498Szrj
6452*38fd1498Szrj if (acost < orig_cost)
6453*38fd1498Szrj {
6454*38fd1498Szrj *delta = act_delta;
6455*38fd1498Szrj return acost;
6456*38fd1498Szrj }
6457*38fd1498Szrj else
6458*38fd1498Szrj iv_ca_delta_free (&act_delta);
6459*38fd1498Szrj }
6460*38fd1498Szrj
6461*38fd1498Szrj return orig_cost;
6462*38fd1498Szrj }
6463*38fd1498Szrj
6464*38fd1498Szrj /* Tries to extend the sets IVS in the best possible way in order to
6465*38fd1498Szrj express the GROUP. If ORIGINALP is true, prefer candidates from
6466*38fd1498Szrj the original set of IVs, otherwise favor important candidates not
6467*38fd1498Szrj based on any memory object. */
6468*38fd1498Szrj
6469*38fd1498Szrj static bool
try_add_cand_for(struct ivopts_data * data,struct iv_ca * ivs,struct iv_group * group,bool originalp)6470*38fd1498Szrj try_add_cand_for (struct ivopts_data *data, struct iv_ca *ivs,
6471*38fd1498Szrj struct iv_group *group, bool originalp)
6472*38fd1498Szrj {
6473*38fd1498Szrj comp_cost best_cost, act_cost;
6474*38fd1498Szrj unsigned i;
6475*38fd1498Szrj bitmap_iterator bi;
6476*38fd1498Szrj struct iv_cand *cand;
6477*38fd1498Szrj struct iv_ca_delta *best_delta = NULL, *act_delta;
6478*38fd1498Szrj struct cost_pair *cp;
6479*38fd1498Szrj
6480*38fd1498Szrj iv_ca_add_group (data, ivs, group);
6481*38fd1498Szrj best_cost = iv_ca_cost (ivs);
6482*38fd1498Szrj cp = iv_ca_cand_for_group (ivs, group);
6483*38fd1498Szrj if (cp)
6484*38fd1498Szrj {
6485*38fd1498Szrj best_delta = iv_ca_delta_add (group, NULL, cp, NULL);
6486*38fd1498Szrj iv_ca_set_no_cp (data, ivs, group);
6487*38fd1498Szrj }
6488*38fd1498Szrj
6489*38fd1498Szrj /* If ORIGINALP is true, try to find the original IV for the use. Otherwise
6490*38fd1498Szrj first try important candidates not based on any memory object. Only if
6491*38fd1498Szrj this fails, try the specific ones. Rationale -- in loops with many
6492*38fd1498Szrj variables the best choice often is to use just one generic biv. If we
6493*38fd1498Szrj added here many ivs specific to the uses, the optimization algorithm later
6494*38fd1498Szrj would be likely to get stuck in a local minimum, thus causing us to create
6495*38fd1498Szrj too many ivs. The approach from few ivs to more seems more likely to be
6496*38fd1498Szrj successful -- starting from few ivs, replacing an expensive use by a
6497*38fd1498Szrj specific iv should always be a win. */
6498*38fd1498Szrj EXECUTE_IF_SET_IN_BITMAP (group->related_cands, 0, i, bi)
6499*38fd1498Szrj {
6500*38fd1498Szrj cand = data->vcands[i];
6501*38fd1498Szrj
6502*38fd1498Szrj if (originalp && cand->pos !=IP_ORIGINAL)
6503*38fd1498Szrj continue;
6504*38fd1498Szrj
6505*38fd1498Szrj if (!originalp && cand->iv->base_object != NULL_TREE)
6506*38fd1498Szrj continue;
6507*38fd1498Szrj
6508*38fd1498Szrj if (iv_ca_cand_used_p (ivs, cand))
6509*38fd1498Szrj continue;
6510*38fd1498Szrj
6511*38fd1498Szrj cp = get_group_iv_cost (data, group, cand);
6512*38fd1498Szrj if (!cp)
6513*38fd1498Szrj continue;
6514*38fd1498Szrj
6515*38fd1498Szrj iv_ca_set_cp (data, ivs, group, cp);
6516*38fd1498Szrj act_cost = iv_ca_extend (data, ivs, cand, &act_delta, NULL,
6517*38fd1498Szrj true);
6518*38fd1498Szrj iv_ca_set_no_cp (data, ivs, group);
6519*38fd1498Szrj act_delta = iv_ca_delta_add (group, NULL, cp, act_delta);
6520*38fd1498Szrj
6521*38fd1498Szrj if (act_cost < best_cost)
6522*38fd1498Szrj {
6523*38fd1498Szrj best_cost = act_cost;
6524*38fd1498Szrj
6525*38fd1498Szrj iv_ca_delta_free (&best_delta);
6526*38fd1498Szrj best_delta = act_delta;
6527*38fd1498Szrj }
6528*38fd1498Szrj else
6529*38fd1498Szrj iv_ca_delta_free (&act_delta);
6530*38fd1498Szrj }
6531*38fd1498Szrj
6532*38fd1498Szrj if (best_cost.infinite_cost_p ())
6533*38fd1498Szrj {
6534*38fd1498Szrj for (i = 0; i < group->n_map_members; i++)
6535*38fd1498Szrj {
6536*38fd1498Szrj cp = group->cost_map + i;
6537*38fd1498Szrj cand = cp->cand;
6538*38fd1498Szrj if (!cand)
6539*38fd1498Szrj continue;
6540*38fd1498Szrj
6541*38fd1498Szrj /* Already tried this. */
6542*38fd1498Szrj if (cand->important)
6543*38fd1498Szrj {
6544*38fd1498Szrj if (originalp && cand->pos == IP_ORIGINAL)
6545*38fd1498Szrj continue;
6546*38fd1498Szrj if (!originalp && cand->iv->base_object == NULL_TREE)
6547*38fd1498Szrj continue;
6548*38fd1498Szrj }
6549*38fd1498Szrj
6550*38fd1498Szrj if (iv_ca_cand_used_p (ivs, cand))
6551*38fd1498Szrj continue;
6552*38fd1498Szrj
6553*38fd1498Szrj act_delta = NULL;
6554*38fd1498Szrj iv_ca_set_cp (data, ivs, group, cp);
6555*38fd1498Szrj act_cost = iv_ca_extend (data, ivs, cand, &act_delta, NULL, true);
6556*38fd1498Szrj iv_ca_set_no_cp (data, ivs, group);
6557*38fd1498Szrj act_delta = iv_ca_delta_add (group,
6558*38fd1498Szrj iv_ca_cand_for_group (ivs, group),
6559*38fd1498Szrj cp, act_delta);
6560*38fd1498Szrj
6561*38fd1498Szrj if (act_cost < best_cost)
6562*38fd1498Szrj {
6563*38fd1498Szrj best_cost = act_cost;
6564*38fd1498Szrj
6565*38fd1498Szrj if (best_delta)
6566*38fd1498Szrj iv_ca_delta_free (&best_delta);
6567*38fd1498Szrj best_delta = act_delta;
6568*38fd1498Szrj }
6569*38fd1498Szrj else
6570*38fd1498Szrj iv_ca_delta_free (&act_delta);
6571*38fd1498Szrj }
6572*38fd1498Szrj }
6573*38fd1498Szrj
6574*38fd1498Szrj iv_ca_delta_commit (data, ivs, best_delta, true);
6575*38fd1498Szrj iv_ca_delta_free (&best_delta);
6576*38fd1498Szrj
6577*38fd1498Szrj return !best_cost.infinite_cost_p ();
6578*38fd1498Szrj }
6579*38fd1498Szrj
6580*38fd1498Szrj /* Finds an initial assignment of candidates to uses. */
6581*38fd1498Szrj
6582*38fd1498Szrj static struct iv_ca *
get_initial_solution(struct ivopts_data * data,bool originalp)6583*38fd1498Szrj get_initial_solution (struct ivopts_data *data, bool originalp)
6584*38fd1498Szrj {
6585*38fd1498Szrj unsigned i;
6586*38fd1498Szrj struct iv_ca *ivs = iv_ca_new (data);
6587*38fd1498Szrj
6588*38fd1498Szrj for (i = 0; i < data->vgroups.length (); i++)
6589*38fd1498Szrj if (!try_add_cand_for (data, ivs, data->vgroups[i], originalp))
6590*38fd1498Szrj {
6591*38fd1498Szrj iv_ca_free (&ivs);
6592*38fd1498Szrj return NULL;
6593*38fd1498Szrj }
6594*38fd1498Szrj
6595*38fd1498Szrj return ivs;
6596*38fd1498Szrj }
6597*38fd1498Szrj
6598*38fd1498Szrj /* Tries to improve set of induction variables IVS. TRY_REPLACE_P
6599*38fd1498Szrj points to a bool variable, this function tries to break local
6600*38fd1498Szrj optimal fixed-point by replacing candidates in IVS if it's true. */
6601*38fd1498Szrj
6602*38fd1498Szrj static bool
try_improve_iv_set(struct ivopts_data * data,struct iv_ca * ivs,bool * try_replace_p)6603*38fd1498Szrj try_improve_iv_set (struct ivopts_data *data,
6604*38fd1498Szrj struct iv_ca *ivs, bool *try_replace_p)
6605*38fd1498Szrj {
6606*38fd1498Szrj unsigned i, n_ivs;
6607*38fd1498Szrj comp_cost acost, best_cost = iv_ca_cost (ivs);
6608*38fd1498Szrj struct iv_ca_delta *best_delta = NULL, *act_delta, *tmp_delta;
6609*38fd1498Szrj struct iv_cand *cand;
6610*38fd1498Szrj
6611*38fd1498Szrj /* Try extending the set of induction variables by one. */
6612*38fd1498Szrj for (i = 0; i < data->vcands.length (); i++)
6613*38fd1498Szrj {
6614*38fd1498Szrj cand = data->vcands[i];
6615*38fd1498Szrj
6616*38fd1498Szrj if (iv_ca_cand_used_p (ivs, cand))
6617*38fd1498Szrj continue;
6618*38fd1498Szrj
6619*38fd1498Szrj acost = iv_ca_extend (data, ivs, cand, &act_delta, &n_ivs, false);
6620*38fd1498Szrj if (!act_delta)
6621*38fd1498Szrj continue;
6622*38fd1498Szrj
6623*38fd1498Szrj /* If we successfully added the candidate and the set is small enough,
6624*38fd1498Szrj try optimizing it by removing other candidates. */
6625*38fd1498Szrj if (n_ivs <= ALWAYS_PRUNE_CAND_SET_BOUND)
6626*38fd1498Szrj {
6627*38fd1498Szrj iv_ca_delta_commit (data, ivs, act_delta, true);
6628*38fd1498Szrj acost = iv_ca_prune (data, ivs, cand, &tmp_delta);
6629*38fd1498Szrj iv_ca_delta_commit (data, ivs, act_delta, false);
6630*38fd1498Szrj act_delta = iv_ca_delta_join (act_delta, tmp_delta);
6631*38fd1498Szrj }
6632*38fd1498Szrj
6633*38fd1498Szrj if (acost < best_cost)
6634*38fd1498Szrj {
6635*38fd1498Szrj best_cost = acost;
6636*38fd1498Szrj iv_ca_delta_free (&best_delta);
6637*38fd1498Szrj best_delta = act_delta;
6638*38fd1498Szrj }
6639*38fd1498Szrj else
6640*38fd1498Szrj iv_ca_delta_free (&act_delta);
6641*38fd1498Szrj }
6642*38fd1498Szrj
6643*38fd1498Szrj if (!best_delta)
6644*38fd1498Szrj {
6645*38fd1498Szrj /* Try removing the candidates from the set instead. */
6646*38fd1498Szrj best_cost = iv_ca_prune (data, ivs, NULL, &best_delta);
6647*38fd1498Szrj
6648*38fd1498Szrj if (!best_delta && *try_replace_p)
6649*38fd1498Szrj {
6650*38fd1498Szrj *try_replace_p = false;
6651*38fd1498Szrj /* So far candidate selecting algorithm tends to choose fewer IVs
6652*38fd1498Szrj so that it can handle cases in which loops have many variables
6653*38fd1498Szrj but the best choice is often to use only one general biv. One
6654*38fd1498Szrj weakness is it can't handle opposite cases, in which different
6655*38fd1498Szrj candidates should be chosen with respect to each use. To solve
6656*38fd1498Szrj the problem, we replace candidates in a manner described by the
6657*38fd1498Szrj comments of iv_ca_replace, thus give general algorithm a chance
6658*38fd1498Szrj to break local optimal fixed-point in these cases. */
6659*38fd1498Szrj best_cost = iv_ca_replace (data, ivs, &best_delta);
6660*38fd1498Szrj }
6661*38fd1498Szrj
6662*38fd1498Szrj if (!best_delta)
6663*38fd1498Szrj return false;
6664*38fd1498Szrj }
6665*38fd1498Szrj
6666*38fd1498Szrj iv_ca_delta_commit (data, ivs, best_delta, true);
6667*38fd1498Szrj gcc_assert (best_cost == iv_ca_cost (ivs));
6668*38fd1498Szrj iv_ca_delta_free (&best_delta);
6669*38fd1498Szrj return true;
6670*38fd1498Szrj }
6671*38fd1498Szrj
6672*38fd1498Szrj /* Attempts to find the optimal set of induction variables. We do simple
6673*38fd1498Szrj greedy heuristic -- we try to replace at most one candidate in the selected
6674*38fd1498Szrj solution and remove the unused ivs while this improves the cost. */
6675*38fd1498Szrj
6676*38fd1498Szrj static struct iv_ca *
find_optimal_iv_set_1(struct ivopts_data * data,bool originalp)6677*38fd1498Szrj find_optimal_iv_set_1 (struct ivopts_data *data, bool originalp)
6678*38fd1498Szrj {
6679*38fd1498Szrj struct iv_ca *set;
6680*38fd1498Szrj bool try_replace_p = true;
6681*38fd1498Szrj
6682*38fd1498Szrj /* Get the initial solution. */
6683*38fd1498Szrj set = get_initial_solution (data, originalp);
6684*38fd1498Szrj if (!set)
6685*38fd1498Szrj {
6686*38fd1498Szrj if (dump_file && (dump_flags & TDF_DETAILS))
6687*38fd1498Szrj fprintf (dump_file, "Unable to substitute for ivs, failed.\n");
6688*38fd1498Szrj return NULL;
6689*38fd1498Szrj }
6690*38fd1498Szrj
6691*38fd1498Szrj if (dump_file && (dump_flags & TDF_DETAILS))
6692*38fd1498Szrj {
6693*38fd1498Szrj fprintf (dump_file, "Initial set of candidates:\n");
6694*38fd1498Szrj iv_ca_dump (data, dump_file, set);
6695*38fd1498Szrj }
6696*38fd1498Szrj
6697*38fd1498Szrj while (try_improve_iv_set (data, set, &try_replace_p))
6698*38fd1498Szrj {
6699*38fd1498Szrj if (dump_file && (dump_flags & TDF_DETAILS))
6700*38fd1498Szrj {
6701*38fd1498Szrj fprintf (dump_file, "Improved to:\n");
6702*38fd1498Szrj iv_ca_dump (data, dump_file, set);
6703*38fd1498Szrj }
6704*38fd1498Szrj }
6705*38fd1498Szrj
6706*38fd1498Szrj return set;
6707*38fd1498Szrj }
6708*38fd1498Szrj
6709*38fd1498Szrj static struct iv_ca *
find_optimal_iv_set(struct ivopts_data * data)6710*38fd1498Szrj find_optimal_iv_set (struct ivopts_data *data)
6711*38fd1498Szrj {
6712*38fd1498Szrj unsigned i;
6713*38fd1498Szrj comp_cost cost, origcost;
6714*38fd1498Szrj struct iv_ca *set, *origset;
6715*38fd1498Szrj
6716*38fd1498Szrj /* Determine the cost based on a strategy that starts with original IVs,
6717*38fd1498Szrj and try again using a strategy that prefers candidates not based
6718*38fd1498Szrj on any IVs. */
6719*38fd1498Szrj origset = find_optimal_iv_set_1 (data, true);
6720*38fd1498Szrj set = find_optimal_iv_set_1 (data, false);
6721*38fd1498Szrj
6722*38fd1498Szrj if (!origset && !set)
6723*38fd1498Szrj return NULL;
6724*38fd1498Szrj
6725*38fd1498Szrj origcost = origset ? iv_ca_cost (origset) : infinite_cost;
6726*38fd1498Szrj cost = set ? iv_ca_cost (set) : infinite_cost;
6727*38fd1498Szrj
6728*38fd1498Szrj if (dump_file && (dump_flags & TDF_DETAILS))
6729*38fd1498Szrj {
6730*38fd1498Szrj fprintf (dump_file, "Original cost %d (complexity %d)\n\n",
6731*38fd1498Szrj origcost.cost, origcost.complexity);
6732*38fd1498Szrj fprintf (dump_file, "Final cost %d (complexity %d)\n\n",
6733*38fd1498Szrj cost.cost, cost.complexity);
6734*38fd1498Szrj }
6735*38fd1498Szrj
6736*38fd1498Szrj /* Choose the one with the best cost. */
6737*38fd1498Szrj if (origcost <= cost)
6738*38fd1498Szrj {
6739*38fd1498Szrj if (set)
6740*38fd1498Szrj iv_ca_free (&set);
6741*38fd1498Szrj set = origset;
6742*38fd1498Szrj }
6743*38fd1498Szrj else if (origset)
6744*38fd1498Szrj iv_ca_free (&origset);
6745*38fd1498Szrj
6746*38fd1498Szrj for (i = 0; i < data->vgroups.length (); i++)
6747*38fd1498Szrj {
6748*38fd1498Szrj struct iv_group *group = data->vgroups[i];
6749*38fd1498Szrj group->selected = iv_ca_cand_for_group (set, group)->cand;
6750*38fd1498Szrj }
6751*38fd1498Szrj
6752*38fd1498Szrj return set;
6753*38fd1498Szrj }
6754*38fd1498Szrj
6755*38fd1498Szrj /* Creates a new induction variable corresponding to CAND. */
6756*38fd1498Szrj
6757*38fd1498Szrj static void
create_new_iv(struct ivopts_data * data,struct iv_cand * cand)6758*38fd1498Szrj create_new_iv (struct ivopts_data *data, struct iv_cand *cand)
6759*38fd1498Szrj {
6760*38fd1498Szrj gimple_stmt_iterator incr_pos;
6761*38fd1498Szrj tree base;
6762*38fd1498Szrj struct iv_use *use;
6763*38fd1498Szrj struct iv_group *group;
6764*38fd1498Szrj bool after = false;
6765*38fd1498Szrj
6766*38fd1498Szrj gcc_assert (cand->iv != NULL);
6767*38fd1498Szrj
6768*38fd1498Szrj switch (cand->pos)
6769*38fd1498Szrj {
6770*38fd1498Szrj case IP_NORMAL:
6771*38fd1498Szrj incr_pos = gsi_last_bb (ip_normal_pos (data->current_loop));
6772*38fd1498Szrj break;
6773*38fd1498Szrj
6774*38fd1498Szrj case IP_END:
6775*38fd1498Szrj incr_pos = gsi_last_bb (ip_end_pos (data->current_loop));
6776*38fd1498Szrj after = true;
6777*38fd1498Szrj break;
6778*38fd1498Szrj
6779*38fd1498Szrj case IP_AFTER_USE:
6780*38fd1498Szrj after = true;
6781*38fd1498Szrj /* fall through */
6782*38fd1498Szrj case IP_BEFORE_USE:
6783*38fd1498Szrj incr_pos = gsi_for_stmt (cand->incremented_at);
6784*38fd1498Szrj break;
6785*38fd1498Szrj
6786*38fd1498Szrj case IP_ORIGINAL:
6787*38fd1498Szrj /* Mark that the iv is preserved. */
6788*38fd1498Szrj name_info (data, cand->var_before)->preserve_biv = true;
6789*38fd1498Szrj name_info (data, cand->var_after)->preserve_biv = true;
6790*38fd1498Szrj
6791*38fd1498Szrj /* Rewrite the increment so that it uses var_before directly. */
6792*38fd1498Szrj use = find_interesting_uses_op (data, cand->var_after);
6793*38fd1498Szrj group = data->vgroups[use->group_id];
6794*38fd1498Szrj group->selected = cand;
6795*38fd1498Szrj return;
6796*38fd1498Szrj }
6797*38fd1498Szrj
6798*38fd1498Szrj gimple_add_tmp_var (cand->var_before);
6799*38fd1498Szrj
6800*38fd1498Szrj base = unshare_expr (cand->iv->base);
6801*38fd1498Szrj
6802*38fd1498Szrj create_iv (base, unshare_expr (cand->iv->step),
6803*38fd1498Szrj cand->var_before, data->current_loop,
6804*38fd1498Szrj &incr_pos, after, &cand->var_before, &cand->var_after);
6805*38fd1498Szrj }
6806*38fd1498Szrj
6807*38fd1498Szrj /* Creates new induction variables described in SET. */
6808*38fd1498Szrj
6809*38fd1498Szrj static void
create_new_ivs(struct ivopts_data * data,struct iv_ca * set)6810*38fd1498Szrj create_new_ivs (struct ivopts_data *data, struct iv_ca *set)
6811*38fd1498Szrj {
6812*38fd1498Szrj unsigned i;
6813*38fd1498Szrj struct iv_cand *cand;
6814*38fd1498Szrj bitmap_iterator bi;
6815*38fd1498Szrj
6816*38fd1498Szrj EXECUTE_IF_SET_IN_BITMAP (set->cands, 0, i, bi)
6817*38fd1498Szrj {
6818*38fd1498Szrj cand = data->vcands[i];
6819*38fd1498Szrj create_new_iv (data, cand);
6820*38fd1498Szrj }
6821*38fd1498Szrj
6822*38fd1498Szrj if (dump_file && (dump_flags & TDF_DETAILS))
6823*38fd1498Szrj {
6824*38fd1498Szrj fprintf (dump_file, "Selected IV set for loop %d",
6825*38fd1498Szrj data->current_loop->num);
6826*38fd1498Szrj if (data->loop_loc != UNKNOWN_LOCATION)
6827*38fd1498Szrj fprintf (dump_file, " at %s:%d", LOCATION_FILE (data->loop_loc),
6828*38fd1498Szrj LOCATION_LINE (data->loop_loc));
6829*38fd1498Szrj fprintf (dump_file, ", " HOST_WIDE_INT_PRINT_DEC " avg niters",
6830*38fd1498Szrj avg_loop_niter (data->current_loop));
6831*38fd1498Szrj fprintf (dump_file, ", %lu IVs:\n", bitmap_count_bits (set->cands));
6832*38fd1498Szrj EXECUTE_IF_SET_IN_BITMAP (set->cands, 0, i, bi)
6833*38fd1498Szrj {
6834*38fd1498Szrj cand = data->vcands[i];
6835*38fd1498Szrj dump_cand (dump_file, cand);
6836*38fd1498Szrj }
6837*38fd1498Szrj fprintf (dump_file, "\n");
6838*38fd1498Szrj }
6839*38fd1498Szrj }
6840*38fd1498Szrj
6841*38fd1498Szrj /* Rewrites USE (definition of iv used in a nonlinear expression)
6842*38fd1498Szrj using candidate CAND. */
6843*38fd1498Szrj
6844*38fd1498Szrj static void
rewrite_use_nonlinear_expr(struct ivopts_data * data,struct iv_use * use,struct iv_cand * cand)6845*38fd1498Szrj rewrite_use_nonlinear_expr (struct ivopts_data *data,
6846*38fd1498Szrj struct iv_use *use, struct iv_cand *cand)
6847*38fd1498Szrj {
6848*38fd1498Szrj gassign *ass;
6849*38fd1498Szrj gimple_stmt_iterator bsi;
6850*38fd1498Szrj tree comp, type = get_use_type (use), tgt;
6851*38fd1498Szrj
6852*38fd1498Szrj /* An important special case -- if we are asked to express value of
6853*38fd1498Szrj the original iv by itself, just exit; there is no need to
6854*38fd1498Szrj introduce a new computation (that might also need casting the
6855*38fd1498Szrj variable to unsigned and back). */
6856*38fd1498Szrj if (cand->pos == IP_ORIGINAL
6857*38fd1498Szrj && cand->incremented_at == use->stmt)
6858*38fd1498Szrj {
6859*38fd1498Szrj tree op = NULL_TREE;
6860*38fd1498Szrj enum tree_code stmt_code;
6861*38fd1498Szrj
6862*38fd1498Szrj gcc_assert (is_gimple_assign (use->stmt));
6863*38fd1498Szrj gcc_assert (gimple_assign_lhs (use->stmt) == cand->var_after);
6864*38fd1498Szrj
6865*38fd1498Szrj /* Check whether we may leave the computation unchanged.
6866*38fd1498Szrj This is the case only if it does not rely on other
6867*38fd1498Szrj computations in the loop -- otherwise, the computation
6868*38fd1498Szrj we rely upon may be removed in remove_unused_ivs,
6869*38fd1498Szrj thus leading to ICE. */
6870*38fd1498Szrj stmt_code = gimple_assign_rhs_code (use->stmt);
6871*38fd1498Szrj if (stmt_code == PLUS_EXPR
6872*38fd1498Szrj || stmt_code == MINUS_EXPR
6873*38fd1498Szrj || stmt_code == POINTER_PLUS_EXPR)
6874*38fd1498Szrj {
6875*38fd1498Szrj if (gimple_assign_rhs1 (use->stmt) == cand->var_before)
6876*38fd1498Szrj op = gimple_assign_rhs2 (use->stmt);
6877*38fd1498Szrj else if (gimple_assign_rhs2 (use->stmt) == cand->var_before)
6878*38fd1498Szrj op = gimple_assign_rhs1 (use->stmt);
6879*38fd1498Szrj }
6880*38fd1498Szrj
6881*38fd1498Szrj if (op != NULL_TREE)
6882*38fd1498Szrj {
6883*38fd1498Szrj if (expr_invariant_in_loop_p (data->current_loop, op))
6884*38fd1498Szrj return;
6885*38fd1498Szrj if (TREE_CODE (op) == SSA_NAME)
6886*38fd1498Szrj {
6887*38fd1498Szrj struct iv *iv = get_iv (data, op);
6888*38fd1498Szrj if (iv != NULL && integer_zerop (iv->step))
6889*38fd1498Szrj return;
6890*38fd1498Szrj }
6891*38fd1498Szrj }
6892*38fd1498Szrj }
6893*38fd1498Szrj
6894*38fd1498Szrj switch (gimple_code (use->stmt))
6895*38fd1498Szrj {
6896*38fd1498Szrj case GIMPLE_PHI:
6897*38fd1498Szrj tgt = PHI_RESULT (use->stmt);
6898*38fd1498Szrj
6899*38fd1498Szrj /* If we should keep the biv, do not replace it. */
6900*38fd1498Szrj if (name_info (data, tgt)->preserve_biv)
6901*38fd1498Szrj return;
6902*38fd1498Szrj
6903*38fd1498Szrj bsi = gsi_after_labels (gimple_bb (use->stmt));
6904*38fd1498Szrj break;
6905*38fd1498Szrj
6906*38fd1498Szrj case GIMPLE_ASSIGN:
6907*38fd1498Szrj tgt = gimple_assign_lhs (use->stmt);
6908*38fd1498Szrj bsi = gsi_for_stmt (use->stmt);
6909*38fd1498Szrj break;
6910*38fd1498Szrj
6911*38fd1498Szrj default:
6912*38fd1498Szrj gcc_unreachable ();
6913*38fd1498Szrj }
6914*38fd1498Szrj
6915*38fd1498Szrj aff_tree aff_inv, aff_var;
6916*38fd1498Szrj if (!get_computation_aff_1 (data->current_loop, use->stmt,
6917*38fd1498Szrj use, cand, &aff_inv, &aff_var))
6918*38fd1498Szrj gcc_unreachable ();
6919*38fd1498Szrj
6920*38fd1498Szrj unshare_aff_combination (&aff_inv);
6921*38fd1498Szrj unshare_aff_combination (&aff_var);
6922*38fd1498Szrj /* Prefer CSE opportunity than loop invariant by adding offset at last
6923*38fd1498Szrj so that iv_uses have different offsets can be CSEed. */
6924*38fd1498Szrj poly_widest_int offset = aff_inv.offset;
6925*38fd1498Szrj aff_inv.offset = 0;
6926*38fd1498Szrj
6927*38fd1498Szrj gimple_seq stmt_list = NULL, seq = NULL;
6928*38fd1498Szrj tree comp_op1 = aff_combination_to_tree (&aff_inv);
6929*38fd1498Szrj tree comp_op2 = aff_combination_to_tree (&aff_var);
6930*38fd1498Szrj gcc_assert (comp_op1 && comp_op2);
6931*38fd1498Szrj
6932*38fd1498Szrj comp_op1 = force_gimple_operand (comp_op1, &seq, true, NULL);
6933*38fd1498Szrj gimple_seq_add_seq (&stmt_list, seq);
6934*38fd1498Szrj comp_op2 = force_gimple_operand (comp_op2, &seq, true, NULL);
6935*38fd1498Szrj gimple_seq_add_seq (&stmt_list, seq);
6936*38fd1498Szrj
6937*38fd1498Szrj if (POINTER_TYPE_P (TREE_TYPE (comp_op2)))
6938*38fd1498Szrj std::swap (comp_op1, comp_op2);
6939*38fd1498Szrj
6940*38fd1498Szrj if (POINTER_TYPE_P (TREE_TYPE (comp_op1)))
6941*38fd1498Szrj {
6942*38fd1498Szrj comp = fold_build_pointer_plus (comp_op1,
6943*38fd1498Szrj fold_convert (sizetype, comp_op2));
6944*38fd1498Szrj comp = fold_build_pointer_plus (comp,
6945*38fd1498Szrj wide_int_to_tree (sizetype, offset));
6946*38fd1498Szrj }
6947*38fd1498Szrj else
6948*38fd1498Szrj {
6949*38fd1498Szrj comp = fold_build2 (PLUS_EXPR, TREE_TYPE (comp_op1), comp_op1,
6950*38fd1498Szrj fold_convert (TREE_TYPE (comp_op1), comp_op2));
6951*38fd1498Szrj comp = fold_build2 (PLUS_EXPR, TREE_TYPE (comp_op1), comp,
6952*38fd1498Szrj wide_int_to_tree (TREE_TYPE (comp_op1), offset));
6953*38fd1498Szrj }
6954*38fd1498Szrj
6955*38fd1498Szrj comp = fold_convert (type, comp);
6956*38fd1498Szrj if (!valid_gimple_rhs_p (comp)
6957*38fd1498Szrj || (gimple_code (use->stmt) != GIMPLE_PHI
6958*38fd1498Szrj /* We can't allow re-allocating the stmt as it might be pointed
6959*38fd1498Szrj to still. */
6960*38fd1498Szrj && (get_gimple_rhs_num_ops (TREE_CODE (comp))
6961*38fd1498Szrj >= gimple_num_ops (gsi_stmt (bsi)))))
6962*38fd1498Szrj {
6963*38fd1498Szrj comp = force_gimple_operand (comp, &seq, true, NULL);
6964*38fd1498Szrj gimple_seq_add_seq (&stmt_list, seq);
6965*38fd1498Szrj if (POINTER_TYPE_P (TREE_TYPE (tgt)))
6966*38fd1498Szrj {
6967*38fd1498Szrj duplicate_ssa_name_ptr_info (comp, SSA_NAME_PTR_INFO (tgt));
6968*38fd1498Szrj /* As this isn't a plain copy we have to reset alignment
6969*38fd1498Szrj information. */
6970*38fd1498Szrj if (SSA_NAME_PTR_INFO (comp))
6971*38fd1498Szrj mark_ptr_info_alignment_unknown (SSA_NAME_PTR_INFO (comp));
6972*38fd1498Szrj }
6973*38fd1498Szrj }
6974*38fd1498Szrj
6975*38fd1498Szrj gsi_insert_seq_before (&bsi, stmt_list, GSI_SAME_STMT);
6976*38fd1498Szrj if (gimple_code (use->stmt) == GIMPLE_PHI)
6977*38fd1498Szrj {
6978*38fd1498Szrj ass = gimple_build_assign (tgt, comp);
6979*38fd1498Szrj gsi_insert_before (&bsi, ass, GSI_SAME_STMT);
6980*38fd1498Szrj
6981*38fd1498Szrj bsi = gsi_for_stmt (use->stmt);
6982*38fd1498Szrj remove_phi_node (&bsi, false);
6983*38fd1498Szrj }
6984*38fd1498Szrj else
6985*38fd1498Szrj {
6986*38fd1498Szrj gimple_assign_set_rhs_from_tree (&bsi, comp);
6987*38fd1498Szrj use->stmt = gsi_stmt (bsi);
6988*38fd1498Szrj }
6989*38fd1498Szrj }
6990*38fd1498Szrj
6991*38fd1498Szrj /* Performs a peephole optimization to reorder the iv update statement with
6992*38fd1498Szrj a mem ref to enable instruction combining in later phases. The mem ref uses
6993*38fd1498Szrj the iv value before the update, so the reordering transformation requires
6994*38fd1498Szrj adjustment of the offset. CAND is the selected IV_CAND.
6995*38fd1498Szrj
6996*38fd1498Szrj Example:
6997*38fd1498Szrj
6998*38fd1498Szrj t = MEM_REF (base, iv1, 8, 16); // base, index, stride, offset
6999*38fd1498Szrj iv2 = iv1 + 1;
7000*38fd1498Szrj
7001*38fd1498Szrj if (t < val) (1)
7002*38fd1498Szrj goto L;
7003*38fd1498Szrj goto Head;
7004*38fd1498Szrj
7005*38fd1498Szrj
7006*38fd1498Szrj directly propagating t over to (1) will introduce overlapping live range
7007*38fd1498Szrj thus increase register pressure. This peephole transform it into:
7008*38fd1498Szrj
7009*38fd1498Szrj
7010*38fd1498Szrj iv2 = iv1 + 1;
7011*38fd1498Szrj t = MEM_REF (base, iv2, 8, 8);
7012*38fd1498Szrj if (t < val)
7013*38fd1498Szrj goto L;
7014*38fd1498Szrj goto Head;
7015*38fd1498Szrj */
7016*38fd1498Szrj
7017*38fd1498Szrj static void
adjust_iv_update_pos(struct iv_cand * cand,struct iv_use * use)7018*38fd1498Szrj adjust_iv_update_pos (struct iv_cand *cand, struct iv_use *use)
7019*38fd1498Szrj {
7020*38fd1498Szrj tree var_after;
7021*38fd1498Szrj gimple *iv_update, *stmt;
7022*38fd1498Szrj basic_block bb;
7023*38fd1498Szrj gimple_stmt_iterator gsi, gsi_iv;
7024*38fd1498Szrj
7025*38fd1498Szrj if (cand->pos != IP_NORMAL)
7026*38fd1498Szrj return;
7027*38fd1498Szrj
7028*38fd1498Szrj var_after = cand->var_after;
7029*38fd1498Szrj iv_update = SSA_NAME_DEF_STMT (var_after);
7030*38fd1498Szrj
7031*38fd1498Szrj bb = gimple_bb (iv_update);
7032*38fd1498Szrj gsi = gsi_last_nondebug_bb (bb);
7033*38fd1498Szrj stmt = gsi_stmt (gsi);
7034*38fd1498Szrj
7035*38fd1498Szrj /* Only handle conditional statement for now. */
7036*38fd1498Szrj if (gimple_code (stmt) != GIMPLE_COND)
7037*38fd1498Szrj return;
7038*38fd1498Szrj
7039*38fd1498Szrj gsi_prev_nondebug (&gsi);
7040*38fd1498Szrj stmt = gsi_stmt (gsi);
7041*38fd1498Szrj if (stmt != iv_update)
7042*38fd1498Szrj return;
7043*38fd1498Szrj
7044*38fd1498Szrj gsi_prev_nondebug (&gsi);
7045*38fd1498Szrj if (gsi_end_p (gsi))
7046*38fd1498Szrj return;
7047*38fd1498Szrj
7048*38fd1498Szrj stmt = gsi_stmt (gsi);
7049*38fd1498Szrj if (gimple_code (stmt) != GIMPLE_ASSIGN)
7050*38fd1498Szrj return;
7051*38fd1498Szrj
7052*38fd1498Szrj if (stmt != use->stmt)
7053*38fd1498Szrj return;
7054*38fd1498Szrj
7055*38fd1498Szrj if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
7056*38fd1498Szrj return;
7057*38fd1498Szrj
7058*38fd1498Szrj if (dump_file && (dump_flags & TDF_DETAILS))
7059*38fd1498Szrj {
7060*38fd1498Szrj fprintf (dump_file, "Reordering \n");
7061*38fd1498Szrj print_gimple_stmt (dump_file, iv_update, 0);
7062*38fd1498Szrj print_gimple_stmt (dump_file, use->stmt, 0);
7063*38fd1498Szrj fprintf (dump_file, "\n");
7064*38fd1498Szrj }
7065*38fd1498Szrj
7066*38fd1498Szrj gsi = gsi_for_stmt (use->stmt);
7067*38fd1498Szrj gsi_iv = gsi_for_stmt (iv_update);
7068*38fd1498Szrj gsi_move_before (&gsi_iv, &gsi);
7069*38fd1498Szrj
7070*38fd1498Szrj cand->pos = IP_BEFORE_USE;
7071*38fd1498Szrj cand->incremented_at = use->stmt;
7072*38fd1498Szrj }
7073*38fd1498Szrj
7074*38fd1498Szrj /* Return the alias pointer type that should be used for a MEM_REF
7075*38fd1498Szrj associated with USE, which has type USE_PTR_ADDRESS. */
7076*38fd1498Szrj
7077*38fd1498Szrj static tree
get_alias_ptr_type_for_ptr_address(iv_use * use)7078*38fd1498Szrj get_alias_ptr_type_for_ptr_address (iv_use *use)
7079*38fd1498Szrj {
7080*38fd1498Szrj gcall *call = as_a <gcall *> (use->stmt);
7081*38fd1498Szrj switch (gimple_call_internal_fn (call))
7082*38fd1498Szrj {
7083*38fd1498Szrj case IFN_MASK_LOAD:
7084*38fd1498Szrj case IFN_MASK_STORE:
7085*38fd1498Szrj /* The second argument contains the correct alias type. */
7086*38fd1498Szrj gcc_assert (use->op_p = gimple_call_arg_ptr (call, 0));
7087*38fd1498Szrj return TREE_TYPE (gimple_call_arg (call, 1));
7088*38fd1498Szrj
7089*38fd1498Szrj default:
7090*38fd1498Szrj gcc_unreachable ();
7091*38fd1498Szrj }
7092*38fd1498Szrj }
7093*38fd1498Szrj
7094*38fd1498Szrj
7095*38fd1498Szrj /* Rewrites USE (address that is an iv) using candidate CAND. */
7096*38fd1498Szrj
7097*38fd1498Szrj static void
rewrite_use_address(struct ivopts_data * data,struct iv_use * use,struct iv_cand * cand)7098*38fd1498Szrj rewrite_use_address (struct ivopts_data *data,
7099*38fd1498Szrj struct iv_use *use, struct iv_cand *cand)
7100*38fd1498Szrj {
7101*38fd1498Szrj aff_tree aff;
7102*38fd1498Szrj bool ok;
7103*38fd1498Szrj
7104*38fd1498Szrj adjust_iv_update_pos (cand, use);
7105*38fd1498Szrj ok = get_computation_aff (data->current_loop, use->stmt, use, cand, &aff);
7106*38fd1498Szrj gcc_assert (ok);
7107*38fd1498Szrj unshare_aff_combination (&aff);
7108*38fd1498Szrj
7109*38fd1498Szrj /* To avoid undefined overflow problems, all IV candidates use unsigned
7110*38fd1498Szrj integer types. The drawback is that this makes it impossible for
7111*38fd1498Szrj create_mem_ref to distinguish an IV that is based on a memory object
7112*38fd1498Szrj from one that represents simply an offset.
7113*38fd1498Szrj
7114*38fd1498Szrj To work around this problem, we pass a hint to create_mem_ref that
7115*38fd1498Szrj indicates which variable (if any) in aff is an IV based on a memory
7116*38fd1498Szrj object. Note that we only consider the candidate. If this is not
7117*38fd1498Szrj based on an object, the base of the reference is in some subexpression
7118*38fd1498Szrj of the use -- but these will use pointer types, so they are recognized
7119*38fd1498Szrj by the create_mem_ref heuristics anyway. */
7120*38fd1498Szrj tree iv = var_at_stmt (data->current_loop, cand, use->stmt);
7121*38fd1498Szrj tree base_hint = (cand->iv->base_object) ? iv : NULL_TREE;
7122*38fd1498Szrj gimple_stmt_iterator bsi = gsi_for_stmt (use->stmt);
7123*38fd1498Szrj tree type = use->mem_type;
7124*38fd1498Szrj tree alias_ptr_type;
7125*38fd1498Szrj if (use->type == USE_PTR_ADDRESS)
7126*38fd1498Szrj alias_ptr_type = get_alias_ptr_type_for_ptr_address (use);
7127*38fd1498Szrj else
7128*38fd1498Szrj {
7129*38fd1498Szrj gcc_assert (type == TREE_TYPE (*use->op_p));
7130*38fd1498Szrj unsigned int align = get_object_alignment (*use->op_p);
7131*38fd1498Szrj if (align != TYPE_ALIGN (type))
7132*38fd1498Szrj type = build_aligned_type (type, align);
7133*38fd1498Szrj alias_ptr_type = reference_alias_ptr_type (*use->op_p);
7134*38fd1498Szrj }
7135*38fd1498Szrj tree ref = create_mem_ref (&bsi, type, &aff, alias_ptr_type,
7136*38fd1498Szrj iv, base_hint, data->speed);
7137*38fd1498Szrj
7138*38fd1498Szrj if (use->type == USE_PTR_ADDRESS)
7139*38fd1498Szrj {
7140*38fd1498Szrj ref = fold_build1 (ADDR_EXPR, build_pointer_type (use->mem_type), ref);
7141*38fd1498Szrj ref = fold_convert (get_use_type (use), ref);
7142*38fd1498Szrj ref = force_gimple_operand_gsi (&bsi, ref, true, NULL_TREE,
7143*38fd1498Szrj true, GSI_SAME_STMT);
7144*38fd1498Szrj }
7145*38fd1498Szrj else
7146*38fd1498Szrj copy_ref_info (ref, *use->op_p);
7147*38fd1498Szrj
7148*38fd1498Szrj *use->op_p = ref;
7149*38fd1498Szrj }
7150*38fd1498Szrj
7151*38fd1498Szrj /* Rewrites USE (the condition such that one of the arguments is an iv) using
7152*38fd1498Szrj candidate CAND. */
7153*38fd1498Szrj
7154*38fd1498Szrj static void
rewrite_use_compare(struct ivopts_data * data,struct iv_use * use,struct iv_cand * cand)7155*38fd1498Szrj rewrite_use_compare (struct ivopts_data *data,
7156*38fd1498Szrj struct iv_use *use, struct iv_cand *cand)
7157*38fd1498Szrj {
7158*38fd1498Szrj tree comp, op, bound;
7159*38fd1498Szrj gimple_stmt_iterator bsi = gsi_for_stmt (use->stmt);
7160*38fd1498Szrj enum tree_code compare;
7161*38fd1498Szrj struct iv_group *group = data->vgroups[use->group_id];
7162*38fd1498Szrj struct cost_pair *cp = get_group_iv_cost (data, group, cand);
7163*38fd1498Szrj
7164*38fd1498Szrj bound = cp->value;
7165*38fd1498Szrj if (bound)
7166*38fd1498Szrj {
7167*38fd1498Szrj tree var = var_at_stmt (data->current_loop, cand, use->stmt);
7168*38fd1498Szrj tree var_type = TREE_TYPE (var);
7169*38fd1498Szrj gimple_seq stmts;
7170*38fd1498Szrj
7171*38fd1498Szrj if (dump_file && (dump_flags & TDF_DETAILS))
7172*38fd1498Szrj {
7173*38fd1498Szrj fprintf (dump_file, "Replacing exit test: ");
7174*38fd1498Szrj print_gimple_stmt (dump_file, use->stmt, 0, TDF_SLIM);
7175*38fd1498Szrj }
7176*38fd1498Szrj compare = cp->comp;
7177*38fd1498Szrj bound = unshare_expr (fold_convert (var_type, bound));
7178*38fd1498Szrj op = force_gimple_operand (bound, &stmts, true, NULL_TREE);
7179*38fd1498Szrj if (stmts)
7180*38fd1498Szrj gsi_insert_seq_on_edge_immediate (
7181*38fd1498Szrj loop_preheader_edge (data->current_loop),
7182*38fd1498Szrj stmts);
7183*38fd1498Szrj
7184*38fd1498Szrj gcond *cond_stmt = as_a <gcond *> (use->stmt);
7185*38fd1498Szrj gimple_cond_set_lhs (cond_stmt, var);
7186*38fd1498Szrj gimple_cond_set_code (cond_stmt, compare);
7187*38fd1498Szrj gimple_cond_set_rhs (cond_stmt, op);
7188*38fd1498Szrj return;
7189*38fd1498Szrj }
7190*38fd1498Szrj
7191*38fd1498Szrj /* The induction variable elimination failed; just express the original
7192*38fd1498Szrj giv. */
7193*38fd1498Szrj comp = get_computation_at (data->current_loop, use->stmt, use, cand);
7194*38fd1498Szrj gcc_assert (comp != NULL_TREE);
7195*38fd1498Szrj gcc_assert (use->op_p != NULL);
7196*38fd1498Szrj *use->op_p = force_gimple_operand_gsi (&bsi, comp, true,
7197*38fd1498Szrj SSA_NAME_VAR (*use->op_p),
7198*38fd1498Szrj true, GSI_SAME_STMT);
7199*38fd1498Szrj }
7200*38fd1498Szrj
7201*38fd1498Szrj /* Rewrite the groups using the selected induction variables. */
7202*38fd1498Szrj
7203*38fd1498Szrj static void
rewrite_groups(struct ivopts_data * data)7204*38fd1498Szrj rewrite_groups (struct ivopts_data *data)
7205*38fd1498Szrj {
7206*38fd1498Szrj unsigned i, j;
7207*38fd1498Szrj
7208*38fd1498Szrj for (i = 0; i < data->vgroups.length (); i++)
7209*38fd1498Szrj {
7210*38fd1498Szrj struct iv_group *group = data->vgroups[i];
7211*38fd1498Szrj struct iv_cand *cand = group->selected;
7212*38fd1498Szrj
7213*38fd1498Szrj gcc_assert (cand);
7214*38fd1498Szrj
7215*38fd1498Szrj if (group->type == USE_NONLINEAR_EXPR)
7216*38fd1498Szrj {
7217*38fd1498Szrj for (j = 0; j < group->vuses.length (); j++)
7218*38fd1498Szrj {
7219*38fd1498Szrj rewrite_use_nonlinear_expr (data, group->vuses[j], cand);
7220*38fd1498Szrj update_stmt (group->vuses[j]->stmt);
7221*38fd1498Szrj }
7222*38fd1498Szrj }
7223*38fd1498Szrj else if (address_p (group->type))
7224*38fd1498Szrj {
7225*38fd1498Szrj for (j = 0; j < group->vuses.length (); j++)
7226*38fd1498Szrj {
7227*38fd1498Szrj rewrite_use_address (data, group->vuses[j], cand);
7228*38fd1498Szrj update_stmt (group->vuses[j]->stmt);
7229*38fd1498Szrj }
7230*38fd1498Szrj }
7231*38fd1498Szrj else
7232*38fd1498Szrj {
7233*38fd1498Szrj gcc_assert (group->type == USE_COMPARE);
7234*38fd1498Szrj
7235*38fd1498Szrj for (j = 0; j < group->vuses.length (); j++)
7236*38fd1498Szrj {
7237*38fd1498Szrj rewrite_use_compare (data, group->vuses[j], cand);
7238*38fd1498Szrj update_stmt (group->vuses[j]->stmt);
7239*38fd1498Szrj }
7240*38fd1498Szrj }
7241*38fd1498Szrj }
7242*38fd1498Szrj }
7243*38fd1498Szrj
7244*38fd1498Szrj /* Removes the ivs that are not used after rewriting. */
7245*38fd1498Szrj
7246*38fd1498Szrj static void
remove_unused_ivs(struct ivopts_data * data)7247*38fd1498Szrj remove_unused_ivs (struct ivopts_data *data)
7248*38fd1498Szrj {
7249*38fd1498Szrj unsigned j;
7250*38fd1498Szrj bitmap_iterator bi;
7251*38fd1498Szrj bitmap toremove = BITMAP_ALLOC (NULL);
7252*38fd1498Szrj
7253*38fd1498Szrj /* Figure out an order in which to release SSA DEFs so that we don't
7254*38fd1498Szrj release something that we'd have to propagate into a debug stmt
7255*38fd1498Szrj afterwards. */
7256*38fd1498Szrj EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j, bi)
7257*38fd1498Szrj {
7258*38fd1498Szrj struct version_info *info;
7259*38fd1498Szrj
7260*38fd1498Szrj info = ver_info (data, j);
7261*38fd1498Szrj if (info->iv
7262*38fd1498Szrj && !integer_zerop (info->iv->step)
7263*38fd1498Szrj && !info->inv_id
7264*38fd1498Szrj && !info->iv->nonlin_use
7265*38fd1498Szrj && !info->preserve_biv)
7266*38fd1498Szrj {
7267*38fd1498Szrj bitmap_set_bit (toremove, SSA_NAME_VERSION (info->iv->ssa_name));
7268*38fd1498Szrj
7269*38fd1498Szrj tree def = info->iv->ssa_name;
7270*38fd1498Szrj
7271*38fd1498Szrj if (MAY_HAVE_DEBUG_BIND_STMTS && SSA_NAME_DEF_STMT (def))
7272*38fd1498Szrj {
7273*38fd1498Szrj imm_use_iterator imm_iter;
7274*38fd1498Szrj use_operand_p use_p;
7275*38fd1498Szrj gimple *stmt;
7276*38fd1498Szrj int count = 0;
7277*38fd1498Szrj
7278*38fd1498Szrj FOR_EACH_IMM_USE_STMT (stmt, imm_iter, def)
7279*38fd1498Szrj {
7280*38fd1498Szrj if (!gimple_debug_bind_p (stmt))
7281*38fd1498Szrj continue;
7282*38fd1498Szrj
7283*38fd1498Szrj /* We just want to determine whether to do nothing
7284*38fd1498Szrj (count == 0), to substitute the computed
7285*38fd1498Szrj expression into a single use of the SSA DEF by
7286*38fd1498Szrj itself (count == 1), or to use a debug temp
7287*38fd1498Szrj because the SSA DEF is used multiple times or as
7288*38fd1498Szrj part of a larger expression (count > 1). */
7289*38fd1498Szrj count++;
7290*38fd1498Szrj if (gimple_debug_bind_get_value (stmt) != def)
7291*38fd1498Szrj count++;
7292*38fd1498Szrj
7293*38fd1498Szrj if (count > 1)
7294*38fd1498Szrj BREAK_FROM_IMM_USE_STMT (imm_iter);
7295*38fd1498Szrj }
7296*38fd1498Szrj
7297*38fd1498Szrj if (!count)
7298*38fd1498Szrj continue;
7299*38fd1498Szrj
7300*38fd1498Szrj struct iv_use dummy_use;
7301*38fd1498Szrj struct iv_cand *best_cand = NULL, *cand;
7302*38fd1498Szrj unsigned i, best_pref = 0, cand_pref;
7303*38fd1498Szrj
7304*38fd1498Szrj memset (&dummy_use, 0, sizeof (dummy_use));
7305*38fd1498Szrj dummy_use.iv = info->iv;
7306*38fd1498Szrj for (i = 0; i < data->vgroups.length () && i < 64; i++)
7307*38fd1498Szrj {
7308*38fd1498Szrj cand = data->vgroups[i]->selected;
7309*38fd1498Szrj if (cand == best_cand)
7310*38fd1498Szrj continue;
7311*38fd1498Szrj cand_pref = operand_equal_p (cand->iv->step,
7312*38fd1498Szrj info->iv->step, 0)
7313*38fd1498Szrj ? 4 : 0;
7314*38fd1498Szrj cand_pref
7315*38fd1498Szrj += TYPE_MODE (TREE_TYPE (cand->iv->base))
7316*38fd1498Szrj == TYPE_MODE (TREE_TYPE (info->iv->base))
7317*38fd1498Szrj ? 2 : 0;
7318*38fd1498Szrj cand_pref
7319*38fd1498Szrj += TREE_CODE (cand->iv->base) == INTEGER_CST
7320*38fd1498Szrj ? 1 : 0;
7321*38fd1498Szrj if (best_cand == NULL || best_pref < cand_pref)
7322*38fd1498Szrj {
7323*38fd1498Szrj best_cand = cand;
7324*38fd1498Szrj best_pref = cand_pref;
7325*38fd1498Szrj }
7326*38fd1498Szrj }
7327*38fd1498Szrj
7328*38fd1498Szrj if (!best_cand)
7329*38fd1498Szrj continue;
7330*38fd1498Szrj
7331*38fd1498Szrj tree comp = get_computation_at (data->current_loop,
7332*38fd1498Szrj SSA_NAME_DEF_STMT (def),
7333*38fd1498Szrj &dummy_use, best_cand);
7334*38fd1498Szrj if (!comp)
7335*38fd1498Szrj continue;
7336*38fd1498Szrj
7337*38fd1498Szrj if (count > 1)
7338*38fd1498Szrj {
7339*38fd1498Szrj tree vexpr = make_node (DEBUG_EXPR_DECL);
7340*38fd1498Szrj DECL_ARTIFICIAL (vexpr) = 1;
7341*38fd1498Szrj TREE_TYPE (vexpr) = TREE_TYPE (comp);
7342*38fd1498Szrj if (SSA_NAME_VAR (def))
7343*38fd1498Szrj SET_DECL_MODE (vexpr, DECL_MODE (SSA_NAME_VAR (def)));
7344*38fd1498Szrj else
7345*38fd1498Szrj SET_DECL_MODE (vexpr, TYPE_MODE (TREE_TYPE (vexpr)));
7346*38fd1498Szrj gdebug *def_temp
7347*38fd1498Szrj = gimple_build_debug_bind (vexpr, comp, NULL);
7348*38fd1498Szrj gimple_stmt_iterator gsi;
7349*38fd1498Szrj
7350*38fd1498Szrj if (gimple_code (SSA_NAME_DEF_STMT (def)) == GIMPLE_PHI)
7351*38fd1498Szrj gsi = gsi_after_labels (gimple_bb
7352*38fd1498Szrj (SSA_NAME_DEF_STMT (def)));
7353*38fd1498Szrj else
7354*38fd1498Szrj gsi = gsi_for_stmt (SSA_NAME_DEF_STMT (def));
7355*38fd1498Szrj
7356*38fd1498Szrj gsi_insert_before (&gsi, def_temp, GSI_SAME_STMT);
7357*38fd1498Szrj comp = vexpr;
7358*38fd1498Szrj }
7359*38fd1498Szrj
7360*38fd1498Szrj FOR_EACH_IMM_USE_STMT (stmt, imm_iter, def)
7361*38fd1498Szrj {
7362*38fd1498Szrj if (!gimple_debug_bind_p (stmt))
7363*38fd1498Szrj continue;
7364*38fd1498Szrj
7365*38fd1498Szrj FOR_EACH_IMM_USE_ON_STMT (use_p, imm_iter)
7366*38fd1498Szrj SET_USE (use_p, comp);
7367*38fd1498Szrj
7368*38fd1498Szrj update_stmt (stmt);
7369*38fd1498Szrj }
7370*38fd1498Szrj }
7371*38fd1498Szrj }
7372*38fd1498Szrj }
7373*38fd1498Szrj
7374*38fd1498Szrj release_defs_bitset (toremove);
7375*38fd1498Szrj
7376*38fd1498Szrj BITMAP_FREE (toremove);
7377*38fd1498Szrj }
7378*38fd1498Szrj
7379*38fd1498Szrj /* Frees memory occupied by struct tree_niter_desc in *VALUE. Callback
7380*38fd1498Szrj for hash_map::traverse. */
7381*38fd1498Szrj
7382*38fd1498Szrj bool
free_tree_niter_desc(edge const &,tree_niter_desc * const & value,void *)7383*38fd1498Szrj free_tree_niter_desc (edge const &, tree_niter_desc *const &value, void *)
7384*38fd1498Szrj {
7385*38fd1498Szrj free (value);
7386*38fd1498Szrj return true;
7387*38fd1498Szrj }
7388*38fd1498Szrj
7389*38fd1498Szrj /* Frees data allocated by the optimization of a single loop. */
7390*38fd1498Szrj
7391*38fd1498Szrj static void
free_loop_data(struct ivopts_data * data)7392*38fd1498Szrj free_loop_data (struct ivopts_data *data)
7393*38fd1498Szrj {
7394*38fd1498Szrj unsigned i, j;
7395*38fd1498Szrj bitmap_iterator bi;
7396*38fd1498Szrj tree obj;
7397*38fd1498Szrj
7398*38fd1498Szrj if (data->niters)
7399*38fd1498Szrj {
7400*38fd1498Szrj data->niters->traverse<void *, free_tree_niter_desc> (NULL);
7401*38fd1498Szrj delete data->niters;
7402*38fd1498Szrj data->niters = NULL;
7403*38fd1498Szrj }
7404*38fd1498Szrj
7405*38fd1498Szrj EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
7406*38fd1498Szrj {
7407*38fd1498Szrj struct version_info *info;
7408*38fd1498Szrj
7409*38fd1498Szrj info = ver_info (data, i);
7410*38fd1498Szrj info->iv = NULL;
7411*38fd1498Szrj info->has_nonlin_use = false;
7412*38fd1498Szrj info->preserve_biv = false;
7413*38fd1498Szrj info->inv_id = 0;
7414*38fd1498Szrj }
7415*38fd1498Szrj bitmap_clear (data->relevant);
7416*38fd1498Szrj bitmap_clear (data->important_candidates);
7417*38fd1498Szrj
7418*38fd1498Szrj for (i = 0; i < data->vgroups.length (); i++)
7419*38fd1498Szrj {
7420*38fd1498Szrj struct iv_group *group = data->vgroups[i];
7421*38fd1498Szrj
7422*38fd1498Szrj for (j = 0; j < group->vuses.length (); j++)
7423*38fd1498Szrj free (group->vuses[j]);
7424*38fd1498Szrj group->vuses.release ();
7425*38fd1498Szrj
7426*38fd1498Szrj BITMAP_FREE (group->related_cands);
7427*38fd1498Szrj for (j = 0; j < group->n_map_members; j++)
7428*38fd1498Szrj {
7429*38fd1498Szrj if (group->cost_map[j].inv_vars)
7430*38fd1498Szrj BITMAP_FREE (group->cost_map[j].inv_vars);
7431*38fd1498Szrj if (group->cost_map[j].inv_exprs)
7432*38fd1498Szrj BITMAP_FREE (group->cost_map[j].inv_exprs);
7433*38fd1498Szrj }
7434*38fd1498Szrj
7435*38fd1498Szrj free (group->cost_map);
7436*38fd1498Szrj free (group);
7437*38fd1498Szrj }
7438*38fd1498Szrj data->vgroups.truncate (0);
7439*38fd1498Szrj
7440*38fd1498Szrj for (i = 0; i < data->vcands.length (); i++)
7441*38fd1498Szrj {
7442*38fd1498Szrj struct iv_cand *cand = data->vcands[i];
7443*38fd1498Szrj
7444*38fd1498Szrj if (cand->inv_vars)
7445*38fd1498Szrj BITMAP_FREE (cand->inv_vars);
7446*38fd1498Szrj if (cand->inv_exprs)
7447*38fd1498Szrj BITMAP_FREE (cand->inv_exprs);
7448*38fd1498Szrj free (cand);
7449*38fd1498Szrj }
7450*38fd1498Szrj data->vcands.truncate (0);
7451*38fd1498Szrj
7452*38fd1498Szrj if (data->version_info_size < num_ssa_names)
7453*38fd1498Szrj {
7454*38fd1498Szrj data->version_info_size = 2 * num_ssa_names;
7455*38fd1498Szrj free (data->version_info);
7456*38fd1498Szrj data->version_info = XCNEWVEC (struct version_info, data->version_info_size);
7457*38fd1498Szrj }
7458*38fd1498Szrj
7459*38fd1498Szrj data->max_inv_var_id = 0;
7460*38fd1498Szrj data->max_inv_expr_id = 0;
7461*38fd1498Szrj
7462*38fd1498Szrj FOR_EACH_VEC_ELT (decl_rtl_to_reset, i, obj)
7463*38fd1498Szrj SET_DECL_RTL (obj, NULL_RTX);
7464*38fd1498Szrj
7465*38fd1498Szrj decl_rtl_to_reset.truncate (0);
7466*38fd1498Szrj
7467*38fd1498Szrj data->inv_expr_tab->empty ();
7468*38fd1498Szrj
7469*38fd1498Szrj data->iv_common_cand_tab->empty ();
7470*38fd1498Szrj data->iv_common_cands.truncate (0);
7471*38fd1498Szrj }
7472*38fd1498Szrj
7473*38fd1498Szrj /* Finalizes data structures used by the iv optimization pass. LOOPS is the
7474*38fd1498Szrj loop tree. */
7475*38fd1498Szrj
7476*38fd1498Szrj static void
tree_ssa_iv_optimize_finalize(struct ivopts_data * data)7477*38fd1498Szrj tree_ssa_iv_optimize_finalize (struct ivopts_data *data)
7478*38fd1498Szrj {
7479*38fd1498Szrj free_loop_data (data);
7480*38fd1498Szrj free (data->version_info);
7481*38fd1498Szrj BITMAP_FREE (data->relevant);
7482*38fd1498Szrj BITMAP_FREE (data->important_candidates);
7483*38fd1498Szrj
7484*38fd1498Szrj decl_rtl_to_reset.release ();
7485*38fd1498Szrj data->vgroups.release ();
7486*38fd1498Szrj data->vcands.release ();
7487*38fd1498Szrj delete data->inv_expr_tab;
7488*38fd1498Szrj data->inv_expr_tab = NULL;
7489*38fd1498Szrj free_affine_expand_cache (&data->name_expansion_cache);
7490*38fd1498Szrj delete data->iv_common_cand_tab;
7491*38fd1498Szrj data->iv_common_cand_tab = NULL;
7492*38fd1498Szrj data->iv_common_cands.release ();
7493*38fd1498Szrj obstack_free (&data->iv_obstack, NULL);
7494*38fd1498Szrj }
7495*38fd1498Szrj
7496*38fd1498Szrj /* Returns true if the loop body BODY includes any function calls. */
7497*38fd1498Szrj
7498*38fd1498Szrj static bool
loop_body_includes_call(basic_block * body,unsigned num_nodes)7499*38fd1498Szrj loop_body_includes_call (basic_block *body, unsigned num_nodes)
7500*38fd1498Szrj {
7501*38fd1498Szrj gimple_stmt_iterator gsi;
7502*38fd1498Szrj unsigned i;
7503*38fd1498Szrj
7504*38fd1498Szrj for (i = 0; i < num_nodes; i++)
7505*38fd1498Szrj for (gsi = gsi_start_bb (body[i]); !gsi_end_p (gsi); gsi_next (&gsi))
7506*38fd1498Szrj {
7507*38fd1498Szrj gimple *stmt = gsi_stmt (gsi);
7508*38fd1498Szrj if (is_gimple_call (stmt)
7509*38fd1498Szrj && !gimple_call_internal_p (stmt)
7510*38fd1498Szrj && !is_inexpensive_builtin (gimple_call_fndecl (stmt)))
7511*38fd1498Szrj return true;
7512*38fd1498Szrj }
7513*38fd1498Szrj return false;
7514*38fd1498Szrj }
7515*38fd1498Szrj
7516*38fd1498Szrj /* Optimizes the LOOP. Returns true if anything changed. */
7517*38fd1498Szrj
7518*38fd1498Szrj static bool
tree_ssa_iv_optimize_loop(struct ivopts_data * data,struct loop * loop)7519*38fd1498Szrj tree_ssa_iv_optimize_loop (struct ivopts_data *data, struct loop *loop)
7520*38fd1498Szrj {
7521*38fd1498Szrj bool changed = false;
7522*38fd1498Szrj struct iv_ca *iv_ca;
7523*38fd1498Szrj edge exit = single_dom_exit (loop);
7524*38fd1498Szrj basic_block *body;
7525*38fd1498Szrj
7526*38fd1498Szrj gcc_assert (!data->niters);
7527*38fd1498Szrj data->current_loop = loop;
7528*38fd1498Szrj data->loop_loc = find_loop_location (loop);
7529*38fd1498Szrj data->speed = optimize_loop_for_speed_p (loop);
7530*38fd1498Szrj
7531*38fd1498Szrj if (dump_file && (dump_flags & TDF_DETAILS))
7532*38fd1498Szrj {
7533*38fd1498Szrj fprintf (dump_file, "Processing loop %d", loop->num);
7534*38fd1498Szrj if (data->loop_loc != UNKNOWN_LOCATION)
7535*38fd1498Szrj fprintf (dump_file, " at %s:%d", LOCATION_FILE (data->loop_loc),
7536*38fd1498Szrj LOCATION_LINE (data->loop_loc));
7537*38fd1498Szrj fprintf (dump_file, "\n");
7538*38fd1498Szrj
7539*38fd1498Szrj if (exit)
7540*38fd1498Szrj {
7541*38fd1498Szrj fprintf (dump_file, " single exit %d -> %d, exit condition ",
7542*38fd1498Szrj exit->src->index, exit->dest->index);
7543*38fd1498Szrj print_gimple_stmt (dump_file, last_stmt (exit->src), 0, TDF_SLIM);
7544*38fd1498Szrj fprintf (dump_file, "\n");
7545*38fd1498Szrj }
7546*38fd1498Szrj
7547*38fd1498Szrj fprintf (dump_file, "\n");
7548*38fd1498Szrj }
7549*38fd1498Szrj
7550*38fd1498Szrj body = get_loop_body (loop);
7551*38fd1498Szrj data->body_includes_call = loop_body_includes_call (body, loop->num_nodes);
7552*38fd1498Szrj renumber_gimple_stmt_uids_in_blocks (body, loop->num_nodes);
7553*38fd1498Szrj free (body);
7554*38fd1498Szrj
7555*38fd1498Szrj data->loop_single_exit_p = exit != NULL && loop_only_exit_p (loop, exit);
7556*38fd1498Szrj
7557*38fd1498Szrj /* For each ssa name determines whether it behaves as an induction variable
7558*38fd1498Szrj in some loop. */
7559*38fd1498Szrj if (!find_induction_variables (data))
7560*38fd1498Szrj goto finish;
7561*38fd1498Szrj
7562*38fd1498Szrj /* Finds interesting uses (item 1). */
7563*38fd1498Szrj find_interesting_uses (data);
7564*38fd1498Szrj if (data->vgroups.length () > MAX_CONSIDERED_GROUPS)
7565*38fd1498Szrj goto finish;
7566*38fd1498Szrj
7567*38fd1498Szrj /* Finds candidates for the induction variables (item 2). */
7568*38fd1498Szrj find_iv_candidates (data);
7569*38fd1498Szrj
7570*38fd1498Szrj /* Calculates the costs (item 3, part 1). */
7571*38fd1498Szrj determine_iv_costs (data);
7572*38fd1498Szrj determine_group_iv_costs (data);
7573*38fd1498Szrj determine_set_costs (data);
7574*38fd1498Szrj
7575*38fd1498Szrj /* Find the optimal set of induction variables (item 3, part 2). */
7576*38fd1498Szrj iv_ca = find_optimal_iv_set (data);
7577*38fd1498Szrj if (!iv_ca)
7578*38fd1498Szrj goto finish;
7579*38fd1498Szrj changed = true;
7580*38fd1498Szrj
7581*38fd1498Szrj /* Create the new induction variables (item 4, part 1). */
7582*38fd1498Szrj create_new_ivs (data, iv_ca);
7583*38fd1498Szrj iv_ca_free (&iv_ca);
7584*38fd1498Szrj
7585*38fd1498Szrj /* Rewrite the uses (item 4, part 2). */
7586*38fd1498Szrj rewrite_groups (data);
7587*38fd1498Szrj
7588*38fd1498Szrj /* Remove the ivs that are unused after rewriting. */
7589*38fd1498Szrj remove_unused_ivs (data);
7590*38fd1498Szrj
7591*38fd1498Szrj /* We have changed the structure of induction variables; it might happen
7592*38fd1498Szrj that definitions in the scev database refer to some of them that were
7593*38fd1498Szrj eliminated. */
7594*38fd1498Szrj scev_reset ();
7595*38fd1498Szrj
7596*38fd1498Szrj finish:
7597*38fd1498Szrj free_loop_data (data);
7598*38fd1498Szrj
7599*38fd1498Szrj return changed;
7600*38fd1498Szrj }
7601*38fd1498Szrj
7602*38fd1498Szrj /* Main entry point. Optimizes induction variables in loops. */
7603*38fd1498Szrj
7604*38fd1498Szrj void
tree_ssa_iv_optimize(void)7605*38fd1498Szrj tree_ssa_iv_optimize (void)
7606*38fd1498Szrj {
7607*38fd1498Szrj struct loop *loop;
7608*38fd1498Szrj struct ivopts_data data;
7609*38fd1498Szrj
7610*38fd1498Szrj tree_ssa_iv_optimize_init (&data);
7611*38fd1498Szrj
7612*38fd1498Szrj /* Optimize the loops starting with the innermost ones. */
7613*38fd1498Szrj FOR_EACH_LOOP (loop, LI_FROM_INNERMOST)
7614*38fd1498Szrj {
7615*38fd1498Szrj if (dump_file && (dump_flags & TDF_DETAILS))
7616*38fd1498Szrj flow_loop_dump (loop, dump_file, NULL, 1);
7617*38fd1498Szrj
7618*38fd1498Szrj tree_ssa_iv_optimize_loop (&data, loop);
7619*38fd1498Szrj }
7620*38fd1498Szrj
7621*38fd1498Szrj tree_ssa_iv_optimize_finalize (&data);
7622*38fd1498Szrj }
7623*38fd1498Szrj
7624*38fd1498Szrj #include "gt-tree-ssa-loop-ivopts.h"
7625