xref: /dflybsd-src/contrib/gcc-8.0/gcc/tree-ssa-loop-ivopts.c (revision 38fd149817dfbff97799f62fcb70be98c4e32523)
1*38fd1498Szrj /* Induction variable optimizations.
2*38fd1498Szrj    Copyright (C) 2003-2018 Free Software Foundation, Inc.
3*38fd1498Szrj 
4*38fd1498Szrj This file is part of GCC.
5*38fd1498Szrj 
6*38fd1498Szrj GCC is free software; you can redistribute it and/or modify it
7*38fd1498Szrj under the terms of the GNU General Public License as published by the
8*38fd1498Szrj Free Software Foundation; either version 3, or (at your option) any
9*38fd1498Szrj later version.
10*38fd1498Szrj 
11*38fd1498Szrj GCC is distributed in the hope that it will be useful, but WITHOUT
12*38fd1498Szrj ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13*38fd1498Szrj FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
14*38fd1498Szrj for more details.
15*38fd1498Szrj 
16*38fd1498Szrj You should have received a copy of the GNU General Public License
17*38fd1498Szrj along with GCC; see the file COPYING3.  If not see
18*38fd1498Szrj <http://www.gnu.org/licenses/>.  */
19*38fd1498Szrj 
20*38fd1498Szrj /* This pass tries to find the optimal set of induction variables for the loop.
21*38fd1498Szrj    It optimizes just the basic linear induction variables (although adding
22*38fd1498Szrj    support for other types should not be too hard).  It includes the
23*38fd1498Szrj    optimizations commonly known as strength reduction, induction variable
24*38fd1498Szrj    coalescing and induction variable elimination.  It does it in the
25*38fd1498Szrj    following steps:
26*38fd1498Szrj 
27*38fd1498Szrj    1) The interesting uses of induction variables are found.  This includes
28*38fd1498Szrj 
29*38fd1498Szrj       -- uses of induction variables in non-linear expressions
30*38fd1498Szrj       -- addresses of arrays
31*38fd1498Szrj       -- comparisons of induction variables
32*38fd1498Szrj 
33*38fd1498Szrj       Note the interesting uses are categorized and handled in group.
34*38fd1498Szrj       Generally, address type uses are grouped together if their iv bases
35*38fd1498Szrj       are different in constant offset.
36*38fd1498Szrj 
37*38fd1498Szrj    2) Candidates for the induction variables are found.  This includes
38*38fd1498Szrj 
39*38fd1498Szrj       -- old induction variables
40*38fd1498Szrj       -- the variables defined by expressions derived from the "interesting
41*38fd1498Szrj 	 groups/uses" above
42*38fd1498Szrj 
43*38fd1498Szrj    3) The optimal (w.r. to a cost function) set of variables is chosen.  The
44*38fd1498Szrj       cost function assigns a cost to sets of induction variables and consists
45*38fd1498Szrj       of three parts:
46*38fd1498Szrj 
47*38fd1498Szrj       -- The group/use costs.  Each of the interesting groups/uses chooses
48*38fd1498Szrj 	 the best induction variable in the set and adds its cost to the sum.
49*38fd1498Szrj 	 The cost reflects the time spent on modifying the induction variables
50*38fd1498Szrj 	 value to be usable for the given purpose (adding base and offset for
51*38fd1498Szrj 	 arrays, etc.).
52*38fd1498Szrj       -- The variable costs.  Each of the variables has a cost assigned that
53*38fd1498Szrj 	 reflects the costs associated with incrementing the value of the
54*38fd1498Szrj 	 variable.  The original variables are somewhat preferred.
55*38fd1498Szrj       -- The set cost.  Depending on the size of the set, extra cost may be
56*38fd1498Szrj 	 added to reflect register pressure.
57*38fd1498Szrj 
58*38fd1498Szrj       All the costs are defined in a machine-specific way, using the target
59*38fd1498Szrj       hooks and machine descriptions to determine them.
60*38fd1498Szrj 
61*38fd1498Szrj    4) The trees are transformed to use the new variables, the dead code is
62*38fd1498Szrj       removed.
63*38fd1498Szrj 
64*38fd1498Szrj    All of this is done loop by loop.  Doing it globally is theoretically
65*38fd1498Szrj    possible, it might give a better performance and it might enable us
66*38fd1498Szrj    to decide costs more precisely, but getting all the interactions right
67*38fd1498Szrj    would be complicated.  */
68*38fd1498Szrj 
69*38fd1498Szrj #include "config.h"
70*38fd1498Szrj #include "system.h"
71*38fd1498Szrj #include "coretypes.h"
72*38fd1498Szrj #include "backend.h"
73*38fd1498Szrj #include "rtl.h"
74*38fd1498Szrj #include "tree.h"
75*38fd1498Szrj #include "gimple.h"
76*38fd1498Szrj #include "cfghooks.h"
77*38fd1498Szrj #include "tree-pass.h"
78*38fd1498Szrj #include "memmodel.h"
79*38fd1498Szrj #include "tm_p.h"
80*38fd1498Szrj #include "ssa.h"
81*38fd1498Szrj #include "expmed.h"
82*38fd1498Szrj #include "insn-config.h"
83*38fd1498Szrj #include "emit-rtl.h"
84*38fd1498Szrj #include "recog.h"
85*38fd1498Szrj #include "cgraph.h"
86*38fd1498Szrj #include "gimple-pretty-print.h"
87*38fd1498Szrj #include "alias.h"
88*38fd1498Szrj #include "fold-const.h"
89*38fd1498Szrj #include "stor-layout.h"
90*38fd1498Szrj #include "tree-eh.h"
91*38fd1498Szrj #include "gimplify.h"
92*38fd1498Szrj #include "gimple-iterator.h"
93*38fd1498Szrj #include "gimplify-me.h"
94*38fd1498Szrj #include "tree-cfg.h"
95*38fd1498Szrj #include "tree-ssa-loop-ivopts.h"
96*38fd1498Szrj #include "tree-ssa-loop-manip.h"
97*38fd1498Szrj #include "tree-ssa-loop-niter.h"
98*38fd1498Szrj #include "tree-ssa-loop.h"
99*38fd1498Szrj #include "explow.h"
100*38fd1498Szrj #include "expr.h"
101*38fd1498Szrj #include "tree-dfa.h"
102*38fd1498Szrj #include "tree-ssa.h"
103*38fd1498Szrj #include "cfgloop.h"
104*38fd1498Szrj #include "tree-scalar-evolution.h"
105*38fd1498Szrj #include "params.h"
106*38fd1498Szrj #include "tree-affine.h"
107*38fd1498Szrj #include "tree-ssa-propagate.h"
108*38fd1498Szrj #include "tree-ssa-address.h"
109*38fd1498Szrj #include "builtins.h"
110*38fd1498Szrj #include "tree-vectorizer.h"
111*38fd1498Szrj 
112*38fd1498Szrj /* FIXME: Expressions are expanded to RTL in this pass to determine the
113*38fd1498Szrj    cost of different addressing modes.  This should be moved to a TBD
114*38fd1498Szrj    interface between the GIMPLE and RTL worlds.  */
115*38fd1498Szrj 
116*38fd1498Szrj /* The infinite cost.  */
117*38fd1498Szrj #define INFTY 10000000
118*38fd1498Szrj 
119*38fd1498Szrj /* Returns the expected number of loop iterations for LOOP.
120*38fd1498Szrj    The average trip count is computed from profile data if it
121*38fd1498Szrj    exists. */
122*38fd1498Szrj 
123*38fd1498Szrj static inline HOST_WIDE_INT
avg_loop_niter(struct loop * loop)124*38fd1498Szrj avg_loop_niter (struct loop *loop)
125*38fd1498Szrj {
126*38fd1498Szrj   HOST_WIDE_INT niter = estimated_stmt_executions_int (loop);
127*38fd1498Szrj   if (niter == -1)
128*38fd1498Szrj     {
129*38fd1498Szrj       niter = likely_max_stmt_executions_int (loop);
130*38fd1498Szrj 
131*38fd1498Szrj       if (niter == -1 || niter > PARAM_VALUE (PARAM_AVG_LOOP_NITER))
132*38fd1498Szrj 	return PARAM_VALUE (PARAM_AVG_LOOP_NITER);
133*38fd1498Szrj     }
134*38fd1498Szrj 
135*38fd1498Szrj   return niter;
136*38fd1498Szrj }
137*38fd1498Szrj 
138*38fd1498Szrj struct iv_use;
139*38fd1498Szrj 
140*38fd1498Szrj /* Representation of the induction variable.  */
141*38fd1498Szrj struct iv
142*38fd1498Szrj {
143*38fd1498Szrj   tree base;		/* Initial value of the iv.  */
144*38fd1498Szrj   tree base_object;	/* A memory object to that the induction variable points.  */
145*38fd1498Szrj   tree step;		/* Step of the iv (constant only).  */
146*38fd1498Szrj   tree ssa_name;	/* The ssa name with the value.  */
147*38fd1498Szrj   struct iv_use *nonlin_use;	/* The identifier in the use if it is the case.  */
148*38fd1498Szrj   bool biv_p;		/* Is it a biv?  */
149*38fd1498Szrj   bool no_overflow;	/* True if the iv doesn't overflow.  */
150*38fd1498Szrj   bool have_address_use;/* For biv, indicate if it's used in any address
151*38fd1498Szrj 			   type use.  */
152*38fd1498Szrj };
153*38fd1498Szrj 
154*38fd1498Szrj /* Per-ssa version information (induction variable descriptions, etc.).  */
155*38fd1498Szrj struct version_info
156*38fd1498Szrj {
157*38fd1498Szrj   tree name;		/* The ssa name.  */
158*38fd1498Szrj   struct iv *iv;	/* Induction variable description.  */
159*38fd1498Szrj   bool has_nonlin_use;	/* For a loop-level invariant, whether it is used in
160*38fd1498Szrj 			   an expression that is not an induction variable.  */
161*38fd1498Szrj   bool preserve_biv;	/* For the original biv, whether to preserve it.  */
162*38fd1498Szrj   unsigned inv_id;	/* Id of an invariant.  */
163*38fd1498Szrj };
164*38fd1498Szrj 
165*38fd1498Szrj /* Types of uses.  */
166*38fd1498Szrj enum use_type
167*38fd1498Szrj {
168*38fd1498Szrj   USE_NONLINEAR_EXPR,	/* Use in a nonlinear expression.  */
169*38fd1498Szrj   USE_REF_ADDRESS,	/* Use is an address for an explicit memory
170*38fd1498Szrj 			   reference.  */
171*38fd1498Szrj   USE_PTR_ADDRESS,	/* Use is a pointer argument to a function in
172*38fd1498Szrj 			   cases where the expansion of the function
173*38fd1498Szrj 			   will turn the argument into a normal address.  */
174*38fd1498Szrj   USE_COMPARE		/* Use is a compare.  */
175*38fd1498Szrj };
176*38fd1498Szrj 
177*38fd1498Szrj /* Cost of a computation.  */
178*38fd1498Szrj struct comp_cost
179*38fd1498Szrj {
comp_costcomp_cost180*38fd1498Szrj   comp_cost (): cost (0), complexity (0), scratch (0)
181*38fd1498Szrj   {}
182*38fd1498Szrj 
183*38fd1498Szrj   comp_cost (int cost, unsigned complexity, int scratch = 0)
costcomp_cost184*38fd1498Szrj     : cost (cost), complexity (complexity), scratch (scratch)
185*38fd1498Szrj   {}
186*38fd1498Szrj 
187*38fd1498Szrj   /* Returns true if COST is infinite.  */
188*38fd1498Szrj   bool infinite_cost_p ();
189*38fd1498Szrj 
190*38fd1498Szrj   /* Adds costs COST1 and COST2.  */
191*38fd1498Szrj   friend comp_cost operator+ (comp_cost cost1, comp_cost cost2);
192*38fd1498Szrj 
193*38fd1498Szrj   /* Adds COST to the comp_cost.  */
194*38fd1498Szrj   comp_cost operator+= (comp_cost cost);
195*38fd1498Szrj 
196*38fd1498Szrj   /* Adds constant C to this comp_cost.  */
197*38fd1498Szrj   comp_cost operator+= (HOST_WIDE_INT c);
198*38fd1498Szrj 
199*38fd1498Szrj   /* Subtracts constant C to this comp_cost.  */
200*38fd1498Szrj   comp_cost operator-= (HOST_WIDE_INT c);
201*38fd1498Szrj 
202*38fd1498Szrj   /* Divide the comp_cost by constant C.  */
203*38fd1498Szrj   comp_cost operator/= (HOST_WIDE_INT c);
204*38fd1498Szrj 
205*38fd1498Szrj   /* Multiply the comp_cost by constant C.  */
206*38fd1498Szrj   comp_cost operator*= (HOST_WIDE_INT c);
207*38fd1498Szrj 
208*38fd1498Szrj   /* Subtracts costs COST1 and COST2.  */
209*38fd1498Szrj   friend comp_cost operator- (comp_cost cost1, comp_cost cost2);
210*38fd1498Szrj 
211*38fd1498Szrj   /* Subtracts COST from this comp_cost.  */
212*38fd1498Szrj   comp_cost operator-= (comp_cost cost);
213*38fd1498Szrj 
214*38fd1498Szrj   /* Returns true if COST1 is smaller than COST2.  */
215*38fd1498Szrj   friend bool operator< (comp_cost cost1, comp_cost cost2);
216*38fd1498Szrj 
217*38fd1498Szrj   /* Returns true if COST1 and COST2 are equal.  */
218*38fd1498Szrj   friend bool operator== (comp_cost cost1, comp_cost cost2);
219*38fd1498Szrj 
220*38fd1498Szrj   /* Returns true if COST1 is smaller or equal than COST2.  */
221*38fd1498Szrj   friend bool operator<= (comp_cost cost1, comp_cost cost2);
222*38fd1498Szrj 
223*38fd1498Szrj   int cost;		/* The runtime cost.  */
224*38fd1498Szrj   unsigned complexity;  /* The estimate of the complexity of the code for
225*38fd1498Szrj 			   the computation (in no concrete units --
226*38fd1498Szrj 			   complexity field should be larger for more
227*38fd1498Szrj 			   complex expressions and addressing modes).  */
228*38fd1498Szrj   int scratch;		/* Scratch used during cost computation.  */
229*38fd1498Szrj };
230*38fd1498Szrj 
231*38fd1498Szrj static const comp_cost no_cost;
232*38fd1498Szrj static const comp_cost infinite_cost (INFTY, INFTY, INFTY);
233*38fd1498Szrj 
234*38fd1498Szrj bool
infinite_cost_p()235*38fd1498Szrj comp_cost::infinite_cost_p ()
236*38fd1498Szrj {
237*38fd1498Szrj   return cost == INFTY;
238*38fd1498Szrj }
239*38fd1498Szrj 
240*38fd1498Szrj comp_cost
241*38fd1498Szrj operator+ (comp_cost cost1, comp_cost cost2)
242*38fd1498Szrj {
243*38fd1498Szrj   if (cost1.infinite_cost_p () || cost2.infinite_cost_p ())
244*38fd1498Szrj     return infinite_cost;
245*38fd1498Szrj 
246*38fd1498Szrj   cost1.cost += cost2.cost;
247*38fd1498Szrj   cost1.complexity += cost2.complexity;
248*38fd1498Szrj 
249*38fd1498Szrj   return cost1;
250*38fd1498Szrj }
251*38fd1498Szrj 
252*38fd1498Szrj comp_cost
253*38fd1498Szrj operator- (comp_cost cost1, comp_cost cost2)
254*38fd1498Szrj {
255*38fd1498Szrj   if (cost1.infinite_cost_p ())
256*38fd1498Szrj     return infinite_cost;
257*38fd1498Szrj 
258*38fd1498Szrj   gcc_assert (!cost2.infinite_cost_p ());
259*38fd1498Szrj 
260*38fd1498Szrj   cost1.cost -= cost2.cost;
261*38fd1498Szrj   cost1.complexity -= cost2.complexity;
262*38fd1498Szrj 
263*38fd1498Szrj   return cost1;
264*38fd1498Szrj }
265*38fd1498Szrj 
266*38fd1498Szrj comp_cost
267*38fd1498Szrj comp_cost::operator+= (comp_cost cost)
268*38fd1498Szrj {
269*38fd1498Szrj   *this = *this + cost;
270*38fd1498Szrj   return *this;
271*38fd1498Szrj }
272*38fd1498Szrj 
273*38fd1498Szrj comp_cost
274*38fd1498Szrj comp_cost::operator+= (HOST_WIDE_INT c)
275*38fd1498Szrj {
276*38fd1498Szrj   if (infinite_cost_p ())
277*38fd1498Szrj     return *this;
278*38fd1498Szrj 
279*38fd1498Szrj   this->cost += c;
280*38fd1498Szrj 
281*38fd1498Szrj   return *this;
282*38fd1498Szrj }
283*38fd1498Szrj 
284*38fd1498Szrj comp_cost
285*38fd1498Szrj comp_cost::operator-= (HOST_WIDE_INT c)
286*38fd1498Szrj {
287*38fd1498Szrj   if (infinite_cost_p ())
288*38fd1498Szrj     return *this;
289*38fd1498Szrj 
290*38fd1498Szrj   this->cost -= c;
291*38fd1498Szrj 
292*38fd1498Szrj   return *this;
293*38fd1498Szrj }
294*38fd1498Szrj 
295*38fd1498Szrj comp_cost
296*38fd1498Szrj comp_cost::operator/= (HOST_WIDE_INT c)
297*38fd1498Szrj {
298*38fd1498Szrj   if (infinite_cost_p ())
299*38fd1498Szrj     return *this;
300*38fd1498Szrj 
301*38fd1498Szrj   this->cost /= c;
302*38fd1498Szrj 
303*38fd1498Szrj   return *this;
304*38fd1498Szrj }
305*38fd1498Szrj 
306*38fd1498Szrj comp_cost
307*38fd1498Szrj comp_cost::operator*= (HOST_WIDE_INT c)
308*38fd1498Szrj {
309*38fd1498Szrj   if (infinite_cost_p ())
310*38fd1498Szrj     return *this;
311*38fd1498Szrj 
312*38fd1498Szrj   this->cost *= c;
313*38fd1498Szrj 
314*38fd1498Szrj   return *this;
315*38fd1498Szrj }
316*38fd1498Szrj 
317*38fd1498Szrj comp_cost
318*38fd1498Szrj comp_cost::operator-= (comp_cost cost)
319*38fd1498Szrj {
320*38fd1498Szrj   *this = *this - cost;
321*38fd1498Szrj   return *this;
322*38fd1498Szrj }
323*38fd1498Szrj 
324*38fd1498Szrj bool
325*38fd1498Szrj operator< (comp_cost cost1, comp_cost cost2)
326*38fd1498Szrj {
327*38fd1498Szrj   if (cost1.cost == cost2.cost)
328*38fd1498Szrj     return cost1.complexity < cost2.complexity;
329*38fd1498Szrj 
330*38fd1498Szrj   return cost1.cost < cost2.cost;
331*38fd1498Szrj }
332*38fd1498Szrj 
333*38fd1498Szrj bool
334*38fd1498Szrj operator== (comp_cost cost1, comp_cost cost2)
335*38fd1498Szrj {
336*38fd1498Szrj   return cost1.cost == cost2.cost
337*38fd1498Szrj     && cost1.complexity == cost2.complexity;
338*38fd1498Szrj }
339*38fd1498Szrj 
340*38fd1498Szrj bool
341*38fd1498Szrj operator<= (comp_cost cost1, comp_cost cost2)
342*38fd1498Szrj {
343*38fd1498Szrj   return cost1 < cost2 || cost1 == cost2;
344*38fd1498Szrj }
345*38fd1498Szrj 
346*38fd1498Szrj struct iv_inv_expr_ent;
347*38fd1498Szrj 
348*38fd1498Szrj /* The candidate - cost pair.  */
349*38fd1498Szrj struct cost_pair
350*38fd1498Szrj {
351*38fd1498Szrj   struct iv_cand *cand;	/* The candidate.  */
352*38fd1498Szrj   comp_cost cost;	/* The cost.  */
353*38fd1498Szrj   enum tree_code comp;	/* For iv elimination, the comparison.  */
354*38fd1498Szrj   bitmap inv_vars;	/* The list of invariant ssa_vars that have to be
355*38fd1498Szrj 			   preserved when representing iv_use with iv_cand.  */
356*38fd1498Szrj   bitmap inv_exprs;	/* The list of newly created invariant expressions
357*38fd1498Szrj 			   when representing iv_use with iv_cand.  */
358*38fd1498Szrj   tree value;		/* For final value elimination, the expression for
359*38fd1498Szrj 			   the final value of the iv.  For iv elimination,
360*38fd1498Szrj 			   the new bound to compare with.  */
361*38fd1498Szrj };
362*38fd1498Szrj 
363*38fd1498Szrj /* Use.  */
364*38fd1498Szrj struct iv_use
365*38fd1498Szrj {
366*38fd1498Szrj   unsigned id;		/* The id of the use.  */
367*38fd1498Szrj   unsigned group_id;	/* The group id the use belongs to.  */
368*38fd1498Szrj   enum use_type type;	/* Type of the use.  */
369*38fd1498Szrj   tree mem_type;	/* The memory type to use when testing whether an
370*38fd1498Szrj 			   address is legitimate, and what the address's
371*38fd1498Szrj 			   cost is.  */
372*38fd1498Szrj   struct iv *iv;	/* The induction variable it is based on.  */
373*38fd1498Szrj   gimple *stmt;		/* Statement in that it occurs.  */
374*38fd1498Szrj   tree *op_p;		/* The place where it occurs.  */
375*38fd1498Szrj 
376*38fd1498Szrj   tree addr_base;	/* Base address with const offset stripped.  */
377*38fd1498Szrj   poly_uint64_pod addr_offset;
378*38fd1498Szrj 			/* Const offset stripped from base address.  */
379*38fd1498Szrj };
380*38fd1498Szrj 
381*38fd1498Szrj /* Group of uses.  */
382*38fd1498Szrj struct iv_group
383*38fd1498Szrj {
384*38fd1498Szrj   /* The id of the group.  */
385*38fd1498Szrj   unsigned id;
386*38fd1498Szrj   /* Uses of the group are of the same type.  */
387*38fd1498Szrj   enum use_type type;
388*38fd1498Szrj   /* The set of "related" IV candidates, plus the important ones.  */
389*38fd1498Szrj   bitmap related_cands;
390*38fd1498Szrj   /* Number of IV candidates in the cost_map.  */
391*38fd1498Szrj   unsigned n_map_members;
392*38fd1498Szrj   /* The costs wrto the iv candidates.  */
393*38fd1498Szrj   struct cost_pair *cost_map;
394*38fd1498Szrj   /* The selected candidate for the group.  */
395*38fd1498Szrj   struct iv_cand *selected;
396*38fd1498Szrj   /* Uses in the group.  */
397*38fd1498Szrj   vec<struct iv_use *> vuses;
398*38fd1498Szrj };
399*38fd1498Szrj 
400*38fd1498Szrj /* The position where the iv is computed.  */
401*38fd1498Szrj enum iv_position
402*38fd1498Szrj {
403*38fd1498Szrj   IP_NORMAL,		/* At the end, just before the exit condition.  */
404*38fd1498Szrj   IP_END,		/* At the end of the latch block.  */
405*38fd1498Szrj   IP_BEFORE_USE,	/* Immediately before a specific use.  */
406*38fd1498Szrj   IP_AFTER_USE,		/* Immediately after a specific use.  */
407*38fd1498Szrj   IP_ORIGINAL		/* The original biv.  */
408*38fd1498Szrj };
409*38fd1498Szrj 
410*38fd1498Szrj /* The induction variable candidate.  */
411*38fd1498Szrj struct iv_cand
412*38fd1498Szrj {
413*38fd1498Szrj   unsigned id;		/* The number of the candidate.  */
414*38fd1498Szrj   bool important;	/* Whether this is an "important" candidate, i.e. such
415*38fd1498Szrj 			   that it should be considered by all uses.  */
416*38fd1498Szrj   ENUM_BITFIELD(iv_position) pos : 8;	/* Where it is computed.  */
417*38fd1498Szrj   gimple *incremented_at;/* For original biv, the statement where it is
418*38fd1498Szrj 			   incremented.  */
419*38fd1498Szrj   tree var_before;	/* The variable used for it before increment.  */
420*38fd1498Szrj   tree var_after;	/* The variable used for it after increment.  */
421*38fd1498Szrj   struct iv *iv;	/* The value of the candidate.  NULL for
422*38fd1498Szrj 			   "pseudocandidate" used to indicate the possibility
423*38fd1498Szrj 			   to replace the final value of an iv by direct
424*38fd1498Szrj 			   computation of the value.  */
425*38fd1498Szrj   unsigned cost;	/* Cost of the candidate.  */
426*38fd1498Szrj   unsigned cost_step;	/* Cost of the candidate's increment operation.  */
427*38fd1498Szrj   struct iv_use *ainc_use; /* For IP_{BEFORE,AFTER}_USE candidates, the place
428*38fd1498Szrj 			      where it is incremented.  */
429*38fd1498Szrj   bitmap inv_vars;	/* The list of invariant ssa_vars used in step of the
430*38fd1498Szrj 			   iv_cand.  */
431*38fd1498Szrj   bitmap inv_exprs;	/* If step is more complicated than a single ssa_var,
432*38fd1498Szrj 			   hanlde it as a new invariant expression which will
433*38fd1498Szrj 			   be hoisted out of loop.  */
434*38fd1498Szrj   struct iv *orig_iv;	/* The original iv if this cand is added from biv with
435*38fd1498Szrj 			   smaller type.  */
436*38fd1498Szrj };
437*38fd1498Szrj 
438*38fd1498Szrj /* Hashtable entry for common candidate derived from iv uses.  */
439*38fd1498Szrj struct iv_common_cand
440*38fd1498Szrj {
441*38fd1498Szrj   tree base;
442*38fd1498Szrj   tree step;
443*38fd1498Szrj   /* IV uses from which this common candidate is derived.  */
444*38fd1498Szrj   auto_vec<struct iv_use *> uses;
445*38fd1498Szrj   hashval_t hash;
446*38fd1498Szrj };
447*38fd1498Szrj 
448*38fd1498Szrj /* Hashtable helpers.  */
449*38fd1498Szrj 
450*38fd1498Szrj struct iv_common_cand_hasher : delete_ptr_hash <iv_common_cand>
451*38fd1498Szrj {
452*38fd1498Szrj   static inline hashval_t hash (const iv_common_cand *);
453*38fd1498Szrj   static inline bool equal (const iv_common_cand *, const iv_common_cand *);
454*38fd1498Szrj };
455*38fd1498Szrj 
456*38fd1498Szrj /* Hash function for possible common candidates.  */
457*38fd1498Szrj 
458*38fd1498Szrj inline hashval_t
hash(const iv_common_cand * ccand)459*38fd1498Szrj iv_common_cand_hasher::hash (const iv_common_cand *ccand)
460*38fd1498Szrj {
461*38fd1498Szrj   return ccand->hash;
462*38fd1498Szrj }
463*38fd1498Szrj 
464*38fd1498Szrj /* Hash table equality function for common candidates.  */
465*38fd1498Szrj 
466*38fd1498Szrj inline bool
equal(const iv_common_cand * ccand1,const iv_common_cand * ccand2)467*38fd1498Szrj iv_common_cand_hasher::equal (const iv_common_cand *ccand1,
468*38fd1498Szrj 			      const iv_common_cand *ccand2)
469*38fd1498Szrj {
470*38fd1498Szrj   return (ccand1->hash == ccand2->hash
471*38fd1498Szrj 	  && operand_equal_p (ccand1->base, ccand2->base, 0)
472*38fd1498Szrj 	  && operand_equal_p (ccand1->step, ccand2->step, 0)
473*38fd1498Szrj 	  && (TYPE_PRECISION (TREE_TYPE (ccand1->base))
474*38fd1498Szrj 	      == TYPE_PRECISION (TREE_TYPE (ccand2->base))));
475*38fd1498Szrj }
476*38fd1498Szrj 
477*38fd1498Szrj /* Loop invariant expression hashtable entry.  */
478*38fd1498Szrj 
479*38fd1498Szrj struct iv_inv_expr_ent
480*38fd1498Szrj {
481*38fd1498Szrj   /* Tree expression of the entry.  */
482*38fd1498Szrj   tree expr;
483*38fd1498Szrj   /* Unique indentifier.  */
484*38fd1498Szrj   int id;
485*38fd1498Szrj   /* Hash value.  */
486*38fd1498Szrj   hashval_t hash;
487*38fd1498Szrj };
488*38fd1498Szrj 
489*38fd1498Szrj /* Sort iv_inv_expr_ent pair A and B by id field.  */
490*38fd1498Szrj 
491*38fd1498Szrj static int
sort_iv_inv_expr_ent(const void * a,const void * b)492*38fd1498Szrj sort_iv_inv_expr_ent (const void *a, const void *b)
493*38fd1498Szrj {
494*38fd1498Szrj   const iv_inv_expr_ent * const *e1 = (const iv_inv_expr_ent * const *) (a);
495*38fd1498Szrj   const iv_inv_expr_ent * const *e2 = (const iv_inv_expr_ent * const *) (b);
496*38fd1498Szrj 
497*38fd1498Szrj   unsigned id1 = (*e1)->id;
498*38fd1498Szrj   unsigned id2 = (*e2)->id;
499*38fd1498Szrj 
500*38fd1498Szrj   if (id1 < id2)
501*38fd1498Szrj     return -1;
502*38fd1498Szrj   else if (id1 > id2)
503*38fd1498Szrj     return 1;
504*38fd1498Szrj   else
505*38fd1498Szrj     return 0;
506*38fd1498Szrj }
507*38fd1498Szrj 
508*38fd1498Szrj /* Hashtable helpers.  */
509*38fd1498Szrj 
510*38fd1498Szrj struct iv_inv_expr_hasher : free_ptr_hash <iv_inv_expr_ent>
511*38fd1498Szrj {
512*38fd1498Szrj   static inline hashval_t hash (const iv_inv_expr_ent *);
513*38fd1498Szrj   static inline bool equal (const iv_inv_expr_ent *, const iv_inv_expr_ent *);
514*38fd1498Szrj };
515*38fd1498Szrj 
516*38fd1498Szrj /* Return true if uses of type TYPE represent some form of address.  */
517*38fd1498Szrj 
518*38fd1498Szrj inline bool
address_p(use_type type)519*38fd1498Szrj address_p (use_type type)
520*38fd1498Szrj {
521*38fd1498Szrj   return type == USE_REF_ADDRESS || type == USE_PTR_ADDRESS;
522*38fd1498Szrj }
523*38fd1498Szrj 
524*38fd1498Szrj /* Hash function for loop invariant expressions.  */
525*38fd1498Szrj 
526*38fd1498Szrj inline hashval_t
hash(const iv_inv_expr_ent * expr)527*38fd1498Szrj iv_inv_expr_hasher::hash (const iv_inv_expr_ent *expr)
528*38fd1498Szrj {
529*38fd1498Szrj   return expr->hash;
530*38fd1498Szrj }
531*38fd1498Szrj 
532*38fd1498Szrj /* Hash table equality function for expressions.  */
533*38fd1498Szrj 
534*38fd1498Szrj inline bool
equal(const iv_inv_expr_ent * expr1,const iv_inv_expr_ent * expr2)535*38fd1498Szrj iv_inv_expr_hasher::equal (const iv_inv_expr_ent *expr1,
536*38fd1498Szrj 			   const iv_inv_expr_ent *expr2)
537*38fd1498Szrj {
538*38fd1498Szrj   return expr1->hash == expr2->hash
539*38fd1498Szrj 	 && operand_equal_p (expr1->expr, expr2->expr, 0);
540*38fd1498Szrj }
541*38fd1498Szrj 
542*38fd1498Szrj struct ivopts_data
543*38fd1498Szrj {
544*38fd1498Szrj   /* The currently optimized loop.  */
545*38fd1498Szrj   struct loop *current_loop;
546*38fd1498Szrj   source_location loop_loc;
547*38fd1498Szrj 
548*38fd1498Szrj   /* Numbers of iterations for all exits of the current loop.  */
549*38fd1498Szrj   hash_map<edge, tree_niter_desc *> *niters;
550*38fd1498Szrj 
551*38fd1498Szrj   /* Number of registers used in it.  */
552*38fd1498Szrj   unsigned regs_used;
553*38fd1498Szrj 
554*38fd1498Szrj   /* The size of version_info array allocated.  */
555*38fd1498Szrj   unsigned version_info_size;
556*38fd1498Szrj 
557*38fd1498Szrj   /* The array of information for the ssa names.  */
558*38fd1498Szrj   struct version_info *version_info;
559*38fd1498Szrj 
560*38fd1498Szrj   /* The hashtable of loop invariant expressions created
561*38fd1498Szrj      by ivopt.  */
562*38fd1498Szrj   hash_table<iv_inv_expr_hasher> *inv_expr_tab;
563*38fd1498Szrj 
564*38fd1498Szrj   /* The bitmap of indices in version_info whose value was changed.  */
565*38fd1498Szrj   bitmap relevant;
566*38fd1498Szrj 
567*38fd1498Szrj   /* The uses of induction variables.  */
568*38fd1498Szrj   vec<iv_group *> vgroups;
569*38fd1498Szrj 
570*38fd1498Szrj   /* The candidates.  */
571*38fd1498Szrj   vec<iv_cand *> vcands;
572*38fd1498Szrj 
573*38fd1498Szrj   /* A bitmap of important candidates.  */
574*38fd1498Szrj   bitmap important_candidates;
575*38fd1498Szrj 
576*38fd1498Szrj   /* Cache used by tree_to_aff_combination_expand.  */
577*38fd1498Szrj   hash_map<tree, name_expansion *> *name_expansion_cache;
578*38fd1498Szrj 
579*38fd1498Szrj   /* The hashtable of common candidates derived from iv uses.  */
580*38fd1498Szrj   hash_table<iv_common_cand_hasher> *iv_common_cand_tab;
581*38fd1498Szrj 
582*38fd1498Szrj   /* The common candidates.  */
583*38fd1498Szrj   vec<iv_common_cand *> iv_common_cands;
584*38fd1498Szrj 
585*38fd1498Szrj   /* The maximum invariant variable id.  */
586*38fd1498Szrj   unsigned max_inv_var_id;
587*38fd1498Szrj 
588*38fd1498Szrj   /* The maximum invariant expression id.  */
589*38fd1498Szrj   unsigned max_inv_expr_id;
590*38fd1498Szrj 
591*38fd1498Szrj   /* Number of no_overflow BIVs which are not used in memory address.  */
592*38fd1498Szrj   unsigned bivs_not_used_in_addr;
593*38fd1498Szrj 
594*38fd1498Szrj   /* Obstack for iv structure.  */
595*38fd1498Szrj   struct obstack iv_obstack;
596*38fd1498Szrj 
597*38fd1498Szrj   /* Whether to consider just related and important candidates when replacing a
598*38fd1498Szrj      use.  */
599*38fd1498Szrj   bool consider_all_candidates;
600*38fd1498Szrj 
601*38fd1498Szrj   /* Are we optimizing for speed?  */
602*38fd1498Szrj   bool speed;
603*38fd1498Szrj 
604*38fd1498Szrj   /* Whether the loop body includes any function calls.  */
605*38fd1498Szrj   bool body_includes_call;
606*38fd1498Szrj 
607*38fd1498Szrj   /* Whether the loop body can only be exited via single exit.  */
608*38fd1498Szrj   bool loop_single_exit_p;
609*38fd1498Szrj };
610*38fd1498Szrj 
611*38fd1498Szrj /* An assignment of iv candidates to uses.  */
612*38fd1498Szrj 
613*38fd1498Szrj struct iv_ca
614*38fd1498Szrj {
615*38fd1498Szrj   /* The number of uses covered by the assignment.  */
616*38fd1498Szrj   unsigned upto;
617*38fd1498Szrj 
618*38fd1498Szrj   /* Number of uses that cannot be expressed by the candidates in the set.  */
619*38fd1498Szrj   unsigned bad_groups;
620*38fd1498Szrj 
621*38fd1498Szrj   /* Candidate assigned to a use, together with the related costs.  */
622*38fd1498Szrj   struct cost_pair **cand_for_group;
623*38fd1498Szrj 
624*38fd1498Szrj   /* Number of times each candidate is used.  */
625*38fd1498Szrj   unsigned *n_cand_uses;
626*38fd1498Szrj 
627*38fd1498Szrj   /* The candidates used.  */
628*38fd1498Szrj   bitmap cands;
629*38fd1498Szrj 
630*38fd1498Szrj   /* The number of candidates in the set.  */
631*38fd1498Szrj   unsigned n_cands;
632*38fd1498Szrj 
633*38fd1498Szrj   /* The number of invariants needed, including both invariant variants and
634*38fd1498Szrj      invariant expressions.  */
635*38fd1498Szrj   unsigned n_invs;
636*38fd1498Szrj 
637*38fd1498Szrj   /* Total cost of expressing uses.  */
638*38fd1498Szrj   comp_cost cand_use_cost;
639*38fd1498Szrj 
640*38fd1498Szrj   /* Total cost of candidates.  */
641*38fd1498Szrj   unsigned cand_cost;
642*38fd1498Szrj 
643*38fd1498Szrj   /* Number of times each invariant variable is used.  */
644*38fd1498Szrj   unsigned *n_inv_var_uses;
645*38fd1498Szrj 
646*38fd1498Szrj   /* Number of times each invariant expression is used.  */
647*38fd1498Szrj   unsigned *n_inv_expr_uses;
648*38fd1498Szrj 
649*38fd1498Szrj   /* Total cost of the assignment.  */
650*38fd1498Szrj   comp_cost cost;
651*38fd1498Szrj };
652*38fd1498Szrj 
653*38fd1498Szrj /* Difference of two iv candidate assignments.  */
654*38fd1498Szrj 
655*38fd1498Szrj struct iv_ca_delta
656*38fd1498Szrj {
657*38fd1498Szrj   /* Changed group.  */
658*38fd1498Szrj   struct iv_group *group;
659*38fd1498Szrj 
660*38fd1498Szrj   /* An old assignment (for rollback purposes).  */
661*38fd1498Szrj   struct cost_pair *old_cp;
662*38fd1498Szrj 
663*38fd1498Szrj   /* A new assignment.  */
664*38fd1498Szrj   struct cost_pair *new_cp;
665*38fd1498Szrj 
666*38fd1498Szrj   /* Next change in the list.  */
667*38fd1498Szrj   struct iv_ca_delta *next;
668*38fd1498Szrj };
669*38fd1498Szrj 
670*38fd1498Szrj /* Bound on number of candidates below that all candidates are considered.  */
671*38fd1498Szrj 
672*38fd1498Szrj #define CONSIDER_ALL_CANDIDATES_BOUND \
673*38fd1498Szrj   ((unsigned) PARAM_VALUE (PARAM_IV_CONSIDER_ALL_CANDIDATES_BOUND))
674*38fd1498Szrj 
675*38fd1498Szrj /* If there are more iv occurrences, we just give up (it is quite unlikely that
676*38fd1498Szrj    optimizing such a loop would help, and it would take ages).  */
677*38fd1498Szrj 
678*38fd1498Szrj #define MAX_CONSIDERED_GROUPS \
679*38fd1498Szrj   ((unsigned) PARAM_VALUE (PARAM_IV_MAX_CONSIDERED_USES))
680*38fd1498Szrj 
681*38fd1498Szrj /* If there are at most this number of ivs in the set, try removing unnecessary
682*38fd1498Szrj    ivs from the set always.  */
683*38fd1498Szrj 
684*38fd1498Szrj #define ALWAYS_PRUNE_CAND_SET_BOUND \
685*38fd1498Szrj   ((unsigned) PARAM_VALUE (PARAM_IV_ALWAYS_PRUNE_CAND_SET_BOUND))
686*38fd1498Szrj 
687*38fd1498Szrj /* The list of trees for that the decl_rtl field must be reset is stored
688*38fd1498Szrj    here.  */
689*38fd1498Szrj 
690*38fd1498Szrj static vec<tree> decl_rtl_to_reset;
691*38fd1498Szrj 
692*38fd1498Szrj static comp_cost force_expr_to_var_cost (tree, bool);
693*38fd1498Szrj 
694*38fd1498Szrj /* The single loop exit if it dominates the latch, NULL otherwise.  */
695*38fd1498Szrj 
696*38fd1498Szrj edge
single_dom_exit(struct loop * loop)697*38fd1498Szrj single_dom_exit (struct loop *loop)
698*38fd1498Szrj {
699*38fd1498Szrj   edge exit = single_exit (loop);
700*38fd1498Szrj 
701*38fd1498Szrj   if (!exit)
702*38fd1498Szrj     return NULL;
703*38fd1498Szrj 
704*38fd1498Szrj   if (!just_once_each_iteration_p (loop, exit->src))
705*38fd1498Szrj     return NULL;
706*38fd1498Szrj 
707*38fd1498Szrj   return exit;
708*38fd1498Szrj }
709*38fd1498Szrj 
710*38fd1498Szrj /* Dumps information about the induction variable IV to FILE.  Don't dump
711*38fd1498Szrj    variable's name if DUMP_NAME is FALSE.  The information is dumped with
712*38fd1498Szrj    preceding spaces indicated by INDENT_LEVEL.  */
713*38fd1498Szrj 
714*38fd1498Szrj void
dump_iv(FILE * file,struct iv * iv,bool dump_name,unsigned indent_level)715*38fd1498Szrj dump_iv (FILE *file, struct iv *iv, bool dump_name, unsigned indent_level)
716*38fd1498Szrj {
717*38fd1498Szrj   const char *p;
718*38fd1498Szrj   const char spaces[9] = {' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '\0'};
719*38fd1498Szrj 
720*38fd1498Szrj   if (indent_level > 4)
721*38fd1498Szrj     indent_level = 4;
722*38fd1498Szrj   p = spaces + 8 - (indent_level << 1);
723*38fd1498Szrj 
724*38fd1498Szrj   fprintf (file, "%sIV struct:\n", p);
725*38fd1498Szrj   if (iv->ssa_name && dump_name)
726*38fd1498Szrj     {
727*38fd1498Szrj       fprintf (file, "%s  SSA_NAME:\t", p);
728*38fd1498Szrj       print_generic_expr (file, iv->ssa_name, TDF_SLIM);
729*38fd1498Szrj       fprintf (file, "\n");
730*38fd1498Szrj     }
731*38fd1498Szrj 
732*38fd1498Szrj   fprintf (file, "%s  Type:\t", p);
733*38fd1498Szrj   print_generic_expr (file, TREE_TYPE (iv->base), TDF_SLIM);
734*38fd1498Szrj   fprintf (file, "\n");
735*38fd1498Szrj 
736*38fd1498Szrj   fprintf (file, "%s  Base:\t", p);
737*38fd1498Szrj   print_generic_expr (file, iv->base, TDF_SLIM);
738*38fd1498Szrj   fprintf (file, "\n");
739*38fd1498Szrj 
740*38fd1498Szrj   fprintf (file, "%s  Step:\t", p);
741*38fd1498Szrj   print_generic_expr (file, iv->step, TDF_SLIM);
742*38fd1498Szrj   fprintf (file, "\n");
743*38fd1498Szrj 
744*38fd1498Szrj   if (iv->base_object)
745*38fd1498Szrj     {
746*38fd1498Szrj       fprintf (file, "%s  Object:\t", p);
747*38fd1498Szrj       print_generic_expr (file, iv->base_object, TDF_SLIM);
748*38fd1498Szrj       fprintf (file, "\n");
749*38fd1498Szrj     }
750*38fd1498Szrj 
751*38fd1498Szrj   fprintf (file, "%s  Biv:\t%c\n", p, iv->biv_p ? 'Y' : 'N');
752*38fd1498Szrj 
753*38fd1498Szrj   fprintf (file, "%s  Overflowness wrto loop niter:\t%s\n",
754*38fd1498Szrj 	   p, iv->no_overflow ? "No-overflow" : "Overflow");
755*38fd1498Szrj }
756*38fd1498Szrj 
757*38fd1498Szrj /* Dumps information about the USE to FILE.  */
758*38fd1498Szrj 
759*38fd1498Szrj void
dump_use(FILE * file,struct iv_use * use)760*38fd1498Szrj dump_use (FILE *file, struct iv_use *use)
761*38fd1498Szrj {
762*38fd1498Szrj   fprintf (file, "  Use %d.%d:\n", use->group_id, use->id);
763*38fd1498Szrj   fprintf (file, "    At stmt:\t");
764*38fd1498Szrj   print_gimple_stmt (file, use->stmt, 0);
765*38fd1498Szrj   fprintf (file, "    At pos:\t");
766*38fd1498Szrj   if (use->op_p)
767*38fd1498Szrj     print_generic_expr (file, *use->op_p, TDF_SLIM);
768*38fd1498Szrj   fprintf (file, "\n");
769*38fd1498Szrj   dump_iv (file, use->iv, false, 2);
770*38fd1498Szrj }
771*38fd1498Szrj 
772*38fd1498Szrj /* Dumps information about the uses to FILE.  */
773*38fd1498Szrj 
774*38fd1498Szrj void
dump_groups(FILE * file,struct ivopts_data * data)775*38fd1498Szrj dump_groups (FILE *file, struct ivopts_data *data)
776*38fd1498Szrj {
777*38fd1498Szrj   unsigned i, j;
778*38fd1498Szrj   struct iv_group *group;
779*38fd1498Szrj 
780*38fd1498Szrj   for (i = 0; i < data->vgroups.length (); i++)
781*38fd1498Szrj     {
782*38fd1498Szrj       group = data->vgroups[i];
783*38fd1498Szrj       fprintf (file, "Group %d:\n", group->id);
784*38fd1498Szrj       if (group->type == USE_NONLINEAR_EXPR)
785*38fd1498Szrj 	fprintf (file, "  Type:\tGENERIC\n");
786*38fd1498Szrj       else if (group->type == USE_REF_ADDRESS)
787*38fd1498Szrj 	fprintf (file, "  Type:\tREFERENCE ADDRESS\n");
788*38fd1498Szrj       else if (group->type == USE_PTR_ADDRESS)
789*38fd1498Szrj 	fprintf (file, "  Type:\tPOINTER ARGUMENT ADDRESS\n");
790*38fd1498Szrj       else
791*38fd1498Szrj 	{
792*38fd1498Szrj 	  gcc_assert (group->type == USE_COMPARE);
793*38fd1498Szrj 	  fprintf (file, "  Type:\tCOMPARE\n");
794*38fd1498Szrj 	}
795*38fd1498Szrj       for (j = 0; j < group->vuses.length (); j++)
796*38fd1498Szrj 	dump_use (file, group->vuses[j]);
797*38fd1498Szrj     }
798*38fd1498Szrj }
799*38fd1498Szrj 
800*38fd1498Szrj /* Dumps information about induction variable candidate CAND to FILE.  */
801*38fd1498Szrj 
802*38fd1498Szrj void
dump_cand(FILE * file,struct iv_cand * cand)803*38fd1498Szrj dump_cand (FILE *file, struct iv_cand *cand)
804*38fd1498Szrj {
805*38fd1498Szrj   struct iv *iv = cand->iv;
806*38fd1498Szrj 
807*38fd1498Szrj   fprintf (file, "Candidate %d:\n", cand->id);
808*38fd1498Szrj   if (cand->inv_vars)
809*38fd1498Szrj     {
810*38fd1498Szrj       fprintf (file, "  Depend on inv.vars: ");
811*38fd1498Szrj       dump_bitmap (file, cand->inv_vars);
812*38fd1498Szrj     }
813*38fd1498Szrj   if (cand->inv_exprs)
814*38fd1498Szrj     {
815*38fd1498Szrj       fprintf (file, "  Depend on inv.exprs: ");
816*38fd1498Szrj       dump_bitmap (file, cand->inv_exprs);
817*38fd1498Szrj     }
818*38fd1498Szrj 
819*38fd1498Szrj   if (cand->var_before)
820*38fd1498Szrj     {
821*38fd1498Szrj       fprintf (file, "  Var befor: ");
822*38fd1498Szrj       print_generic_expr (file, cand->var_before, TDF_SLIM);
823*38fd1498Szrj       fprintf (file, "\n");
824*38fd1498Szrj     }
825*38fd1498Szrj   if (cand->var_after)
826*38fd1498Szrj     {
827*38fd1498Szrj       fprintf (file, "  Var after: ");
828*38fd1498Szrj       print_generic_expr (file, cand->var_after, TDF_SLIM);
829*38fd1498Szrj       fprintf (file, "\n");
830*38fd1498Szrj     }
831*38fd1498Szrj 
832*38fd1498Szrj   switch (cand->pos)
833*38fd1498Szrj     {
834*38fd1498Szrj     case IP_NORMAL:
835*38fd1498Szrj       fprintf (file, "  Incr POS: before exit test\n");
836*38fd1498Szrj       break;
837*38fd1498Szrj 
838*38fd1498Szrj     case IP_BEFORE_USE:
839*38fd1498Szrj       fprintf (file, "  Incr POS: before use %d\n", cand->ainc_use->id);
840*38fd1498Szrj       break;
841*38fd1498Szrj 
842*38fd1498Szrj     case IP_AFTER_USE:
843*38fd1498Szrj       fprintf (file, "  Incr POS: after use %d\n", cand->ainc_use->id);
844*38fd1498Szrj       break;
845*38fd1498Szrj 
846*38fd1498Szrj     case IP_END:
847*38fd1498Szrj       fprintf (file, "  Incr POS: at end\n");
848*38fd1498Szrj       break;
849*38fd1498Szrj 
850*38fd1498Szrj     case IP_ORIGINAL:
851*38fd1498Szrj       fprintf (file, "  Incr POS: orig biv\n");
852*38fd1498Szrj       break;
853*38fd1498Szrj     }
854*38fd1498Szrj 
855*38fd1498Szrj   dump_iv (file, iv, false, 1);
856*38fd1498Szrj }
857*38fd1498Szrj 
858*38fd1498Szrj /* Returns the info for ssa version VER.  */
859*38fd1498Szrj 
860*38fd1498Szrj static inline struct version_info *
ver_info(struct ivopts_data * data,unsigned ver)861*38fd1498Szrj ver_info (struct ivopts_data *data, unsigned ver)
862*38fd1498Szrj {
863*38fd1498Szrj   return data->version_info + ver;
864*38fd1498Szrj }
865*38fd1498Szrj 
866*38fd1498Szrj /* Returns the info for ssa name NAME.  */
867*38fd1498Szrj 
868*38fd1498Szrj static inline struct version_info *
name_info(struct ivopts_data * data,tree name)869*38fd1498Szrj name_info (struct ivopts_data *data, tree name)
870*38fd1498Szrj {
871*38fd1498Szrj   return ver_info (data, SSA_NAME_VERSION (name));
872*38fd1498Szrj }
873*38fd1498Szrj 
874*38fd1498Szrj /* Returns true if STMT is after the place where the IP_NORMAL ivs will be
875*38fd1498Szrj    emitted in LOOP.  */
876*38fd1498Szrj 
877*38fd1498Szrj static bool
stmt_after_ip_normal_pos(struct loop * loop,gimple * stmt)878*38fd1498Szrj stmt_after_ip_normal_pos (struct loop *loop, gimple *stmt)
879*38fd1498Szrj {
880*38fd1498Szrj   basic_block bb = ip_normal_pos (loop), sbb = gimple_bb (stmt);
881*38fd1498Szrj 
882*38fd1498Szrj   gcc_assert (bb);
883*38fd1498Szrj 
884*38fd1498Szrj   if (sbb == loop->latch)
885*38fd1498Szrj     return true;
886*38fd1498Szrj 
887*38fd1498Szrj   if (sbb != bb)
888*38fd1498Szrj     return false;
889*38fd1498Szrj 
890*38fd1498Szrj   return stmt == last_stmt (bb);
891*38fd1498Szrj }
892*38fd1498Szrj 
893*38fd1498Szrj /* Returns true if STMT if after the place where the original induction
894*38fd1498Szrj    variable CAND is incremented.  If TRUE_IF_EQUAL is set, we return true
895*38fd1498Szrj    if the positions are identical.  */
896*38fd1498Szrj 
897*38fd1498Szrj static bool
stmt_after_inc_pos(struct iv_cand * cand,gimple * stmt,bool true_if_equal)898*38fd1498Szrj stmt_after_inc_pos (struct iv_cand *cand, gimple *stmt, bool true_if_equal)
899*38fd1498Szrj {
900*38fd1498Szrj   basic_block cand_bb = gimple_bb (cand->incremented_at);
901*38fd1498Szrj   basic_block stmt_bb = gimple_bb (stmt);
902*38fd1498Szrj 
903*38fd1498Szrj   if (!dominated_by_p (CDI_DOMINATORS, stmt_bb, cand_bb))
904*38fd1498Szrj     return false;
905*38fd1498Szrj 
906*38fd1498Szrj   if (stmt_bb != cand_bb)
907*38fd1498Szrj     return true;
908*38fd1498Szrj 
909*38fd1498Szrj   if (true_if_equal
910*38fd1498Szrj       && gimple_uid (stmt) == gimple_uid (cand->incremented_at))
911*38fd1498Szrj     return true;
912*38fd1498Szrj   return gimple_uid (stmt) > gimple_uid (cand->incremented_at);
913*38fd1498Szrj }
914*38fd1498Szrj 
915*38fd1498Szrj /* Returns true if STMT if after the place where the induction variable
916*38fd1498Szrj    CAND is incremented in LOOP.  */
917*38fd1498Szrj 
918*38fd1498Szrj static bool
stmt_after_increment(struct loop * loop,struct iv_cand * cand,gimple * stmt)919*38fd1498Szrj stmt_after_increment (struct loop *loop, struct iv_cand *cand, gimple *stmt)
920*38fd1498Szrj {
921*38fd1498Szrj   switch (cand->pos)
922*38fd1498Szrj     {
923*38fd1498Szrj     case IP_END:
924*38fd1498Szrj       return false;
925*38fd1498Szrj 
926*38fd1498Szrj     case IP_NORMAL:
927*38fd1498Szrj       return stmt_after_ip_normal_pos (loop, stmt);
928*38fd1498Szrj 
929*38fd1498Szrj     case IP_ORIGINAL:
930*38fd1498Szrj     case IP_AFTER_USE:
931*38fd1498Szrj       return stmt_after_inc_pos (cand, stmt, false);
932*38fd1498Szrj 
933*38fd1498Szrj     case IP_BEFORE_USE:
934*38fd1498Szrj       return stmt_after_inc_pos (cand, stmt, true);
935*38fd1498Szrj 
936*38fd1498Szrj     default:
937*38fd1498Szrj       gcc_unreachable ();
938*38fd1498Szrj     }
939*38fd1498Szrj }
940*38fd1498Szrj 
941*38fd1498Szrj /* Returns true if EXP is a ssa name that occurs in an abnormal phi node.  */
942*38fd1498Szrj 
943*38fd1498Szrj static bool
abnormal_ssa_name_p(tree exp)944*38fd1498Szrj abnormal_ssa_name_p (tree exp)
945*38fd1498Szrj {
946*38fd1498Szrj   if (!exp)
947*38fd1498Szrj     return false;
948*38fd1498Szrj 
949*38fd1498Szrj   if (TREE_CODE (exp) != SSA_NAME)
950*38fd1498Szrj     return false;
951*38fd1498Szrj 
952*38fd1498Szrj   return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (exp) != 0;
953*38fd1498Szrj }
954*38fd1498Szrj 
955*38fd1498Szrj /* Returns false if BASE or INDEX contains a ssa name that occurs in an
956*38fd1498Szrj    abnormal phi node.  Callback for for_each_index.  */
957*38fd1498Szrj 
958*38fd1498Szrj static bool
idx_contains_abnormal_ssa_name_p(tree base,tree * index,void * data ATTRIBUTE_UNUSED)959*38fd1498Szrj idx_contains_abnormal_ssa_name_p (tree base, tree *index,
960*38fd1498Szrj 				  void *data ATTRIBUTE_UNUSED)
961*38fd1498Szrj {
962*38fd1498Szrj   if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
963*38fd1498Szrj     {
964*38fd1498Szrj       if (abnormal_ssa_name_p (TREE_OPERAND (base, 2)))
965*38fd1498Szrj 	return false;
966*38fd1498Szrj       if (abnormal_ssa_name_p (TREE_OPERAND (base, 3)))
967*38fd1498Szrj 	return false;
968*38fd1498Szrj     }
969*38fd1498Szrj 
970*38fd1498Szrj   return !abnormal_ssa_name_p (*index);
971*38fd1498Szrj }
972*38fd1498Szrj 
973*38fd1498Szrj /* Returns true if EXPR contains a ssa name that occurs in an
974*38fd1498Szrj    abnormal phi node.  */
975*38fd1498Szrj 
976*38fd1498Szrj bool
contains_abnormal_ssa_name_p(tree expr)977*38fd1498Szrj contains_abnormal_ssa_name_p (tree expr)
978*38fd1498Szrj {
979*38fd1498Szrj   enum tree_code code;
980*38fd1498Szrj   enum tree_code_class codeclass;
981*38fd1498Szrj 
982*38fd1498Szrj   if (!expr)
983*38fd1498Szrj     return false;
984*38fd1498Szrj 
985*38fd1498Szrj   code = TREE_CODE (expr);
986*38fd1498Szrj   codeclass = TREE_CODE_CLASS (code);
987*38fd1498Szrj 
988*38fd1498Szrj   if (code == SSA_NAME)
989*38fd1498Szrj     return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (expr) != 0;
990*38fd1498Szrj 
991*38fd1498Szrj   if (code == INTEGER_CST
992*38fd1498Szrj       || is_gimple_min_invariant (expr))
993*38fd1498Szrj     return false;
994*38fd1498Szrj 
995*38fd1498Szrj   if (code == ADDR_EXPR)
996*38fd1498Szrj     return !for_each_index (&TREE_OPERAND (expr, 0),
997*38fd1498Szrj 			    idx_contains_abnormal_ssa_name_p,
998*38fd1498Szrj 			    NULL);
999*38fd1498Szrj 
1000*38fd1498Szrj   if (code == COND_EXPR)
1001*38fd1498Szrj     return contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 0))
1002*38fd1498Szrj       || contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 1))
1003*38fd1498Szrj       || contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 2));
1004*38fd1498Szrj 
1005*38fd1498Szrj   switch (codeclass)
1006*38fd1498Szrj     {
1007*38fd1498Szrj     case tcc_binary:
1008*38fd1498Szrj     case tcc_comparison:
1009*38fd1498Szrj       if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 1)))
1010*38fd1498Szrj 	return true;
1011*38fd1498Szrj 
1012*38fd1498Szrj       /* Fallthru.  */
1013*38fd1498Szrj     case tcc_unary:
1014*38fd1498Szrj       if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 0)))
1015*38fd1498Szrj 	return true;
1016*38fd1498Szrj 
1017*38fd1498Szrj       break;
1018*38fd1498Szrj 
1019*38fd1498Szrj     default:
1020*38fd1498Szrj       gcc_unreachable ();
1021*38fd1498Szrj     }
1022*38fd1498Szrj 
1023*38fd1498Szrj   return false;
1024*38fd1498Szrj }
1025*38fd1498Szrj 
1026*38fd1498Szrj /*  Returns the structure describing number of iterations determined from
1027*38fd1498Szrj     EXIT of DATA->current_loop, or NULL if something goes wrong.  */
1028*38fd1498Szrj 
1029*38fd1498Szrj static struct tree_niter_desc *
niter_for_exit(struct ivopts_data * data,edge exit)1030*38fd1498Szrj niter_for_exit (struct ivopts_data *data, edge exit)
1031*38fd1498Szrj {
1032*38fd1498Szrj   struct tree_niter_desc *desc;
1033*38fd1498Szrj   tree_niter_desc **slot;
1034*38fd1498Szrj 
1035*38fd1498Szrj   if (!data->niters)
1036*38fd1498Szrj     {
1037*38fd1498Szrj       data->niters = new hash_map<edge, tree_niter_desc *>;
1038*38fd1498Szrj       slot = NULL;
1039*38fd1498Szrj     }
1040*38fd1498Szrj   else
1041*38fd1498Szrj     slot = data->niters->get (exit);
1042*38fd1498Szrj 
1043*38fd1498Szrj   if (!slot)
1044*38fd1498Szrj     {
1045*38fd1498Szrj       /* Try to determine number of iterations.  We cannot safely work with ssa
1046*38fd1498Szrj 	 names that appear in phi nodes on abnormal edges, so that we do not
1047*38fd1498Szrj 	 create overlapping life ranges for them (PR 27283).  */
1048*38fd1498Szrj       desc = XNEW (struct tree_niter_desc);
1049*38fd1498Szrj       if (!number_of_iterations_exit (data->current_loop,
1050*38fd1498Szrj 				      exit, desc, true)
1051*38fd1498Szrj      	  || contains_abnormal_ssa_name_p (desc->niter))
1052*38fd1498Szrj 	{
1053*38fd1498Szrj 	  XDELETE (desc);
1054*38fd1498Szrj 	  desc = NULL;
1055*38fd1498Szrj 	}
1056*38fd1498Szrj       data->niters->put (exit, desc);
1057*38fd1498Szrj     }
1058*38fd1498Szrj   else
1059*38fd1498Szrj     desc = *slot;
1060*38fd1498Szrj 
1061*38fd1498Szrj   return desc;
1062*38fd1498Szrj }
1063*38fd1498Szrj 
1064*38fd1498Szrj /* Returns the structure describing number of iterations determined from
1065*38fd1498Szrj    single dominating exit of DATA->current_loop, or NULL if something
1066*38fd1498Szrj    goes wrong.  */
1067*38fd1498Szrj 
1068*38fd1498Szrj static struct tree_niter_desc *
niter_for_single_dom_exit(struct ivopts_data * data)1069*38fd1498Szrj niter_for_single_dom_exit (struct ivopts_data *data)
1070*38fd1498Szrj {
1071*38fd1498Szrj   edge exit = single_dom_exit (data->current_loop);
1072*38fd1498Szrj 
1073*38fd1498Szrj   if (!exit)
1074*38fd1498Szrj     return NULL;
1075*38fd1498Szrj 
1076*38fd1498Szrj   return niter_for_exit (data, exit);
1077*38fd1498Szrj }
1078*38fd1498Szrj 
1079*38fd1498Szrj /* Initializes data structures used by the iv optimization pass, stored
1080*38fd1498Szrj    in DATA.  */
1081*38fd1498Szrj 
1082*38fd1498Szrj static void
tree_ssa_iv_optimize_init(struct ivopts_data * data)1083*38fd1498Szrj tree_ssa_iv_optimize_init (struct ivopts_data *data)
1084*38fd1498Szrj {
1085*38fd1498Szrj   data->version_info_size = 2 * num_ssa_names;
1086*38fd1498Szrj   data->version_info = XCNEWVEC (struct version_info, data->version_info_size);
1087*38fd1498Szrj   data->relevant = BITMAP_ALLOC (NULL);
1088*38fd1498Szrj   data->important_candidates = BITMAP_ALLOC (NULL);
1089*38fd1498Szrj   data->max_inv_var_id = 0;
1090*38fd1498Szrj   data->max_inv_expr_id = 0;
1091*38fd1498Szrj   data->niters = NULL;
1092*38fd1498Szrj   data->vgroups.create (20);
1093*38fd1498Szrj   data->vcands.create (20);
1094*38fd1498Szrj   data->inv_expr_tab = new hash_table<iv_inv_expr_hasher> (10);
1095*38fd1498Szrj   data->name_expansion_cache = NULL;
1096*38fd1498Szrj   data->iv_common_cand_tab = new hash_table<iv_common_cand_hasher> (10);
1097*38fd1498Szrj   data->iv_common_cands.create (20);
1098*38fd1498Szrj   decl_rtl_to_reset.create (20);
1099*38fd1498Szrj   gcc_obstack_init (&data->iv_obstack);
1100*38fd1498Szrj }
1101*38fd1498Szrj 
1102*38fd1498Szrj /* Returns a memory object to that EXPR points.  In case we are able to
1103*38fd1498Szrj    determine that it does not point to any such object, NULL is returned.  */
1104*38fd1498Szrj 
1105*38fd1498Szrj static tree
determine_base_object(tree expr)1106*38fd1498Szrj determine_base_object (tree expr)
1107*38fd1498Szrj {
1108*38fd1498Szrj   enum tree_code code = TREE_CODE (expr);
1109*38fd1498Szrj   tree base, obj;
1110*38fd1498Szrj 
1111*38fd1498Szrj   /* If this is a pointer casted to any type, we need to determine
1112*38fd1498Szrj      the base object for the pointer; so handle conversions before
1113*38fd1498Szrj      throwing away non-pointer expressions.  */
1114*38fd1498Szrj   if (CONVERT_EXPR_P (expr))
1115*38fd1498Szrj     return determine_base_object (TREE_OPERAND (expr, 0));
1116*38fd1498Szrj 
1117*38fd1498Szrj   if (!POINTER_TYPE_P (TREE_TYPE (expr)))
1118*38fd1498Szrj     return NULL_TREE;
1119*38fd1498Szrj 
1120*38fd1498Szrj   switch (code)
1121*38fd1498Szrj     {
1122*38fd1498Szrj     case INTEGER_CST:
1123*38fd1498Szrj       return NULL_TREE;
1124*38fd1498Szrj 
1125*38fd1498Szrj     case ADDR_EXPR:
1126*38fd1498Szrj       obj = TREE_OPERAND (expr, 0);
1127*38fd1498Szrj       base = get_base_address (obj);
1128*38fd1498Szrj 
1129*38fd1498Szrj       if (!base)
1130*38fd1498Szrj 	return expr;
1131*38fd1498Szrj 
1132*38fd1498Szrj       if (TREE_CODE (base) == MEM_REF)
1133*38fd1498Szrj 	return determine_base_object (TREE_OPERAND (base, 0));
1134*38fd1498Szrj 
1135*38fd1498Szrj       return fold_convert (ptr_type_node,
1136*38fd1498Szrj 			   build_fold_addr_expr (base));
1137*38fd1498Szrj 
1138*38fd1498Szrj     case POINTER_PLUS_EXPR:
1139*38fd1498Szrj       return determine_base_object (TREE_OPERAND (expr, 0));
1140*38fd1498Szrj 
1141*38fd1498Szrj     case PLUS_EXPR:
1142*38fd1498Szrj     case MINUS_EXPR:
1143*38fd1498Szrj       /* Pointer addition is done solely using POINTER_PLUS_EXPR.  */
1144*38fd1498Szrj       gcc_unreachable ();
1145*38fd1498Szrj 
1146*38fd1498Szrj     default:
1147*38fd1498Szrj       if (POLY_INT_CST_P (expr))
1148*38fd1498Szrj 	return NULL_TREE;
1149*38fd1498Szrj       return fold_convert (ptr_type_node, expr);
1150*38fd1498Szrj     }
1151*38fd1498Szrj }
1152*38fd1498Szrj 
1153*38fd1498Szrj /* Return true if address expression with non-DECL_P operand appears
1154*38fd1498Szrj    in EXPR.  */
1155*38fd1498Szrj 
1156*38fd1498Szrj static bool
contain_complex_addr_expr(tree expr)1157*38fd1498Szrj contain_complex_addr_expr (tree expr)
1158*38fd1498Szrj {
1159*38fd1498Szrj   bool res = false;
1160*38fd1498Szrj 
1161*38fd1498Szrj   STRIP_NOPS (expr);
1162*38fd1498Szrj   switch (TREE_CODE (expr))
1163*38fd1498Szrj     {
1164*38fd1498Szrj     case POINTER_PLUS_EXPR:
1165*38fd1498Szrj     case PLUS_EXPR:
1166*38fd1498Szrj     case MINUS_EXPR:
1167*38fd1498Szrj       res |= contain_complex_addr_expr (TREE_OPERAND (expr, 0));
1168*38fd1498Szrj       res |= contain_complex_addr_expr (TREE_OPERAND (expr, 1));
1169*38fd1498Szrj       break;
1170*38fd1498Szrj 
1171*38fd1498Szrj     case ADDR_EXPR:
1172*38fd1498Szrj       return (!DECL_P (TREE_OPERAND (expr, 0)));
1173*38fd1498Szrj 
1174*38fd1498Szrj     default:
1175*38fd1498Szrj       return false;
1176*38fd1498Szrj     }
1177*38fd1498Szrj 
1178*38fd1498Szrj   return res;
1179*38fd1498Szrj }
1180*38fd1498Szrj 
1181*38fd1498Szrj /* Allocates an induction variable with given initial value BASE and step STEP
1182*38fd1498Szrj    for loop LOOP.  NO_OVERFLOW implies the iv doesn't overflow.  */
1183*38fd1498Szrj 
1184*38fd1498Szrj static struct iv *
1185*38fd1498Szrj alloc_iv (struct ivopts_data *data, tree base, tree step,
1186*38fd1498Szrj 	  bool no_overflow = false)
1187*38fd1498Szrj {
1188*38fd1498Szrj   tree expr = base;
1189*38fd1498Szrj   struct iv *iv = (struct iv*) obstack_alloc (&data->iv_obstack,
1190*38fd1498Szrj 					      sizeof (struct iv));
1191*38fd1498Szrj   gcc_assert (step != NULL_TREE);
1192*38fd1498Szrj 
1193*38fd1498Szrj   /* Lower address expression in base except ones with DECL_P as operand.
1194*38fd1498Szrj      By doing this:
1195*38fd1498Szrj        1) More accurate cost can be computed for address expressions;
1196*38fd1498Szrj        2) Duplicate candidates won't be created for bases in different
1197*38fd1498Szrj 	  forms, like &a[0] and &a.  */
1198*38fd1498Szrj   STRIP_NOPS (expr);
1199*38fd1498Szrj   if ((TREE_CODE (expr) == ADDR_EXPR && !DECL_P (TREE_OPERAND (expr, 0)))
1200*38fd1498Szrj       || contain_complex_addr_expr (expr))
1201*38fd1498Szrj     {
1202*38fd1498Szrj       aff_tree comb;
1203*38fd1498Szrj       tree_to_aff_combination (expr, TREE_TYPE (expr), &comb);
1204*38fd1498Szrj       base = fold_convert (TREE_TYPE (base), aff_combination_to_tree (&comb));
1205*38fd1498Szrj     }
1206*38fd1498Szrj 
1207*38fd1498Szrj   iv->base = base;
1208*38fd1498Szrj   iv->base_object = determine_base_object (base);
1209*38fd1498Szrj   iv->step = step;
1210*38fd1498Szrj   iv->biv_p = false;
1211*38fd1498Szrj   iv->nonlin_use = NULL;
1212*38fd1498Szrj   iv->ssa_name = NULL_TREE;
1213*38fd1498Szrj   if (!no_overflow
1214*38fd1498Szrj        && !iv_can_overflow_p (data->current_loop, TREE_TYPE (base),
1215*38fd1498Szrj 			      base, step))
1216*38fd1498Szrj     no_overflow = true;
1217*38fd1498Szrj   iv->no_overflow = no_overflow;
1218*38fd1498Szrj   iv->have_address_use = false;
1219*38fd1498Szrj 
1220*38fd1498Szrj   return iv;
1221*38fd1498Szrj }
1222*38fd1498Szrj 
1223*38fd1498Szrj /* Sets STEP and BASE for induction variable IV.  NO_OVERFLOW implies the IV
1224*38fd1498Szrj    doesn't overflow.  */
1225*38fd1498Szrj 
1226*38fd1498Szrj static void
set_iv(struct ivopts_data * data,tree iv,tree base,tree step,bool no_overflow)1227*38fd1498Szrj set_iv (struct ivopts_data *data, tree iv, tree base, tree step,
1228*38fd1498Szrj 	bool no_overflow)
1229*38fd1498Szrj {
1230*38fd1498Szrj   struct version_info *info = name_info (data, iv);
1231*38fd1498Szrj 
1232*38fd1498Szrj   gcc_assert (!info->iv);
1233*38fd1498Szrj 
1234*38fd1498Szrj   bitmap_set_bit (data->relevant, SSA_NAME_VERSION (iv));
1235*38fd1498Szrj   info->iv = alloc_iv (data, base, step, no_overflow);
1236*38fd1498Szrj   info->iv->ssa_name = iv;
1237*38fd1498Szrj }
1238*38fd1498Szrj 
1239*38fd1498Szrj /* Finds induction variable declaration for VAR.  */
1240*38fd1498Szrj 
1241*38fd1498Szrj static struct iv *
get_iv(struct ivopts_data * data,tree var)1242*38fd1498Szrj get_iv (struct ivopts_data *data, tree var)
1243*38fd1498Szrj {
1244*38fd1498Szrj   basic_block bb;
1245*38fd1498Szrj   tree type = TREE_TYPE (var);
1246*38fd1498Szrj 
1247*38fd1498Szrj   if (!POINTER_TYPE_P (type)
1248*38fd1498Szrj       && !INTEGRAL_TYPE_P (type))
1249*38fd1498Szrj     return NULL;
1250*38fd1498Szrj 
1251*38fd1498Szrj   if (!name_info (data, var)->iv)
1252*38fd1498Szrj     {
1253*38fd1498Szrj       bb = gimple_bb (SSA_NAME_DEF_STMT (var));
1254*38fd1498Szrj 
1255*38fd1498Szrj       if (!bb
1256*38fd1498Szrj 	  || !flow_bb_inside_loop_p (data->current_loop, bb))
1257*38fd1498Szrj 	set_iv (data, var, var, build_int_cst (type, 0), true);
1258*38fd1498Szrj     }
1259*38fd1498Szrj 
1260*38fd1498Szrj   return name_info (data, var)->iv;
1261*38fd1498Szrj }
1262*38fd1498Szrj 
1263*38fd1498Szrj /* Return the first non-invariant ssa var found in EXPR.  */
1264*38fd1498Szrj 
1265*38fd1498Szrj static tree
extract_single_var_from_expr(tree expr)1266*38fd1498Szrj extract_single_var_from_expr (tree expr)
1267*38fd1498Szrj {
1268*38fd1498Szrj   int i, n;
1269*38fd1498Szrj   tree tmp;
1270*38fd1498Szrj   enum tree_code code;
1271*38fd1498Szrj 
1272*38fd1498Szrj   if (!expr || is_gimple_min_invariant (expr))
1273*38fd1498Szrj     return NULL;
1274*38fd1498Szrj 
1275*38fd1498Szrj   code = TREE_CODE (expr);
1276*38fd1498Szrj   if (IS_EXPR_CODE_CLASS (TREE_CODE_CLASS (code)))
1277*38fd1498Szrj     {
1278*38fd1498Szrj       n = TREE_OPERAND_LENGTH (expr);
1279*38fd1498Szrj       for (i = 0; i < n; i++)
1280*38fd1498Szrj 	{
1281*38fd1498Szrj 	  tmp = extract_single_var_from_expr (TREE_OPERAND (expr, i));
1282*38fd1498Szrj 
1283*38fd1498Szrj 	  if (tmp)
1284*38fd1498Szrj 	    return tmp;
1285*38fd1498Szrj 	}
1286*38fd1498Szrj     }
1287*38fd1498Szrj   return (TREE_CODE (expr) == SSA_NAME) ? expr : NULL;
1288*38fd1498Szrj }
1289*38fd1498Szrj 
1290*38fd1498Szrj /* Finds basic ivs.  */
1291*38fd1498Szrj 
1292*38fd1498Szrj static bool
find_bivs(struct ivopts_data * data)1293*38fd1498Szrj find_bivs (struct ivopts_data *data)
1294*38fd1498Szrj {
1295*38fd1498Szrj   gphi *phi;
1296*38fd1498Szrj   affine_iv iv;
1297*38fd1498Szrj   tree step, type, base, stop;
1298*38fd1498Szrj   bool found = false;
1299*38fd1498Szrj   struct loop *loop = data->current_loop;
1300*38fd1498Szrj   gphi_iterator psi;
1301*38fd1498Szrj 
1302*38fd1498Szrj   for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
1303*38fd1498Szrj     {
1304*38fd1498Szrj       phi = psi.phi ();
1305*38fd1498Szrj 
1306*38fd1498Szrj       if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (PHI_RESULT (phi)))
1307*38fd1498Szrj 	continue;
1308*38fd1498Szrj 
1309*38fd1498Szrj       if (virtual_operand_p (PHI_RESULT (phi)))
1310*38fd1498Szrj 	continue;
1311*38fd1498Szrj 
1312*38fd1498Szrj       if (!simple_iv (loop, loop, PHI_RESULT (phi), &iv, true))
1313*38fd1498Szrj 	continue;
1314*38fd1498Szrj 
1315*38fd1498Szrj       if (integer_zerop (iv.step))
1316*38fd1498Szrj 	continue;
1317*38fd1498Szrj 
1318*38fd1498Szrj       step = iv.step;
1319*38fd1498Szrj       base = PHI_ARG_DEF_FROM_EDGE (phi, loop_preheader_edge (loop));
1320*38fd1498Szrj       /* Stop expanding iv base at the first ssa var referred by iv step.
1321*38fd1498Szrj 	 Ideally we should stop at any ssa var, because that's expensive
1322*38fd1498Szrj 	 and unusual to happen, we just do it on the first one.
1323*38fd1498Szrj 
1324*38fd1498Szrj 	 See PR64705 for the rationale.  */
1325*38fd1498Szrj       stop = extract_single_var_from_expr (step);
1326*38fd1498Szrj       base = expand_simple_operations (base, stop);
1327*38fd1498Szrj       if (contains_abnormal_ssa_name_p (base)
1328*38fd1498Szrj 	  || contains_abnormal_ssa_name_p (step))
1329*38fd1498Szrj 	continue;
1330*38fd1498Szrj 
1331*38fd1498Szrj       type = TREE_TYPE (PHI_RESULT (phi));
1332*38fd1498Szrj       base = fold_convert (type, base);
1333*38fd1498Szrj       if (step)
1334*38fd1498Szrj 	{
1335*38fd1498Szrj 	  if (POINTER_TYPE_P (type))
1336*38fd1498Szrj 	    step = convert_to_ptrofftype (step);
1337*38fd1498Szrj 	  else
1338*38fd1498Szrj 	    step = fold_convert (type, step);
1339*38fd1498Szrj 	}
1340*38fd1498Szrj 
1341*38fd1498Szrj       set_iv (data, PHI_RESULT (phi), base, step, iv.no_overflow);
1342*38fd1498Szrj       found = true;
1343*38fd1498Szrj     }
1344*38fd1498Szrj 
1345*38fd1498Szrj   return found;
1346*38fd1498Szrj }
1347*38fd1498Szrj 
1348*38fd1498Szrj /* Marks basic ivs.  */
1349*38fd1498Szrj 
1350*38fd1498Szrj static void
mark_bivs(struct ivopts_data * data)1351*38fd1498Szrj mark_bivs (struct ivopts_data *data)
1352*38fd1498Szrj {
1353*38fd1498Szrj   gphi *phi;
1354*38fd1498Szrj   gimple *def;
1355*38fd1498Szrj   tree var;
1356*38fd1498Szrj   struct iv *iv, *incr_iv;
1357*38fd1498Szrj   struct loop *loop = data->current_loop;
1358*38fd1498Szrj   basic_block incr_bb;
1359*38fd1498Szrj   gphi_iterator psi;
1360*38fd1498Szrj 
1361*38fd1498Szrj   data->bivs_not_used_in_addr = 0;
1362*38fd1498Szrj   for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
1363*38fd1498Szrj     {
1364*38fd1498Szrj       phi = psi.phi ();
1365*38fd1498Szrj 
1366*38fd1498Szrj       iv = get_iv (data, PHI_RESULT (phi));
1367*38fd1498Szrj       if (!iv)
1368*38fd1498Szrj 	continue;
1369*38fd1498Szrj 
1370*38fd1498Szrj       var = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (loop));
1371*38fd1498Szrj       def = SSA_NAME_DEF_STMT (var);
1372*38fd1498Szrj       /* Don't mark iv peeled from other one as biv.  */
1373*38fd1498Szrj       if (def
1374*38fd1498Szrj 	  && gimple_code (def) == GIMPLE_PHI
1375*38fd1498Szrj 	  && gimple_bb (def) == loop->header)
1376*38fd1498Szrj 	continue;
1377*38fd1498Szrj 
1378*38fd1498Szrj       incr_iv = get_iv (data, var);
1379*38fd1498Szrj       if (!incr_iv)
1380*38fd1498Szrj 	continue;
1381*38fd1498Szrj 
1382*38fd1498Szrj       /* If the increment is in the subloop, ignore it.  */
1383*38fd1498Szrj       incr_bb = gimple_bb (SSA_NAME_DEF_STMT (var));
1384*38fd1498Szrj       if (incr_bb->loop_father != data->current_loop
1385*38fd1498Szrj 	  || (incr_bb->flags & BB_IRREDUCIBLE_LOOP))
1386*38fd1498Szrj 	continue;
1387*38fd1498Szrj 
1388*38fd1498Szrj       iv->biv_p = true;
1389*38fd1498Szrj       incr_iv->biv_p = true;
1390*38fd1498Szrj       if (iv->no_overflow)
1391*38fd1498Szrj 	data->bivs_not_used_in_addr++;
1392*38fd1498Szrj       if (incr_iv->no_overflow)
1393*38fd1498Szrj 	data->bivs_not_used_in_addr++;
1394*38fd1498Szrj     }
1395*38fd1498Szrj }
1396*38fd1498Szrj 
1397*38fd1498Szrj /* Checks whether STMT defines a linear induction variable and stores its
1398*38fd1498Szrj    parameters to IV.  */
1399*38fd1498Szrj 
1400*38fd1498Szrj static bool
find_givs_in_stmt_scev(struct ivopts_data * data,gimple * stmt,affine_iv * iv)1401*38fd1498Szrj find_givs_in_stmt_scev (struct ivopts_data *data, gimple *stmt, affine_iv *iv)
1402*38fd1498Szrj {
1403*38fd1498Szrj   tree lhs, stop;
1404*38fd1498Szrj   struct loop *loop = data->current_loop;
1405*38fd1498Szrj 
1406*38fd1498Szrj   iv->base = NULL_TREE;
1407*38fd1498Szrj   iv->step = NULL_TREE;
1408*38fd1498Szrj 
1409*38fd1498Szrj   if (gimple_code (stmt) != GIMPLE_ASSIGN)
1410*38fd1498Szrj     return false;
1411*38fd1498Szrj 
1412*38fd1498Szrj   lhs = gimple_assign_lhs (stmt);
1413*38fd1498Szrj   if (TREE_CODE (lhs) != SSA_NAME)
1414*38fd1498Szrj     return false;
1415*38fd1498Szrj 
1416*38fd1498Szrj   if (!simple_iv (loop, loop_containing_stmt (stmt), lhs, iv, true))
1417*38fd1498Szrj     return false;
1418*38fd1498Szrj 
1419*38fd1498Szrj   /* Stop expanding iv base at the first ssa var referred by iv step.
1420*38fd1498Szrj      Ideally we should stop at any ssa var, because that's expensive
1421*38fd1498Szrj      and unusual to happen, we just do it on the first one.
1422*38fd1498Szrj 
1423*38fd1498Szrj      See PR64705 for the rationale.  */
1424*38fd1498Szrj   stop = extract_single_var_from_expr (iv->step);
1425*38fd1498Szrj   iv->base = expand_simple_operations (iv->base, stop);
1426*38fd1498Szrj   if (contains_abnormal_ssa_name_p (iv->base)
1427*38fd1498Szrj       || contains_abnormal_ssa_name_p (iv->step))
1428*38fd1498Szrj     return false;
1429*38fd1498Szrj 
1430*38fd1498Szrj   /* If STMT could throw, then do not consider STMT as defining a GIV.
1431*38fd1498Szrj      While this will suppress optimizations, we can not safely delete this
1432*38fd1498Szrj      GIV and associated statements, even if it appears it is not used.  */
1433*38fd1498Szrj   if (stmt_could_throw_p (stmt))
1434*38fd1498Szrj     return false;
1435*38fd1498Szrj 
1436*38fd1498Szrj   return true;
1437*38fd1498Szrj }
1438*38fd1498Szrj 
1439*38fd1498Szrj /* Finds general ivs in statement STMT.  */
1440*38fd1498Szrj 
1441*38fd1498Szrj static void
find_givs_in_stmt(struct ivopts_data * data,gimple * stmt)1442*38fd1498Szrj find_givs_in_stmt (struct ivopts_data *data, gimple *stmt)
1443*38fd1498Szrj {
1444*38fd1498Szrj   affine_iv iv;
1445*38fd1498Szrj 
1446*38fd1498Szrj   if (!find_givs_in_stmt_scev (data, stmt, &iv))
1447*38fd1498Szrj     return;
1448*38fd1498Szrj 
1449*38fd1498Szrj   set_iv (data, gimple_assign_lhs (stmt), iv.base, iv.step, iv.no_overflow);
1450*38fd1498Szrj }
1451*38fd1498Szrj 
1452*38fd1498Szrj /* Finds general ivs in basic block BB.  */
1453*38fd1498Szrj 
1454*38fd1498Szrj static void
find_givs_in_bb(struct ivopts_data * data,basic_block bb)1455*38fd1498Szrj find_givs_in_bb (struct ivopts_data *data, basic_block bb)
1456*38fd1498Szrj {
1457*38fd1498Szrj   gimple_stmt_iterator bsi;
1458*38fd1498Szrj 
1459*38fd1498Szrj   for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
1460*38fd1498Szrj     find_givs_in_stmt (data, gsi_stmt (bsi));
1461*38fd1498Szrj }
1462*38fd1498Szrj 
1463*38fd1498Szrj /* Finds general ivs.  */
1464*38fd1498Szrj 
1465*38fd1498Szrj static void
find_givs(struct ivopts_data * data)1466*38fd1498Szrj find_givs (struct ivopts_data *data)
1467*38fd1498Szrj {
1468*38fd1498Szrj   struct loop *loop = data->current_loop;
1469*38fd1498Szrj   basic_block *body = get_loop_body_in_dom_order (loop);
1470*38fd1498Szrj   unsigned i;
1471*38fd1498Szrj 
1472*38fd1498Szrj   for (i = 0; i < loop->num_nodes; i++)
1473*38fd1498Szrj     find_givs_in_bb (data, body[i]);
1474*38fd1498Szrj   free (body);
1475*38fd1498Szrj }
1476*38fd1498Szrj 
1477*38fd1498Szrj /* For each ssa name defined in LOOP determines whether it is an induction
1478*38fd1498Szrj    variable and if so, its initial value and step.  */
1479*38fd1498Szrj 
1480*38fd1498Szrj static bool
find_induction_variables(struct ivopts_data * data)1481*38fd1498Szrj find_induction_variables (struct ivopts_data *data)
1482*38fd1498Szrj {
1483*38fd1498Szrj   unsigned i;
1484*38fd1498Szrj   bitmap_iterator bi;
1485*38fd1498Szrj 
1486*38fd1498Szrj   if (!find_bivs (data))
1487*38fd1498Szrj     return false;
1488*38fd1498Szrj 
1489*38fd1498Szrj   find_givs (data);
1490*38fd1498Szrj   mark_bivs (data);
1491*38fd1498Szrj 
1492*38fd1498Szrj   if (dump_file && (dump_flags & TDF_DETAILS))
1493*38fd1498Szrj     {
1494*38fd1498Szrj       struct tree_niter_desc *niter = niter_for_single_dom_exit (data);
1495*38fd1498Szrj 
1496*38fd1498Szrj       if (niter)
1497*38fd1498Szrj 	{
1498*38fd1498Szrj 	  fprintf (dump_file, "  number of iterations ");
1499*38fd1498Szrj 	  print_generic_expr (dump_file, niter->niter, TDF_SLIM);
1500*38fd1498Szrj 	  if (!integer_zerop (niter->may_be_zero))
1501*38fd1498Szrj 	    {
1502*38fd1498Szrj 	      fprintf (dump_file, "; zero if ");
1503*38fd1498Szrj 	      print_generic_expr (dump_file, niter->may_be_zero, TDF_SLIM);
1504*38fd1498Szrj 	    }
1505*38fd1498Szrj 	  fprintf (dump_file, "\n");
1506*38fd1498Szrj 	};
1507*38fd1498Szrj 
1508*38fd1498Szrj       fprintf (dump_file, "\n<Induction Vars>:\n");
1509*38fd1498Szrj       EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
1510*38fd1498Szrj 	{
1511*38fd1498Szrj 	  struct version_info *info = ver_info (data, i);
1512*38fd1498Szrj 	  if (info->iv && info->iv->step && !integer_zerop (info->iv->step))
1513*38fd1498Szrj 	    dump_iv (dump_file, ver_info (data, i)->iv, true, 0);
1514*38fd1498Szrj 	}
1515*38fd1498Szrj     }
1516*38fd1498Szrj 
1517*38fd1498Szrj   return true;
1518*38fd1498Szrj }
1519*38fd1498Szrj 
1520*38fd1498Szrj /* Records a use of TYPE at *USE_P in STMT whose value is IV in GROUP.
1521*38fd1498Szrj    For address type use, ADDR_BASE is the stripped IV base, ADDR_OFFSET
1522*38fd1498Szrj    is the const offset stripped from IV base and MEM_TYPE is the type
1523*38fd1498Szrj    of the memory being addressed.  For uses of other types, ADDR_BASE
1524*38fd1498Szrj    and ADDR_OFFSET are zero by default and MEM_TYPE is NULL_TREE.  */
1525*38fd1498Szrj 
1526*38fd1498Szrj static struct iv_use *
record_use(struct iv_group * group,tree * use_p,struct iv * iv,gimple * stmt,enum use_type type,tree mem_type,tree addr_base,poly_uint64 addr_offset)1527*38fd1498Szrj record_use (struct iv_group *group, tree *use_p, struct iv *iv,
1528*38fd1498Szrj 	    gimple *stmt, enum use_type type, tree mem_type,
1529*38fd1498Szrj 	    tree addr_base, poly_uint64 addr_offset)
1530*38fd1498Szrj {
1531*38fd1498Szrj   struct iv_use *use = XCNEW (struct iv_use);
1532*38fd1498Szrj 
1533*38fd1498Szrj   use->id = group->vuses.length ();
1534*38fd1498Szrj   use->group_id = group->id;
1535*38fd1498Szrj   use->type = type;
1536*38fd1498Szrj   use->mem_type = mem_type;
1537*38fd1498Szrj   use->iv = iv;
1538*38fd1498Szrj   use->stmt = stmt;
1539*38fd1498Szrj   use->op_p = use_p;
1540*38fd1498Szrj   use->addr_base = addr_base;
1541*38fd1498Szrj   use->addr_offset = addr_offset;
1542*38fd1498Szrj 
1543*38fd1498Szrj   group->vuses.safe_push (use);
1544*38fd1498Szrj   return use;
1545*38fd1498Szrj }
1546*38fd1498Szrj 
1547*38fd1498Szrj /* Checks whether OP is a loop-level invariant and if so, records it.
1548*38fd1498Szrj    NONLINEAR_USE is true if the invariant is used in a way we do not
1549*38fd1498Szrj    handle specially.  */
1550*38fd1498Szrj 
1551*38fd1498Szrj static void
record_invariant(struct ivopts_data * data,tree op,bool nonlinear_use)1552*38fd1498Szrj record_invariant (struct ivopts_data *data, tree op, bool nonlinear_use)
1553*38fd1498Szrj {
1554*38fd1498Szrj   basic_block bb;
1555*38fd1498Szrj   struct version_info *info;
1556*38fd1498Szrj 
1557*38fd1498Szrj   if (TREE_CODE (op) != SSA_NAME
1558*38fd1498Szrj       || virtual_operand_p (op))
1559*38fd1498Szrj     return;
1560*38fd1498Szrj 
1561*38fd1498Szrj   bb = gimple_bb (SSA_NAME_DEF_STMT (op));
1562*38fd1498Szrj   if (bb
1563*38fd1498Szrj       && flow_bb_inside_loop_p (data->current_loop, bb))
1564*38fd1498Szrj     return;
1565*38fd1498Szrj 
1566*38fd1498Szrj   info = name_info (data, op);
1567*38fd1498Szrj   info->name = op;
1568*38fd1498Szrj   info->has_nonlin_use |= nonlinear_use;
1569*38fd1498Szrj   if (!info->inv_id)
1570*38fd1498Szrj     info->inv_id = ++data->max_inv_var_id;
1571*38fd1498Szrj   bitmap_set_bit (data->relevant, SSA_NAME_VERSION (op));
1572*38fd1498Szrj }
1573*38fd1498Szrj 
1574*38fd1498Szrj /* Record a group of TYPE.  */
1575*38fd1498Szrj 
1576*38fd1498Szrj static struct iv_group *
record_group(struct ivopts_data * data,enum use_type type)1577*38fd1498Szrj record_group (struct ivopts_data *data, enum use_type type)
1578*38fd1498Szrj {
1579*38fd1498Szrj   struct iv_group *group = XCNEW (struct iv_group);
1580*38fd1498Szrj 
1581*38fd1498Szrj   group->id = data->vgroups.length ();
1582*38fd1498Szrj   group->type = type;
1583*38fd1498Szrj   group->related_cands = BITMAP_ALLOC (NULL);
1584*38fd1498Szrj   group->vuses.create (1);
1585*38fd1498Szrj 
1586*38fd1498Szrj   data->vgroups.safe_push (group);
1587*38fd1498Szrj   return group;
1588*38fd1498Szrj }
1589*38fd1498Szrj 
1590*38fd1498Szrj /* Record a use of TYPE at *USE_P in STMT whose value is IV in a group.
1591*38fd1498Szrj    New group will be created if there is no existing group for the use.
1592*38fd1498Szrj    MEM_TYPE is the type of memory being addressed, or NULL if this
1593*38fd1498Szrj    isn't an address reference.  */
1594*38fd1498Szrj 
1595*38fd1498Szrj static struct iv_use *
record_group_use(struct ivopts_data * data,tree * use_p,struct iv * iv,gimple * stmt,enum use_type type,tree mem_type)1596*38fd1498Szrj record_group_use (struct ivopts_data *data, tree *use_p,
1597*38fd1498Szrj 		  struct iv *iv, gimple *stmt, enum use_type type,
1598*38fd1498Szrj 		  tree mem_type)
1599*38fd1498Szrj {
1600*38fd1498Szrj   tree addr_base = NULL;
1601*38fd1498Szrj   struct iv_group *group = NULL;
1602*38fd1498Szrj   poly_uint64 addr_offset = 0;
1603*38fd1498Szrj 
1604*38fd1498Szrj   /* Record non address type use in a new group.  */
1605*38fd1498Szrj   if (address_p (type))
1606*38fd1498Szrj     {
1607*38fd1498Szrj       unsigned int i;
1608*38fd1498Szrj 
1609*38fd1498Szrj       addr_base = strip_offset (iv->base, &addr_offset);
1610*38fd1498Szrj       for (i = 0; i < data->vgroups.length (); i++)
1611*38fd1498Szrj 	{
1612*38fd1498Szrj 	  struct iv_use *use;
1613*38fd1498Szrj 
1614*38fd1498Szrj 	  group = data->vgroups[i];
1615*38fd1498Szrj 	  use = group->vuses[0];
1616*38fd1498Szrj 	  if (!address_p (use->type))
1617*38fd1498Szrj 	    continue;
1618*38fd1498Szrj 
1619*38fd1498Szrj 	  /* Check if it has the same stripped base and step.  */
1620*38fd1498Szrj 	  if (operand_equal_p (iv->base_object, use->iv->base_object, 0)
1621*38fd1498Szrj 	      && operand_equal_p (iv->step, use->iv->step, 0)
1622*38fd1498Szrj 	      && operand_equal_p (addr_base, use->addr_base, 0))
1623*38fd1498Szrj 	    break;
1624*38fd1498Szrj 	}
1625*38fd1498Szrj       if (i == data->vgroups.length ())
1626*38fd1498Szrj 	group = NULL;
1627*38fd1498Szrj     }
1628*38fd1498Szrj 
1629*38fd1498Szrj   if (!group)
1630*38fd1498Szrj     group = record_group (data, type);
1631*38fd1498Szrj 
1632*38fd1498Szrj   return record_use (group, use_p, iv, stmt, type, mem_type,
1633*38fd1498Szrj 		     addr_base, addr_offset);
1634*38fd1498Szrj }
1635*38fd1498Szrj 
1636*38fd1498Szrj /* Checks whether the use OP is interesting and if so, records it.  */
1637*38fd1498Szrj 
1638*38fd1498Szrj static struct iv_use *
find_interesting_uses_op(struct ivopts_data * data,tree op)1639*38fd1498Szrj find_interesting_uses_op (struct ivopts_data *data, tree op)
1640*38fd1498Szrj {
1641*38fd1498Szrj   struct iv *iv;
1642*38fd1498Szrj   gimple *stmt;
1643*38fd1498Szrj   struct iv_use *use;
1644*38fd1498Szrj 
1645*38fd1498Szrj   if (TREE_CODE (op) != SSA_NAME)
1646*38fd1498Szrj     return NULL;
1647*38fd1498Szrj 
1648*38fd1498Szrj   iv = get_iv (data, op);
1649*38fd1498Szrj   if (!iv)
1650*38fd1498Szrj     return NULL;
1651*38fd1498Szrj 
1652*38fd1498Szrj   if (iv->nonlin_use)
1653*38fd1498Szrj     {
1654*38fd1498Szrj       gcc_assert (iv->nonlin_use->type == USE_NONLINEAR_EXPR);
1655*38fd1498Szrj       return iv->nonlin_use;
1656*38fd1498Szrj     }
1657*38fd1498Szrj 
1658*38fd1498Szrj   if (integer_zerop (iv->step))
1659*38fd1498Szrj     {
1660*38fd1498Szrj       record_invariant (data, op, true);
1661*38fd1498Szrj       return NULL;
1662*38fd1498Szrj     }
1663*38fd1498Szrj 
1664*38fd1498Szrj   stmt = SSA_NAME_DEF_STMT (op);
1665*38fd1498Szrj   gcc_assert (gimple_code (stmt) == GIMPLE_PHI || is_gimple_assign (stmt));
1666*38fd1498Szrj 
1667*38fd1498Szrj   use = record_group_use (data, NULL, iv, stmt, USE_NONLINEAR_EXPR, NULL_TREE);
1668*38fd1498Szrj   iv->nonlin_use = use;
1669*38fd1498Szrj   return use;
1670*38fd1498Szrj }
1671*38fd1498Szrj 
1672*38fd1498Szrj /* Indicate how compare type iv_use can be handled.  */
1673*38fd1498Szrj enum comp_iv_rewrite
1674*38fd1498Szrj {
1675*38fd1498Szrj   COMP_IV_NA,
1676*38fd1498Szrj   /* We may rewrite compare type iv_use by expressing value of the iv_use.  */
1677*38fd1498Szrj   COMP_IV_EXPR,
1678*38fd1498Szrj   /* We may rewrite compare type iv_uses on both sides of comparison by
1679*38fd1498Szrj      expressing value of each iv_use.  */
1680*38fd1498Szrj   COMP_IV_EXPR_2,
1681*38fd1498Szrj   /* We may rewrite compare type iv_use by expressing value of the iv_use
1682*38fd1498Szrj      or by eliminating it with other iv_cand.  */
1683*38fd1498Szrj   COMP_IV_ELIM
1684*38fd1498Szrj };
1685*38fd1498Szrj 
1686*38fd1498Szrj /* Given a condition in statement STMT, checks whether it is a compare
1687*38fd1498Szrj    of an induction variable and an invariant.  If this is the case,
1688*38fd1498Szrj    CONTROL_VAR is set to location of the iv, BOUND to the location of
1689*38fd1498Szrj    the invariant, IV_VAR and IV_BOUND are set to the corresponding
1690*38fd1498Szrj    induction variable descriptions, and true is returned.  If this is not
1691*38fd1498Szrj    the case, CONTROL_VAR and BOUND are set to the arguments of the
1692*38fd1498Szrj    condition and false is returned.  */
1693*38fd1498Szrj 
1694*38fd1498Szrj static enum comp_iv_rewrite
extract_cond_operands(struct ivopts_data * data,gimple * stmt,tree ** control_var,tree ** bound,struct iv ** iv_var,struct iv ** iv_bound)1695*38fd1498Szrj extract_cond_operands (struct ivopts_data *data, gimple *stmt,
1696*38fd1498Szrj 		       tree **control_var, tree **bound,
1697*38fd1498Szrj 		       struct iv **iv_var, struct iv **iv_bound)
1698*38fd1498Szrj {
1699*38fd1498Szrj   /* The objects returned when COND has constant operands.  */
1700*38fd1498Szrj   static struct iv const_iv;
1701*38fd1498Szrj   static tree zero;
1702*38fd1498Szrj   tree *op0 = &zero, *op1 = &zero;
1703*38fd1498Szrj   struct iv *iv0 = &const_iv, *iv1 = &const_iv;
1704*38fd1498Szrj   enum comp_iv_rewrite rewrite_type = COMP_IV_NA;
1705*38fd1498Szrj 
1706*38fd1498Szrj   if (gimple_code (stmt) == GIMPLE_COND)
1707*38fd1498Szrj     {
1708*38fd1498Szrj       gcond *cond_stmt = as_a <gcond *> (stmt);
1709*38fd1498Szrj       op0 = gimple_cond_lhs_ptr (cond_stmt);
1710*38fd1498Szrj       op1 = gimple_cond_rhs_ptr (cond_stmt);
1711*38fd1498Szrj     }
1712*38fd1498Szrj   else
1713*38fd1498Szrj     {
1714*38fd1498Szrj       op0 = gimple_assign_rhs1_ptr (stmt);
1715*38fd1498Szrj       op1 = gimple_assign_rhs2_ptr (stmt);
1716*38fd1498Szrj     }
1717*38fd1498Szrj 
1718*38fd1498Szrj   zero = integer_zero_node;
1719*38fd1498Szrj   const_iv.step = integer_zero_node;
1720*38fd1498Szrj 
1721*38fd1498Szrj   if (TREE_CODE (*op0) == SSA_NAME)
1722*38fd1498Szrj     iv0 = get_iv (data, *op0);
1723*38fd1498Szrj   if (TREE_CODE (*op1) == SSA_NAME)
1724*38fd1498Szrj     iv1 = get_iv (data, *op1);
1725*38fd1498Szrj 
1726*38fd1498Szrj   /* If both sides of comparison are IVs.  We can express ivs on both end.  */
1727*38fd1498Szrj   if (iv0 && iv1 && !integer_zerop (iv0->step) && !integer_zerop (iv1->step))
1728*38fd1498Szrj     {
1729*38fd1498Szrj       rewrite_type = COMP_IV_EXPR_2;
1730*38fd1498Szrj       goto end;
1731*38fd1498Szrj     }
1732*38fd1498Szrj 
1733*38fd1498Szrj   /* If none side of comparison is IV.  */
1734*38fd1498Szrj   if ((!iv0 || integer_zerop (iv0->step))
1735*38fd1498Szrj       && (!iv1 || integer_zerop (iv1->step)))
1736*38fd1498Szrj     goto end;
1737*38fd1498Szrj 
1738*38fd1498Szrj   /* Control variable may be on the other side.  */
1739*38fd1498Szrj   if (!iv0 || integer_zerop (iv0->step))
1740*38fd1498Szrj     {
1741*38fd1498Szrj       std::swap (op0, op1);
1742*38fd1498Szrj       std::swap (iv0, iv1);
1743*38fd1498Szrj     }
1744*38fd1498Szrj   /* If one side is IV and the other side isn't loop invariant.  */
1745*38fd1498Szrj   if (!iv1)
1746*38fd1498Szrj     rewrite_type = COMP_IV_EXPR;
1747*38fd1498Szrj   /* If one side is IV and the other side is loop invariant.  */
1748*38fd1498Szrj   else if (!integer_zerop (iv0->step) && integer_zerop (iv1->step))
1749*38fd1498Szrj     rewrite_type = COMP_IV_ELIM;
1750*38fd1498Szrj 
1751*38fd1498Szrj end:
1752*38fd1498Szrj   if (control_var)
1753*38fd1498Szrj     *control_var = op0;
1754*38fd1498Szrj   if (iv_var)
1755*38fd1498Szrj     *iv_var = iv0;
1756*38fd1498Szrj   if (bound)
1757*38fd1498Szrj     *bound = op1;
1758*38fd1498Szrj   if (iv_bound)
1759*38fd1498Szrj     *iv_bound = iv1;
1760*38fd1498Szrj 
1761*38fd1498Szrj   return rewrite_type;
1762*38fd1498Szrj }
1763*38fd1498Szrj 
1764*38fd1498Szrj /* Checks whether the condition in STMT is interesting and if so,
1765*38fd1498Szrj    records it.  */
1766*38fd1498Szrj 
1767*38fd1498Szrj static void
find_interesting_uses_cond(struct ivopts_data * data,gimple * stmt)1768*38fd1498Szrj find_interesting_uses_cond (struct ivopts_data *data, gimple *stmt)
1769*38fd1498Szrj {
1770*38fd1498Szrj   tree *var_p, *bound_p;
1771*38fd1498Szrj   struct iv *var_iv, *bound_iv;
1772*38fd1498Szrj   enum comp_iv_rewrite ret;
1773*38fd1498Szrj 
1774*38fd1498Szrj   ret = extract_cond_operands (data, stmt,
1775*38fd1498Szrj 			       &var_p, &bound_p, &var_iv, &bound_iv);
1776*38fd1498Szrj   if (ret == COMP_IV_NA)
1777*38fd1498Szrj     {
1778*38fd1498Szrj       find_interesting_uses_op (data, *var_p);
1779*38fd1498Szrj       find_interesting_uses_op (data, *bound_p);
1780*38fd1498Szrj       return;
1781*38fd1498Szrj     }
1782*38fd1498Szrj 
1783*38fd1498Szrj   record_group_use (data, var_p, var_iv, stmt, USE_COMPARE, NULL_TREE);
1784*38fd1498Szrj   /* Record compare type iv_use for iv on the other side of comparison.  */
1785*38fd1498Szrj   if (ret == COMP_IV_EXPR_2)
1786*38fd1498Szrj     record_group_use (data, bound_p, bound_iv, stmt, USE_COMPARE, NULL_TREE);
1787*38fd1498Szrj }
1788*38fd1498Szrj 
1789*38fd1498Szrj /* Returns the outermost loop EXPR is obviously invariant in
1790*38fd1498Szrj    relative to the loop LOOP, i.e. if all its operands are defined
1791*38fd1498Szrj    outside of the returned loop.  Returns NULL if EXPR is not
1792*38fd1498Szrj    even obviously invariant in LOOP.  */
1793*38fd1498Szrj 
1794*38fd1498Szrj struct loop *
outermost_invariant_loop_for_expr(struct loop * loop,tree expr)1795*38fd1498Szrj outermost_invariant_loop_for_expr (struct loop *loop, tree expr)
1796*38fd1498Szrj {
1797*38fd1498Szrj   basic_block def_bb;
1798*38fd1498Szrj   unsigned i, len;
1799*38fd1498Szrj 
1800*38fd1498Szrj   if (is_gimple_min_invariant (expr))
1801*38fd1498Szrj     return current_loops->tree_root;
1802*38fd1498Szrj 
1803*38fd1498Szrj   if (TREE_CODE (expr) == SSA_NAME)
1804*38fd1498Szrj     {
1805*38fd1498Szrj       def_bb = gimple_bb (SSA_NAME_DEF_STMT (expr));
1806*38fd1498Szrj       if (def_bb)
1807*38fd1498Szrj 	{
1808*38fd1498Szrj 	  if (flow_bb_inside_loop_p (loop, def_bb))
1809*38fd1498Szrj 	    return NULL;
1810*38fd1498Szrj 	  return superloop_at_depth (loop,
1811*38fd1498Szrj 				     loop_depth (def_bb->loop_father) + 1);
1812*38fd1498Szrj 	}
1813*38fd1498Szrj 
1814*38fd1498Szrj       return current_loops->tree_root;
1815*38fd1498Szrj     }
1816*38fd1498Szrj 
1817*38fd1498Szrj   if (!EXPR_P (expr))
1818*38fd1498Szrj     return NULL;
1819*38fd1498Szrj 
1820*38fd1498Szrj   unsigned maxdepth = 0;
1821*38fd1498Szrj   len = TREE_OPERAND_LENGTH (expr);
1822*38fd1498Szrj   for (i = 0; i < len; i++)
1823*38fd1498Szrj     {
1824*38fd1498Szrj       struct loop *ivloop;
1825*38fd1498Szrj       if (!TREE_OPERAND (expr, i))
1826*38fd1498Szrj 	continue;
1827*38fd1498Szrj 
1828*38fd1498Szrj       ivloop = outermost_invariant_loop_for_expr (loop, TREE_OPERAND (expr, i));
1829*38fd1498Szrj       if (!ivloop)
1830*38fd1498Szrj 	return NULL;
1831*38fd1498Szrj       maxdepth = MAX (maxdepth, loop_depth (ivloop));
1832*38fd1498Szrj     }
1833*38fd1498Szrj 
1834*38fd1498Szrj   return superloop_at_depth (loop, maxdepth);
1835*38fd1498Szrj }
1836*38fd1498Szrj 
1837*38fd1498Szrj /* Returns true if expression EXPR is obviously invariant in LOOP,
1838*38fd1498Szrj    i.e. if all its operands are defined outside of the LOOP.  LOOP
1839*38fd1498Szrj    should not be the function body.  */
1840*38fd1498Szrj 
1841*38fd1498Szrj bool
expr_invariant_in_loop_p(struct loop * loop,tree expr)1842*38fd1498Szrj expr_invariant_in_loop_p (struct loop *loop, tree expr)
1843*38fd1498Szrj {
1844*38fd1498Szrj   basic_block def_bb;
1845*38fd1498Szrj   unsigned i, len;
1846*38fd1498Szrj 
1847*38fd1498Szrj   gcc_assert (loop_depth (loop) > 0);
1848*38fd1498Szrj 
1849*38fd1498Szrj   if (is_gimple_min_invariant (expr))
1850*38fd1498Szrj     return true;
1851*38fd1498Szrj 
1852*38fd1498Szrj   if (TREE_CODE (expr) == SSA_NAME)
1853*38fd1498Szrj     {
1854*38fd1498Szrj       def_bb = gimple_bb (SSA_NAME_DEF_STMT (expr));
1855*38fd1498Szrj       if (def_bb
1856*38fd1498Szrj 	  && flow_bb_inside_loop_p (loop, def_bb))
1857*38fd1498Szrj 	return false;
1858*38fd1498Szrj 
1859*38fd1498Szrj       return true;
1860*38fd1498Szrj     }
1861*38fd1498Szrj 
1862*38fd1498Szrj   if (!EXPR_P (expr))
1863*38fd1498Szrj     return false;
1864*38fd1498Szrj 
1865*38fd1498Szrj   len = TREE_OPERAND_LENGTH (expr);
1866*38fd1498Szrj   for (i = 0; i < len; i++)
1867*38fd1498Szrj     if (TREE_OPERAND (expr, i)
1868*38fd1498Szrj 	&& !expr_invariant_in_loop_p (loop, TREE_OPERAND (expr, i)))
1869*38fd1498Szrj       return false;
1870*38fd1498Szrj 
1871*38fd1498Szrj   return true;
1872*38fd1498Szrj }
1873*38fd1498Szrj 
1874*38fd1498Szrj /* Given expression EXPR which computes inductive values with respect
1875*38fd1498Szrj    to loop recorded in DATA, this function returns biv from which EXPR
1876*38fd1498Szrj    is derived by tracing definition chains of ssa variables in EXPR.  */
1877*38fd1498Szrj 
1878*38fd1498Szrj static struct iv*
find_deriving_biv_for_expr(struct ivopts_data * data,tree expr)1879*38fd1498Szrj find_deriving_biv_for_expr (struct ivopts_data *data, tree expr)
1880*38fd1498Szrj {
1881*38fd1498Szrj   struct iv *iv;
1882*38fd1498Szrj   unsigned i, n;
1883*38fd1498Szrj   tree e2, e1;
1884*38fd1498Szrj   enum tree_code code;
1885*38fd1498Szrj   gimple *stmt;
1886*38fd1498Szrj 
1887*38fd1498Szrj   if (expr == NULL_TREE)
1888*38fd1498Szrj     return NULL;
1889*38fd1498Szrj 
1890*38fd1498Szrj   if (is_gimple_min_invariant (expr))
1891*38fd1498Szrj     return NULL;
1892*38fd1498Szrj 
1893*38fd1498Szrj   code = TREE_CODE (expr);
1894*38fd1498Szrj   if (IS_EXPR_CODE_CLASS (TREE_CODE_CLASS (code)))
1895*38fd1498Szrj     {
1896*38fd1498Szrj       n = TREE_OPERAND_LENGTH (expr);
1897*38fd1498Szrj       for (i = 0; i < n; i++)
1898*38fd1498Szrj 	{
1899*38fd1498Szrj 	  iv = find_deriving_biv_for_expr (data, TREE_OPERAND (expr, i));
1900*38fd1498Szrj 	  if (iv)
1901*38fd1498Szrj 	    return iv;
1902*38fd1498Szrj 	}
1903*38fd1498Szrj     }
1904*38fd1498Szrj 
1905*38fd1498Szrj   /* Stop if it's not ssa name.  */
1906*38fd1498Szrj   if (code != SSA_NAME)
1907*38fd1498Szrj     return NULL;
1908*38fd1498Szrj 
1909*38fd1498Szrj   iv = get_iv (data, expr);
1910*38fd1498Szrj   if (!iv || integer_zerop (iv->step))
1911*38fd1498Szrj     return NULL;
1912*38fd1498Szrj   else if (iv->biv_p)
1913*38fd1498Szrj     return iv;
1914*38fd1498Szrj 
1915*38fd1498Szrj   stmt = SSA_NAME_DEF_STMT (expr);
1916*38fd1498Szrj   if (gphi *phi = dyn_cast <gphi *> (stmt))
1917*38fd1498Szrj     {
1918*38fd1498Szrj       ssa_op_iter iter;
1919*38fd1498Szrj       use_operand_p use_p;
1920*38fd1498Szrj       basic_block phi_bb = gimple_bb (phi);
1921*38fd1498Szrj 
1922*38fd1498Szrj       /* Skip loop header PHI that doesn't define biv.  */
1923*38fd1498Szrj       if (phi_bb->loop_father == data->current_loop)
1924*38fd1498Szrj 	return NULL;
1925*38fd1498Szrj 
1926*38fd1498Szrj       if (virtual_operand_p (gimple_phi_result (phi)))
1927*38fd1498Szrj 	return NULL;
1928*38fd1498Szrj 
1929*38fd1498Szrj       FOR_EACH_PHI_ARG (use_p, phi, iter, SSA_OP_USE)
1930*38fd1498Szrj 	{
1931*38fd1498Szrj 	  tree use = USE_FROM_PTR (use_p);
1932*38fd1498Szrj 	  iv = find_deriving_biv_for_expr (data, use);
1933*38fd1498Szrj 	  if (iv)
1934*38fd1498Szrj 	    return iv;
1935*38fd1498Szrj 	}
1936*38fd1498Szrj       return NULL;
1937*38fd1498Szrj     }
1938*38fd1498Szrj   if (gimple_code (stmt) != GIMPLE_ASSIGN)
1939*38fd1498Szrj     return NULL;
1940*38fd1498Szrj 
1941*38fd1498Szrj   e1 = gimple_assign_rhs1 (stmt);
1942*38fd1498Szrj   code = gimple_assign_rhs_code (stmt);
1943*38fd1498Szrj   if (get_gimple_rhs_class (code) == GIMPLE_SINGLE_RHS)
1944*38fd1498Szrj     return find_deriving_biv_for_expr (data, e1);
1945*38fd1498Szrj 
1946*38fd1498Szrj   switch (code)
1947*38fd1498Szrj     {
1948*38fd1498Szrj     case MULT_EXPR:
1949*38fd1498Szrj     case PLUS_EXPR:
1950*38fd1498Szrj     case MINUS_EXPR:
1951*38fd1498Szrj     case POINTER_PLUS_EXPR:
1952*38fd1498Szrj       /* Increments, decrements and multiplications by a constant
1953*38fd1498Szrj 	 are simple.  */
1954*38fd1498Szrj       e2 = gimple_assign_rhs2 (stmt);
1955*38fd1498Szrj       iv = find_deriving_biv_for_expr (data, e2);
1956*38fd1498Szrj       if (iv)
1957*38fd1498Szrj 	return iv;
1958*38fd1498Szrj       gcc_fallthrough ();
1959*38fd1498Szrj 
1960*38fd1498Szrj     CASE_CONVERT:
1961*38fd1498Szrj       /* Casts are simple.  */
1962*38fd1498Szrj       return find_deriving_biv_for_expr (data, e1);
1963*38fd1498Szrj 
1964*38fd1498Szrj     default:
1965*38fd1498Szrj       break;
1966*38fd1498Szrj     }
1967*38fd1498Szrj 
1968*38fd1498Szrj   return NULL;
1969*38fd1498Szrj }
1970*38fd1498Szrj 
1971*38fd1498Szrj /* Record BIV, its predecessor and successor that they are used in
1972*38fd1498Szrj    address type uses.  */
1973*38fd1498Szrj 
1974*38fd1498Szrj static void
record_biv_for_address_use(struct ivopts_data * data,struct iv * biv)1975*38fd1498Szrj record_biv_for_address_use (struct ivopts_data *data, struct iv *biv)
1976*38fd1498Szrj {
1977*38fd1498Szrj   unsigned i;
1978*38fd1498Szrj   tree type, base_1, base_2;
1979*38fd1498Szrj   bitmap_iterator bi;
1980*38fd1498Szrj 
1981*38fd1498Szrj   if (!biv || !biv->biv_p || integer_zerop (biv->step)
1982*38fd1498Szrj       || biv->have_address_use || !biv->no_overflow)
1983*38fd1498Szrj     return;
1984*38fd1498Szrj 
1985*38fd1498Szrj   type = TREE_TYPE (biv->base);
1986*38fd1498Szrj   if (!INTEGRAL_TYPE_P (type))
1987*38fd1498Szrj     return;
1988*38fd1498Szrj 
1989*38fd1498Szrj   biv->have_address_use = true;
1990*38fd1498Szrj   data->bivs_not_used_in_addr--;
1991*38fd1498Szrj   base_1 = fold_build2 (PLUS_EXPR, type, biv->base, biv->step);
1992*38fd1498Szrj   EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
1993*38fd1498Szrj     {
1994*38fd1498Szrj       struct iv *iv = ver_info (data, i)->iv;
1995*38fd1498Szrj 
1996*38fd1498Szrj       if (!iv || !iv->biv_p || integer_zerop (iv->step)
1997*38fd1498Szrj 	  || iv->have_address_use || !iv->no_overflow)
1998*38fd1498Szrj 	continue;
1999*38fd1498Szrj 
2000*38fd1498Szrj       if (type != TREE_TYPE (iv->base)
2001*38fd1498Szrj 	  || !INTEGRAL_TYPE_P (TREE_TYPE (iv->base)))
2002*38fd1498Szrj 	continue;
2003*38fd1498Szrj 
2004*38fd1498Szrj       if (!operand_equal_p (biv->step, iv->step, 0))
2005*38fd1498Szrj 	continue;
2006*38fd1498Szrj 
2007*38fd1498Szrj       base_2 = fold_build2 (PLUS_EXPR, type, iv->base, iv->step);
2008*38fd1498Szrj       if (operand_equal_p (base_1, iv->base, 0)
2009*38fd1498Szrj 	  || operand_equal_p (base_2, biv->base, 0))
2010*38fd1498Szrj 	{
2011*38fd1498Szrj 	  iv->have_address_use = true;
2012*38fd1498Szrj 	  data->bivs_not_used_in_addr--;
2013*38fd1498Szrj 	}
2014*38fd1498Szrj     }
2015*38fd1498Szrj }
2016*38fd1498Szrj 
2017*38fd1498Szrj /* Cumulates the steps of indices into DATA and replaces their values with the
2018*38fd1498Szrj    initial ones.  Returns false when the value of the index cannot be determined.
2019*38fd1498Szrj    Callback for for_each_index.  */
2020*38fd1498Szrj 
2021*38fd1498Szrj struct ifs_ivopts_data
2022*38fd1498Szrj {
2023*38fd1498Szrj   struct ivopts_data *ivopts_data;
2024*38fd1498Szrj   gimple *stmt;
2025*38fd1498Szrj   tree step;
2026*38fd1498Szrj };
2027*38fd1498Szrj 
2028*38fd1498Szrj static bool
idx_find_step(tree base,tree * idx,void * data)2029*38fd1498Szrj idx_find_step (tree base, tree *idx, void *data)
2030*38fd1498Szrj {
2031*38fd1498Szrj   struct ifs_ivopts_data *dta = (struct ifs_ivopts_data *) data;
2032*38fd1498Szrj   struct iv *iv;
2033*38fd1498Szrj   bool use_overflow_semantics = false;
2034*38fd1498Szrj   tree step, iv_base, iv_step, lbound, off;
2035*38fd1498Szrj   struct loop *loop = dta->ivopts_data->current_loop;
2036*38fd1498Szrj 
2037*38fd1498Szrj   /* If base is a component ref, require that the offset of the reference
2038*38fd1498Szrj      be invariant.  */
2039*38fd1498Szrj   if (TREE_CODE (base) == COMPONENT_REF)
2040*38fd1498Szrj     {
2041*38fd1498Szrj       off = component_ref_field_offset (base);
2042*38fd1498Szrj       return expr_invariant_in_loop_p (loop, off);
2043*38fd1498Szrj     }
2044*38fd1498Szrj 
2045*38fd1498Szrj   /* If base is array, first check whether we will be able to move the
2046*38fd1498Szrj      reference out of the loop (in order to take its address in strength
2047*38fd1498Szrj      reduction).  In order for this to work we need both lower bound
2048*38fd1498Szrj      and step to be loop invariants.  */
2049*38fd1498Szrj   if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
2050*38fd1498Szrj     {
2051*38fd1498Szrj       /* Moreover, for a range, the size needs to be invariant as well.  */
2052*38fd1498Szrj       if (TREE_CODE (base) == ARRAY_RANGE_REF
2053*38fd1498Szrj 	  && !expr_invariant_in_loop_p (loop, TYPE_SIZE (TREE_TYPE (base))))
2054*38fd1498Szrj 	return false;
2055*38fd1498Szrj 
2056*38fd1498Szrj       step = array_ref_element_size (base);
2057*38fd1498Szrj       lbound = array_ref_low_bound (base);
2058*38fd1498Szrj 
2059*38fd1498Szrj       if (!expr_invariant_in_loop_p (loop, step)
2060*38fd1498Szrj 	  || !expr_invariant_in_loop_p (loop, lbound))
2061*38fd1498Szrj 	return false;
2062*38fd1498Szrj     }
2063*38fd1498Szrj 
2064*38fd1498Szrj   if (TREE_CODE (*idx) != SSA_NAME)
2065*38fd1498Szrj     return true;
2066*38fd1498Szrj 
2067*38fd1498Szrj   iv = get_iv (dta->ivopts_data, *idx);
2068*38fd1498Szrj   if (!iv)
2069*38fd1498Szrj     return false;
2070*38fd1498Szrj 
2071*38fd1498Szrj   /* XXX  We produce for a base of *D42 with iv->base being &x[0]
2072*38fd1498Szrj 	  *&x[0], which is not folded and does not trigger the
2073*38fd1498Szrj 	  ARRAY_REF path below.  */
2074*38fd1498Szrj   *idx = iv->base;
2075*38fd1498Szrj 
2076*38fd1498Szrj   if (integer_zerop (iv->step))
2077*38fd1498Szrj     return true;
2078*38fd1498Szrj 
2079*38fd1498Szrj   if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
2080*38fd1498Szrj     {
2081*38fd1498Szrj       step = array_ref_element_size (base);
2082*38fd1498Szrj 
2083*38fd1498Szrj       /* We only handle addresses whose step is an integer constant.  */
2084*38fd1498Szrj       if (TREE_CODE (step) != INTEGER_CST)
2085*38fd1498Szrj 	return false;
2086*38fd1498Szrj     }
2087*38fd1498Szrj   else
2088*38fd1498Szrj     /* The step for pointer arithmetics already is 1 byte.  */
2089*38fd1498Szrj     step = size_one_node;
2090*38fd1498Szrj 
2091*38fd1498Szrj   iv_base = iv->base;
2092*38fd1498Szrj   iv_step = iv->step;
2093*38fd1498Szrj   if (iv->no_overflow && nowrap_type_p (TREE_TYPE (iv_step)))
2094*38fd1498Szrj     use_overflow_semantics = true;
2095*38fd1498Szrj 
2096*38fd1498Szrj   if (!convert_affine_scev (dta->ivopts_data->current_loop,
2097*38fd1498Szrj 			    sizetype, &iv_base, &iv_step, dta->stmt,
2098*38fd1498Szrj 			    use_overflow_semantics))
2099*38fd1498Szrj     {
2100*38fd1498Szrj       /* The index might wrap.  */
2101*38fd1498Szrj       return false;
2102*38fd1498Szrj     }
2103*38fd1498Szrj 
2104*38fd1498Szrj   step = fold_build2 (MULT_EXPR, sizetype, step, iv_step);
2105*38fd1498Szrj   dta->step = fold_build2 (PLUS_EXPR, sizetype, dta->step, step);
2106*38fd1498Szrj 
2107*38fd1498Szrj   if (dta->ivopts_data->bivs_not_used_in_addr)
2108*38fd1498Szrj     {
2109*38fd1498Szrj       if (!iv->biv_p)
2110*38fd1498Szrj 	iv = find_deriving_biv_for_expr (dta->ivopts_data, iv->ssa_name);
2111*38fd1498Szrj 
2112*38fd1498Szrj       record_biv_for_address_use (dta->ivopts_data, iv);
2113*38fd1498Szrj     }
2114*38fd1498Szrj   return true;
2115*38fd1498Szrj }
2116*38fd1498Szrj 
2117*38fd1498Szrj /* Records use in index IDX.  Callback for for_each_index.  Ivopts data
2118*38fd1498Szrj    object is passed to it in DATA.  */
2119*38fd1498Szrj 
2120*38fd1498Szrj static bool
idx_record_use(tree base,tree * idx,void * vdata)2121*38fd1498Szrj idx_record_use (tree base, tree *idx,
2122*38fd1498Szrj 		void *vdata)
2123*38fd1498Szrj {
2124*38fd1498Szrj   struct ivopts_data *data = (struct ivopts_data *) vdata;
2125*38fd1498Szrj   find_interesting_uses_op (data, *idx);
2126*38fd1498Szrj   if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
2127*38fd1498Szrj     {
2128*38fd1498Szrj       find_interesting_uses_op (data, array_ref_element_size (base));
2129*38fd1498Szrj       find_interesting_uses_op (data, array_ref_low_bound (base));
2130*38fd1498Szrj     }
2131*38fd1498Szrj   return true;
2132*38fd1498Szrj }
2133*38fd1498Szrj 
2134*38fd1498Szrj /* If we can prove that TOP = cst * BOT for some constant cst,
2135*38fd1498Szrj    store cst to MUL and return true.  Otherwise return false.
2136*38fd1498Szrj    The returned value is always sign-extended, regardless of the
2137*38fd1498Szrj    signedness of TOP and BOT.  */
2138*38fd1498Szrj 
2139*38fd1498Szrj static bool
constant_multiple_of(tree top,tree bot,widest_int * mul)2140*38fd1498Szrj constant_multiple_of (tree top, tree bot, widest_int *mul)
2141*38fd1498Szrj {
2142*38fd1498Szrj   tree mby;
2143*38fd1498Szrj   enum tree_code code;
2144*38fd1498Szrj   unsigned precision = TYPE_PRECISION (TREE_TYPE (top));
2145*38fd1498Szrj   widest_int res, p0, p1;
2146*38fd1498Szrj 
2147*38fd1498Szrj   STRIP_NOPS (top);
2148*38fd1498Szrj   STRIP_NOPS (bot);
2149*38fd1498Szrj 
2150*38fd1498Szrj   if (operand_equal_p (top, bot, 0))
2151*38fd1498Szrj     {
2152*38fd1498Szrj       *mul = 1;
2153*38fd1498Szrj       return true;
2154*38fd1498Szrj     }
2155*38fd1498Szrj 
2156*38fd1498Szrj   code = TREE_CODE (top);
2157*38fd1498Szrj   switch (code)
2158*38fd1498Szrj     {
2159*38fd1498Szrj     case MULT_EXPR:
2160*38fd1498Szrj       mby = TREE_OPERAND (top, 1);
2161*38fd1498Szrj       if (TREE_CODE (mby) != INTEGER_CST)
2162*38fd1498Szrj 	return false;
2163*38fd1498Szrj 
2164*38fd1498Szrj       if (!constant_multiple_of (TREE_OPERAND (top, 0), bot, &res))
2165*38fd1498Szrj 	return false;
2166*38fd1498Szrj 
2167*38fd1498Szrj       *mul = wi::sext (res * wi::to_widest (mby), precision);
2168*38fd1498Szrj       return true;
2169*38fd1498Szrj 
2170*38fd1498Szrj     case PLUS_EXPR:
2171*38fd1498Szrj     case MINUS_EXPR:
2172*38fd1498Szrj       if (!constant_multiple_of (TREE_OPERAND (top, 0), bot, &p0)
2173*38fd1498Szrj 	  || !constant_multiple_of (TREE_OPERAND (top, 1), bot, &p1))
2174*38fd1498Szrj 	return false;
2175*38fd1498Szrj 
2176*38fd1498Szrj       if (code == MINUS_EXPR)
2177*38fd1498Szrj 	p1 = -p1;
2178*38fd1498Szrj       *mul = wi::sext (p0 + p1, precision);
2179*38fd1498Szrj       return true;
2180*38fd1498Szrj 
2181*38fd1498Szrj     case INTEGER_CST:
2182*38fd1498Szrj       if (TREE_CODE (bot) != INTEGER_CST)
2183*38fd1498Szrj 	return false;
2184*38fd1498Szrj 
2185*38fd1498Szrj       p0 = widest_int::from (wi::to_wide (top), SIGNED);
2186*38fd1498Szrj       p1 = widest_int::from (wi::to_wide (bot), SIGNED);
2187*38fd1498Szrj       if (p1 == 0)
2188*38fd1498Szrj 	return false;
2189*38fd1498Szrj       *mul = wi::sext (wi::divmod_trunc (p0, p1, SIGNED, &res), precision);
2190*38fd1498Szrj       return res == 0;
2191*38fd1498Szrj 
2192*38fd1498Szrj     default:
2193*38fd1498Szrj       if (POLY_INT_CST_P (top)
2194*38fd1498Szrj 	  && POLY_INT_CST_P (bot)
2195*38fd1498Szrj 	  && constant_multiple_p (wi::to_poly_widest (top),
2196*38fd1498Szrj 				  wi::to_poly_widest (bot), mul))
2197*38fd1498Szrj 	return true;
2198*38fd1498Szrj 
2199*38fd1498Szrj       return false;
2200*38fd1498Szrj     }
2201*38fd1498Szrj }
2202*38fd1498Szrj 
2203*38fd1498Szrj /* Return true if memory reference REF with step STEP may be unaligned.  */
2204*38fd1498Szrj 
2205*38fd1498Szrj static bool
may_be_unaligned_p(tree ref,tree step)2206*38fd1498Szrj may_be_unaligned_p (tree ref, tree step)
2207*38fd1498Szrj {
2208*38fd1498Szrj   /* TARGET_MEM_REFs are translated directly to valid MEMs on the target,
2209*38fd1498Szrj      thus they are not misaligned.  */
2210*38fd1498Szrj   if (TREE_CODE (ref) == TARGET_MEM_REF)
2211*38fd1498Szrj     return false;
2212*38fd1498Szrj 
2213*38fd1498Szrj   unsigned int align = TYPE_ALIGN (TREE_TYPE (ref));
2214*38fd1498Szrj   if (GET_MODE_ALIGNMENT (TYPE_MODE (TREE_TYPE (ref))) > align)
2215*38fd1498Szrj     align = GET_MODE_ALIGNMENT (TYPE_MODE (TREE_TYPE (ref)));
2216*38fd1498Szrj 
2217*38fd1498Szrj   unsigned HOST_WIDE_INT bitpos;
2218*38fd1498Szrj   unsigned int ref_align;
2219*38fd1498Szrj   get_object_alignment_1 (ref, &ref_align, &bitpos);
2220*38fd1498Szrj   if (ref_align < align
2221*38fd1498Szrj       || (bitpos % align) != 0
2222*38fd1498Szrj       || (bitpos % BITS_PER_UNIT) != 0)
2223*38fd1498Szrj     return true;
2224*38fd1498Szrj 
2225*38fd1498Szrj   unsigned int trailing_zeros = tree_ctz (step);
2226*38fd1498Szrj   if (trailing_zeros < HOST_BITS_PER_INT
2227*38fd1498Szrj       && (1U << trailing_zeros) * BITS_PER_UNIT < align)
2228*38fd1498Szrj     return true;
2229*38fd1498Szrj 
2230*38fd1498Szrj   return false;
2231*38fd1498Szrj }
2232*38fd1498Szrj 
2233*38fd1498Szrj /* Return true if EXPR may be non-addressable.   */
2234*38fd1498Szrj 
2235*38fd1498Szrj bool
may_be_nonaddressable_p(tree expr)2236*38fd1498Szrj may_be_nonaddressable_p (tree expr)
2237*38fd1498Szrj {
2238*38fd1498Szrj   switch (TREE_CODE (expr))
2239*38fd1498Szrj     {
2240*38fd1498Szrj     case TARGET_MEM_REF:
2241*38fd1498Szrj       /* TARGET_MEM_REFs are translated directly to valid MEMs on the
2242*38fd1498Szrj 	 target, thus they are always addressable.  */
2243*38fd1498Szrj       return false;
2244*38fd1498Szrj 
2245*38fd1498Szrj     case MEM_REF:
2246*38fd1498Szrj       /* Likewise for MEM_REFs, modulo the storage order.  */
2247*38fd1498Szrj       return REF_REVERSE_STORAGE_ORDER (expr);
2248*38fd1498Szrj 
2249*38fd1498Szrj     case BIT_FIELD_REF:
2250*38fd1498Szrj       if (REF_REVERSE_STORAGE_ORDER (expr))
2251*38fd1498Szrj 	return true;
2252*38fd1498Szrj       return may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
2253*38fd1498Szrj 
2254*38fd1498Szrj     case COMPONENT_REF:
2255*38fd1498Szrj       if (TYPE_REVERSE_STORAGE_ORDER (TREE_TYPE (TREE_OPERAND (expr, 0))))
2256*38fd1498Szrj 	return true;
2257*38fd1498Szrj       return DECL_NONADDRESSABLE_P (TREE_OPERAND (expr, 1))
2258*38fd1498Szrj 	     || may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
2259*38fd1498Szrj 
2260*38fd1498Szrj     case ARRAY_REF:
2261*38fd1498Szrj     case ARRAY_RANGE_REF:
2262*38fd1498Szrj       if (TYPE_REVERSE_STORAGE_ORDER (TREE_TYPE (TREE_OPERAND (expr, 0))))
2263*38fd1498Szrj 	return true;
2264*38fd1498Szrj       return may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
2265*38fd1498Szrj 
2266*38fd1498Szrj     case VIEW_CONVERT_EXPR:
2267*38fd1498Szrj       /* This kind of view-conversions may wrap non-addressable objects
2268*38fd1498Szrj 	 and make them look addressable.  After some processing the
2269*38fd1498Szrj 	 non-addressability may be uncovered again, causing ADDR_EXPRs
2270*38fd1498Szrj 	 of inappropriate objects to be built.  */
2271*38fd1498Szrj       if (is_gimple_reg (TREE_OPERAND (expr, 0))
2272*38fd1498Szrj 	  || !is_gimple_addressable (TREE_OPERAND (expr, 0)))
2273*38fd1498Szrj 	return true;
2274*38fd1498Szrj       return may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
2275*38fd1498Szrj 
2276*38fd1498Szrj     CASE_CONVERT:
2277*38fd1498Szrj       return true;
2278*38fd1498Szrj 
2279*38fd1498Szrj     default:
2280*38fd1498Szrj       break;
2281*38fd1498Szrj     }
2282*38fd1498Szrj 
2283*38fd1498Szrj   return false;
2284*38fd1498Szrj }
2285*38fd1498Szrj 
2286*38fd1498Szrj /* Finds addresses in *OP_P inside STMT.  */
2287*38fd1498Szrj 
2288*38fd1498Szrj static void
find_interesting_uses_address(struct ivopts_data * data,gimple * stmt,tree * op_p)2289*38fd1498Szrj find_interesting_uses_address (struct ivopts_data *data, gimple *stmt,
2290*38fd1498Szrj 			       tree *op_p)
2291*38fd1498Szrj {
2292*38fd1498Szrj   tree base = *op_p, step = size_zero_node;
2293*38fd1498Szrj   struct iv *civ;
2294*38fd1498Szrj   struct ifs_ivopts_data ifs_ivopts_data;
2295*38fd1498Szrj 
2296*38fd1498Szrj   /* Do not play with volatile memory references.  A bit too conservative,
2297*38fd1498Szrj      perhaps, but safe.  */
2298*38fd1498Szrj   if (gimple_has_volatile_ops (stmt))
2299*38fd1498Szrj     goto fail;
2300*38fd1498Szrj 
2301*38fd1498Szrj   /* Ignore bitfields for now.  Not really something terribly complicated
2302*38fd1498Szrj      to handle.  TODO.  */
2303*38fd1498Szrj   if (TREE_CODE (base) == BIT_FIELD_REF)
2304*38fd1498Szrj     goto fail;
2305*38fd1498Szrj 
2306*38fd1498Szrj   base = unshare_expr (base);
2307*38fd1498Szrj 
2308*38fd1498Szrj   if (TREE_CODE (base) == TARGET_MEM_REF)
2309*38fd1498Szrj     {
2310*38fd1498Szrj       tree type = build_pointer_type (TREE_TYPE (base));
2311*38fd1498Szrj       tree astep;
2312*38fd1498Szrj 
2313*38fd1498Szrj       if (TMR_BASE (base)
2314*38fd1498Szrj 	  && TREE_CODE (TMR_BASE (base)) == SSA_NAME)
2315*38fd1498Szrj 	{
2316*38fd1498Szrj 	  civ = get_iv (data, TMR_BASE (base));
2317*38fd1498Szrj 	  if (!civ)
2318*38fd1498Szrj 	    goto fail;
2319*38fd1498Szrj 
2320*38fd1498Szrj 	  TMR_BASE (base) = civ->base;
2321*38fd1498Szrj 	  step = civ->step;
2322*38fd1498Szrj 	}
2323*38fd1498Szrj       if (TMR_INDEX2 (base)
2324*38fd1498Szrj 	  && TREE_CODE (TMR_INDEX2 (base)) == SSA_NAME)
2325*38fd1498Szrj 	{
2326*38fd1498Szrj 	  civ = get_iv (data, TMR_INDEX2 (base));
2327*38fd1498Szrj 	  if (!civ)
2328*38fd1498Szrj 	    goto fail;
2329*38fd1498Szrj 
2330*38fd1498Szrj 	  TMR_INDEX2 (base) = civ->base;
2331*38fd1498Szrj 	  step = civ->step;
2332*38fd1498Szrj 	}
2333*38fd1498Szrj       if (TMR_INDEX (base)
2334*38fd1498Szrj 	  && TREE_CODE (TMR_INDEX (base)) == SSA_NAME)
2335*38fd1498Szrj 	{
2336*38fd1498Szrj 	  civ = get_iv (data, TMR_INDEX (base));
2337*38fd1498Szrj 	  if (!civ)
2338*38fd1498Szrj 	    goto fail;
2339*38fd1498Szrj 
2340*38fd1498Szrj 	  TMR_INDEX (base) = civ->base;
2341*38fd1498Szrj 	  astep = civ->step;
2342*38fd1498Szrj 
2343*38fd1498Szrj 	  if (astep)
2344*38fd1498Szrj 	    {
2345*38fd1498Szrj 	      if (TMR_STEP (base))
2346*38fd1498Szrj 		astep = fold_build2 (MULT_EXPR, type, TMR_STEP (base), astep);
2347*38fd1498Szrj 
2348*38fd1498Szrj 	      step = fold_build2 (PLUS_EXPR, type, step, astep);
2349*38fd1498Szrj 	    }
2350*38fd1498Szrj 	}
2351*38fd1498Szrj 
2352*38fd1498Szrj       if (integer_zerop (step))
2353*38fd1498Szrj 	goto fail;
2354*38fd1498Szrj       base = tree_mem_ref_addr (type, base);
2355*38fd1498Szrj     }
2356*38fd1498Szrj   else
2357*38fd1498Szrj     {
2358*38fd1498Szrj       ifs_ivopts_data.ivopts_data = data;
2359*38fd1498Szrj       ifs_ivopts_data.stmt = stmt;
2360*38fd1498Szrj       ifs_ivopts_data.step = size_zero_node;
2361*38fd1498Szrj       if (!for_each_index (&base, idx_find_step, &ifs_ivopts_data)
2362*38fd1498Szrj 	  || integer_zerop (ifs_ivopts_data.step))
2363*38fd1498Szrj 	goto fail;
2364*38fd1498Szrj       step = ifs_ivopts_data.step;
2365*38fd1498Szrj 
2366*38fd1498Szrj       /* Check that the base expression is addressable.  This needs
2367*38fd1498Szrj 	 to be done after substituting bases of IVs into it.  */
2368*38fd1498Szrj       if (may_be_nonaddressable_p (base))
2369*38fd1498Szrj 	goto fail;
2370*38fd1498Szrj 
2371*38fd1498Szrj       /* Moreover, on strict alignment platforms, check that it is
2372*38fd1498Szrj 	 sufficiently aligned.  */
2373*38fd1498Szrj       if (STRICT_ALIGNMENT && may_be_unaligned_p (base, step))
2374*38fd1498Szrj 	goto fail;
2375*38fd1498Szrj 
2376*38fd1498Szrj       base = build_fold_addr_expr (base);
2377*38fd1498Szrj 
2378*38fd1498Szrj       /* Substituting bases of IVs into the base expression might
2379*38fd1498Szrj 	 have caused folding opportunities.  */
2380*38fd1498Szrj       if (TREE_CODE (base) == ADDR_EXPR)
2381*38fd1498Szrj 	{
2382*38fd1498Szrj 	  tree *ref = &TREE_OPERAND (base, 0);
2383*38fd1498Szrj 	  while (handled_component_p (*ref))
2384*38fd1498Szrj 	    ref = &TREE_OPERAND (*ref, 0);
2385*38fd1498Szrj 	  if (TREE_CODE (*ref) == MEM_REF)
2386*38fd1498Szrj 	    {
2387*38fd1498Szrj 	      tree tem = fold_binary (MEM_REF, TREE_TYPE (*ref),
2388*38fd1498Szrj 				      TREE_OPERAND (*ref, 0),
2389*38fd1498Szrj 				      TREE_OPERAND (*ref, 1));
2390*38fd1498Szrj 	      if (tem)
2391*38fd1498Szrj 		*ref = tem;
2392*38fd1498Szrj 	    }
2393*38fd1498Szrj 	}
2394*38fd1498Szrj     }
2395*38fd1498Szrj 
2396*38fd1498Szrj   civ = alloc_iv (data, base, step);
2397*38fd1498Szrj   /* Fail if base object of this memory reference is unknown.  */
2398*38fd1498Szrj   if (civ->base_object == NULL_TREE)
2399*38fd1498Szrj     goto fail;
2400*38fd1498Szrj 
2401*38fd1498Szrj   record_group_use (data, op_p, civ, stmt, USE_REF_ADDRESS, TREE_TYPE (*op_p));
2402*38fd1498Szrj   return;
2403*38fd1498Szrj 
2404*38fd1498Szrj fail:
2405*38fd1498Szrj   for_each_index (op_p, idx_record_use, data);
2406*38fd1498Szrj }
2407*38fd1498Szrj 
2408*38fd1498Szrj /* Finds and records invariants used in STMT.  */
2409*38fd1498Szrj 
2410*38fd1498Szrj static void
find_invariants_stmt(struct ivopts_data * data,gimple * stmt)2411*38fd1498Szrj find_invariants_stmt (struct ivopts_data *data, gimple *stmt)
2412*38fd1498Szrj {
2413*38fd1498Szrj   ssa_op_iter iter;
2414*38fd1498Szrj   use_operand_p use_p;
2415*38fd1498Szrj   tree op;
2416*38fd1498Szrj 
2417*38fd1498Szrj   FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
2418*38fd1498Szrj     {
2419*38fd1498Szrj       op = USE_FROM_PTR (use_p);
2420*38fd1498Szrj       record_invariant (data, op, false);
2421*38fd1498Szrj     }
2422*38fd1498Szrj }
2423*38fd1498Szrj 
2424*38fd1498Szrj /* CALL calls an internal function.  If operand *OP_P will become an
2425*38fd1498Szrj    address when the call is expanded, return the type of the memory
2426*38fd1498Szrj    being addressed, otherwise return null.  */
2427*38fd1498Szrj 
2428*38fd1498Szrj static tree
get_mem_type_for_internal_fn(gcall * call,tree * op_p)2429*38fd1498Szrj get_mem_type_for_internal_fn (gcall *call, tree *op_p)
2430*38fd1498Szrj {
2431*38fd1498Szrj   switch (gimple_call_internal_fn (call))
2432*38fd1498Szrj     {
2433*38fd1498Szrj     case IFN_MASK_LOAD:
2434*38fd1498Szrj       if (op_p == gimple_call_arg_ptr (call, 0))
2435*38fd1498Szrj 	return TREE_TYPE (gimple_call_lhs (call));
2436*38fd1498Szrj       return NULL_TREE;
2437*38fd1498Szrj 
2438*38fd1498Szrj     case IFN_MASK_STORE:
2439*38fd1498Szrj       if (op_p == gimple_call_arg_ptr (call, 0))
2440*38fd1498Szrj 	return TREE_TYPE (gimple_call_arg (call, 3));
2441*38fd1498Szrj       return NULL_TREE;
2442*38fd1498Szrj 
2443*38fd1498Szrj     default:
2444*38fd1498Szrj       return NULL_TREE;
2445*38fd1498Szrj     }
2446*38fd1498Szrj }
2447*38fd1498Szrj 
2448*38fd1498Szrj /* IV is a (non-address) iv that describes operand *OP_P of STMT.
2449*38fd1498Szrj    Return true if the operand will become an address when STMT
2450*38fd1498Szrj    is expanded and record the associated address use if so.  */
2451*38fd1498Szrj 
2452*38fd1498Szrj static bool
find_address_like_use(struct ivopts_data * data,gimple * stmt,tree * op_p,struct iv * iv)2453*38fd1498Szrj find_address_like_use (struct ivopts_data *data, gimple *stmt, tree *op_p,
2454*38fd1498Szrj 		       struct iv *iv)
2455*38fd1498Szrj {
2456*38fd1498Szrj   /* Fail if base object of this memory reference is unknown.  */
2457*38fd1498Szrj   if (iv->base_object == NULL_TREE)
2458*38fd1498Szrj     return false;
2459*38fd1498Szrj 
2460*38fd1498Szrj   tree mem_type = NULL_TREE;
2461*38fd1498Szrj   if (gcall *call = dyn_cast <gcall *> (stmt))
2462*38fd1498Szrj     if (gimple_call_internal_p (call))
2463*38fd1498Szrj       mem_type = get_mem_type_for_internal_fn (call, op_p);
2464*38fd1498Szrj   if (mem_type)
2465*38fd1498Szrj     {
2466*38fd1498Szrj       iv = alloc_iv (data, iv->base, iv->step);
2467*38fd1498Szrj       record_group_use (data, op_p, iv, stmt, USE_PTR_ADDRESS, mem_type);
2468*38fd1498Szrj       return true;
2469*38fd1498Szrj     }
2470*38fd1498Szrj   return false;
2471*38fd1498Szrj }
2472*38fd1498Szrj 
2473*38fd1498Szrj /* Finds interesting uses of induction variables in the statement STMT.  */
2474*38fd1498Szrj 
2475*38fd1498Szrj static void
find_interesting_uses_stmt(struct ivopts_data * data,gimple * stmt)2476*38fd1498Szrj find_interesting_uses_stmt (struct ivopts_data *data, gimple *stmt)
2477*38fd1498Szrj {
2478*38fd1498Szrj   struct iv *iv;
2479*38fd1498Szrj   tree op, *lhs, *rhs;
2480*38fd1498Szrj   ssa_op_iter iter;
2481*38fd1498Szrj   use_operand_p use_p;
2482*38fd1498Szrj   enum tree_code code;
2483*38fd1498Szrj 
2484*38fd1498Szrj   find_invariants_stmt (data, stmt);
2485*38fd1498Szrj 
2486*38fd1498Szrj   if (gimple_code (stmt) == GIMPLE_COND)
2487*38fd1498Szrj     {
2488*38fd1498Szrj       find_interesting_uses_cond (data, stmt);
2489*38fd1498Szrj       return;
2490*38fd1498Szrj     }
2491*38fd1498Szrj 
2492*38fd1498Szrj   if (is_gimple_assign (stmt))
2493*38fd1498Szrj     {
2494*38fd1498Szrj       lhs = gimple_assign_lhs_ptr (stmt);
2495*38fd1498Szrj       rhs = gimple_assign_rhs1_ptr (stmt);
2496*38fd1498Szrj 
2497*38fd1498Szrj       if (TREE_CODE (*lhs) == SSA_NAME)
2498*38fd1498Szrj 	{
2499*38fd1498Szrj 	  /* If the statement defines an induction variable, the uses are not
2500*38fd1498Szrj 	     interesting by themselves.  */
2501*38fd1498Szrj 
2502*38fd1498Szrj 	  iv = get_iv (data, *lhs);
2503*38fd1498Szrj 
2504*38fd1498Szrj 	  if (iv && !integer_zerop (iv->step))
2505*38fd1498Szrj 	    return;
2506*38fd1498Szrj 	}
2507*38fd1498Szrj 
2508*38fd1498Szrj       code = gimple_assign_rhs_code (stmt);
2509*38fd1498Szrj       if (get_gimple_rhs_class (code) == GIMPLE_SINGLE_RHS
2510*38fd1498Szrj 	  && (REFERENCE_CLASS_P (*rhs)
2511*38fd1498Szrj 	      || is_gimple_val (*rhs)))
2512*38fd1498Szrj 	{
2513*38fd1498Szrj 	  if (REFERENCE_CLASS_P (*rhs))
2514*38fd1498Szrj 	    find_interesting_uses_address (data, stmt, rhs);
2515*38fd1498Szrj 	  else
2516*38fd1498Szrj 	    find_interesting_uses_op (data, *rhs);
2517*38fd1498Szrj 
2518*38fd1498Szrj 	  if (REFERENCE_CLASS_P (*lhs))
2519*38fd1498Szrj 	    find_interesting_uses_address (data, stmt, lhs);
2520*38fd1498Szrj 	  return;
2521*38fd1498Szrj 	}
2522*38fd1498Szrj       else if (TREE_CODE_CLASS (code) == tcc_comparison)
2523*38fd1498Szrj 	{
2524*38fd1498Szrj 	  find_interesting_uses_cond (data, stmt);
2525*38fd1498Szrj 	  return;
2526*38fd1498Szrj 	}
2527*38fd1498Szrj 
2528*38fd1498Szrj       /* TODO -- we should also handle address uses of type
2529*38fd1498Szrj 
2530*38fd1498Szrj 	 memory = call (whatever);
2531*38fd1498Szrj 
2532*38fd1498Szrj 	 and
2533*38fd1498Szrj 
2534*38fd1498Szrj 	 call (memory).  */
2535*38fd1498Szrj     }
2536*38fd1498Szrj 
2537*38fd1498Szrj   if (gimple_code (stmt) == GIMPLE_PHI
2538*38fd1498Szrj       && gimple_bb (stmt) == data->current_loop->header)
2539*38fd1498Szrj     {
2540*38fd1498Szrj       iv = get_iv (data, PHI_RESULT (stmt));
2541*38fd1498Szrj 
2542*38fd1498Szrj       if (iv && !integer_zerop (iv->step))
2543*38fd1498Szrj 	return;
2544*38fd1498Szrj     }
2545*38fd1498Szrj 
2546*38fd1498Szrj   FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
2547*38fd1498Szrj     {
2548*38fd1498Szrj       op = USE_FROM_PTR (use_p);
2549*38fd1498Szrj 
2550*38fd1498Szrj       if (TREE_CODE (op) != SSA_NAME)
2551*38fd1498Szrj 	continue;
2552*38fd1498Szrj 
2553*38fd1498Szrj       iv = get_iv (data, op);
2554*38fd1498Szrj       if (!iv)
2555*38fd1498Szrj 	continue;
2556*38fd1498Szrj 
2557*38fd1498Szrj       if (!find_address_like_use (data, stmt, use_p->use, iv))
2558*38fd1498Szrj 	find_interesting_uses_op (data, op);
2559*38fd1498Szrj     }
2560*38fd1498Szrj }
2561*38fd1498Szrj 
2562*38fd1498Szrj /* Finds interesting uses of induction variables outside of loops
2563*38fd1498Szrj    on loop exit edge EXIT.  */
2564*38fd1498Szrj 
2565*38fd1498Szrj static void
find_interesting_uses_outside(struct ivopts_data * data,edge exit)2566*38fd1498Szrj find_interesting_uses_outside (struct ivopts_data *data, edge exit)
2567*38fd1498Szrj {
2568*38fd1498Szrj   gphi *phi;
2569*38fd1498Szrj   gphi_iterator psi;
2570*38fd1498Szrj   tree def;
2571*38fd1498Szrj 
2572*38fd1498Szrj   for (psi = gsi_start_phis (exit->dest); !gsi_end_p (psi); gsi_next (&psi))
2573*38fd1498Szrj     {
2574*38fd1498Szrj       phi = psi.phi ();
2575*38fd1498Szrj       def = PHI_ARG_DEF_FROM_EDGE (phi, exit);
2576*38fd1498Szrj       if (!virtual_operand_p (def))
2577*38fd1498Szrj 	find_interesting_uses_op (data, def);
2578*38fd1498Szrj     }
2579*38fd1498Szrj }
2580*38fd1498Szrj 
2581*38fd1498Szrj /* Return TRUE if OFFSET is within the range of [base + offset] addressing
2582*38fd1498Szrj    mode for memory reference represented by USE.  */
2583*38fd1498Szrj 
2584*38fd1498Szrj static GTY (()) vec<rtx, va_gc> *addr_list;
2585*38fd1498Szrj 
2586*38fd1498Szrj static bool
addr_offset_valid_p(struct iv_use * use,poly_int64 offset)2587*38fd1498Szrj addr_offset_valid_p (struct iv_use *use, poly_int64 offset)
2588*38fd1498Szrj {
2589*38fd1498Szrj   rtx reg, addr;
2590*38fd1498Szrj   unsigned list_index;
2591*38fd1498Szrj   addr_space_t as = TYPE_ADDR_SPACE (TREE_TYPE (use->iv->base));
2592*38fd1498Szrj   machine_mode addr_mode, mem_mode = TYPE_MODE (use->mem_type);
2593*38fd1498Szrj 
2594*38fd1498Szrj   list_index = (unsigned) as * MAX_MACHINE_MODE + (unsigned) mem_mode;
2595*38fd1498Szrj   if (list_index >= vec_safe_length (addr_list))
2596*38fd1498Szrj     vec_safe_grow_cleared (addr_list, list_index + MAX_MACHINE_MODE);
2597*38fd1498Szrj 
2598*38fd1498Szrj   addr = (*addr_list)[list_index];
2599*38fd1498Szrj   if (!addr)
2600*38fd1498Szrj     {
2601*38fd1498Szrj       addr_mode = targetm.addr_space.address_mode (as);
2602*38fd1498Szrj       reg = gen_raw_REG (addr_mode, LAST_VIRTUAL_REGISTER + 1);
2603*38fd1498Szrj       addr = gen_rtx_fmt_ee (PLUS, addr_mode, reg, NULL_RTX);
2604*38fd1498Szrj       (*addr_list)[list_index] = addr;
2605*38fd1498Szrj     }
2606*38fd1498Szrj   else
2607*38fd1498Szrj     addr_mode = GET_MODE (addr);
2608*38fd1498Szrj 
2609*38fd1498Szrj   XEXP (addr, 1) = gen_int_mode (offset, addr_mode);
2610*38fd1498Szrj   return (memory_address_addr_space_p (mem_mode, addr, as));
2611*38fd1498Szrj }
2612*38fd1498Szrj 
2613*38fd1498Szrj /* Comparison function to sort group in ascending order of addr_offset.  */
2614*38fd1498Szrj 
2615*38fd1498Szrj static int
group_compare_offset(const void * a,const void * b)2616*38fd1498Szrj group_compare_offset (const void *a, const void *b)
2617*38fd1498Szrj {
2618*38fd1498Szrj   const struct iv_use *const *u1 = (const struct iv_use *const *) a;
2619*38fd1498Szrj   const struct iv_use *const *u2 = (const struct iv_use *const *) b;
2620*38fd1498Szrj 
2621*38fd1498Szrj   return compare_sizes_for_sort ((*u1)->addr_offset, (*u2)->addr_offset);
2622*38fd1498Szrj }
2623*38fd1498Szrj 
2624*38fd1498Szrj /* Check if small groups should be split.  Return true if no group
2625*38fd1498Szrj    contains more than two uses with distinct addr_offsets.  Return
2626*38fd1498Szrj    false otherwise.  We want to split such groups because:
2627*38fd1498Szrj 
2628*38fd1498Szrj      1) Small groups don't have much benefit and may interfer with
2629*38fd1498Szrj 	general candidate selection.
2630*38fd1498Szrj      2) Size for problem with only small groups is usually small and
2631*38fd1498Szrj 	general algorithm can handle it well.
2632*38fd1498Szrj 
2633*38fd1498Szrj    TODO -- Above claim may not hold when we want to merge memory
2634*38fd1498Szrj    accesses with conseuctive addresses.  */
2635*38fd1498Szrj 
2636*38fd1498Szrj static bool
split_small_address_groups_p(struct ivopts_data * data)2637*38fd1498Szrj split_small_address_groups_p (struct ivopts_data *data)
2638*38fd1498Szrj {
2639*38fd1498Szrj   unsigned int i, j, distinct = 1;
2640*38fd1498Szrj   struct iv_use *pre;
2641*38fd1498Szrj   struct iv_group *group;
2642*38fd1498Szrj 
2643*38fd1498Szrj   for (i = 0; i < data->vgroups.length (); i++)
2644*38fd1498Szrj     {
2645*38fd1498Szrj       group = data->vgroups[i];
2646*38fd1498Szrj       if (group->vuses.length () == 1)
2647*38fd1498Szrj 	continue;
2648*38fd1498Szrj 
2649*38fd1498Szrj       gcc_assert (address_p (group->type));
2650*38fd1498Szrj       if (group->vuses.length () == 2)
2651*38fd1498Szrj 	{
2652*38fd1498Szrj 	  if (compare_sizes_for_sort (group->vuses[0]->addr_offset,
2653*38fd1498Szrj 				      group->vuses[1]->addr_offset) > 0)
2654*38fd1498Szrj 	    std::swap (group->vuses[0], group->vuses[1]);
2655*38fd1498Szrj 	}
2656*38fd1498Szrj       else
2657*38fd1498Szrj 	group->vuses.qsort (group_compare_offset);
2658*38fd1498Szrj 
2659*38fd1498Szrj       if (distinct > 2)
2660*38fd1498Szrj 	continue;
2661*38fd1498Szrj 
2662*38fd1498Szrj       distinct = 1;
2663*38fd1498Szrj       for (pre = group->vuses[0], j = 1; j < group->vuses.length (); j++)
2664*38fd1498Szrj 	{
2665*38fd1498Szrj 	  if (maybe_ne (group->vuses[j]->addr_offset, pre->addr_offset))
2666*38fd1498Szrj 	    {
2667*38fd1498Szrj 	      pre = group->vuses[j];
2668*38fd1498Szrj 	      distinct++;
2669*38fd1498Szrj 	    }
2670*38fd1498Szrj 
2671*38fd1498Szrj 	  if (distinct > 2)
2672*38fd1498Szrj 	    break;
2673*38fd1498Szrj 	}
2674*38fd1498Szrj     }
2675*38fd1498Szrj 
2676*38fd1498Szrj   return (distinct <= 2);
2677*38fd1498Szrj }
2678*38fd1498Szrj 
2679*38fd1498Szrj /* For each group of address type uses, this function further groups
2680*38fd1498Szrj    these uses according to the maximum offset supported by target's
2681*38fd1498Szrj    [base + offset] addressing mode.  */
2682*38fd1498Szrj 
2683*38fd1498Szrj static void
split_address_groups(struct ivopts_data * data)2684*38fd1498Szrj split_address_groups (struct ivopts_data *data)
2685*38fd1498Szrj {
2686*38fd1498Szrj   unsigned int i, j;
2687*38fd1498Szrj   /* Always split group.  */
2688*38fd1498Szrj   bool split_p = split_small_address_groups_p (data);
2689*38fd1498Szrj 
2690*38fd1498Szrj   for (i = 0; i < data->vgroups.length (); i++)
2691*38fd1498Szrj     {
2692*38fd1498Szrj       struct iv_group *new_group = NULL;
2693*38fd1498Szrj       struct iv_group *group = data->vgroups[i];
2694*38fd1498Szrj       struct iv_use *use = group->vuses[0];
2695*38fd1498Szrj 
2696*38fd1498Szrj       use->id = 0;
2697*38fd1498Szrj       use->group_id = group->id;
2698*38fd1498Szrj       if (group->vuses.length () == 1)
2699*38fd1498Szrj 	continue;
2700*38fd1498Szrj 
2701*38fd1498Szrj       gcc_assert (address_p (use->type));
2702*38fd1498Szrj 
2703*38fd1498Szrj       for (j = 1; j < group->vuses.length ();)
2704*38fd1498Szrj 	{
2705*38fd1498Szrj 	  struct iv_use *next = group->vuses[j];
2706*38fd1498Szrj 	  poly_int64 offset = next->addr_offset - use->addr_offset;
2707*38fd1498Szrj 
2708*38fd1498Szrj 	  /* Split group if aksed to, or the offset against the first
2709*38fd1498Szrj 	     use can't fit in offset part of addressing mode.  IV uses
2710*38fd1498Szrj 	     having the same offset are still kept in one group.  */
2711*38fd1498Szrj 	  if (maybe_ne (offset, 0)
2712*38fd1498Szrj 	      && (split_p || !addr_offset_valid_p (use, offset)))
2713*38fd1498Szrj 	    {
2714*38fd1498Szrj 	      if (!new_group)
2715*38fd1498Szrj 		new_group = record_group (data, group->type);
2716*38fd1498Szrj 	      group->vuses.ordered_remove (j);
2717*38fd1498Szrj 	      new_group->vuses.safe_push (next);
2718*38fd1498Szrj 	      continue;
2719*38fd1498Szrj 	    }
2720*38fd1498Szrj 
2721*38fd1498Szrj 	  next->id = j;
2722*38fd1498Szrj 	  next->group_id = group->id;
2723*38fd1498Szrj 	  j++;
2724*38fd1498Szrj 	}
2725*38fd1498Szrj     }
2726*38fd1498Szrj }
2727*38fd1498Szrj 
2728*38fd1498Szrj /* Finds uses of the induction variables that are interesting.  */
2729*38fd1498Szrj 
2730*38fd1498Szrj static void
find_interesting_uses(struct ivopts_data * data)2731*38fd1498Szrj find_interesting_uses (struct ivopts_data *data)
2732*38fd1498Szrj {
2733*38fd1498Szrj   basic_block bb;
2734*38fd1498Szrj   gimple_stmt_iterator bsi;
2735*38fd1498Szrj   basic_block *body = get_loop_body (data->current_loop);
2736*38fd1498Szrj   unsigned i;
2737*38fd1498Szrj   edge e;
2738*38fd1498Szrj 
2739*38fd1498Szrj   for (i = 0; i < data->current_loop->num_nodes; i++)
2740*38fd1498Szrj     {
2741*38fd1498Szrj       edge_iterator ei;
2742*38fd1498Szrj       bb = body[i];
2743*38fd1498Szrj 
2744*38fd1498Szrj       FOR_EACH_EDGE (e, ei, bb->succs)
2745*38fd1498Szrj 	if (e->dest != EXIT_BLOCK_PTR_FOR_FN (cfun)
2746*38fd1498Szrj 	    && !flow_bb_inside_loop_p (data->current_loop, e->dest))
2747*38fd1498Szrj 	  find_interesting_uses_outside (data, e);
2748*38fd1498Szrj 
2749*38fd1498Szrj       for (bsi = gsi_start_phis (bb); !gsi_end_p (bsi); gsi_next (&bsi))
2750*38fd1498Szrj 	find_interesting_uses_stmt (data, gsi_stmt (bsi));
2751*38fd1498Szrj       for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
2752*38fd1498Szrj 	if (!is_gimple_debug (gsi_stmt (bsi)))
2753*38fd1498Szrj 	  find_interesting_uses_stmt (data, gsi_stmt (bsi));
2754*38fd1498Szrj     }
2755*38fd1498Szrj   free (body);
2756*38fd1498Szrj 
2757*38fd1498Szrj   split_address_groups (data);
2758*38fd1498Szrj 
2759*38fd1498Szrj   if (dump_file && (dump_flags & TDF_DETAILS))
2760*38fd1498Szrj     {
2761*38fd1498Szrj       fprintf (dump_file, "\n<IV Groups>:\n");
2762*38fd1498Szrj       dump_groups (dump_file, data);
2763*38fd1498Szrj       fprintf (dump_file, "\n");
2764*38fd1498Szrj     }
2765*38fd1498Szrj }
2766*38fd1498Szrj 
2767*38fd1498Szrj /* Strips constant offsets from EXPR and stores them to OFFSET.  If INSIDE_ADDR
2768*38fd1498Szrj    is true, assume we are inside an address.  If TOP_COMPREF is true, assume
2769*38fd1498Szrj    we are at the top-level of the processed address.  */
2770*38fd1498Szrj 
2771*38fd1498Szrj static tree
strip_offset_1(tree expr,bool inside_addr,bool top_compref,poly_int64 * offset)2772*38fd1498Szrj strip_offset_1 (tree expr, bool inside_addr, bool top_compref,
2773*38fd1498Szrj 		poly_int64 *offset)
2774*38fd1498Szrj {
2775*38fd1498Szrj   tree op0 = NULL_TREE, op1 = NULL_TREE, tmp, step;
2776*38fd1498Szrj   enum tree_code code;
2777*38fd1498Szrj   tree type, orig_type = TREE_TYPE (expr);
2778*38fd1498Szrj   poly_int64 off0, off1;
2779*38fd1498Szrj   HOST_WIDE_INT st;
2780*38fd1498Szrj   tree orig_expr = expr;
2781*38fd1498Szrj 
2782*38fd1498Szrj   STRIP_NOPS (expr);
2783*38fd1498Szrj 
2784*38fd1498Szrj   type = TREE_TYPE (expr);
2785*38fd1498Szrj   code = TREE_CODE (expr);
2786*38fd1498Szrj   *offset = 0;
2787*38fd1498Szrj 
2788*38fd1498Szrj   switch (code)
2789*38fd1498Szrj     {
2790*38fd1498Szrj     case POINTER_PLUS_EXPR:
2791*38fd1498Szrj     case PLUS_EXPR:
2792*38fd1498Szrj     case MINUS_EXPR:
2793*38fd1498Szrj       op0 = TREE_OPERAND (expr, 0);
2794*38fd1498Szrj       op1 = TREE_OPERAND (expr, 1);
2795*38fd1498Szrj 
2796*38fd1498Szrj       op0 = strip_offset_1 (op0, false, false, &off0);
2797*38fd1498Szrj       op1 = strip_offset_1 (op1, false, false, &off1);
2798*38fd1498Szrj 
2799*38fd1498Szrj       *offset = (code == MINUS_EXPR ? off0 - off1 : off0 + off1);
2800*38fd1498Szrj       if (op0 == TREE_OPERAND (expr, 0)
2801*38fd1498Szrj 	  && op1 == TREE_OPERAND (expr, 1))
2802*38fd1498Szrj 	return orig_expr;
2803*38fd1498Szrj 
2804*38fd1498Szrj       if (integer_zerop (op1))
2805*38fd1498Szrj 	expr = op0;
2806*38fd1498Szrj       else if (integer_zerop (op0))
2807*38fd1498Szrj 	{
2808*38fd1498Szrj 	  if (code == MINUS_EXPR)
2809*38fd1498Szrj 	    expr = fold_build1 (NEGATE_EXPR, type, op1);
2810*38fd1498Szrj 	  else
2811*38fd1498Szrj 	    expr = op1;
2812*38fd1498Szrj 	}
2813*38fd1498Szrj       else
2814*38fd1498Szrj 	expr = fold_build2 (code, type, op0, op1);
2815*38fd1498Szrj 
2816*38fd1498Szrj       return fold_convert (orig_type, expr);
2817*38fd1498Szrj 
2818*38fd1498Szrj     case MULT_EXPR:
2819*38fd1498Szrj       op1 = TREE_OPERAND (expr, 1);
2820*38fd1498Szrj       if (!cst_and_fits_in_hwi (op1))
2821*38fd1498Szrj 	return orig_expr;
2822*38fd1498Szrj 
2823*38fd1498Szrj       op0 = TREE_OPERAND (expr, 0);
2824*38fd1498Szrj       op0 = strip_offset_1 (op0, false, false, &off0);
2825*38fd1498Szrj       if (op0 == TREE_OPERAND (expr, 0))
2826*38fd1498Szrj 	return orig_expr;
2827*38fd1498Szrj 
2828*38fd1498Szrj       *offset = off0 * int_cst_value (op1);
2829*38fd1498Szrj       if (integer_zerop (op0))
2830*38fd1498Szrj 	expr = op0;
2831*38fd1498Szrj       else
2832*38fd1498Szrj 	expr = fold_build2 (MULT_EXPR, type, op0, op1);
2833*38fd1498Szrj 
2834*38fd1498Szrj       return fold_convert (orig_type, expr);
2835*38fd1498Szrj 
2836*38fd1498Szrj     case ARRAY_REF:
2837*38fd1498Szrj     case ARRAY_RANGE_REF:
2838*38fd1498Szrj       if (!inside_addr)
2839*38fd1498Szrj 	return orig_expr;
2840*38fd1498Szrj 
2841*38fd1498Szrj       step = array_ref_element_size (expr);
2842*38fd1498Szrj       if (!cst_and_fits_in_hwi (step))
2843*38fd1498Szrj 	break;
2844*38fd1498Szrj 
2845*38fd1498Szrj       st = int_cst_value (step);
2846*38fd1498Szrj       op1 = TREE_OPERAND (expr, 1);
2847*38fd1498Szrj       op1 = strip_offset_1 (op1, false, false, &off1);
2848*38fd1498Szrj       *offset = off1 * st;
2849*38fd1498Szrj 
2850*38fd1498Szrj       if (top_compref
2851*38fd1498Szrj 	  && integer_zerop (op1))
2852*38fd1498Szrj 	{
2853*38fd1498Szrj 	  /* Strip the component reference completely.  */
2854*38fd1498Szrj 	  op0 = TREE_OPERAND (expr, 0);
2855*38fd1498Szrj 	  op0 = strip_offset_1 (op0, inside_addr, top_compref, &off0);
2856*38fd1498Szrj 	  *offset += off0;
2857*38fd1498Szrj 	  return op0;
2858*38fd1498Szrj 	}
2859*38fd1498Szrj       break;
2860*38fd1498Szrj 
2861*38fd1498Szrj     case COMPONENT_REF:
2862*38fd1498Szrj       {
2863*38fd1498Szrj 	tree field;
2864*38fd1498Szrj 
2865*38fd1498Szrj 	if (!inside_addr)
2866*38fd1498Szrj 	  return orig_expr;
2867*38fd1498Szrj 
2868*38fd1498Szrj 	tmp = component_ref_field_offset (expr);
2869*38fd1498Szrj 	field = TREE_OPERAND (expr, 1);
2870*38fd1498Szrj 	if (top_compref
2871*38fd1498Szrj 	    && cst_and_fits_in_hwi (tmp)
2872*38fd1498Szrj 	    && cst_and_fits_in_hwi (DECL_FIELD_BIT_OFFSET (field)))
2873*38fd1498Szrj 	  {
2874*38fd1498Szrj 	    HOST_WIDE_INT boffset, abs_off;
2875*38fd1498Szrj 
2876*38fd1498Szrj 	    /* Strip the component reference completely.  */
2877*38fd1498Szrj 	    op0 = TREE_OPERAND (expr, 0);
2878*38fd1498Szrj 	    op0 = strip_offset_1 (op0, inside_addr, top_compref, &off0);
2879*38fd1498Szrj 	    boffset = int_cst_value (DECL_FIELD_BIT_OFFSET (field));
2880*38fd1498Szrj 	    abs_off = abs_hwi (boffset) / BITS_PER_UNIT;
2881*38fd1498Szrj 	    if (boffset < 0)
2882*38fd1498Szrj 	      abs_off = -abs_off;
2883*38fd1498Szrj 
2884*38fd1498Szrj 	    *offset = off0 + int_cst_value (tmp) + abs_off;
2885*38fd1498Szrj 	    return op0;
2886*38fd1498Szrj 	  }
2887*38fd1498Szrj       }
2888*38fd1498Szrj       break;
2889*38fd1498Szrj 
2890*38fd1498Szrj     case ADDR_EXPR:
2891*38fd1498Szrj       op0 = TREE_OPERAND (expr, 0);
2892*38fd1498Szrj       op0 = strip_offset_1 (op0, true, true, &off0);
2893*38fd1498Szrj       *offset += off0;
2894*38fd1498Szrj 
2895*38fd1498Szrj       if (op0 == TREE_OPERAND (expr, 0))
2896*38fd1498Szrj 	return orig_expr;
2897*38fd1498Szrj 
2898*38fd1498Szrj       expr = build_fold_addr_expr (op0);
2899*38fd1498Szrj       return fold_convert (orig_type, expr);
2900*38fd1498Szrj 
2901*38fd1498Szrj     case MEM_REF:
2902*38fd1498Szrj       /* ???  Offset operand?  */
2903*38fd1498Szrj       inside_addr = false;
2904*38fd1498Szrj       break;
2905*38fd1498Szrj 
2906*38fd1498Szrj     default:
2907*38fd1498Szrj       if (ptrdiff_tree_p (expr, offset) && maybe_ne (*offset, 0))
2908*38fd1498Szrj 	return build_int_cst (orig_type, 0);
2909*38fd1498Szrj       return orig_expr;
2910*38fd1498Szrj     }
2911*38fd1498Szrj 
2912*38fd1498Szrj   /* Default handling of expressions for that we want to recurse into
2913*38fd1498Szrj      the first operand.  */
2914*38fd1498Szrj   op0 = TREE_OPERAND (expr, 0);
2915*38fd1498Szrj   op0 = strip_offset_1 (op0, inside_addr, false, &off0);
2916*38fd1498Szrj   *offset += off0;
2917*38fd1498Szrj 
2918*38fd1498Szrj   if (op0 == TREE_OPERAND (expr, 0)
2919*38fd1498Szrj       && (!op1 || op1 == TREE_OPERAND (expr, 1)))
2920*38fd1498Szrj     return orig_expr;
2921*38fd1498Szrj 
2922*38fd1498Szrj   expr = copy_node (expr);
2923*38fd1498Szrj   TREE_OPERAND (expr, 0) = op0;
2924*38fd1498Szrj   if (op1)
2925*38fd1498Szrj     TREE_OPERAND (expr, 1) = op1;
2926*38fd1498Szrj 
2927*38fd1498Szrj   /* Inside address, we might strip the top level component references,
2928*38fd1498Szrj      thus changing type of the expression.  Handling of ADDR_EXPR
2929*38fd1498Szrj      will fix that.  */
2930*38fd1498Szrj   expr = fold_convert (orig_type, expr);
2931*38fd1498Szrj 
2932*38fd1498Szrj   return expr;
2933*38fd1498Szrj }
2934*38fd1498Szrj 
2935*38fd1498Szrj /* Strips constant offsets from EXPR and stores them to OFFSET.  */
2936*38fd1498Szrj 
2937*38fd1498Szrj tree
strip_offset(tree expr,poly_uint64_pod * offset)2938*38fd1498Szrj strip_offset (tree expr, poly_uint64_pod *offset)
2939*38fd1498Szrj {
2940*38fd1498Szrj   poly_int64 off;
2941*38fd1498Szrj   tree core = strip_offset_1 (expr, false, false, &off);
2942*38fd1498Szrj   *offset = off;
2943*38fd1498Szrj   return core;
2944*38fd1498Szrj }
2945*38fd1498Szrj 
2946*38fd1498Szrj /* Returns variant of TYPE that can be used as base for different uses.
2947*38fd1498Szrj    We return unsigned type with the same precision, which avoids problems
2948*38fd1498Szrj    with overflows.  */
2949*38fd1498Szrj 
2950*38fd1498Szrj static tree
generic_type_for(tree type)2951*38fd1498Szrj generic_type_for (tree type)
2952*38fd1498Szrj {
2953*38fd1498Szrj   if (POINTER_TYPE_P (type))
2954*38fd1498Szrj     return unsigned_type_for (type);
2955*38fd1498Szrj 
2956*38fd1498Szrj   if (TYPE_UNSIGNED (type))
2957*38fd1498Szrj     return type;
2958*38fd1498Szrj 
2959*38fd1498Szrj   return unsigned_type_for (type);
2960*38fd1498Szrj }
2961*38fd1498Szrj 
2962*38fd1498Szrj /* Private data for walk_tree.  */
2963*38fd1498Szrj 
2964*38fd1498Szrj struct walk_tree_data
2965*38fd1498Szrj {
2966*38fd1498Szrj   bitmap *inv_vars;
2967*38fd1498Szrj   struct ivopts_data *idata;
2968*38fd1498Szrj };
2969*38fd1498Szrj 
2970*38fd1498Szrj /* Callback function for walk_tree, it records invariants and symbol
2971*38fd1498Szrj    reference in *EXPR_P.  DATA is the structure storing result info.  */
2972*38fd1498Szrj 
2973*38fd1498Szrj static tree
find_inv_vars_cb(tree * expr_p,int * ws ATTRIBUTE_UNUSED,void * data)2974*38fd1498Szrj find_inv_vars_cb (tree *expr_p, int *ws ATTRIBUTE_UNUSED, void *data)
2975*38fd1498Szrj {
2976*38fd1498Szrj   tree op = *expr_p;
2977*38fd1498Szrj   struct version_info *info;
2978*38fd1498Szrj   struct walk_tree_data *wdata = (struct walk_tree_data*) data;
2979*38fd1498Szrj 
2980*38fd1498Szrj   if (TREE_CODE (op) != SSA_NAME)
2981*38fd1498Szrj     return NULL_TREE;
2982*38fd1498Szrj 
2983*38fd1498Szrj   info = name_info (wdata->idata, op);
2984*38fd1498Szrj   /* Because we expand simple operations when finding IVs, loop invariant
2985*38fd1498Szrj      variable that isn't referred by the original loop could be used now.
2986*38fd1498Szrj      Record such invariant variables here.  */
2987*38fd1498Szrj   if (!info->iv)
2988*38fd1498Szrj     {
2989*38fd1498Szrj       struct ivopts_data *idata = wdata->idata;
2990*38fd1498Szrj       basic_block bb = gimple_bb (SSA_NAME_DEF_STMT (op));
2991*38fd1498Szrj 
2992*38fd1498Szrj       if (!bb || !flow_bb_inside_loop_p (idata->current_loop, bb))
2993*38fd1498Szrj 	{
2994*38fd1498Szrj 	  set_iv (idata, op, op, build_int_cst (TREE_TYPE (op), 0), true);
2995*38fd1498Szrj 	  record_invariant (idata, op, false);
2996*38fd1498Szrj 	}
2997*38fd1498Szrj     }
2998*38fd1498Szrj   if (!info->inv_id || info->has_nonlin_use)
2999*38fd1498Szrj     return NULL_TREE;
3000*38fd1498Szrj 
3001*38fd1498Szrj   if (!*wdata->inv_vars)
3002*38fd1498Szrj     *wdata->inv_vars = BITMAP_ALLOC (NULL);
3003*38fd1498Szrj   bitmap_set_bit (*wdata->inv_vars, info->inv_id);
3004*38fd1498Szrj 
3005*38fd1498Szrj   return NULL_TREE;
3006*38fd1498Szrj }
3007*38fd1498Szrj 
3008*38fd1498Szrj /* Records invariants in *EXPR_P.  INV_VARS is the bitmap to that we should
3009*38fd1498Szrj    store it.  */
3010*38fd1498Szrj 
3011*38fd1498Szrj static inline void
find_inv_vars(struct ivopts_data * data,tree * expr_p,bitmap * inv_vars)3012*38fd1498Szrj find_inv_vars (struct ivopts_data *data, tree *expr_p, bitmap *inv_vars)
3013*38fd1498Szrj {
3014*38fd1498Szrj   struct walk_tree_data wdata;
3015*38fd1498Szrj 
3016*38fd1498Szrj   if (!inv_vars)
3017*38fd1498Szrj     return;
3018*38fd1498Szrj 
3019*38fd1498Szrj   wdata.idata = data;
3020*38fd1498Szrj   wdata.inv_vars = inv_vars;
3021*38fd1498Szrj   walk_tree (expr_p, find_inv_vars_cb, &wdata, NULL);
3022*38fd1498Szrj }
3023*38fd1498Szrj 
3024*38fd1498Szrj /* Get entry from invariant expr hash table for INV_EXPR.  New entry
3025*38fd1498Szrj    will be recorded if it doesn't exist yet.  Given below two exprs:
3026*38fd1498Szrj      inv_expr + cst1, inv_expr + cst2
3027*38fd1498Szrj    It's hard to make decision whether constant part should be stripped
3028*38fd1498Szrj    or not.  We choose to not strip based on below facts:
3029*38fd1498Szrj      1) We need to count ADD cost for constant part if it's stripped,
3030*38fd1498Szrj 	which is't always trivial where this functions is called.
3031*38fd1498Szrj      2) Stripping constant away may be conflict with following loop
3032*38fd1498Szrj 	invariant hoisting pass.
3033*38fd1498Szrj      3) Not stripping constant away results in more invariant exprs,
3034*38fd1498Szrj 	which usually leads to decision preferring lower reg pressure.  */
3035*38fd1498Szrj 
3036*38fd1498Szrj static iv_inv_expr_ent *
get_loop_invariant_expr(struct ivopts_data * data,tree inv_expr)3037*38fd1498Szrj get_loop_invariant_expr (struct ivopts_data *data, tree inv_expr)
3038*38fd1498Szrj {
3039*38fd1498Szrj   STRIP_NOPS (inv_expr);
3040*38fd1498Szrj 
3041*38fd1498Szrj   if (poly_int_tree_p (inv_expr)
3042*38fd1498Szrj       || TREE_CODE (inv_expr) == SSA_NAME)
3043*38fd1498Szrj     return NULL;
3044*38fd1498Szrj 
3045*38fd1498Szrj   /* Don't strip constant part away as we used to.  */
3046*38fd1498Szrj 
3047*38fd1498Szrj   /* Stores EXPR in DATA->inv_expr_tab, return pointer to iv_inv_expr_ent.  */
3048*38fd1498Szrj   struct iv_inv_expr_ent ent;
3049*38fd1498Szrj   ent.expr = inv_expr;
3050*38fd1498Szrj   ent.hash = iterative_hash_expr (inv_expr, 0);
3051*38fd1498Szrj   struct iv_inv_expr_ent **slot = data->inv_expr_tab->find_slot (&ent, INSERT);
3052*38fd1498Szrj 
3053*38fd1498Szrj   if (!*slot)
3054*38fd1498Szrj     {
3055*38fd1498Szrj       *slot = XNEW (struct iv_inv_expr_ent);
3056*38fd1498Szrj       (*slot)->expr = inv_expr;
3057*38fd1498Szrj       (*slot)->hash = ent.hash;
3058*38fd1498Szrj       (*slot)->id = ++data->max_inv_expr_id;
3059*38fd1498Szrj     }
3060*38fd1498Szrj 
3061*38fd1498Szrj   return *slot;
3062*38fd1498Szrj }
3063*38fd1498Szrj 
3064*38fd1498Szrj /* Adds a candidate BASE + STEP * i.  Important field is set to IMPORTANT and
3065*38fd1498Szrj    position to POS.  If USE is not NULL, the candidate is set as related to
3066*38fd1498Szrj    it.  If both BASE and STEP are NULL, we add a pseudocandidate for the
3067*38fd1498Szrj    replacement of the final value of the iv by a direct computation.  */
3068*38fd1498Szrj 
3069*38fd1498Szrj static struct iv_cand *
3070*38fd1498Szrj add_candidate_1 (struct ivopts_data *data,
3071*38fd1498Szrj 		 tree base, tree step, bool important, enum iv_position pos,
3072*38fd1498Szrj 		 struct iv_use *use, gimple *incremented_at,
3073*38fd1498Szrj 		 struct iv *orig_iv = NULL)
3074*38fd1498Szrj {
3075*38fd1498Szrj   unsigned i;
3076*38fd1498Szrj   struct iv_cand *cand = NULL;
3077*38fd1498Szrj   tree type, orig_type;
3078*38fd1498Szrj 
3079*38fd1498Szrj   gcc_assert (base && step);
3080*38fd1498Szrj 
3081*38fd1498Szrj   /* -fkeep-gc-roots-live means that we have to keep a real pointer
3082*38fd1498Szrj      live, but the ivopts code may replace a real pointer with one
3083*38fd1498Szrj      pointing before or after the memory block that is then adjusted
3084*38fd1498Szrj      into the memory block during the loop.  FIXME: It would likely be
3085*38fd1498Szrj      better to actually force the pointer live and still use ivopts;
3086*38fd1498Szrj      for example, it would be enough to write the pointer into memory
3087*38fd1498Szrj      and keep it there until after the loop.  */
3088*38fd1498Szrj   if (flag_keep_gc_roots_live && POINTER_TYPE_P (TREE_TYPE (base)))
3089*38fd1498Szrj     return NULL;
3090*38fd1498Szrj 
3091*38fd1498Szrj   /* For non-original variables, make sure their values are computed in a type
3092*38fd1498Szrj      that does not invoke undefined behavior on overflows (since in general,
3093*38fd1498Szrj      we cannot prove that these induction variables are non-wrapping).  */
3094*38fd1498Szrj   if (pos != IP_ORIGINAL)
3095*38fd1498Szrj     {
3096*38fd1498Szrj       orig_type = TREE_TYPE (base);
3097*38fd1498Szrj       type = generic_type_for (orig_type);
3098*38fd1498Szrj       if (type != orig_type)
3099*38fd1498Szrj 	{
3100*38fd1498Szrj 	  base = fold_convert (type, base);
3101*38fd1498Szrj 	  step = fold_convert (type, step);
3102*38fd1498Szrj 	}
3103*38fd1498Szrj     }
3104*38fd1498Szrj 
3105*38fd1498Szrj   for (i = 0; i < data->vcands.length (); i++)
3106*38fd1498Szrj     {
3107*38fd1498Szrj       cand = data->vcands[i];
3108*38fd1498Szrj 
3109*38fd1498Szrj       if (cand->pos != pos)
3110*38fd1498Szrj 	continue;
3111*38fd1498Szrj 
3112*38fd1498Szrj       if (cand->incremented_at != incremented_at
3113*38fd1498Szrj 	  || ((pos == IP_AFTER_USE || pos == IP_BEFORE_USE)
3114*38fd1498Szrj 	      && cand->ainc_use != use))
3115*38fd1498Szrj 	continue;
3116*38fd1498Szrj 
3117*38fd1498Szrj       if (operand_equal_p (base, cand->iv->base, 0)
3118*38fd1498Szrj 	  && operand_equal_p (step, cand->iv->step, 0)
3119*38fd1498Szrj 	  && (TYPE_PRECISION (TREE_TYPE (base))
3120*38fd1498Szrj 	      == TYPE_PRECISION (TREE_TYPE (cand->iv->base))))
3121*38fd1498Szrj 	break;
3122*38fd1498Szrj     }
3123*38fd1498Szrj 
3124*38fd1498Szrj   if (i == data->vcands.length ())
3125*38fd1498Szrj     {
3126*38fd1498Szrj       cand = XCNEW (struct iv_cand);
3127*38fd1498Szrj       cand->id = i;
3128*38fd1498Szrj       cand->iv = alloc_iv (data, base, step);
3129*38fd1498Szrj       cand->pos = pos;
3130*38fd1498Szrj       if (pos != IP_ORIGINAL)
3131*38fd1498Szrj 	{
3132*38fd1498Szrj 	  cand->var_before = create_tmp_var_raw (TREE_TYPE (base), "ivtmp");
3133*38fd1498Szrj 	  cand->var_after = cand->var_before;
3134*38fd1498Szrj 	}
3135*38fd1498Szrj       cand->important = important;
3136*38fd1498Szrj       cand->incremented_at = incremented_at;
3137*38fd1498Szrj       data->vcands.safe_push (cand);
3138*38fd1498Szrj 
3139*38fd1498Szrj       if (!poly_int_tree_p (step))
3140*38fd1498Szrj 	{
3141*38fd1498Szrj 	  find_inv_vars (data, &step, &cand->inv_vars);
3142*38fd1498Szrj 
3143*38fd1498Szrj 	  iv_inv_expr_ent *inv_expr = get_loop_invariant_expr (data, step);
3144*38fd1498Szrj 	  /* Share bitmap between inv_vars and inv_exprs for cand.  */
3145*38fd1498Szrj 	  if (inv_expr != NULL)
3146*38fd1498Szrj 	    {
3147*38fd1498Szrj 	      cand->inv_exprs = cand->inv_vars;
3148*38fd1498Szrj 	      cand->inv_vars = NULL;
3149*38fd1498Szrj 	      if (cand->inv_exprs)
3150*38fd1498Szrj 		bitmap_clear (cand->inv_exprs);
3151*38fd1498Szrj 	      else
3152*38fd1498Szrj 		cand->inv_exprs = BITMAP_ALLOC (NULL);
3153*38fd1498Szrj 
3154*38fd1498Szrj 	      bitmap_set_bit (cand->inv_exprs, inv_expr->id);
3155*38fd1498Szrj 	    }
3156*38fd1498Szrj 	}
3157*38fd1498Szrj 
3158*38fd1498Szrj       if (pos == IP_AFTER_USE || pos == IP_BEFORE_USE)
3159*38fd1498Szrj 	cand->ainc_use = use;
3160*38fd1498Szrj       else
3161*38fd1498Szrj 	cand->ainc_use = NULL;
3162*38fd1498Szrj 
3163*38fd1498Szrj       cand->orig_iv = orig_iv;
3164*38fd1498Szrj       if (dump_file && (dump_flags & TDF_DETAILS))
3165*38fd1498Szrj 	dump_cand (dump_file, cand);
3166*38fd1498Szrj     }
3167*38fd1498Szrj 
3168*38fd1498Szrj   cand->important |= important;
3169*38fd1498Szrj 
3170*38fd1498Szrj   /* Relate candidate to the group for which it is added.  */
3171*38fd1498Szrj   if (use)
3172*38fd1498Szrj     bitmap_set_bit (data->vgroups[use->group_id]->related_cands, i);
3173*38fd1498Szrj 
3174*38fd1498Szrj   return cand;
3175*38fd1498Szrj }
3176*38fd1498Szrj 
3177*38fd1498Szrj /* Returns true if incrementing the induction variable at the end of the LOOP
3178*38fd1498Szrj    is allowed.
3179*38fd1498Szrj 
3180*38fd1498Szrj    The purpose is to avoid splitting latch edge with a biv increment, thus
3181*38fd1498Szrj    creating a jump, possibly confusing other optimization passes and leaving
3182*38fd1498Szrj    less freedom to scheduler.  So we allow IP_END only if IP_NORMAL is not
3183*38fd1498Szrj    available (so we do not have a better alternative), or if the latch edge
3184*38fd1498Szrj    is already nonempty.  */
3185*38fd1498Szrj 
3186*38fd1498Szrj static bool
allow_ip_end_pos_p(struct loop * loop)3187*38fd1498Szrj allow_ip_end_pos_p (struct loop *loop)
3188*38fd1498Szrj {
3189*38fd1498Szrj   if (!ip_normal_pos (loop))
3190*38fd1498Szrj     return true;
3191*38fd1498Szrj 
3192*38fd1498Szrj   if (!empty_block_p (ip_end_pos (loop)))
3193*38fd1498Szrj     return true;
3194*38fd1498Szrj 
3195*38fd1498Szrj   return false;
3196*38fd1498Szrj }
3197*38fd1498Szrj 
3198*38fd1498Szrj /* If possible, adds autoincrement candidates BASE + STEP * i based on use USE.
3199*38fd1498Szrj    Important field is set to IMPORTANT.  */
3200*38fd1498Szrj 
3201*38fd1498Szrj static void
add_autoinc_candidates(struct ivopts_data * data,tree base,tree step,bool important,struct iv_use * use)3202*38fd1498Szrj add_autoinc_candidates (struct ivopts_data *data, tree base, tree step,
3203*38fd1498Szrj 			bool important, struct iv_use *use)
3204*38fd1498Szrj {
3205*38fd1498Szrj   basic_block use_bb = gimple_bb (use->stmt);
3206*38fd1498Szrj   machine_mode mem_mode;
3207*38fd1498Szrj   unsigned HOST_WIDE_INT cstepi;
3208*38fd1498Szrj 
3209*38fd1498Szrj   /* If we insert the increment in any position other than the standard
3210*38fd1498Szrj      ones, we must ensure that it is incremented once per iteration.
3211*38fd1498Szrj      It must not be in an inner nested loop, or one side of an if
3212*38fd1498Szrj      statement.  */
3213*38fd1498Szrj   if (use_bb->loop_father != data->current_loop
3214*38fd1498Szrj       || !dominated_by_p (CDI_DOMINATORS, data->current_loop->latch, use_bb)
3215*38fd1498Szrj       || stmt_can_throw_internal (use->stmt)
3216*38fd1498Szrj       || !cst_and_fits_in_hwi (step))
3217*38fd1498Szrj     return;
3218*38fd1498Szrj 
3219*38fd1498Szrj   cstepi = int_cst_value (step);
3220*38fd1498Szrj 
3221*38fd1498Szrj   mem_mode = TYPE_MODE (use->mem_type);
3222*38fd1498Szrj   if (((USE_LOAD_PRE_INCREMENT (mem_mode)
3223*38fd1498Szrj 	|| USE_STORE_PRE_INCREMENT (mem_mode))
3224*38fd1498Szrj        && known_eq (GET_MODE_SIZE (mem_mode), cstepi))
3225*38fd1498Szrj       || ((USE_LOAD_PRE_DECREMENT (mem_mode)
3226*38fd1498Szrj 	   || USE_STORE_PRE_DECREMENT (mem_mode))
3227*38fd1498Szrj 	  && known_eq (GET_MODE_SIZE (mem_mode), -cstepi)))
3228*38fd1498Szrj     {
3229*38fd1498Szrj       enum tree_code code = MINUS_EXPR;
3230*38fd1498Szrj       tree new_base;
3231*38fd1498Szrj       tree new_step = step;
3232*38fd1498Szrj 
3233*38fd1498Szrj       if (POINTER_TYPE_P (TREE_TYPE (base)))
3234*38fd1498Szrj 	{
3235*38fd1498Szrj 	  new_step = fold_build1 (NEGATE_EXPR, TREE_TYPE (step), step);
3236*38fd1498Szrj 	  code = POINTER_PLUS_EXPR;
3237*38fd1498Szrj 	}
3238*38fd1498Szrj       else
3239*38fd1498Szrj 	new_step = fold_convert (TREE_TYPE (base), new_step);
3240*38fd1498Szrj       new_base = fold_build2 (code, TREE_TYPE (base), base, new_step);
3241*38fd1498Szrj       add_candidate_1 (data, new_base, step, important, IP_BEFORE_USE, use,
3242*38fd1498Szrj 		       use->stmt);
3243*38fd1498Szrj     }
3244*38fd1498Szrj   if (((USE_LOAD_POST_INCREMENT (mem_mode)
3245*38fd1498Szrj 	|| USE_STORE_POST_INCREMENT (mem_mode))
3246*38fd1498Szrj        && known_eq (GET_MODE_SIZE (mem_mode), cstepi))
3247*38fd1498Szrj       || ((USE_LOAD_POST_DECREMENT (mem_mode)
3248*38fd1498Szrj 	   || USE_STORE_POST_DECREMENT (mem_mode))
3249*38fd1498Szrj 	  && known_eq (GET_MODE_SIZE (mem_mode), -cstepi)))
3250*38fd1498Szrj     {
3251*38fd1498Szrj       add_candidate_1 (data, base, step, important, IP_AFTER_USE, use,
3252*38fd1498Szrj 		       use->stmt);
3253*38fd1498Szrj     }
3254*38fd1498Szrj }
3255*38fd1498Szrj 
3256*38fd1498Szrj /* Adds a candidate BASE + STEP * i.  Important field is set to IMPORTANT and
3257*38fd1498Szrj    position to POS.  If USE is not NULL, the candidate is set as related to
3258*38fd1498Szrj    it.  The candidate computation is scheduled before exit condition and at
3259*38fd1498Szrj    the end of loop.  */
3260*38fd1498Szrj 
3261*38fd1498Szrj static void
3262*38fd1498Szrj add_candidate (struct ivopts_data *data,
3263*38fd1498Szrj 	       tree base, tree step, bool important, struct iv_use *use,
3264*38fd1498Szrj 	       struct iv *orig_iv = NULL)
3265*38fd1498Szrj {
3266*38fd1498Szrj   if (ip_normal_pos (data->current_loop))
3267*38fd1498Szrj     add_candidate_1 (data, base, step, important,
3268*38fd1498Szrj 		     IP_NORMAL, use, NULL, orig_iv);
3269*38fd1498Szrj   if (ip_end_pos (data->current_loop)
3270*38fd1498Szrj       && allow_ip_end_pos_p (data->current_loop))
3271*38fd1498Szrj     add_candidate_1 (data, base, step, important, IP_END, use, NULL, orig_iv);
3272*38fd1498Szrj }
3273*38fd1498Szrj 
3274*38fd1498Szrj /* Adds standard iv candidates.  */
3275*38fd1498Szrj 
3276*38fd1498Szrj static void
add_standard_iv_candidates(struct ivopts_data * data)3277*38fd1498Szrj add_standard_iv_candidates (struct ivopts_data *data)
3278*38fd1498Szrj {
3279*38fd1498Szrj   add_candidate (data, integer_zero_node, integer_one_node, true, NULL);
3280*38fd1498Szrj 
3281*38fd1498Szrj   /* The same for a double-integer type if it is still fast enough.  */
3282*38fd1498Szrj   if (TYPE_PRECISION
3283*38fd1498Szrj 	(long_integer_type_node) > TYPE_PRECISION (integer_type_node)
3284*38fd1498Szrj       && TYPE_PRECISION (long_integer_type_node) <= BITS_PER_WORD)
3285*38fd1498Szrj     add_candidate (data, build_int_cst (long_integer_type_node, 0),
3286*38fd1498Szrj 		   build_int_cst (long_integer_type_node, 1), true, NULL);
3287*38fd1498Szrj 
3288*38fd1498Szrj   /* The same for a double-integer type if it is still fast enough.  */
3289*38fd1498Szrj   if (TYPE_PRECISION
3290*38fd1498Szrj 	(long_long_integer_type_node) > TYPE_PRECISION (long_integer_type_node)
3291*38fd1498Szrj       && TYPE_PRECISION (long_long_integer_type_node) <= BITS_PER_WORD)
3292*38fd1498Szrj     add_candidate (data, build_int_cst (long_long_integer_type_node, 0),
3293*38fd1498Szrj 		   build_int_cst (long_long_integer_type_node, 1), true, NULL);
3294*38fd1498Szrj }
3295*38fd1498Szrj 
3296*38fd1498Szrj 
3297*38fd1498Szrj /* Adds candidates bases on the old induction variable IV.  */
3298*38fd1498Szrj 
3299*38fd1498Szrj static void
add_iv_candidate_for_biv(struct ivopts_data * data,struct iv * iv)3300*38fd1498Szrj add_iv_candidate_for_biv (struct ivopts_data *data, struct iv *iv)
3301*38fd1498Szrj {
3302*38fd1498Szrj   gimple *phi;
3303*38fd1498Szrj   tree def;
3304*38fd1498Szrj   struct iv_cand *cand;
3305*38fd1498Szrj 
3306*38fd1498Szrj   /* Check if this biv is used in address type use.  */
3307*38fd1498Szrj   if (iv->no_overflow  && iv->have_address_use
3308*38fd1498Szrj       && INTEGRAL_TYPE_P (TREE_TYPE (iv->base))
3309*38fd1498Szrj       && TYPE_PRECISION (TREE_TYPE (iv->base)) < TYPE_PRECISION (sizetype))
3310*38fd1498Szrj     {
3311*38fd1498Szrj       tree base = fold_convert (sizetype, iv->base);
3312*38fd1498Szrj       tree step = fold_convert (sizetype, iv->step);
3313*38fd1498Szrj 
3314*38fd1498Szrj       /* Add iv cand of same precision as index part in TARGET_MEM_REF.  */
3315*38fd1498Szrj       add_candidate (data, base, step, true, NULL, iv);
3316*38fd1498Szrj       /* Add iv cand of the original type only if it has nonlinear use.  */
3317*38fd1498Szrj       if (iv->nonlin_use)
3318*38fd1498Szrj 	add_candidate (data, iv->base, iv->step, true, NULL);
3319*38fd1498Szrj     }
3320*38fd1498Szrj   else
3321*38fd1498Szrj     add_candidate (data, iv->base, iv->step, true, NULL);
3322*38fd1498Szrj 
3323*38fd1498Szrj   /* The same, but with initial value zero.  */
3324*38fd1498Szrj   if (POINTER_TYPE_P (TREE_TYPE (iv->base)))
3325*38fd1498Szrj     add_candidate (data, size_int (0), iv->step, true, NULL);
3326*38fd1498Szrj   else
3327*38fd1498Szrj     add_candidate (data, build_int_cst (TREE_TYPE (iv->base), 0),
3328*38fd1498Szrj 		   iv->step, true, NULL);
3329*38fd1498Szrj 
3330*38fd1498Szrj   phi = SSA_NAME_DEF_STMT (iv->ssa_name);
3331*38fd1498Szrj   if (gimple_code (phi) == GIMPLE_PHI)
3332*38fd1498Szrj     {
3333*38fd1498Szrj       /* Additionally record the possibility of leaving the original iv
3334*38fd1498Szrj 	 untouched.  */
3335*38fd1498Szrj       def = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (data->current_loop));
3336*38fd1498Szrj       /* Don't add candidate if it's from another PHI node because
3337*38fd1498Szrj 	 it's an affine iv appearing in the form of PEELED_CHREC.  */
3338*38fd1498Szrj       phi = SSA_NAME_DEF_STMT (def);
3339*38fd1498Szrj       if (gimple_code (phi) != GIMPLE_PHI)
3340*38fd1498Szrj 	{
3341*38fd1498Szrj 	  cand = add_candidate_1 (data,
3342*38fd1498Szrj 				  iv->base, iv->step, true, IP_ORIGINAL, NULL,
3343*38fd1498Szrj 				  SSA_NAME_DEF_STMT (def));
3344*38fd1498Szrj 	  if (cand)
3345*38fd1498Szrj 	    {
3346*38fd1498Szrj 	      cand->var_before = iv->ssa_name;
3347*38fd1498Szrj 	      cand->var_after = def;
3348*38fd1498Szrj 	    }
3349*38fd1498Szrj 	}
3350*38fd1498Szrj       else
3351*38fd1498Szrj 	gcc_assert (gimple_bb (phi) == data->current_loop->header);
3352*38fd1498Szrj     }
3353*38fd1498Szrj }
3354*38fd1498Szrj 
3355*38fd1498Szrj /* Adds candidates based on the old induction variables.  */
3356*38fd1498Szrj 
3357*38fd1498Szrj static void
add_iv_candidate_for_bivs(struct ivopts_data * data)3358*38fd1498Szrj add_iv_candidate_for_bivs (struct ivopts_data *data)
3359*38fd1498Szrj {
3360*38fd1498Szrj   unsigned i;
3361*38fd1498Szrj   struct iv *iv;
3362*38fd1498Szrj   bitmap_iterator bi;
3363*38fd1498Szrj 
3364*38fd1498Szrj   EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
3365*38fd1498Szrj     {
3366*38fd1498Szrj       iv = ver_info (data, i)->iv;
3367*38fd1498Szrj       if (iv && iv->biv_p && !integer_zerop (iv->step))
3368*38fd1498Szrj 	add_iv_candidate_for_biv (data, iv);
3369*38fd1498Szrj     }
3370*38fd1498Szrj }
3371*38fd1498Szrj 
3372*38fd1498Szrj /* Record common candidate {BASE, STEP} derived from USE in hashtable.  */
3373*38fd1498Szrj 
3374*38fd1498Szrj static void
record_common_cand(struct ivopts_data * data,tree base,tree step,struct iv_use * use)3375*38fd1498Szrj record_common_cand (struct ivopts_data *data, tree base,
3376*38fd1498Szrj 		    tree step, struct iv_use *use)
3377*38fd1498Szrj {
3378*38fd1498Szrj   struct iv_common_cand ent;
3379*38fd1498Szrj   struct iv_common_cand **slot;
3380*38fd1498Szrj 
3381*38fd1498Szrj   ent.base = base;
3382*38fd1498Szrj   ent.step = step;
3383*38fd1498Szrj   ent.hash = iterative_hash_expr (base, 0);
3384*38fd1498Szrj   ent.hash = iterative_hash_expr (step, ent.hash);
3385*38fd1498Szrj 
3386*38fd1498Szrj   slot = data->iv_common_cand_tab->find_slot (&ent, INSERT);
3387*38fd1498Szrj   if (*slot == NULL)
3388*38fd1498Szrj     {
3389*38fd1498Szrj       *slot = new iv_common_cand ();
3390*38fd1498Szrj       (*slot)->base = base;
3391*38fd1498Szrj       (*slot)->step = step;
3392*38fd1498Szrj       (*slot)->uses.create (8);
3393*38fd1498Szrj       (*slot)->hash = ent.hash;
3394*38fd1498Szrj       data->iv_common_cands.safe_push ((*slot));
3395*38fd1498Szrj     }
3396*38fd1498Szrj 
3397*38fd1498Szrj   gcc_assert (use != NULL);
3398*38fd1498Szrj   (*slot)->uses.safe_push (use);
3399*38fd1498Szrj   return;
3400*38fd1498Szrj }
3401*38fd1498Szrj 
3402*38fd1498Szrj /* Comparison function used to sort common candidates.  */
3403*38fd1498Szrj 
3404*38fd1498Szrj static int
common_cand_cmp(const void * p1,const void * p2)3405*38fd1498Szrj common_cand_cmp (const void *p1, const void *p2)
3406*38fd1498Szrj {
3407*38fd1498Szrj   unsigned n1, n2;
3408*38fd1498Szrj   const struct iv_common_cand *const *const ccand1
3409*38fd1498Szrj     = (const struct iv_common_cand *const *)p1;
3410*38fd1498Szrj   const struct iv_common_cand *const *const ccand2
3411*38fd1498Szrj     = (const struct iv_common_cand *const *)p2;
3412*38fd1498Szrj 
3413*38fd1498Szrj   n1 = (*ccand1)->uses.length ();
3414*38fd1498Szrj   n2 = (*ccand2)->uses.length ();
3415*38fd1498Szrj   return n2 - n1;
3416*38fd1498Szrj }
3417*38fd1498Szrj 
3418*38fd1498Szrj /* Adds IV candidates based on common candidated recorded.  */
3419*38fd1498Szrj 
3420*38fd1498Szrj static void
add_iv_candidate_derived_from_uses(struct ivopts_data * data)3421*38fd1498Szrj add_iv_candidate_derived_from_uses (struct ivopts_data *data)
3422*38fd1498Szrj {
3423*38fd1498Szrj   unsigned i, j;
3424*38fd1498Szrj   struct iv_cand *cand_1, *cand_2;
3425*38fd1498Szrj 
3426*38fd1498Szrj   data->iv_common_cands.qsort (common_cand_cmp);
3427*38fd1498Szrj   for (i = 0; i < data->iv_common_cands.length (); i++)
3428*38fd1498Szrj     {
3429*38fd1498Szrj       struct iv_common_cand *ptr = data->iv_common_cands[i];
3430*38fd1498Szrj 
3431*38fd1498Szrj       /* Only add IV candidate if it's derived from multiple uses.  */
3432*38fd1498Szrj       if (ptr->uses.length () <= 1)
3433*38fd1498Szrj 	break;
3434*38fd1498Szrj 
3435*38fd1498Szrj       cand_1 = NULL;
3436*38fd1498Szrj       cand_2 = NULL;
3437*38fd1498Szrj       if (ip_normal_pos (data->current_loop))
3438*38fd1498Szrj 	cand_1 = add_candidate_1 (data, ptr->base, ptr->step,
3439*38fd1498Szrj 				  false, IP_NORMAL, NULL, NULL);
3440*38fd1498Szrj 
3441*38fd1498Szrj       if (ip_end_pos (data->current_loop)
3442*38fd1498Szrj 	  && allow_ip_end_pos_p (data->current_loop))
3443*38fd1498Szrj 	cand_2 = add_candidate_1 (data, ptr->base, ptr->step,
3444*38fd1498Szrj 				  false, IP_END, NULL, NULL);
3445*38fd1498Szrj 
3446*38fd1498Szrj       /* Bind deriving uses and the new candidates.  */
3447*38fd1498Szrj       for (j = 0; j < ptr->uses.length (); j++)
3448*38fd1498Szrj 	{
3449*38fd1498Szrj 	  struct iv_group *group = data->vgroups[ptr->uses[j]->group_id];
3450*38fd1498Szrj 	  if (cand_1)
3451*38fd1498Szrj 	    bitmap_set_bit (group->related_cands, cand_1->id);
3452*38fd1498Szrj 	  if (cand_2)
3453*38fd1498Szrj 	    bitmap_set_bit (group->related_cands, cand_2->id);
3454*38fd1498Szrj 	}
3455*38fd1498Szrj     }
3456*38fd1498Szrj 
3457*38fd1498Szrj   /* Release data since it is useless from this point.  */
3458*38fd1498Szrj   data->iv_common_cand_tab->empty ();
3459*38fd1498Szrj   data->iv_common_cands.truncate (0);
3460*38fd1498Szrj }
3461*38fd1498Szrj 
3462*38fd1498Szrj /* Adds candidates based on the value of USE's iv.  */
3463*38fd1498Szrj 
3464*38fd1498Szrj static void
add_iv_candidate_for_use(struct ivopts_data * data,struct iv_use * use)3465*38fd1498Szrj add_iv_candidate_for_use (struct ivopts_data *data, struct iv_use *use)
3466*38fd1498Szrj {
3467*38fd1498Szrj   poly_uint64 offset;
3468*38fd1498Szrj   tree base;
3469*38fd1498Szrj   tree basetype;
3470*38fd1498Szrj   struct iv *iv = use->iv;
3471*38fd1498Szrj 
3472*38fd1498Szrj   add_candidate (data, iv->base, iv->step, false, use);
3473*38fd1498Szrj 
3474*38fd1498Szrj   /* Record common candidate for use in case it can be shared by others.  */
3475*38fd1498Szrj   record_common_cand (data, iv->base, iv->step, use);
3476*38fd1498Szrj 
3477*38fd1498Szrj   /* Record common candidate with initial value zero.  */
3478*38fd1498Szrj   basetype = TREE_TYPE (iv->base);
3479*38fd1498Szrj   if (POINTER_TYPE_P (basetype))
3480*38fd1498Szrj     basetype = sizetype;
3481*38fd1498Szrj   record_common_cand (data, build_int_cst (basetype, 0), iv->step, use);
3482*38fd1498Szrj 
3483*38fd1498Szrj   /* Record common candidate with constant offset stripped in base.
3484*38fd1498Szrj      Like the use itself, we also add candidate directly for it.  */
3485*38fd1498Szrj   base = strip_offset (iv->base, &offset);
3486*38fd1498Szrj   if (maybe_ne (offset, 0U) || base != iv->base)
3487*38fd1498Szrj     {
3488*38fd1498Szrj       record_common_cand (data, base, iv->step, use);
3489*38fd1498Szrj       add_candidate (data, base, iv->step, false, use);
3490*38fd1498Szrj     }
3491*38fd1498Szrj 
3492*38fd1498Szrj   /* Record common candidate with base_object removed in base.  */
3493*38fd1498Szrj   base = iv->base;
3494*38fd1498Szrj   STRIP_NOPS (base);
3495*38fd1498Szrj   if (iv->base_object != NULL && TREE_CODE (base) == POINTER_PLUS_EXPR)
3496*38fd1498Szrj     {
3497*38fd1498Szrj       tree step = iv->step;
3498*38fd1498Szrj 
3499*38fd1498Szrj       STRIP_NOPS (step);
3500*38fd1498Szrj       base = TREE_OPERAND (base, 1);
3501*38fd1498Szrj       step = fold_convert (sizetype, step);
3502*38fd1498Szrj       record_common_cand (data, base, step, use);
3503*38fd1498Szrj       /* Also record common candidate with offset stripped.  */
3504*38fd1498Szrj       base = strip_offset (base, &offset);
3505*38fd1498Szrj       if (maybe_ne (offset, 0U))
3506*38fd1498Szrj 	record_common_cand (data, base, step, use);
3507*38fd1498Szrj     }
3508*38fd1498Szrj 
3509*38fd1498Szrj   /* At last, add auto-incremental candidates.  Make such variables
3510*38fd1498Szrj      important since other iv uses with same base object may be based
3511*38fd1498Szrj      on it.  */
3512*38fd1498Szrj   if (use != NULL && address_p (use->type))
3513*38fd1498Szrj     add_autoinc_candidates (data, iv->base, iv->step, true, use);
3514*38fd1498Szrj }
3515*38fd1498Szrj 
3516*38fd1498Szrj /* Adds candidates based on the uses.  */
3517*38fd1498Szrj 
3518*38fd1498Szrj static void
add_iv_candidate_for_groups(struct ivopts_data * data)3519*38fd1498Szrj add_iv_candidate_for_groups (struct ivopts_data *data)
3520*38fd1498Szrj {
3521*38fd1498Szrj   unsigned i;
3522*38fd1498Szrj 
3523*38fd1498Szrj   /* Only add candidate for the first use in group.  */
3524*38fd1498Szrj   for (i = 0; i < data->vgroups.length (); i++)
3525*38fd1498Szrj     {
3526*38fd1498Szrj       struct iv_group *group = data->vgroups[i];
3527*38fd1498Szrj 
3528*38fd1498Szrj       gcc_assert (group->vuses[0] != NULL);
3529*38fd1498Szrj       add_iv_candidate_for_use (data, group->vuses[0]);
3530*38fd1498Szrj     }
3531*38fd1498Szrj   add_iv_candidate_derived_from_uses (data);
3532*38fd1498Szrj }
3533*38fd1498Szrj 
3534*38fd1498Szrj /* Record important candidates and add them to related_cands bitmaps.  */
3535*38fd1498Szrj 
3536*38fd1498Szrj static void
record_important_candidates(struct ivopts_data * data)3537*38fd1498Szrj record_important_candidates (struct ivopts_data *data)
3538*38fd1498Szrj {
3539*38fd1498Szrj   unsigned i;
3540*38fd1498Szrj   struct iv_group *group;
3541*38fd1498Szrj 
3542*38fd1498Szrj   for (i = 0; i < data->vcands.length (); i++)
3543*38fd1498Szrj     {
3544*38fd1498Szrj       struct iv_cand *cand = data->vcands[i];
3545*38fd1498Szrj 
3546*38fd1498Szrj       if (cand->important)
3547*38fd1498Szrj 	bitmap_set_bit (data->important_candidates, i);
3548*38fd1498Szrj     }
3549*38fd1498Szrj 
3550*38fd1498Szrj   data->consider_all_candidates = (data->vcands.length ()
3551*38fd1498Szrj 				   <= CONSIDER_ALL_CANDIDATES_BOUND);
3552*38fd1498Szrj 
3553*38fd1498Szrj   /* Add important candidates to groups' related_cands bitmaps.  */
3554*38fd1498Szrj   for (i = 0; i < data->vgroups.length (); i++)
3555*38fd1498Szrj     {
3556*38fd1498Szrj       group = data->vgroups[i];
3557*38fd1498Szrj       bitmap_ior_into (group->related_cands, data->important_candidates);
3558*38fd1498Szrj     }
3559*38fd1498Szrj }
3560*38fd1498Szrj 
3561*38fd1498Szrj /* Allocates the data structure mapping the (use, candidate) pairs to costs.
3562*38fd1498Szrj    If consider_all_candidates is true, we use a two-dimensional array, otherwise
3563*38fd1498Szrj    we allocate a simple list to every use.  */
3564*38fd1498Szrj 
3565*38fd1498Szrj static void
alloc_use_cost_map(struct ivopts_data * data)3566*38fd1498Szrj alloc_use_cost_map (struct ivopts_data *data)
3567*38fd1498Szrj {
3568*38fd1498Szrj   unsigned i, size, s;
3569*38fd1498Szrj 
3570*38fd1498Szrj   for (i = 0; i < data->vgroups.length (); i++)
3571*38fd1498Szrj     {
3572*38fd1498Szrj       struct iv_group *group = data->vgroups[i];
3573*38fd1498Szrj 
3574*38fd1498Szrj       if (data->consider_all_candidates)
3575*38fd1498Szrj 	size = data->vcands.length ();
3576*38fd1498Szrj       else
3577*38fd1498Szrj 	{
3578*38fd1498Szrj 	  s = bitmap_count_bits (group->related_cands);
3579*38fd1498Szrj 
3580*38fd1498Szrj 	  /* Round up to the power of two, so that moduling by it is fast.  */
3581*38fd1498Szrj 	  size = s ? (1 << ceil_log2 (s)) : 1;
3582*38fd1498Szrj 	}
3583*38fd1498Szrj 
3584*38fd1498Szrj       group->n_map_members = size;
3585*38fd1498Szrj       group->cost_map = XCNEWVEC (struct cost_pair, size);
3586*38fd1498Szrj     }
3587*38fd1498Szrj }
3588*38fd1498Szrj 
3589*38fd1498Szrj /* Sets cost of (GROUP, CAND) pair to COST and record that it depends
3590*38fd1498Szrj    on invariants INV_VARS and that the value used in expressing it is
3591*38fd1498Szrj    VALUE, and in case of iv elimination the comparison operator is COMP.  */
3592*38fd1498Szrj 
3593*38fd1498Szrj static void
set_group_iv_cost(struct ivopts_data * data,struct iv_group * group,struct iv_cand * cand,comp_cost cost,bitmap inv_vars,tree value,enum tree_code comp,bitmap inv_exprs)3594*38fd1498Szrj set_group_iv_cost (struct ivopts_data *data,
3595*38fd1498Szrj 		   struct iv_group *group, struct iv_cand *cand,
3596*38fd1498Szrj 		   comp_cost cost, bitmap inv_vars, tree value,
3597*38fd1498Szrj 		   enum tree_code comp, bitmap inv_exprs)
3598*38fd1498Szrj {
3599*38fd1498Szrj   unsigned i, s;
3600*38fd1498Szrj 
3601*38fd1498Szrj   if (cost.infinite_cost_p ())
3602*38fd1498Szrj     {
3603*38fd1498Szrj       BITMAP_FREE (inv_vars);
3604*38fd1498Szrj       BITMAP_FREE (inv_exprs);
3605*38fd1498Szrj       return;
3606*38fd1498Szrj     }
3607*38fd1498Szrj 
3608*38fd1498Szrj   if (data->consider_all_candidates)
3609*38fd1498Szrj     {
3610*38fd1498Szrj       group->cost_map[cand->id].cand = cand;
3611*38fd1498Szrj       group->cost_map[cand->id].cost = cost;
3612*38fd1498Szrj       group->cost_map[cand->id].inv_vars = inv_vars;
3613*38fd1498Szrj       group->cost_map[cand->id].inv_exprs = inv_exprs;
3614*38fd1498Szrj       group->cost_map[cand->id].value = value;
3615*38fd1498Szrj       group->cost_map[cand->id].comp = comp;
3616*38fd1498Szrj       return;
3617*38fd1498Szrj     }
3618*38fd1498Szrj 
3619*38fd1498Szrj   /* n_map_members is a power of two, so this computes modulo.  */
3620*38fd1498Szrj   s = cand->id & (group->n_map_members - 1);
3621*38fd1498Szrj   for (i = s; i < group->n_map_members; i++)
3622*38fd1498Szrj     if (!group->cost_map[i].cand)
3623*38fd1498Szrj       goto found;
3624*38fd1498Szrj   for (i = 0; i < s; i++)
3625*38fd1498Szrj     if (!group->cost_map[i].cand)
3626*38fd1498Szrj       goto found;
3627*38fd1498Szrj 
3628*38fd1498Szrj   gcc_unreachable ();
3629*38fd1498Szrj 
3630*38fd1498Szrj found:
3631*38fd1498Szrj   group->cost_map[i].cand = cand;
3632*38fd1498Szrj   group->cost_map[i].cost = cost;
3633*38fd1498Szrj   group->cost_map[i].inv_vars = inv_vars;
3634*38fd1498Szrj   group->cost_map[i].inv_exprs = inv_exprs;
3635*38fd1498Szrj   group->cost_map[i].value = value;
3636*38fd1498Szrj   group->cost_map[i].comp = comp;
3637*38fd1498Szrj }
3638*38fd1498Szrj 
3639*38fd1498Szrj /* Gets cost of (GROUP, CAND) pair.  */
3640*38fd1498Szrj 
3641*38fd1498Szrj static struct cost_pair *
get_group_iv_cost(struct ivopts_data * data,struct iv_group * group,struct iv_cand * cand)3642*38fd1498Szrj get_group_iv_cost (struct ivopts_data *data, struct iv_group *group,
3643*38fd1498Szrj 		   struct iv_cand *cand)
3644*38fd1498Szrj {
3645*38fd1498Szrj   unsigned i, s;
3646*38fd1498Szrj   struct cost_pair *ret;
3647*38fd1498Szrj 
3648*38fd1498Szrj   if (!cand)
3649*38fd1498Szrj     return NULL;
3650*38fd1498Szrj 
3651*38fd1498Szrj   if (data->consider_all_candidates)
3652*38fd1498Szrj     {
3653*38fd1498Szrj       ret = group->cost_map + cand->id;
3654*38fd1498Szrj       if (!ret->cand)
3655*38fd1498Szrj 	return NULL;
3656*38fd1498Szrj 
3657*38fd1498Szrj       return ret;
3658*38fd1498Szrj     }
3659*38fd1498Szrj 
3660*38fd1498Szrj   /* n_map_members is a power of two, so this computes modulo.  */
3661*38fd1498Szrj   s = cand->id & (group->n_map_members - 1);
3662*38fd1498Szrj   for (i = s; i < group->n_map_members; i++)
3663*38fd1498Szrj     if (group->cost_map[i].cand == cand)
3664*38fd1498Szrj       return group->cost_map + i;
3665*38fd1498Szrj     else if (group->cost_map[i].cand == NULL)
3666*38fd1498Szrj       return NULL;
3667*38fd1498Szrj   for (i = 0; i < s; i++)
3668*38fd1498Szrj     if (group->cost_map[i].cand == cand)
3669*38fd1498Szrj       return group->cost_map + i;
3670*38fd1498Szrj     else if (group->cost_map[i].cand == NULL)
3671*38fd1498Szrj       return NULL;
3672*38fd1498Szrj 
3673*38fd1498Szrj   return NULL;
3674*38fd1498Szrj }
3675*38fd1498Szrj 
3676*38fd1498Szrj /* Produce DECL_RTL for object obj so it looks like it is stored in memory.  */
3677*38fd1498Szrj static rtx
produce_memory_decl_rtl(tree obj,int * regno)3678*38fd1498Szrj produce_memory_decl_rtl (tree obj, int *regno)
3679*38fd1498Szrj {
3680*38fd1498Szrj   addr_space_t as = TYPE_ADDR_SPACE (TREE_TYPE (obj));
3681*38fd1498Szrj   machine_mode address_mode = targetm.addr_space.address_mode (as);
3682*38fd1498Szrj   rtx x;
3683*38fd1498Szrj 
3684*38fd1498Szrj   gcc_assert (obj);
3685*38fd1498Szrj   if (TREE_STATIC (obj) || DECL_EXTERNAL (obj))
3686*38fd1498Szrj     {
3687*38fd1498Szrj       const char *name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (obj));
3688*38fd1498Szrj       x = gen_rtx_SYMBOL_REF (address_mode, name);
3689*38fd1498Szrj       SET_SYMBOL_REF_DECL (x, obj);
3690*38fd1498Szrj       x = gen_rtx_MEM (DECL_MODE (obj), x);
3691*38fd1498Szrj       set_mem_addr_space (x, as);
3692*38fd1498Szrj       targetm.encode_section_info (obj, x, true);
3693*38fd1498Szrj     }
3694*38fd1498Szrj   else
3695*38fd1498Szrj     {
3696*38fd1498Szrj       x = gen_raw_REG (address_mode, (*regno)++);
3697*38fd1498Szrj       x = gen_rtx_MEM (DECL_MODE (obj), x);
3698*38fd1498Szrj       set_mem_addr_space (x, as);
3699*38fd1498Szrj     }
3700*38fd1498Szrj 
3701*38fd1498Szrj   return x;
3702*38fd1498Szrj }
3703*38fd1498Szrj 
3704*38fd1498Szrj /* Prepares decl_rtl for variables referred in *EXPR_P.  Callback for
3705*38fd1498Szrj    walk_tree.  DATA contains the actual fake register number.  */
3706*38fd1498Szrj 
3707*38fd1498Szrj static tree
prepare_decl_rtl(tree * expr_p,int * ws,void * data)3708*38fd1498Szrj prepare_decl_rtl (tree *expr_p, int *ws, void *data)
3709*38fd1498Szrj {
3710*38fd1498Szrj   tree obj = NULL_TREE;
3711*38fd1498Szrj   rtx x = NULL_RTX;
3712*38fd1498Szrj   int *regno = (int *) data;
3713*38fd1498Szrj 
3714*38fd1498Szrj   switch (TREE_CODE (*expr_p))
3715*38fd1498Szrj     {
3716*38fd1498Szrj     case ADDR_EXPR:
3717*38fd1498Szrj       for (expr_p = &TREE_OPERAND (*expr_p, 0);
3718*38fd1498Szrj 	   handled_component_p (*expr_p);
3719*38fd1498Szrj 	   expr_p = &TREE_OPERAND (*expr_p, 0))
3720*38fd1498Szrj 	continue;
3721*38fd1498Szrj       obj = *expr_p;
3722*38fd1498Szrj       if (DECL_P (obj) && HAS_RTL_P (obj) && !DECL_RTL_SET_P (obj))
3723*38fd1498Szrj 	x = produce_memory_decl_rtl (obj, regno);
3724*38fd1498Szrj       break;
3725*38fd1498Szrj 
3726*38fd1498Szrj     case SSA_NAME:
3727*38fd1498Szrj       *ws = 0;
3728*38fd1498Szrj       obj = SSA_NAME_VAR (*expr_p);
3729*38fd1498Szrj       /* Defer handling of anonymous SSA_NAMEs to the expander.  */
3730*38fd1498Szrj       if (!obj)
3731*38fd1498Szrj 	return NULL_TREE;
3732*38fd1498Szrj       if (!DECL_RTL_SET_P (obj))
3733*38fd1498Szrj 	x = gen_raw_REG (DECL_MODE (obj), (*regno)++);
3734*38fd1498Szrj       break;
3735*38fd1498Szrj 
3736*38fd1498Szrj     case VAR_DECL:
3737*38fd1498Szrj     case PARM_DECL:
3738*38fd1498Szrj     case RESULT_DECL:
3739*38fd1498Szrj       *ws = 0;
3740*38fd1498Szrj       obj = *expr_p;
3741*38fd1498Szrj 
3742*38fd1498Szrj       if (DECL_RTL_SET_P (obj))
3743*38fd1498Szrj 	break;
3744*38fd1498Szrj 
3745*38fd1498Szrj       if (DECL_MODE (obj) == BLKmode)
3746*38fd1498Szrj 	x = produce_memory_decl_rtl (obj, regno);
3747*38fd1498Szrj       else
3748*38fd1498Szrj 	x = gen_raw_REG (DECL_MODE (obj), (*regno)++);
3749*38fd1498Szrj 
3750*38fd1498Szrj       break;
3751*38fd1498Szrj 
3752*38fd1498Szrj     default:
3753*38fd1498Szrj       break;
3754*38fd1498Szrj     }
3755*38fd1498Szrj 
3756*38fd1498Szrj   if (x)
3757*38fd1498Szrj     {
3758*38fd1498Szrj       decl_rtl_to_reset.safe_push (obj);
3759*38fd1498Szrj       SET_DECL_RTL (obj, x);
3760*38fd1498Szrj     }
3761*38fd1498Szrj 
3762*38fd1498Szrj   return NULL_TREE;
3763*38fd1498Szrj }
3764*38fd1498Szrj 
3765*38fd1498Szrj /* Determines cost of the computation of EXPR.  */
3766*38fd1498Szrj 
3767*38fd1498Szrj static unsigned
computation_cost(tree expr,bool speed)3768*38fd1498Szrj computation_cost (tree expr, bool speed)
3769*38fd1498Szrj {
3770*38fd1498Szrj   rtx_insn *seq;
3771*38fd1498Szrj   rtx rslt;
3772*38fd1498Szrj   tree type = TREE_TYPE (expr);
3773*38fd1498Szrj   unsigned cost;
3774*38fd1498Szrj   /* Avoid using hard regs in ways which may be unsupported.  */
3775*38fd1498Szrj   int regno = LAST_VIRTUAL_REGISTER + 1;
3776*38fd1498Szrj   struct cgraph_node *node = cgraph_node::get (current_function_decl);
3777*38fd1498Szrj   enum node_frequency real_frequency = node->frequency;
3778*38fd1498Szrj 
3779*38fd1498Szrj   node->frequency = NODE_FREQUENCY_NORMAL;
3780*38fd1498Szrj   crtl->maybe_hot_insn_p = speed;
3781*38fd1498Szrj   walk_tree (&expr, prepare_decl_rtl, &regno, NULL);
3782*38fd1498Szrj   start_sequence ();
3783*38fd1498Szrj   rslt = expand_expr (expr, NULL_RTX, TYPE_MODE (type), EXPAND_NORMAL);
3784*38fd1498Szrj   seq = get_insns ();
3785*38fd1498Szrj   end_sequence ();
3786*38fd1498Szrj   default_rtl_profile ();
3787*38fd1498Szrj   node->frequency = real_frequency;
3788*38fd1498Szrj 
3789*38fd1498Szrj   cost = seq_cost (seq, speed);
3790*38fd1498Szrj   if (MEM_P (rslt))
3791*38fd1498Szrj     cost += address_cost (XEXP (rslt, 0), TYPE_MODE (type),
3792*38fd1498Szrj 			  TYPE_ADDR_SPACE (type), speed);
3793*38fd1498Szrj   else if (!REG_P (rslt))
3794*38fd1498Szrj     cost += set_src_cost (rslt, TYPE_MODE (type), speed);
3795*38fd1498Szrj 
3796*38fd1498Szrj   return cost;
3797*38fd1498Szrj }
3798*38fd1498Szrj 
3799*38fd1498Szrj /* Returns variable containing the value of candidate CAND at statement AT.  */
3800*38fd1498Szrj 
3801*38fd1498Szrj static tree
var_at_stmt(struct loop * loop,struct iv_cand * cand,gimple * stmt)3802*38fd1498Szrj var_at_stmt (struct loop *loop, struct iv_cand *cand, gimple *stmt)
3803*38fd1498Szrj {
3804*38fd1498Szrj   if (stmt_after_increment (loop, cand, stmt))
3805*38fd1498Szrj     return cand->var_after;
3806*38fd1498Szrj   else
3807*38fd1498Szrj     return cand->var_before;
3808*38fd1498Szrj }
3809*38fd1498Szrj 
3810*38fd1498Szrj /* If A is (TYPE) BA and B is (TYPE) BB, and the types of BA and BB have the
3811*38fd1498Szrj    same precision that is at least as wide as the precision of TYPE, stores
3812*38fd1498Szrj    BA to A and BB to B, and returns the type of BA.  Otherwise, returns the
3813*38fd1498Szrj    type of A and B.  */
3814*38fd1498Szrj 
3815*38fd1498Szrj static tree
determine_common_wider_type(tree * a,tree * b)3816*38fd1498Szrj determine_common_wider_type (tree *a, tree *b)
3817*38fd1498Szrj {
3818*38fd1498Szrj   tree wider_type = NULL;
3819*38fd1498Szrj   tree suba, subb;
3820*38fd1498Szrj   tree atype = TREE_TYPE (*a);
3821*38fd1498Szrj 
3822*38fd1498Szrj   if (CONVERT_EXPR_P (*a))
3823*38fd1498Szrj     {
3824*38fd1498Szrj       suba = TREE_OPERAND (*a, 0);
3825*38fd1498Szrj       wider_type = TREE_TYPE (suba);
3826*38fd1498Szrj       if (TYPE_PRECISION (wider_type) < TYPE_PRECISION (atype))
3827*38fd1498Szrj 	return atype;
3828*38fd1498Szrj     }
3829*38fd1498Szrj   else
3830*38fd1498Szrj     return atype;
3831*38fd1498Szrj 
3832*38fd1498Szrj   if (CONVERT_EXPR_P (*b))
3833*38fd1498Szrj     {
3834*38fd1498Szrj       subb = TREE_OPERAND (*b, 0);
3835*38fd1498Szrj       if (TYPE_PRECISION (wider_type) != TYPE_PRECISION (TREE_TYPE (subb)))
3836*38fd1498Szrj 	return atype;
3837*38fd1498Szrj     }
3838*38fd1498Szrj   else
3839*38fd1498Szrj     return atype;
3840*38fd1498Szrj 
3841*38fd1498Szrj   *a = suba;
3842*38fd1498Szrj   *b = subb;
3843*38fd1498Szrj   return wider_type;
3844*38fd1498Szrj }
3845*38fd1498Szrj 
3846*38fd1498Szrj /* Determines the expression by that USE is expressed from induction variable
3847*38fd1498Szrj    CAND at statement AT in LOOP.  The expression is stored in two parts in a
3848*38fd1498Szrj    decomposed form.  The invariant part is stored in AFF_INV; while variant
3849*38fd1498Szrj    part in AFF_VAR.  Store ratio of CAND.step over USE.step in PRAT if it's
3850*38fd1498Szrj    non-null.  Returns false if USE cannot be expressed using CAND.  */
3851*38fd1498Szrj 
3852*38fd1498Szrj static bool
3853*38fd1498Szrj get_computation_aff_1 (struct loop *loop, gimple *at, struct iv_use *use,
3854*38fd1498Szrj 		       struct iv_cand *cand, struct aff_tree *aff_inv,
3855*38fd1498Szrj 		       struct aff_tree *aff_var, widest_int *prat = NULL)
3856*38fd1498Szrj {
3857*38fd1498Szrj   tree ubase = use->iv->base, ustep = use->iv->step;
3858*38fd1498Szrj   tree cbase = cand->iv->base, cstep = cand->iv->step;
3859*38fd1498Szrj   tree common_type, uutype, var, cstep_common;
3860*38fd1498Szrj   tree utype = TREE_TYPE (ubase), ctype = TREE_TYPE (cbase);
3861*38fd1498Szrj   aff_tree aff_cbase;
3862*38fd1498Szrj   widest_int rat;
3863*38fd1498Szrj 
3864*38fd1498Szrj   /* We must have a precision to express the values of use.  */
3865*38fd1498Szrj   if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
3866*38fd1498Szrj     return false;
3867*38fd1498Szrj 
3868*38fd1498Szrj   var = var_at_stmt (loop, cand, at);
3869*38fd1498Szrj   uutype = unsigned_type_for (utype);
3870*38fd1498Szrj 
3871*38fd1498Szrj   /* If the conversion is not noop, perform it.  */
3872*38fd1498Szrj   if (TYPE_PRECISION (utype) < TYPE_PRECISION (ctype))
3873*38fd1498Szrj     {
3874*38fd1498Szrj       if (cand->orig_iv != NULL && CONVERT_EXPR_P (cbase)
3875*38fd1498Szrj 	  && (CONVERT_EXPR_P (cstep) || poly_int_tree_p (cstep)))
3876*38fd1498Szrj 	{
3877*38fd1498Szrj 	  tree inner_base, inner_step, inner_type;
3878*38fd1498Szrj 	  inner_base = TREE_OPERAND (cbase, 0);
3879*38fd1498Szrj 	  if (CONVERT_EXPR_P (cstep))
3880*38fd1498Szrj 	    inner_step = TREE_OPERAND (cstep, 0);
3881*38fd1498Szrj 	  else
3882*38fd1498Szrj 	    inner_step = cstep;
3883*38fd1498Szrj 
3884*38fd1498Szrj 	  inner_type = TREE_TYPE (inner_base);
3885*38fd1498Szrj 	  /* If candidate is added from a biv whose type is smaller than
3886*38fd1498Szrj 	     ctype, we know both candidate and the biv won't overflow.
3887*38fd1498Szrj 	     In this case, it's safe to skip the convertion in candidate.
3888*38fd1498Szrj 	     As an example, (unsigned short)((unsigned long)A) equals to
3889*38fd1498Szrj 	     (unsigned short)A, if A has a type no larger than short.  */
3890*38fd1498Szrj 	  if (TYPE_PRECISION (inner_type) <= TYPE_PRECISION (uutype))
3891*38fd1498Szrj 	    {
3892*38fd1498Szrj 	      cbase = inner_base;
3893*38fd1498Szrj 	      cstep = inner_step;
3894*38fd1498Szrj 	    }
3895*38fd1498Szrj 	}
3896*38fd1498Szrj       cbase = fold_convert (uutype, cbase);
3897*38fd1498Szrj       cstep = fold_convert (uutype, cstep);
3898*38fd1498Szrj       var = fold_convert (uutype, var);
3899*38fd1498Szrj     }
3900*38fd1498Szrj 
3901*38fd1498Szrj   /* Ratio is 1 when computing the value of biv cand by itself.
3902*38fd1498Szrj      We can't rely on constant_multiple_of in this case because the
3903*38fd1498Szrj      use is created after the original biv is selected.  The call
3904*38fd1498Szrj      could fail because of inconsistent fold behavior.  See PR68021
3905*38fd1498Szrj      for more information.  */
3906*38fd1498Szrj   if (cand->pos == IP_ORIGINAL && cand->incremented_at == use->stmt)
3907*38fd1498Szrj     {
3908*38fd1498Szrj       gcc_assert (is_gimple_assign (use->stmt));
3909*38fd1498Szrj       gcc_assert (use->iv->ssa_name == cand->var_after);
3910*38fd1498Szrj       gcc_assert (gimple_assign_lhs (use->stmt) == cand->var_after);
3911*38fd1498Szrj       rat = 1;
3912*38fd1498Szrj     }
3913*38fd1498Szrj   else if (!constant_multiple_of (ustep, cstep, &rat))
3914*38fd1498Szrj     return false;
3915*38fd1498Szrj 
3916*38fd1498Szrj   if (prat)
3917*38fd1498Szrj     *prat = rat;
3918*38fd1498Szrj 
3919*38fd1498Szrj   /* In case both UBASE and CBASE are shortened to UUTYPE from some common
3920*38fd1498Szrj      type, we achieve better folding by computing their difference in this
3921*38fd1498Szrj      wider type, and cast the result to UUTYPE.  We do not need to worry about
3922*38fd1498Szrj      overflows, as all the arithmetics will in the end be performed in UUTYPE
3923*38fd1498Szrj      anyway.  */
3924*38fd1498Szrj   common_type = determine_common_wider_type (&ubase, &cbase);
3925*38fd1498Szrj 
3926*38fd1498Szrj   /* use = ubase - ratio * cbase + ratio * var.  */
3927*38fd1498Szrj   tree_to_aff_combination (ubase, common_type, aff_inv);
3928*38fd1498Szrj   tree_to_aff_combination (cbase, common_type, &aff_cbase);
3929*38fd1498Szrj   tree_to_aff_combination (var, uutype, aff_var);
3930*38fd1498Szrj 
3931*38fd1498Szrj   /* We need to shift the value if we are after the increment.  */
3932*38fd1498Szrj   if (stmt_after_increment (loop, cand, at))
3933*38fd1498Szrj     {
3934*38fd1498Szrj       aff_tree cstep_aff;
3935*38fd1498Szrj 
3936*38fd1498Szrj       if (common_type != uutype)
3937*38fd1498Szrj 	cstep_common = fold_convert (common_type, cstep);
3938*38fd1498Szrj       else
3939*38fd1498Szrj 	cstep_common = cstep;
3940*38fd1498Szrj 
3941*38fd1498Szrj       tree_to_aff_combination (cstep_common, common_type, &cstep_aff);
3942*38fd1498Szrj       aff_combination_add (&aff_cbase, &cstep_aff);
3943*38fd1498Szrj     }
3944*38fd1498Szrj 
3945*38fd1498Szrj   aff_combination_scale (&aff_cbase, -rat);
3946*38fd1498Szrj   aff_combination_add (aff_inv, &aff_cbase);
3947*38fd1498Szrj   if (common_type != uutype)
3948*38fd1498Szrj     aff_combination_convert (aff_inv, uutype);
3949*38fd1498Szrj 
3950*38fd1498Szrj   aff_combination_scale (aff_var, rat);
3951*38fd1498Szrj   return true;
3952*38fd1498Szrj }
3953*38fd1498Szrj 
3954*38fd1498Szrj /* Determines the expression by that USE is expressed from induction variable
3955*38fd1498Szrj    CAND at statement AT in LOOP.  The expression is stored in a decomposed
3956*38fd1498Szrj    form into AFF.  Returns false if USE cannot be expressed using CAND.  */
3957*38fd1498Szrj 
3958*38fd1498Szrj static bool
get_computation_aff(struct loop * loop,gimple * at,struct iv_use * use,struct iv_cand * cand,struct aff_tree * aff)3959*38fd1498Szrj get_computation_aff (struct loop *loop, gimple *at, struct iv_use *use,
3960*38fd1498Szrj 		     struct iv_cand *cand, struct aff_tree *aff)
3961*38fd1498Szrj {
3962*38fd1498Szrj   aff_tree aff_var;
3963*38fd1498Szrj 
3964*38fd1498Szrj   if (!get_computation_aff_1 (loop, at, use, cand, aff, &aff_var))
3965*38fd1498Szrj     return false;
3966*38fd1498Szrj 
3967*38fd1498Szrj   aff_combination_add (aff, &aff_var);
3968*38fd1498Szrj   return true;
3969*38fd1498Szrj }
3970*38fd1498Szrj 
3971*38fd1498Szrj /* Return the type of USE.  */
3972*38fd1498Szrj 
3973*38fd1498Szrj static tree
get_use_type(struct iv_use * use)3974*38fd1498Szrj get_use_type (struct iv_use *use)
3975*38fd1498Szrj {
3976*38fd1498Szrj   tree base_type = TREE_TYPE (use->iv->base);
3977*38fd1498Szrj   tree type;
3978*38fd1498Szrj 
3979*38fd1498Szrj   if (use->type == USE_REF_ADDRESS)
3980*38fd1498Szrj     {
3981*38fd1498Szrj       /* The base_type may be a void pointer.  Create a pointer type based on
3982*38fd1498Szrj 	 the mem_ref instead.  */
3983*38fd1498Szrj       type = build_pointer_type (TREE_TYPE (*use->op_p));
3984*38fd1498Szrj       gcc_assert (TYPE_ADDR_SPACE (TREE_TYPE (type))
3985*38fd1498Szrj 		  == TYPE_ADDR_SPACE (TREE_TYPE (base_type)));
3986*38fd1498Szrj     }
3987*38fd1498Szrj   else
3988*38fd1498Szrj     type = base_type;
3989*38fd1498Szrj 
3990*38fd1498Szrj   return type;
3991*38fd1498Szrj }
3992*38fd1498Szrj 
3993*38fd1498Szrj /* Determines the expression by that USE is expressed from induction variable
3994*38fd1498Szrj    CAND at statement AT in LOOP.  The computation is unshared.  */
3995*38fd1498Szrj 
3996*38fd1498Szrj static tree
get_computation_at(struct loop * loop,gimple * at,struct iv_use * use,struct iv_cand * cand)3997*38fd1498Szrj get_computation_at (struct loop *loop, gimple *at,
3998*38fd1498Szrj 		    struct iv_use *use, struct iv_cand *cand)
3999*38fd1498Szrj {
4000*38fd1498Szrj   aff_tree aff;
4001*38fd1498Szrj   tree type = get_use_type (use);
4002*38fd1498Szrj 
4003*38fd1498Szrj   if (!get_computation_aff (loop, at, use, cand, &aff))
4004*38fd1498Szrj     return NULL_TREE;
4005*38fd1498Szrj   unshare_aff_combination (&aff);
4006*38fd1498Szrj   return fold_convert (type, aff_combination_to_tree (&aff));
4007*38fd1498Szrj }
4008*38fd1498Szrj 
4009*38fd1498Szrj /* Adjust the cost COST for being in loop setup rather than loop body.
4010*38fd1498Szrj    If we're optimizing for space, the loop setup overhead is constant;
4011*38fd1498Szrj    if we're optimizing for speed, amortize it over the per-iteration cost.
4012*38fd1498Szrj    If ROUND_UP_P is true, the result is round up rather than to zero when
4013*38fd1498Szrj    optimizing for speed.  */
4014*38fd1498Szrj static unsigned
4015*38fd1498Szrj adjust_setup_cost (struct ivopts_data *data, unsigned cost,
4016*38fd1498Szrj 		   bool round_up_p = false)
4017*38fd1498Szrj {
4018*38fd1498Szrj   if (cost == INFTY)
4019*38fd1498Szrj     return cost;
4020*38fd1498Szrj   else if (optimize_loop_for_speed_p (data->current_loop))
4021*38fd1498Szrj     {
4022*38fd1498Szrj       HOST_WIDE_INT niters = avg_loop_niter (data->current_loop);
4023*38fd1498Szrj       return ((HOST_WIDE_INT) cost + (round_up_p ? niters - 1 : 0)) / niters;
4024*38fd1498Szrj     }
4025*38fd1498Szrj   else
4026*38fd1498Szrj     return cost;
4027*38fd1498Szrj }
4028*38fd1498Szrj 
4029*38fd1498Szrj /* Calculate the SPEED or size cost of shiftadd EXPR in MODE.  MULT is the
4030*38fd1498Szrj    EXPR operand holding the shift.  COST0 and COST1 are the costs for
4031*38fd1498Szrj    calculating the operands of EXPR.  Returns true if successful, and returns
4032*38fd1498Szrj    the cost in COST.  */
4033*38fd1498Szrj 
4034*38fd1498Szrj static bool
get_shiftadd_cost(tree expr,scalar_int_mode mode,comp_cost cost0,comp_cost cost1,tree mult,bool speed,comp_cost * cost)4035*38fd1498Szrj get_shiftadd_cost (tree expr, scalar_int_mode mode, comp_cost cost0,
4036*38fd1498Szrj 		   comp_cost cost1, tree mult, bool speed, comp_cost *cost)
4037*38fd1498Szrj {
4038*38fd1498Szrj   comp_cost res;
4039*38fd1498Szrj   tree op1 = TREE_OPERAND (expr, 1);
4040*38fd1498Szrj   tree cst = TREE_OPERAND (mult, 1);
4041*38fd1498Szrj   tree multop = TREE_OPERAND (mult, 0);
4042*38fd1498Szrj   int m = exact_log2 (int_cst_value (cst));
4043*38fd1498Szrj   int maxm = MIN (BITS_PER_WORD, GET_MODE_BITSIZE (mode));
4044*38fd1498Szrj   int as_cost, sa_cost;
4045*38fd1498Szrj   bool mult_in_op1;
4046*38fd1498Szrj 
4047*38fd1498Szrj   if (!(m >= 0 && m < maxm))
4048*38fd1498Szrj     return false;
4049*38fd1498Szrj 
4050*38fd1498Szrj   STRIP_NOPS (op1);
4051*38fd1498Szrj   mult_in_op1 = operand_equal_p (op1, mult, 0);
4052*38fd1498Szrj 
4053*38fd1498Szrj   as_cost = add_cost (speed, mode) + shift_cost (speed, mode, m);
4054*38fd1498Szrj 
4055*38fd1498Szrj   /* If the target has a cheap shift-and-add or shift-and-sub instruction,
4056*38fd1498Szrj      use that in preference to a shift insn followed by an add insn.  */
4057*38fd1498Szrj   sa_cost = (TREE_CODE (expr) != MINUS_EXPR
4058*38fd1498Szrj 	     ? shiftadd_cost (speed, mode, m)
4059*38fd1498Szrj 	     : (mult_in_op1
4060*38fd1498Szrj 		? shiftsub1_cost (speed, mode, m)
4061*38fd1498Szrj 		: shiftsub0_cost (speed, mode, m)));
4062*38fd1498Szrj 
4063*38fd1498Szrj   res = comp_cost (MIN (as_cost, sa_cost), 0);
4064*38fd1498Szrj   res += (mult_in_op1 ? cost0 : cost1);
4065*38fd1498Szrj 
4066*38fd1498Szrj   STRIP_NOPS (multop);
4067*38fd1498Szrj   if (!is_gimple_val (multop))
4068*38fd1498Szrj     res += force_expr_to_var_cost (multop, speed);
4069*38fd1498Szrj 
4070*38fd1498Szrj   *cost = res;
4071*38fd1498Szrj   return true;
4072*38fd1498Szrj }
4073*38fd1498Szrj 
4074*38fd1498Szrj /* Estimates cost of forcing expression EXPR into a variable.  */
4075*38fd1498Szrj 
4076*38fd1498Szrj static comp_cost
force_expr_to_var_cost(tree expr,bool speed)4077*38fd1498Szrj force_expr_to_var_cost (tree expr, bool speed)
4078*38fd1498Szrj {
4079*38fd1498Szrj   static bool costs_initialized = false;
4080*38fd1498Szrj   static unsigned integer_cost [2];
4081*38fd1498Szrj   static unsigned symbol_cost [2];
4082*38fd1498Szrj   static unsigned address_cost [2];
4083*38fd1498Szrj   tree op0, op1;
4084*38fd1498Szrj   comp_cost cost0, cost1, cost;
4085*38fd1498Szrj   machine_mode mode;
4086*38fd1498Szrj   scalar_int_mode int_mode;
4087*38fd1498Szrj 
4088*38fd1498Szrj   if (!costs_initialized)
4089*38fd1498Szrj     {
4090*38fd1498Szrj       tree type = build_pointer_type (integer_type_node);
4091*38fd1498Szrj       tree var, addr;
4092*38fd1498Szrj       rtx x;
4093*38fd1498Szrj       int i;
4094*38fd1498Szrj 
4095*38fd1498Szrj       var = create_tmp_var_raw (integer_type_node, "test_var");
4096*38fd1498Szrj       TREE_STATIC (var) = 1;
4097*38fd1498Szrj       x = produce_memory_decl_rtl (var, NULL);
4098*38fd1498Szrj       SET_DECL_RTL (var, x);
4099*38fd1498Szrj 
4100*38fd1498Szrj       addr = build1 (ADDR_EXPR, type, var);
4101*38fd1498Szrj 
4102*38fd1498Szrj 
4103*38fd1498Szrj       for (i = 0; i < 2; i++)
4104*38fd1498Szrj 	{
4105*38fd1498Szrj 	  integer_cost[i] = computation_cost (build_int_cst (integer_type_node,
4106*38fd1498Szrj 							     2000), i);
4107*38fd1498Szrj 
4108*38fd1498Szrj 	  symbol_cost[i] = computation_cost (addr, i) + 1;
4109*38fd1498Szrj 
4110*38fd1498Szrj 	  address_cost[i]
4111*38fd1498Szrj 	    = computation_cost (fold_build_pointer_plus_hwi (addr, 2000), i) + 1;
4112*38fd1498Szrj 	  if (dump_file && (dump_flags & TDF_DETAILS))
4113*38fd1498Szrj 	    {
4114*38fd1498Szrj 	      fprintf (dump_file, "force_expr_to_var_cost %s costs:\n", i ? "speed" : "size");
4115*38fd1498Szrj 	      fprintf (dump_file, "  integer %d\n", (int) integer_cost[i]);
4116*38fd1498Szrj 	      fprintf (dump_file, "  symbol %d\n", (int) symbol_cost[i]);
4117*38fd1498Szrj 	      fprintf (dump_file, "  address %d\n", (int) address_cost[i]);
4118*38fd1498Szrj 	      fprintf (dump_file, "  other %d\n", (int) target_spill_cost[i]);
4119*38fd1498Szrj 	      fprintf (dump_file, "\n");
4120*38fd1498Szrj 	    }
4121*38fd1498Szrj 	}
4122*38fd1498Szrj 
4123*38fd1498Szrj       costs_initialized = true;
4124*38fd1498Szrj     }
4125*38fd1498Szrj 
4126*38fd1498Szrj   STRIP_NOPS (expr);
4127*38fd1498Szrj 
4128*38fd1498Szrj   if (SSA_VAR_P (expr))
4129*38fd1498Szrj     return no_cost;
4130*38fd1498Szrj 
4131*38fd1498Szrj   if (is_gimple_min_invariant (expr))
4132*38fd1498Szrj     {
4133*38fd1498Szrj       if (poly_int_tree_p (expr))
4134*38fd1498Szrj 	return comp_cost (integer_cost [speed], 0);
4135*38fd1498Szrj 
4136*38fd1498Szrj       if (TREE_CODE (expr) == ADDR_EXPR)
4137*38fd1498Szrj 	{
4138*38fd1498Szrj 	  tree obj = TREE_OPERAND (expr, 0);
4139*38fd1498Szrj 
4140*38fd1498Szrj 	  if (VAR_P (obj)
4141*38fd1498Szrj 	      || TREE_CODE (obj) == PARM_DECL
4142*38fd1498Szrj 	      || TREE_CODE (obj) == RESULT_DECL)
4143*38fd1498Szrj 	    return comp_cost (symbol_cost [speed], 0);
4144*38fd1498Szrj 	}
4145*38fd1498Szrj 
4146*38fd1498Szrj       return comp_cost (address_cost [speed], 0);
4147*38fd1498Szrj     }
4148*38fd1498Szrj 
4149*38fd1498Szrj   switch (TREE_CODE (expr))
4150*38fd1498Szrj     {
4151*38fd1498Szrj     case POINTER_PLUS_EXPR:
4152*38fd1498Szrj     case PLUS_EXPR:
4153*38fd1498Szrj     case MINUS_EXPR:
4154*38fd1498Szrj     case MULT_EXPR:
4155*38fd1498Szrj     case TRUNC_DIV_EXPR:
4156*38fd1498Szrj     case BIT_AND_EXPR:
4157*38fd1498Szrj     case BIT_IOR_EXPR:
4158*38fd1498Szrj     case LSHIFT_EXPR:
4159*38fd1498Szrj     case RSHIFT_EXPR:
4160*38fd1498Szrj       op0 = TREE_OPERAND (expr, 0);
4161*38fd1498Szrj       op1 = TREE_OPERAND (expr, 1);
4162*38fd1498Szrj       STRIP_NOPS (op0);
4163*38fd1498Szrj       STRIP_NOPS (op1);
4164*38fd1498Szrj       break;
4165*38fd1498Szrj 
4166*38fd1498Szrj     CASE_CONVERT:
4167*38fd1498Szrj     case NEGATE_EXPR:
4168*38fd1498Szrj     case BIT_NOT_EXPR:
4169*38fd1498Szrj       op0 = TREE_OPERAND (expr, 0);
4170*38fd1498Szrj       STRIP_NOPS (op0);
4171*38fd1498Szrj       op1 = NULL_TREE;
4172*38fd1498Szrj       break;
4173*38fd1498Szrj 
4174*38fd1498Szrj     default:
4175*38fd1498Szrj       /* Just an arbitrary value, FIXME.  */
4176*38fd1498Szrj       return comp_cost (target_spill_cost[speed], 0);
4177*38fd1498Szrj     }
4178*38fd1498Szrj 
4179*38fd1498Szrj   if (op0 == NULL_TREE
4180*38fd1498Szrj       || TREE_CODE (op0) == SSA_NAME || CONSTANT_CLASS_P (op0))
4181*38fd1498Szrj     cost0 = no_cost;
4182*38fd1498Szrj   else
4183*38fd1498Szrj     cost0 = force_expr_to_var_cost (op0, speed);
4184*38fd1498Szrj 
4185*38fd1498Szrj   if (op1 == NULL_TREE
4186*38fd1498Szrj       || TREE_CODE (op1) == SSA_NAME || CONSTANT_CLASS_P (op1))
4187*38fd1498Szrj     cost1 = no_cost;
4188*38fd1498Szrj   else
4189*38fd1498Szrj     cost1 = force_expr_to_var_cost (op1, speed);
4190*38fd1498Szrj 
4191*38fd1498Szrj   mode = TYPE_MODE (TREE_TYPE (expr));
4192*38fd1498Szrj   switch (TREE_CODE (expr))
4193*38fd1498Szrj     {
4194*38fd1498Szrj     case POINTER_PLUS_EXPR:
4195*38fd1498Szrj     case PLUS_EXPR:
4196*38fd1498Szrj     case MINUS_EXPR:
4197*38fd1498Szrj     case NEGATE_EXPR:
4198*38fd1498Szrj       cost = comp_cost (add_cost (speed, mode), 0);
4199*38fd1498Szrj       if (TREE_CODE (expr) != NEGATE_EXPR)
4200*38fd1498Szrj 	{
4201*38fd1498Szrj 	  tree mult = NULL_TREE;
4202*38fd1498Szrj 	  comp_cost sa_cost;
4203*38fd1498Szrj 	  if (TREE_CODE (op1) == MULT_EXPR)
4204*38fd1498Szrj 	    mult = op1;
4205*38fd1498Szrj 	  else if (TREE_CODE (op0) == MULT_EXPR)
4206*38fd1498Szrj 	    mult = op0;
4207*38fd1498Szrj 
4208*38fd1498Szrj 	  if (mult != NULL_TREE
4209*38fd1498Szrj 	      && is_a <scalar_int_mode> (mode, &int_mode)
4210*38fd1498Szrj 	      && cst_and_fits_in_hwi (TREE_OPERAND (mult, 1))
4211*38fd1498Szrj 	      && get_shiftadd_cost (expr, int_mode, cost0, cost1, mult,
4212*38fd1498Szrj 				    speed, &sa_cost))
4213*38fd1498Szrj 	    return sa_cost;
4214*38fd1498Szrj 	}
4215*38fd1498Szrj       break;
4216*38fd1498Szrj 
4217*38fd1498Szrj     CASE_CONVERT:
4218*38fd1498Szrj       {
4219*38fd1498Szrj 	tree inner_mode, outer_mode;
4220*38fd1498Szrj 	outer_mode = TREE_TYPE (expr);
4221*38fd1498Szrj 	inner_mode = TREE_TYPE (op0);
4222*38fd1498Szrj 	cost = comp_cost (convert_cost (TYPE_MODE (outer_mode),
4223*38fd1498Szrj 				       TYPE_MODE (inner_mode), speed), 0);
4224*38fd1498Szrj       }
4225*38fd1498Szrj       break;
4226*38fd1498Szrj 
4227*38fd1498Szrj     case MULT_EXPR:
4228*38fd1498Szrj       if (cst_and_fits_in_hwi (op0))
4229*38fd1498Szrj 	cost = comp_cost (mult_by_coeff_cost (int_cst_value (op0),
4230*38fd1498Szrj 					     mode, speed), 0);
4231*38fd1498Szrj       else if (cst_and_fits_in_hwi (op1))
4232*38fd1498Szrj 	cost = comp_cost (mult_by_coeff_cost (int_cst_value (op1),
4233*38fd1498Szrj 					     mode, speed), 0);
4234*38fd1498Szrj       else
4235*38fd1498Szrj 	return comp_cost (target_spill_cost [speed], 0);
4236*38fd1498Szrj       break;
4237*38fd1498Szrj 
4238*38fd1498Szrj     case TRUNC_DIV_EXPR:
4239*38fd1498Szrj       /* Division by power of two is usually cheap, so we allow it.  Forbid
4240*38fd1498Szrj 	 anything else.  */
4241*38fd1498Szrj       if (integer_pow2p (TREE_OPERAND (expr, 1)))
4242*38fd1498Szrj 	cost = comp_cost (add_cost (speed, mode), 0);
4243*38fd1498Szrj       else
4244*38fd1498Szrj 	cost = comp_cost (target_spill_cost[speed], 0);
4245*38fd1498Szrj       break;
4246*38fd1498Szrj 
4247*38fd1498Szrj     case BIT_AND_EXPR:
4248*38fd1498Szrj     case BIT_IOR_EXPR:
4249*38fd1498Szrj     case BIT_NOT_EXPR:
4250*38fd1498Szrj     case LSHIFT_EXPR:
4251*38fd1498Szrj     case RSHIFT_EXPR:
4252*38fd1498Szrj       cost = comp_cost (add_cost (speed, mode), 0);
4253*38fd1498Szrj       break;
4254*38fd1498Szrj 
4255*38fd1498Szrj     default:
4256*38fd1498Szrj       gcc_unreachable ();
4257*38fd1498Szrj     }
4258*38fd1498Szrj 
4259*38fd1498Szrj   cost += cost0;
4260*38fd1498Szrj   cost += cost1;
4261*38fd1498Szrj   return cost;
4262*38fd1498Szrj }
4263*38fd1498Szrj 
4264*38fd1498Szrj /* Estimates cost of forcing EXPR into a variable.  INV_VARS is a set of the
4265*38fd1498Szrj    invariants the computation depends on.  */
4266*38fd1498Szrj 
4267*38fd1498Szrj static comp_cost
force_var_cost(struct ivopts_data * data,tree expr,bitmap * inv_vars)4268*38fd1498Szrj force_var_cost (struct ivopts_data *data, tree expr, bitmap *inv_vars)
4269*38fd1498Szrj {
4270*38fd1498Szrj   if (!expr)
4271*38fd1498Szrj     return no_cost;
4272*38fd1498Szrj 
4273*38fd1498Szrj   find_inv_vars (data, &expr, inv_vars);
4274*38fd1498Szrj   return force_expr_to_var_cost (expr, data->speed);
4275*38fd1498Szrj }
4276*38fd1498Szrj 
4277*38fd1498Szrj /* Returns cost of auto-modifying address expression in shape base + offset.
4278*38fd1498Szrj    AINC_STEP is step size of the address IV.  AINC_OFFSET is offset of the
4279*38fd1498Szrj    address expression.  The address expression has ADDR_MODE in addr space
4280*38fd1498Szrj    AS.  The memory access has MEM_MODE.  SPEED means we are optimizing for
4281*38fd1498Szrj    speed or size.  */
4282*38fd1498Szrj 
4283*38fd1498Szrj enum ainc_type
4284*38fd1498Szrj {
4285*38fd1498Szrj   AINC_PRE_INC,		/* Pre increment.  */
4286*38fd1498Szrj   AINC_PRE_DEC,		/* Pre decrement.  */
4287*38fd1498Szrj   AINC_POST_INC,	/* Post increment.  */
4288*38fd1498Szrj   AINC_POST_DEC,	/* Post decrement.  */
4289*38fd1498Szrj   AINC_NONE		/* Also the number of auto increment types.  */
4290*38fd1498Szrj };
4291*38fd1498Szrj 
4292*38fd1498Szrj struct ainc_cost_data
4293*38fd1498Szrj {
4294*38fd1498Szrj   unsigned costs[AINC_NONE];
4295*38fd1498Szrj };
4296*38fd1498Szrj 
4297*38fd1498Szrj static comp_cost
get_address_cost_ainc(poly_int64 ainc_step,poly_int64 ainc_offset,machine_mode addr_mode,machine_mode mem_mode,addr_space_t as,bool speed)4298*38fd1498Szrj get_address_cost_ainc (poly_int64 ainc_step, poly_int64 ainc_offset,
4299*38fd1498Szrj 		       machine_mode addr_mode, machine_mode mem_mode,
4300*38fd1498Szrj 		       addr_space_t as, bool speed)
4301*38fd1498Szrj {
4302*38fd1498Szrj   if (!USE_LOAD_PRE_DECREMENT (mem_mode)
4303*38fd1498Szrj       && !USE_STORE_PRE_DECREMENT (mem_mode)
4304*38fd1498Szrj       && !USE_LOAD_POST_DECREMENT (mem_mode)
4305*38fd1498Szrj       && !USE_STORE_POST_DECREMENT (mem_mode)
4306*38fd1498Szrj       && !USE_LOAD_PRE_INCREMENT (mem_mode)
4307*38fd1498Szrj       && !USE_STORE_PRE_INCREMENT (mem_mode)
4308*38fd1498Szrj       && !USE_LOAD_POST_INCREMENT (mem_mode)
4309*38fd1498Szrj       && !USE_STORE_POST_INCREMENT (mem_mode))
4310*38fd1498Szrj     return infinite_cost;
4311*38fd1498Szrj 
4312*38fd1498Szrj   static vec<ainc_cost_data *> ainc_cost_data_list;
4313*38fd1498Szrj   unsigned idx = (unsigned) as * MAX_MACHINE_MODE + (unsigned) mem_mode;
4314*38fd1498Szrj   if (idx >= ainc_cost_data_list.length ())
4315*38fd1498Szrj     {
4316*38fd1498Szrj       unsigned nsize = ((unsigned) as + 1) *MAX_MACHINE_MODE;
4317*38fd1498Szrj 
4318*38fd1498Szrj       gcc_assert (nsize > idx);
4319*38fd1498Szrj       ainc_cost_data_list.safe_grow_cleared (nsize);
4320*38fd1498Szrj     }
4321*38fd1498Szrj 
4322*38fd1498Szrj   ainc_cost_data *data = ainc_cost_data_list[idx];
4323*38fd1498Szrj   if (data == NULL)
4324*38fd1498Szrj     {
4325*38fd1498Szrj       rtx reg = gen_raw_REG (addr_mode, LAST_VIRTUAL_REGISTER + 1);
4326*38fd1498Szrj 
4327*38fd1498Szrj       data = (ainc_cost_data *) xcalloc (1, sizeof (*data));
4328*38fd1498Szrj       data->costs[AINC_PRE_DEC] = INFTY;
4329*38fd1498Szrj       data->costs[AINC_POST_DEC] = INFTY;
4330*38fd1498Szrj       data->costs[AINC_PRE_INC] = INFTY;
4331*38fd1498Szrj       data->costs[AINC_POST_INC] = INFTY;
4332*38fd1498Szrj       if (USE_LOAD_PRE_DECREMENT (mem_mode)
4333*38fd1498Szrj 	  || USE_STORE_PRE_DECREMENT (mem_mode))
4334*38fd1498Szrj 	{
4335*38fd1498Szrj 	  rtx addr = gen_rtx_PRE_DEC (addr_mode, reg);
4336*38fd1498Szrj 
4337*38fd1498Szrj 	  if (memory_address_addr_space_p (mem_mode, addr, as))
4338*38fd1498Szrj 	    data->costs[AINC_PRE_DEC]
4339*38fd1498Szrj 	      = address_cost (addr, mem_mode, as, speed);
4340*38fd1498Szrj 	}
4341*38fd1498Szrj       if (USE_LOAD_POST_DECREMENT (mem_mode)
4342*38fd1498Szrj 	  || USE_STORE_POST_DECREMENT (mem_mode))
4343*38fd1498Szrj 	{
4344*38fd1498Szrj 	  rtx addr = gen_rtx_POST_DEC (addr_mode, reg);
4345*38fd1498Szrj 
4346*38fd1498Szrj 	  if (memory_address_addr_space_p (mem_mode, addr, as))
4347*38fd1498Szrj 	    data->costs[AINC_POST_DEC]
4348*38fd1498Szrj 	      = address_cost (addr, mem_mode, as, speed);
4349*38fd1498Szrj 	}
4350*38fd1498Szrj       if (USE_LOAD_PRE_INCREMENT (mem_mode)
4351*38fd1498Szrj 	  || USE_STORE_PRE_INCREMENT (mem_mode))
4352*38fd1498Szrj 	{
4353*38fd1498Szrj 	  rtx addr = gen_rtx_PRE_INC (addr_mode, reg);
4354*38fd1498Szrj 
4355*38fd1498Szrj 	  if (memory_address_addr_space_p (mem_mode, addr, as))
4356*38fd1498Szrj 	    data->costs[AINC_PRE_INC]
4357*38fd1498Szrj 	      = address_cost (addr, mem_mode, as, speed);
4358*38fd1498Szrj 	}
4359*38fd1498Szrj       if (USE_LOAD_POST_INCREMENT (mem_mode)
4360*38fd1498Szrj 	  || USE_STORE_POST_INCREMENT (mem_mode))
4361*38fd1498Szrj 	{
4362*38fd1498Szrj 	  rtx addr = gen_rtx_POST_INC (addr_mode, reg);
4363*38fd1498Szrj 
4364*38fd1498Szrj 	  if (memory_address_addr_space_p (mem_mode, addr, as))
4365*38fd1498Szrj 	    data->costs[AINC_POST_INC]
4366*38fd1498Szrj 	      = address_cost (addr, mem_mode, as, speed);
4367*38fd1498Szrj 	}
4368*38fd1498Szrj       ainc_cost_data_list[idx] = data;
4369*38fd1498Szrj     }
4370*38fd1498Szrj 
4371*38fd1498Szrj   poly_int64 msize = GET_MODE_SIZE (mem_mode);
4372*38fd1498Szrj   if (known_eq (ainc_offset, 0) && known_eq (msize, ainc_step))
4373*38fd1498Szrj     return comp_cost (data->costs[AINC_POST_INC], 0);
4374*38fd1498Szrj   if (known_eq (ainc_offset, 0) && known_eq (msize, -ainc_step))
4375*38fd1498Szrj     return comp_cost (data->costs[AINC_POST_DEC], 0);
4376*38fd1498Szrj   if (known_eq (ainc_offset, msize) && known_eq (msize, ainc_step))
4377*38fd1498Szrj     return comp_cost (data->costs[AINC_PRE_INC], 0);
4378*38fd1498Szrj   if (known_eq (ainc_offset, -msize) && known_eq (msize, -ainc_step))
4379*38fd1498Szrj     return comp_cost (data->costs[AINC_PRE_DEC], 0);
4380*38fd1498Szrj 
4381*38fd1498Szrj   return infinite_cost;
4382*38fd1498Szrj }
4383*38fd1498Szrj 
4384*38fd1498Szrj /* Return cost of computing USE's address expression by using CAND.
4385*38fd1498Szrj    AFF_INV and AFF_VAR represent invariant and variant parts of the
4386*38fd1498Szrj    address expression, respectively.  If AFF_INV is simple, store
4387*38fd1498Szrj    the loop invariant variables which are depended by it in INV_VARS;
4388*38fd1498Szrj    if AFF_INV is complicated, handle it as a new invariant expression
4389*38fd1498Szrj    and record it in INV_EXPR.  RATIO indicates multiple times between
4390*38fd1498Szrj    steps of USE and CAND.  If CAN_AUTOINC is nonNULL, store boolean
4391*38fd1498Szrj    value to it indicating if this is an auto-increment address.  */
4392*38fd1498Szrj 
4393*38fd1498Szrj static comp_cost
get_address_cost(struct ivopts_data * data,struct iv_use * use,struct iv_cand * cand,aff_tree * aff_inv,aff_tree * aff_var,HOST_WIDE_INT ratio,bitmap * inv_vars,iv_inv_expr_ent ** inv_expr,bool * can_autoinc,bool speed)4394*38fd1498Szrj get_address_cost (struct ivopts_data *data, struct iv_use *use,
4395*38fd1498Szrj 		  struct iv_cand *cand, aff_tree *aff_inv,
4396*38fd1498Szrj 		  aff_tree *aff_var, HOST_WIDE_INT ratio,
4397*38fd1498Szrj 		  bitmap *inv_vars, iv_inv_expr_ent **inv_expr,
4398*38fd1498Szrj 		  bool *can_autoinc, bool speed)
4399*38fd1498Szrj {
4400*38fd1498Szrj   rtx addr;
4401*38fd1498Szrj   bool simple_inv = true;
4402*38fd1498Szrj   tree comp_inv = NULL_TREE, type = aff_var->type;
4403*38fd1498Szrj   comp_cost var_cost = no_cost, cost = no_cost;
4404*38fd1498Szrj   struct mem_address parts = {NULL_TREE, integer_one_node,
4405*38fd1498Szrj 			      NULL_TREE, NULL_TREE, NULL_TREE};
4406*38fd1498Szrj   machine_mode addr_mode = TYPE_MODE (type);
4407*38fd1498Szrj   machine_mode mem_mode = TYPE_MODE (use->mem_type);
4408*38fd1498Szrj   addr_space_t as = TYPE_ADDR_SPACE (TREE_TYPE (use->iv->base));
4409*38fd1498Szrj   /* Only true if ratio != 1.  */
4410*38fd1498Szrj   bool ok_with_ratio_p = false;
4411*38fd1498Szrj   bool ok_without_ratio_p = false;
4412*38fd1498Szrj 
4413*38fd1498Szrj   if (!aff_combination_const_p (aff_inv))
4414*38fd1498Szrj     {
4415*38fd1498Szrj       parts.index = integer_one_node;
4416*38fd1498Szrj       /* Addressing mode "base + index".  */
4417*38fd1498Szrj       ok_without_ratio_p = valid_mem_ref_p (mem_mode, as, &parts);
4418*38fd1498Szrj       if (ratio != 1)
4419*38fd1498Szrj 	{
4420*38fd1498Szrj 	  parts.step = wide_int_to_tree (type, ratio);
4421*38fd1498Szrj 	  /* Addressing mode "base + index << scale".  */
4422*38fd1498Szrj 	  ok_with_ratio_p = valid_mem_ref_p (mem_mode, as, &parts);
4423*38fd1498Szrj 	  if (!ok_with_ratio_p)
4424*38fd1498Szrj 	    parts.step = NULL_TREE;
4425*38fd1498Szrj 	}
4426*38fd1498Szrj       if (ok_with_ratio_p || ok_without_ratio_p)
4427*38fd1498Szrj 	{
4428*38fd1498Szrj 	  if (maybe_ne (aff_inv->offset, 0))
4429*38fd1498Szrj 	    {
4430*38fd1498Szrj 	      parts.offset = wide_int_to_tree (sizetype, aff_inv->offset);
4431*38fd1498Szrj 	      /* Addressing mode "base + index [<< scale] + offset".  */
4432*38fd1498Szrj 	      if (!valid_mem_ref_p (mem_mode, as, &parts))
4433*38fd1498Szrj 		parts.offset = NULL_TREE;
4434*38fd1498Szrj 	      else
4435*38fd1498Szrj 		aff_inv->offset = 0;
4436*38fd1498Szrj 	    }
4437*38fd1498Szrj 
4438*38fd1498Szrj 	  move_fixed_address_to_symbol (&parts, aff_inv);
4439*38fd1498Szrj 	  /* Base is fixed address and is moved to symbol part.  */
4440*38fd1498Szrj 	  if (parts.symbol != NULL_TREE && aff_combination_zero_p (aff_inv))
4441*38fd1498Szrj 	    parts.base = NULL_TREE;
4442*38fd1498Szrj 
4443*38fd1498Szrj 	  /* Addressing mode "symbol + base + index [<< scale] [+ offset]".  */
4444*38fd1498Szrj 	  if (parts.symbol != NULL_TREE
4445*38fd1498Szrj 	      && !valid_mem_ref_p (mem_mode, as, &parts))
4446*38fd1498Szrj 	    {
4447*38fd1498Szrj 	      aff_combination_add_elt (aff_inv, parts.symbol, 1);
4448*38fd1498Szrj 	      parts.symbol = NULL_TREE;
4449*38fd1498Szrj 	      /* Reset SIMPLE_INV since symbol address needs to be computed
4450*38fd1498Szrj 		 outside of address expression in this case.  */
4451*38fd1498Szrj 	      simple_inv = false;
4452*38fd1498Szrj 	      /* Symbol part is moved back to base part, it can't be NULL.  */
4453*38fd1498Szrj 	      parts.base = integer_one_node;
4454*38fd1498Szrj 	    }
4455*38fd1498Szrj 	}
4456*38fd1498Szrj       else
4457*38fd1498Szrj 	parts.index = NULL_TREE;
4458*38fd1498Szrj     }
4459*38fd1498Szrj   else
4460*38fd1498Szrj     {
4461*38fd1498Szrj       poly_int64 ainc_step;
4462*38fd1498Szrj       if (can_autoinc
4463*38fd1498Szrj 	  && ratio == 1
4464*38fd1498Szrj 	  && ptrdiff_tree_p (cand->iv->step, &ainc_step))
4465*38fd1498Szrj 	{
4466*38fd1498Szrj 	  poly_int64 ainc_offset = (aff_inv->offset).force_shwi ();
4467*38fd1498Szrj 
4468*38fd1498Szrj 	  if (stmt_after_increment (data->current_loop, cand, use->stmt))
4469*38fd1498Szrj 	    ainc_offset += ainc_step;
4470*38fd1498Szrj 	  cost = get_address_cost_ainc (ainc_step, ainc_offset,
4471*38fd1498Szrj 					addr_mode, mem_mode, as, speed);
4472*38fd1498Szrj 	  if (!cost.infinite_cost_p ())
4473*38fd1498Szrj 	    {
4474*38fd1498Szrj 	      *can_autoinc = true;
4475*38fd1498Szrj 	      return cost;
4476*38fd1498Szrj 	    }
4477*38fd1498Szrj 	  cost = no_cost;
4478*38fd1498Szrj 	}
4479*38fd1498Szrj       if (!aff_combination_zero_p (aff_inv))
4480*38fd1498Szrj 	{
4481*38fd1498Szrj 	  parts.offset = wide_int_to_tree (sizetype, aff_inv->offset);
4482*38fd1498Szrj 	  /* Addressing mode "base + offset".  */
4483*38fd1498Szrj 	  if (!valid_mem_ref_p (mem_mode, as, &parts))
4484*38fd1498Szrj 	    parts.offset = NULL_TREE;
4485*38fd1498Szrj 	  else
4486*38fd1498Szrj 	    aff_inv->offset = 0;
4487*38fd1498Szrj 	}
4488*38fd1498Szrj     }
4489*38fd1498Szrj 
4490*38fd1498Szrj   if (simple_inv)
4491*38fd1498Szrj     simple_inv = (aff_inv == NULL
4492*38fd1498Szrj 		  || aff_combination_const_p (aff_inv)
4493*38fd1498Szrj 		  || aff_combination_singleton_var_p (aff_inv));
4494*38fd1498Szrj   if (!aff_combination_zero_p (aff_inv))
4495*38fd1498Szrj     comp_inv = aff_combination_to_tree (aff_inv);
4496*38fd1498Szrj   if (comp_inv != NULL_TREE)
4497*38fd1498Szrj     cost = force_var_cost (data, comp_inv, inv_vars);
4498*38fd1498Szrj   if (ratio != 1 && parts.step == NULL_TREE)
4499*38fd1498Szrj     var_cost += mult_by_coeff_cost (ratio, addr_mode, speed);
4500*38fd1498Szrj   if (comp_inv != NULL_TREE && parts.index == NULL_TREE)
4501*38fd1498Szrj     var_cost += add_cost (speed, addr_mode);
4502*38fd1498Szrj 
4503*38fd1498Szrj   if (comp_inv && inv_expr && !simple_inv)
4504*38fd1498Szrj     {
4505*38fd1498Szrj       *inv_expr = get_loop_invariant_expr (data, comp_inv);
4506*38fd1498Szrj       /* Clear depends on.  */
4507*38fd1498Szrj       if (*inv_expr != NULL && inv_vars && *inv_vars)
4508*38fd1498Szrj 	bitmap_clear (*inv_vars);
4509*38fd1498Szrj 
4510*38fd1498Szrj       /* Cost of small invariant expression adjusted against loop niters
4511*38fd1498Szrj 	 is usually zero, which makes it difficult to be differentiated
4512*38fd1498Szrj 	 from candidate based on loop invariant variables.  Secondly, the
4513*38fd1498Szrj 	 generated invariant expression may not be hoisted out of loop by
4514*38fd1498Szrj 	 following pass.  We penalize the cost by rounding up in order to
4515*38fd1498Szrj 	 neutralize such effects.  */
4516*38fd1498Szrj       cost.cost = adjust_setup_cost (data, cost.cost, true);
4517*38fd1498Szrj       cost.scratch = cost.cost;
4518*38fd1498Szrj     }
4519*38fd1498Szrj 
4520*38fd1498Szrj   cost += var_cost;
4521*38fd1498Szrj   addr = addr_for_mem_ref (&parts, as, false);
4522*38fd1498Szrj   gcc_assert (memory_address_addr_space_p (mem_mode, addr, as));
4523*38fd1498Szrj   cost += address_cost (addr, mem_mode, as, speed);
4524*38fd1498Szrj 
4525*38fd1498Szrj   if (parts.symbol != NULL_TREE)
4526*38fd1498Szrj     cost.complexity += 1;
4527*38fd1498Szrj   /* Don't increase the complexity of adding a scaled index if it's
4528*38fd1498Szrj      the only kind of index that the target allows.  */
4529*38fd1498Szrj   if (parts.step != NULL_TREE && ok_without_ratio_p)
4530*38fd1498Szrj     cost.complexity += 1;
4531*38fd1498Szrj   if (parts.base != NULL_TREE && parts.index != NULL_TREE)
4532*38fd1498Szrj     cost.complexity += 1;
4533*38fd1498Szrj   if (parts.offset != NULL_TREE && !integer_zerop (parts.offset))
4534*38fd1498Szrj     cost.complexity += 1;
4535*38fd1498Szrj 
4536*38fd1498Szrj   return cost;
4537*38fd1498Szrj }
4538*38fd1498Szrj 
4539*38fd1498Szrj /* Scale (multiply) the computed COST (except scratch part that should be
4540*38fd1498Szrj    hoisted out a loop) by header->frequency / AT->frequency, which makes
4541*38fd1498Szrj    expected cost more accurate.  */
4542*38fd1498Szrj 
4543*38fd1498Szrj static comp_cost
get_scaled_computation_cost_at(ivopts_data * data,gimple * at,comp_cost cost)4544*38fd1498Szrj get_scaled_computation_cost_at (ivopts_data *data, gimple *at, comp_cost cost)
4545*38fd1498Szrj {
4546*38fd1498Szrj    int loop_freq = data->current_loop->header->count.to_frequency (cfun);
4547*38fd1498Szrj    int bb_freq = gimple_bb (at)->count.to_frequency (cfun);
4548*38fd1498Szrj    if (loop_freq != 0)
4549*38fd1498Szrj      {
4550*38fd1498Szrj        gcc_assert (cost.scratch <= cost.cost);
4551*38fd1498Szrj        int scaled_cost
4552*38fd1498Szrj 	 = cost.scratch + (cost.cost - cost.scratch) * bb_freq / loop_freq;
4553*38fd1498Szrj 
4554*38fd1498Szrj        if (dump_file && (dump_flags & TDF_DETAILS))
4555*38fd1498Szrj 	 fprintf (dump_file, "Scaling cost based on bb prob "
4556*38fd1498Szrj 		  "by %2.2f: %d (scratch: %d) -> %d (%d/%d)\n",
4557*38fd1498Szrj 		  1.0f * bb_freq / loop_freq, cost.cost,
4558*38fd1498Szrj 		  cost.scratch, scaled_cost, bb_freq, loop_freq);
4559*38fd1498Szrj 
4560*38fd1498Szrj        cost.cost = scaled_cost;
4561*38fd1498Szrj      }
4562*38fd1498Szrj 
4563*38fd1498Szrj   return cost;
4564*38fd1498Szrj }
4565*38fd1498Szrj 
4566*38fd1498Szrj /* Determines the cost of the computation by that USE is expressed
4567*38fd1498Szrj    from induction variable CAND.  If ADDRESS_P is true, we just need
4568*38fd1498Szrj    to create an address from it, otherwise we want to get it into
4569*38fd1498Szrj    register.  A set of invariants we depend on is stored in INV_VARS.
4570*38fd1498Szrj    If CAN_AUTOINC is nonnull, use it to record whether autoinc
4571*38fd1498Szrj    addressing is likely.  If INV_EXPR is nonnull, record invariant
4572*38fd1498Szrj    expr entry in it.  */
4573*38fd1498Szrj 
4574*38fd1498Szrj static comp_cost
get_computation_cost(struct ivopts_data * data,struct iv_use * use,struct iv_cand * cand,bool address_p,bitmap * inv_vars,bool * can_autoinc,iv_inv_expr_ent ** inv_expr)4575*38fd1498Szrj get_computation_cost (struct ivopts_data *data, struct iv_use *use,
4576*38fd1498Szrj 		      struct iv_cand *cand, bool address_p, bitmap *inv_vars,
4577*38fd1498Szrj 		      bool *can_autoinc, iv_inv_expr_ent **inv_expr)
4578*38fd1498Szrj {
4579*38fd1498Szrj   gimple *at = use->stmt;
4580*38fd1498Szrj   tree ubase = use->iv->base, cbase = cand->iv->base;
4581*38fd1498Szrj   tree utype = TREE_TYPE (ubase), ctype = TREE_TYPE (cbase);
4582*38fd1498Szrj   tree comp_inv = NULL_TREE;
4583*38fd1498Szrj   HOST_WIDE_INT ratio, aratio;
4584*38fd1498Szrj   comp_cost cost;
4585*38fd1498Szrj   widest_int rat;
4586*38fd1498Szrj   aff_tree aff_inv, aff_var;
4587*38fd1498Szrj   bool speed = optimize_bb_for_speed_p (gimple_bb (at));
4588*38fd1498Szrj 
4589*38fd1498Szrj   if (inv_vars)
4590*38fd1498Szrj     *inv_vars = NULL;
4591*38fd1498Szrj   if (can_autoinc)
4592*38fd1498Szrj     *can_autoinc = false;
4593*38fd1498Szrj   if (inv_expr)
4594*38fd1498Szrj     *inv_expr = NULL;
4595*38fd1498Szrj 
4596*38fd1498Szrj   /* Check if we have enough precision to express the values of use.  */
4597*38fd1498Szrj   if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
4598*38fd1498Szrj     return infinite_cost;
4599*38fd1498Szrj 
4600*38fd1498Szrj   if (address_p
4601*38fd1498Szrj       || (use->iv->base_object
4602*38fd1498Szrj 	  && cand->iv->base_object
4603*38fd1498Szrj 	  && POINTER_TYPE_P (TREE_TYPE (use->iv->base_object))
4604*38fd1498Szrj 	  && POINTER_TYPE_P (TREE_TYPE (cand->iv->base_object))))
4605*38fd1498Szrj     {
4606*38fd1498Szrj       /* Do not try to express address of an object with computation based
4607*38fd1498Szrj 	 on address of a different object.  This may cause problems in rtl
4608*38fd1498Szrj 	 level alias analysis (that does not expect this to be happening,
4609*38fd1498Szrj 	 as this is illegal in C), and would be unlikely to be useful
4610*38fd1498Szrj 	 anyway.  */
4611*38fd1498Szrj       if (use->iv->base_object
4612*38fd1498Szrj 	  && cand->iv->base_object
4613*38fd1498Szrj 	  && !operand_equal_p (use->iv->base_object, cand->iv->base_object, 0))
4614*38fd1498Szrj 	return infinite_cost;
4615*38fd1498Szrj     }
4616*38fd1498Szrj 
4617*38fd1498Szrj   if (!get_computation_aff_1 (data->current_loop, at, use,
4618*38fd1498Szrj 			      cand, &aff_inv, &aff_var, &rat)
4619*38fd1498Szrj       || !wi::fits_shwi_p (rat))
4620*38fd1498Szrj     return infinite_cost;
4621*38fd1498Szrj 
4622*38fd1498Szrj   ratio = rat.to_shwi ();
4623*38fd1498Szrj   if (address_p)
4624*38fd1498Szrj     {
4625*38fd1498Szrj       cost = get_address_cost (data, use, cand, &aff_inv, &aff_var, ratio,
4626*38fd1498Szrj 			       inv_vars, inv_expr, can_autoinc, speed);
4627*38fd1498Szrj       return get_scaled_computation_cost_at (data, at, cost);
4628*38fd1498Szrj     }
4629*38fd1498Szrj 
4630*38fd1498Szrj   bool simple_inv = (aff_combination_const_p (&aff_inv)
4631*38fd1498Szrj 		     || aff_combination_singleton_var_p (&aff_inv));
4632*38fd1498Szrj   tree signed_type = signed_type_for (aff_combination_type (&aff_inv));
4633*38fd1498Szrj   aff_combination_convert (&aff_inv, signed_type);
4634*38fd1498Szrj   if (!aff_combination_zero_p (&aff_inv))
4635*38fd1498Szrj     comp_inv = aff_combination_to_tree (&aff_inv);
4636*38fd1498Szrj 
4637*38fd1498Szrj   cost = force_var_cost (data, comp_inv, inv_vars);
4638*38fd1498Szrj   if (comp_inv && inv_expr && !simple_inv)
4639*38fd1498Szrj     {
4640*38fd1498Szrj       *inv_expr = get_loop_invariant_expr (data, comp_inv);
4641*38fd1498Szrj       /* Clear depends on.  */
4642*38fd1498Szrj       if (*inv_expr != NULL && inv_vars && *inv_vars)
4643*38fd1498Szrj 	bitmap_clear (*inv_vars);
4644*38fd1498Szrj 
4645*38fd1498Szrj       cost.cost = adjust_setup_cost (data, cost.cost);
4646*38fd1498Szrj       /* Record setup cost in scratch field.  */
4647*38fd1498Szrj       cost.scratch = cost.cost;
4648*38fd1498Szrj     }
4649*38fd1498Szrj   /* Cost of constant integer can be covered when adding invariant part to
4650*38fd1498Szrj      variant part.  */
4651*38fd1498Szrj   else if (comp_inv && CONSTANT_CLASS_P (comp_inv))
4652*38fd1498Szrj     cost = no_cost;
4653*38fd1498Szrj 
4654*38fd1498Szrj   /* Need type narrowing to represent use with cand.  */
4655*38fd1498Szrj   if (TYPE_PRECISION (utype) < TYPE_PRECISION (ctype))
4656*38fd1498Szrj     {
4657*38fd1498Szrj       machine_mode outer_mode = TYPE_MODE (utype);
4658*38fd1498Szrj       machine_mode inner_mode = TYPE_MODE (ctype);
4659*38fd1498Szrj       cost += comp_cost (convert_cost (outer_mode, inner_mode, speed), 0);
4660*38fd1498Szrj     }
4661*38fd1498Szrj 
4662*38fd1498Szrj   /* Turn a + i * (-c) into a - i * c.  */
4663*38fd1498Szrj   if (ratio < 0 && comp_inv && !integer_zerop (comp_inv))
4664*38fd1498Szrj     aratio = -ratio;
4665*38fd1498Szrj   else
4666*38fd1498Szrj     aratio = ratio;
4667*38fd1498Szrj 
4668*38fd1498Szrj   if (ratio != 1)
4669*38fd1498Szrj     cost += mult_by_coeff_cost (aratio, TYPE_MODE (utype), speed);
4670*38fd1498Szrj 
4671*38fd1498Szrj   /* TODO: We may also need to check if we can compute  a + i * 4 in one
4672*38fd1498Szrj      instruction.  */
4673*38fd1498Szrj   /* Need to add up the invariant and variant parts.  */
4674*38fd1498Szrj   if (comp_inv && !integer_zerop (comp_inv))
4675*38fd1498Szrj     cost += add_cost (speed, TYPE_MODE (utype));
4676*38fd1498Szrj 
4677*38fd1498Szrj   return get_scaled_computation_cost_at (data, at, cost);
4678*38fd1498Szrj }
4679*38fd1498Szrj 
4680*38fd1498Szrj /* Determines cost of computing the use in GROUP with CAND in a generic
4681*38fd1498Szrj    expression.  */
4682*38fd1498Szrj 
4683*38fd1498Szrj static bool
determine_group_iv_cost_generic(struct ivopts_data * data,struct iv_group * group,struct iv_cand * cand)4684*38fd1498Szrj determine_group_iv_cost_generic (struct ivopts_data *data,
4685*38fd1498Szrj 				 struct iv_group *group, struct iv_cand *cand)
4686*38fd1498Szrj {
4687*38fd1498Szrj   comp_cost cost;
4688*38fd1498Szrj   iv_inv_expr_ent *inv_expr = NULL;
4689*38fd1498Szrj   bitmap inv_vars = NULL, inv_exprs = NULL;
4690*38fd1498Szrj   struct iv_use *use = group->vuses[0];
4691*38fd1498Szrj 
4692*38fd1498Szrj   /* The simple case first -- if we need to express value of the preserved
4693*38fd1498Szrj      original biv, the cost is 0.  This also prevents us from counting the
4694*38fd1498Szrj      cost of increment twice -- once at this use and once in the cost of
4695*38fd1498Szrj      the candidate.  */
4696*38fd1498Szrj   if (cand->pos == IP_ORIGINAL && cand->incremented_at == use->stmt)
4697*38fd1498Szrj     cost = no_cost;
4698*38fd1498Szrj   else
4699*38fd1498Szrj     cost = get_computation_cost (data, use, cand, false,
4700*38fd1498Szrj 				 &inv_vars, NULL, &inv_expr);
4701*38fd1498Szrj 
4702*38fd1498Szrj   if (inv_expr)
4703*38fd1498Szrj     {
4704*38fd1498Szrj       inv_exprs = BITMAP_ALLOC (NULL);
4705*38fd1498Szrj       bitmap_set_bit (inv_exprs, inv_expr->id);
4706*38fd1498Szrj     }
4707*38fd1498Szrj   set_group_iv_cost (data, group, cand, cost, inv_vars,
4708*38fd1498Szrj 		     NULL_TREE, ERROR_MARK, inv_exprs);
4709*38fd1498Szrj   return !cost.infinite_cost_p ();
4710*38fd1498Szrj }
4711*38fd1498Szrj 
4712*38fd1498Szrj /* Determines cost of computing uses in GROUP with CAND in addresses.  */
4713*38fd1498Szrj 
4714*38fd1498Szrj static bool
determine_group_iv_cost_address(struct ivopts_data * data,struct iv_group * group,struct iv_cand * cand)4715*38fd1498Szrj determine_group_iv_cost_address (struct ivopts_data *data,
4716*38fd1498Szrj 				 struct iv_group *group, struct iv_cand *cand)
4717*38fd1498Szrj {
4718*38fd1498Szrj   unsigned i;
4719*38fd1498Szrj   bitmap inv_vars = NULL, inv_exprs = NULL;
4720*38fd1498Szrj   bool can_autoinc;
4721*38fd1498Szrj   iv_inv_expr_ent *inv_expr = NULL;
4722*38fd1498Szrj   struct iv_use *use = group->vuses[0];
4723*38fd1498Szrj   comp_cost sum_cost = no_cost, cost;
4724*38fd1498Szrj 
4725*38fd1498Szrj   cost = get_computation_cost (data, use, cand, true,
4726*38fd1498Szrj 			       &inv_vars, &can_autoinc, &inv_expr);
4727*38fd1498Szrj 
4728*38fd1498Szrj   if (inv_expr)
4729*38fd1498Szrj     {
4730*38fd1498Szrj       inv_exprs = BITMAP_ALLOC (NULL);
4731*38fd1498Szrj       bitmap_set_bit (inv_exprs, inv_expr->id);
4732*38fd1498Szrj     }
4733*38fd1498Szrj   sum_cost = cost;
4734*38fd1498Szrj   if (!sum_cost.infinite_cost_p () && cand->ainc_use == use)
4735*38fd1498Szrj     {
4736*38fd1498Szrj       if (can_autoinc)
4737*38fd1498Szrj 	sum_cost -= cand->cost_step;
4738*38fd1498Szrj       /* If we generated the candidate solely for exploiting autoincrement
4739*38fd1498Szrj 	 opportunities, and it turns out it can't be used, set the cost to
4740*38fd1498Szrj 	 infinity to make sure we ignore it.  */
4741*38fd1498Szrj       else if (cand->pos == IP_AFTER_USE || cand->pos == IP_BEFORE_USE)
4742*38fd1498Szrj 	sum_cost = infinite_cost;
4743*38fd1498Szrj     }
4744*38fd1498Szrj 
4745*38fd1498Szrj   /* Uses in a group can share setup code, so only add setup cost once.  */
4746*38fd1498Szrj   cost -= cost.scratch;
4747*38fd1498Szrj   /* Compute and add costs for rest uses of this group.  */
4748*38fd1498Szrj   for (i = 1; i < group->vuses.length () && !sum_cost.infinite_cost_p (); i++)
4749*38fd1498Szrj     {
4750*38fd1498Szrj       struct iv_use *next = group->vuses[i];
4751*38fd1498Szrj 
4752*38fd1498Szrj       /* TODO: We could skip computing cost for sub iv_use when it has the
4753*38fd1498Szrj 	 same cost as the first iv_use, but the cost really depends on the
4754*38fd1498Szrj 	 offset and where the iv_use is.  */
4755*38fd1498Szrj 	cost = get_computation_cost (data, next, cand, true,
4756*38fd1498Szrj 				     NULL, &can_autoinc, &inv_expr);
4757*38fd1498Szrj 	if (inv_expr)
4758*38fd1498Szrj 	  {
4759*38fd1498Szrj 	    if (!inv_exprs)
4760*38fd1498Szrj 	      inv_exprs = BITMAP_ALLOC (NULL);
4761*38fd1498Szrj 
4762*38fd1498Szrj 	    bitmap_set_bit (inv_exprs, inv_expr->id);
4763*38fd1498Szrj 	  }
4764*38fd1498Szrj       sum_cost += cost;
4765*38fd1498Szrj     }
4766*38fd1498Szrj   set_group_iv_cost (data, group, cand, sum_cost, inv_vars,
4767*38fd1498Szrj 		     NULL_TREE, ERROR_MARK, inv_exprs);
4768*38fd1498Szrj 
4769*38fd1498Szrj   return !sum_cost.infinite_cost_p ();
4770*38fd1498Szrj }
4771*38fd1498Szrj 
4772*38fd1498Szrj /* Computes value of candidate CAND at position AT in iteration NITER, and
4773*38fd1498Szrj    stores it to VAL.  */
4774*38fd1498Szrj 
4775*38fd1498Szrj static void
cand_value_at(struct loop * loop,struct iv_cand * cand,gimple * at,tree niter,aff_tree * val)4776*38fd1498Szrj cand_value_at (struct loop *loop, struct iv_cand *cand, gimple *at, tree niter,
4777*38fd1498Szrj 	       aff_tree *val)
4778*38fd1498Szrj {
4779*38fd1498Szrj   aff_tree step, delta, nit;
4780*38fd1498Szrj   struct iv *iv = cand->iv;
4781*38fd1498Szrj   tree type = TREE_TYPE (iv->base);
4782*38fd1498Szrj   tree steptype;
4783*38fd1498Szrj   if (POINTER_TYPE_P (type))
4784*38fd1498Szrj     steptype = sizetype;
4785*38fd1498Szrj   else
4786*38fd1498Szrj     steptype = unsigned_type_for (type);
4787*38fd1498Szrj 
4788*38fd1498Szrj   tree_to_aff_combination (iv->step, TREE_TYPE (iv->step), &step);
4789*38fd1498Szrj   aff_combination_convert (&step, steptype);
4790*38fd1498Szrj   tree_to_aff_combination (niter, TREE_TYPE (niter), &nit);
4791*38fd1498Szrj   aff_combination_convert (&nit, steptype);
4792*38fd1498Szrj   aff_combination_mult (&nit, &step, &delta);
4793*38fd1498Szrj   if (stmt_after_increment (loop, cand, at))
4794*38fd1498Szrj     aff_combination_add (&delta, &step);
4795*38fd1498Szrj 
4796*38fd1498Szrj   tree_to_aff_combination (iv->base, type, val);
4797*38fd1498Szrj   if (!POINTER_TYPE_P (type))
4798*38fd1498Szrj     aff_combination_convert (val, steptype);
4799*38fd1498Szrj   aff_combination_add (val, &delta);
4800*38fd1498Szrj }
4801*38fd1498Szrj 
4802*38fd1498Szrj /* Returns period of induction variable iv.  */
4803*38fd1498Szrj 
4804*38fd1498Szrj static tree
iv_period(struct iv * iv)4805*38fd1498Szrj iv_period (struct iv *iv)
4806*38fd1498Szrj {
4807*38fd1498Szrj   tree step = iv->step, period, type;
4808*38fd1498Szrj   tree pow2div;
4809*38fd1498Szrj 
4810*38fd1498Szrj   gcc_assert (step && TREE_CODE (step) == INTEGER_CST);
4811*38fd1498Szrj 
4812*38fd1498Szrj   type = unsigned_type_for (TREE_TYPE (step));
4813*38fd1498Szrj   /* Period of the iv is lcm (step, type_range)/step -1,
4814*38fd1498Szrj      i.e., N*type_range/step - 1. Since type range is power
4815*38fd1498Szrj      of two, N == (step >> num_of_ending_zeros_binary (step),
4816*38fd1498Szrj      so the final result is
4817*38fd1498Szrj 
4818*38fd1498Szrj        (type_range >> num_of_ending_zeros_binary (step)) - 1
4819*38fd1498Szrj 
4820*38fd1498Szrj   */
4821*38fd1498Szrj   pow2div = num_ending_zeros (step);
4822*38fd1498Szrj 
4823*38fd1498Szrj   period = build_low_bits_mask (type,
4824*38fd1498Szrj 				(TYPE_PRECISION (type)
4825*38fd1498Szrj 				 - tree_to_uhwi (pow2div)));
4826*38fd1498Szrj 
4827*38fd1498Szrj   return period;
4828*38fd1498Szrj }
4829*38fd1498Szrj 
4830*38fd1498Szrj /* Returns the comparison operator used when eliminating the iv USE.  */
4831*38fd1498Szrj 
4832*38fd1498Szrj static enum tree_code
iv_elimination_compare(struct ivopts_data * data,struct iv_use * use)4833*38fd1498Szrj iv_elimination_compare (struct ivopts_data *data, struct iv_use *use)
4834*38fd1498Szrj {
4835*38fd1498Szrj   struct loop *loop = data->current_loop;
4836*38fd1498Szrj   basic_block ex_bb;
4837*38fd1498Szrj   edge exit;
4838*38fd1498Szrj 
4839*38fd1498Szrj   ex_bb = gimple_bb (use->stmt);
4840*38fd1498Szrj   exit = EDGE_SUCC (ex_bb, 0);
4841*38fd1498Szrj   if (flow_bb_inside_loop_p (loop, exit->dest))
4842*38fd1498Szrj     exit = EDGE_SUCC (ex_bb, 1);
4843*38fd1498Szrj 
4844*38fd1498Szrj   return (exit->flags & EDGE_TRUE_VALUE ? EQ_EXPR : NE_EXPR);
4845*38fd1498Szrj }
4846*38fd1498Szrj 
4847*38fd1498Szrj /* Returns true if we can prove that BASE - OFFSET does not overflow.  For now,
4848*38fd1498Szrj    we only detect the situation that BASE = SOMETHING + OFFSET, where the
4849*38fd1498Szrj    calculation is performed in non-wrapping type.
4850*38fd1498Szrj 
4851*38fd1498Szrj    TODO: More generally, we could test for the situation that
4852*38fd1498Szrj 	 BASE = SOMETHING + OFFSET' and OFFSET is between OFFSET' and zero.
4853*38fd1498Szrj 	 This would require knowing the sign of OFFSET.  */
4854*38fd1498Szrj 
4855*38fd1498Szrj static bool
difference_cannot_overflow_p(struct ivopts_data * data,tree base,tree offset)4856*38fd1498Szrj difference_cannot_overflow_p (struct ivopts_data *data, tree base, tree offset)
4857*38fd1498Szrj {
4858*38fd1498Szrj   enum tree_code code;
4859*38fd1498Szrj   tree e1, e2;
4860*38fd1498Szrj   aff_tree aff_e1, aff_e2, aff_offset;
4861*38fd1498Szrj 
4862*38fd1498Szrj   if (!nowrap_type_p (TREE_TYPE (base)))
4863*38fd1498Szrj     return false;
4864*38fd1498Szrj 
4865*38fd1498Szrj   base = expand_simple_operations (base);
4866*38fd1498Szrj 
4867*38fd1498Szrj   if (TREE_CODE (base) == SSA_NAME)
4868*38fd1498Szrj     {
4869*38fd1498Szrj       gimple *stmt = SSA_NAME_DEF_STMT (base);
4870*38fd1498Szrj 
4871*38fd1498Szrj       if (gimple_code (stmt) != GIMPLE_ASSIGN)
4872*38fd1498Szrj 	return false;
4873*38fd1498Szrj 
4874*38fd1498Szrj       code = gimple_assign_rhs_code (stmt);
4875*38fd1498Szrj       if (get_gimple_rhs_class (code) != GIMPLE_BINARY_RHS)
4876*38fd1498Szrj 	return false;
4877*38fd1498Szrj 
4878*38fd1498Szrj       e1 = gimple_assign_rhs1 (stmt);
4879*38fd1498Szrj       e2 = gimple_assign_rhs2 (stmt);
4880*38fd1498Szrj     }
4881*38fd1498Szrj   else
4882*38fd1498Szrj     {
4883*38fd1498Szrj       code = TREE_CODE (base);
4884*38fd1498Szrj       if (get_gimple_rhs_class (code) != GIMPLE_BINARY_RHS)
4885*38fd1498Szrj 	return false;
4886*38fd1498Szrj       e1 = TREE_OPERAND (base, 0);
4887*38fd1498Szrj       e2 = TREE_OPERAND (base, 1);
4888*38fd1498Szrj     }
4889*38fd1498Szrj 
4890*38fd1498Szrj   /* Use affine expansion as deeper inspection to prove the equality.  */
4891*38fd1498Szrj   tree_to_aff_combination_expand (e2, TREE_TYPE (e2),
4892*38fd1498Szrj 				  &aff_e2, &data->name_expansion_cache);
4893*38fd1498Szrj   tree_to_aff_combination_expand (offset, TREE_TYPE (offset),
4894*38fd1498Szrj 				  &aff_offset, &data->name_expansion_cache);
4895*38fd1498Szrj   aff_combination_scale (&aff_offset, -1);
4896*38fd1498Szrj   switch (code)
4897*38fd1498Szrj     {
4898*38fd1498Szrj     case PLUS_EXPR:
4899*38fd1498Szrj       aff_combination_add (&aff_e2, &aff_offset);
4900*38fd1498Szrj       if (aff_combination_zero_p (&aff_e2))
4901*38fd1498Szrj 	return true;
4902*38fd1498Szrj 
4903*38fd1498Szrj       tree_to_aff_combination_expand (e1, TREE_TYPE (e1),
4904*38fd1498Szrj 				      &aff_e1, &data->name_expansion_cache);
4905*38fd1498Szrj       aff_combination_add (&aff_e1, &aff_offset);
4906*38fd1498Szrj       return aff_combination_zero_p (&aff_e1);
4907*38fd1498Szrj 
4908*38fd1498Szrj     case POINTER_PLUS_EXPR:
4909*38fd1498Szrj       aff_combination_add (&aff_e2, &aff_offset);
4910*38fd1498Szrj       return aff_combination_zero_p (&aff_e2);
4911*38fd1498Szrj 
4912*38fd1498Szrj     default:
4913*38fd1498Szrj       return false;
4914*38fd1498Szrj     }
4915*38fd1498Szrj }
4916*38fd1498Szrj 
4917*38fd1498Szrj /* Tries to replace loop exit by one formulated in terms of a LT_EXPR
4918*38fd1498Szrj    comparison with CAND.  NITER describes the number of iterations of
4919*38fd1498Szrj    the loops.  If successful, the comparison in COMP_P is altered accordingly.
4920*38fd1498Szrj 
4921*38fd1498Szrj    We aim to handle the following situation:
4922*38fd1498Szrj 
4923*38fd1498Szrj    sometype *base, *p;
4924*38fd1498Szrj    int a, b, i;
4925*38fd1498Szrj 
4926*38fd1498Szrj    i = a;
4927*38fd1498Szrj    p = p_0 = base + a;
4928*38fd1498Szrj 
4929*38fd1498Szrj    do
4930*38fd1498Szrj      {
4931*38fd1498Szrj        bla (*p);
4932*38fd1498Szrj        p++;
4933*38fd1498Szrj        i++;
4934*38fd1498Szrj      }
4935*38fd1498Szrj    while (i < b);
4936*38fd1498Szrj 
4937*38fd1498Szrj    Here, the number of iterations of the loop is (a + 1 > b) ? 0 : b - a - 1.
4938*38fd1498Szrj    We aim to optimize this to
4939*38fd1498Szrj 
4940*38fd1498Szrj    p = p_0 = base + a;
4941*38fd1498Szrj    do
4942*38fd1498Szrj      {
4943*38fd1498Szrj        bla (*p);
4944*38fd1498Szrj        p++;
4945*38fd1498Szrj      }
4946*38fd1498Szrj    while (p < p_0 - a + b);
4947*38fd1498Szrj 
4948*38fd1498Szrj    This preserves the correctness, since the pointer arithmetics does not
4949*38fd1498Szrj    overflow.  More precisely:
4950*38fd1498Szrj 
4951*38fd1498Szrj    1) if a + 1 <= b, then p_0 - a + b is the final value of p, hence there is no
4952*38fd1498Szrj       overflow in computing it or the values of p.
4953*38fd1498Szrj    2) if a + 1 > b, then we need to verify that the expression p_0 - a does not
4954*38fd1498Szrj       overflow.  To prove this, we use the fact that p_0 = base + a.  */
4955*38fd1498Szrj 
4956*38fd1498Szrj static bool
iv_elimination_compare_lt(struct ivopts_data * data,struct iv_cand * cand,enum tree_code * comp_p,struct tree_niter_desc * niter)4957*38fd1498Szrj iv_elimination_compare_lt (struct ivopts_data *data,
4958*38fd1498Szrj 			   struct iv_cand *cand, enum tree_code *comp_p,
4959*38fd1498Szrj 			   struct tree_niter_desc *niter)
4960*38fd1498Szrj {
4961*38fd1498Szrj   tree cand_type, a, b, mbz, nit_type = TREE_TYPE (niter->niter), offset;
4962*38fd1498Szrj   struct aff_tree nit, tmpa, tmpb;
4963*38fd1498Szrj   enum tree_code comp;
4964*38fd1498Szrj   HOST_WIDE_INT step;
4965*38fd1498Szrj 
4966*38fd1498Szrj   /* We need to know that the candidate induction variable does not overflow.
4967*38fd1498Szrj      While more complex analysis may be used to prove this, for now just
4968*38fd1498Szrj      check that the variable appears in the original program and that it
4969*38fd1498Szrj      is computed in a type that guarantees no overflows.  */
4970*38fd1498Szrj   cand_type = TREE_TYPE (cand->iv->base);
4971*38fd1498Szrj   if (cand->pos != IP_ORIGINAL || !nowrap_type_p (cand_type))
4972*38fd1498Szrj     return false;
4973*38fd1498Szrj 
4974*38fd1498Szrj   /* Make sure that the loop iterates till the loop bound is hit, as otherwise
4975*38fd1498Szrj      the calculation of the BOUND could overflow, making the comparison
4976*38fd1498Szrj      invalid.  */
4977*38fd1498Szrj   if (!data->loop_single_exit_p)
4978*38fd1498Szrj     return false;
4979*38fd1498Szrj 
4980*38fd1498Szrj   /* We need to be able to decide whether candidate is increasing or decreasing
4981*38fd1498Szrj      in order to choose the right comparison operator.  */
4982*38fd1498Szrj   if (!cst_and_fits_in_hwi (cand->iv->step))
4983*38fd1498Szrj     return false;
4984*38fd1498Szrj   step = int_cst_value (cand->iv->step);
4985*38fd1498Szrj 
4986*38fd1498Szrj   /* Check that the number of iterations matches the expected pattern:
4987*38fd1498Szrj      a + 1 > b ? 0 : b - a - 1.  */
4988*38fd1498Szrj   mbz = niter->may_be_zero;
4989*38fd1498Szrj   if (TREE_CODE (mbz) == GT_EXPR)
4990*38fd1498Szrj     {
4991*38fd1498Szrj       /* Handle a + 1 > b.  */
4992*38fd1498Szrj       tree op0 = TREE_OPERAND (mbz, 0);
4993*38fd1498Szrj       if (TREE_CODE (op0) == PLUS_EXPR && integer_onep (TREE_OPERAND (op0, 1)))
4994*38fd1498Szrj 	{
4995*38fd1498Szrj 	  a = TREE_OPERAND (op0, 0);
4996*38fd1498Szrj 	  b = TREE_OPERAND (mbz, 1);
4997*38fd1498Szrj 	}
4998*38fd1498Szrj       else
4999*38fd1498Szrj 	return false;
5000*38fd1498Szrj     }
5001*38fd1498Szrj   else if (TREE_CODE (mbz) == LT_EXPR)
5002*38fd1498Szrj     {
5003*38fd1498Szrj       tree op1 = TREE_OPERAND (mbz, 1);
5004*38fd1498Szrj 
5005*38fd1498Szrj       /* Handle b < a + 1.  */
5006*38fd1498Szrj       if (TREE_CODE (op1) == PLUS_EXPR && integer_onep (TREE_OPERAND (op1, 1)))
5007*38fd1498Szrj 	{
5008*38fd1498Szrj 	  a = TREE_OPERAND (op1, 0);
5009*38fd1498Szrj 	  b = TREE_OPERAND (mbz, 0);
5010*38fd1498Szrj 	}
5011*38fd1498Szrj       else
5012*38fd1498Szrj 	return false;
5013*38fd1498Szrj     }
5014*38fd1498Szrj   else
5015*38fd1498Szrj     return false;
5016*38fd1498Szrj 
5017*38fd1498Szrj   /* Expected number of iterations is B - A - 1.  Check that it matches
5018*38fd1498Szrj      the actual number, i.e., that B - A - NITER = 1.  */
5019*38fd1498Szrj   tree_to_aff_combination (niter->niter, nit_type, &nit);
5020*38fd1498Szrj   tree_to_aff_combination (fold_convert (nit_type, a), nit_type, &tmpa);
5021*38fd1498Szrj   tree_to_aff_combination (fold_convert (nit_type, b), nit_type, &tmpb);
5022*38fd1498Szrj   aff_combination_scale (&nit, -1);
5023*38fd1498Szrj   aff_combination_scale (&tmpa, -1);
5024*38fd1498Szrj   aff_combination_add (&tmpb, &tmpa);
5025*38fd1498Szrj   aff_combination_add (&tmpb, &nit);
5026*38fd1498Szrj   if (tmpb.n != 0 || maybe_ne (tmpb.offset, 1))
5027*38fd1498Szrj     return false;
5028*38fd1498Szrj 
5029*38fd1498Szrj   /* Finally, check that CAND->IV->BASE - CAND->IV->STEP * A does not
5030*38fd1498Szrj      overflow.  */
5031*38fd1498Szrj   offset = fold_build2 (MULT_EXPR, TREE_TYPE (cand->iv->step),
5032*38fd1498Szrj 			cand->iv->step,
5033*38fd1498Szrj 			fold_convert (TREE_TYPE (cand->iv->step), a));
5034*38fd1498Szrj   if (!difference_cannot_overflow_p (data, cand->iv->base, offset))
5035*38fd1498Szrj     return false;
5036*38fd1498Szrj 
5037*38fd1498Szrj   /* Determine the new comparison operator.  */
5038*38fd1498Szrj   comp = step < 0 ? GT_EXPR : LT_EXPR;
5039*38fd1498Szrj   if (*comp_p == NE_EXPR)
5040*38fd1498Szrj     *comp_p = comp;
5041*38fd1498Szrj   else if (*comp_p == EQ_EXPR)
5042*38fd1498Szrj     *comp_p = invert_tree_comparison (comp, false);
5043*38fd1498Szrj   else
5044*38fd1498Szrj     gcc_unreachable ();
5045*38fd1498Szrj 
5046*38fd1498Szrj   return true;
5047*38fd1498Szrj }
5048*38fd1498Szrj 
5049*38fd1498Szrj /* Check whether it is possible to express the condition in USE by comparison
5050*38fd1498Szrj    of candidate CAND.  If so, store the value compared with to BOUND, and the
5051*38fd1498Szrj    comparison operator to COMP.  */
5052*38fd1498Szrj 
5053*38fd1498Szrj static bool
may_eliminate_iv(struct ivopts_data * data,struct iv_use * use,struct iv_cand * cand,tree * bound,enum tree_code * comp)5054*38fd1498Szrj may_eliminate_iv (struct ivopts_data *data,
5055*38fd1498Szrj 		  struct iv_use *use, struct iv_cand *cand, tree *bound,
5056*38fd1498Szrj 		  enum tree_code *comp)
5057*38fd1498Szrj {
5058*38fd1498Szrj   basic_block ex_bb;
5059*38fd1498Szrj   edge exit;
5060*38fd1498Szrj   tree period;
5061*38fd1498Szrj   struct loop *loop = data->current_loop;
5062*38fd1498Szrj   aff_tree bnd;
5063*38fd1498Szrj   struct tree_niter_desc *desc = NULL;
5064*38fd1498Szrj 
5065*38fd1498Szrj   if (TREE_CODE (cand->iv->step) != INTEGER_CST)
5066*38fd1498Szrj     return false;
5067*38fd1498Szrj 
5068*38fd1498Szrj   /* For now works only for exits that dominate the loop latch.
5069*38fd1498Szrj      TODO: extend to other conditions inside loop body.  */
5070*38fd1498Szrj   ex_bb = gimple_bb (use->stmt);
5071*38fd1498Szrj   if (use->stmt != last_stmt (ex_bb)
5072*38fd1498Szrj       || gimple_code (use->stmt) != GIMPLE_COND
5073*38fd1498Szrj       || !dominated_by_p (CDI_DOMINATORS, loop->latch, ex_bb))
5074*38fd1498Szrj     return false;
5075*38fd1498Szrj 
5076*38fd1498Szrj   exit = EDGE_SUCC (ex_bb, 0);
5077*38fd1498Szrj   if (flow_bb_inside_loop_p (loop, exit->dest))
5078*38fd1498Szrj     exit = EDGE_SUCC (ex_bb, 1);
5079*38fd1498Szrj   if (flow_bb_inside_loop_p (loop, exit->dest))
5080*38fd1498Szrj     return false;
5081*38fd1498Szrj 
5082*38fd1498Szrj   desc = niter_for_exit (data, exit);
5083*38fd1498Szrj   if (!desc)
5084*38fd1498Szrj     return false;
5085*38fd1498Szrj 
5086*38fd1498Szrj   /* Determine whether we can use the variable to test the exit condition.
5087*38fd1498Szrj      This is the case iff the period of the induction variable is greater
5088*38fd1498Szrj      than the number of iterations for which the exit condition is true.  */
5089*38fd1498Szrj   period = iv_period (cand->iv);
5090*38fd1498Szrj 
5091*38fd1498Szrj   /* If the number of iterations is constant, compare against it directly.  */
5092*38fd1498Szrj   if (TREE_CODE (desc->niter) == INTEGER_CST)
5093*38fd1498Szrj     {
5094*38fd1498Szrj       /* See cand_value_at.  */
5095*38fd1498Szrj       if (stmt_after_increment (loop, cand, use->stmt))
5096*38fd1498Szrj 	{
5097*38fd1498Szrj 	  if (!tree_int_cst_lt (desc->niter, period))
5098*38fd1498Szrj 	    return false;
5099*38fd1498Szrj 	}
5100*38fd1498Szrj       else
5101*38fd1498Szrj 	{
5102*38fd1498Szrj 	  if (tree_int_cst_lt (period, desc->niter))
5103*38fd1498Szrj 	    return false;
5104*38fd1498Szrj 	}
5105*38fd1498Szrj     }
5106*38fd1498Szrj 
5107*38fd1498Szrj   /* If not, and if this is the only possible exit of the loop, see whether
5108*38fd1498Szrj      we can get a conservative estimate on the number of iterations of the
5109*38fd1498Szrj      entire loop and compare against that instead.  */
5110*38fd1498Szrj   else
5111*38fd1498Szrj     {
5112*38fd1498Szrj       widest_int period_value, max_niter;
5113*38fd1498Szrj 
5114*38fd1498Szrj       max_niter = desc->max;
5115*38fd1498Szrj       if (stmt_after_increment (loop, cand, use->stmt))
5116*38fd1498Szrj 	max_niter += 1;
5117*38fd1498Szrj       period_value = wi::to_widest (period);
5118*38fd1498Szrj       if (wi::gtu_p (max_niter, period_value))
5119*38fd1498Szrj 	{
5120*38fd1498Szrj 	  /* See if we can take advantage of inferred loop bound
5121*38fd1498Szrj 	     information.  */
5122*38fd1498Szrj 	  if (data->loop_single_exit_p)
5123*38fd1498Szrj 	    {
5124*38fd1498Szrj 	      if (!max_loop_iterations (loop, &max_niter))
5125*38fd1498Szrj 		return false;
5126*38fd1498Szrj 	      /* The loop bound is already adjusted by adding 1.  */
5127*38fd1498Szrj 	      if (wi::gtu_p (max_niter, period_value))
5128*38fd1498Szrj 		return false;
5129*38fd1498Szrj 	    }
5130*38fd1498Szrj 	  else
5131*38fd1498Szrj 	    return false;
5132*38fd1498Szrj 	}
5133*38fd1498Szrj     }
5134*38fd1498Szrj 
5135*38fd1498Szrj   cand_value_at (loop, cand, use->stmt, desc->niter, &bnd);
5136*38fd1498Szrj 
5137*38fd1498Szrj   *bound = fold_convert (TREE_TYPE (cand->iv->base),
5138*38fd1498Szrj 			 aff_combination_to_tree (&bnd));
5139*38fd1498Szrj   *comp = iv_elimination_compare (data, use);
5140*38fd1498Szrj 
5141*38fd1498Szrj   /* It is unlikely that computing the number of iterations using division
5142*38fd1498Szrj      would be more profitable than keeping the original induction variable.  */
5143*38fd1498Szrj   if (expression_expensive_p (*bound))
5144*38fd1498Szrj     return false;
5145*38fd1498Szrj 
5146*38fd1498Szrj   /* Sometimes, it is possible to handle the situation that the number of
5147*38fd1498Szrj      iterations may be zero unless additional assumptions by using <
5148*38fd1498Szrj      instead of != in the exit condition.
5149*38fd1498Szrj 
5150*38fd1498Szrj      TODO: we could also calculate the value MAY_BE_ZERO ? 0 : NITER and
5151*38fd1498Szrj 	   base the exit condition on it.  However, that is often too
5152*38fd1498Szrj 	   expensive.  */
5153*38fd1498Szrj   if (!integer_zerop (desc->may_be_zero))
5154*38fd1498Szrj     return iv_elimination_compare_lt (data, cand, comp, desc);
5155*38fd1498Szrj 
5156*38fd1498Szrj   return true;
5157*38fd1498Szrj }
5158*38fd1498Szrj 
5159*38fd1498Szrj  /* Calculates the cost of BOUND, if it is a PARM_DECL.  A PARM_DECL must
5160*38fd1498Szrj     be copied, if it is used in the loop body and DATA->body_includes_call.  */
5161*38fd1498Szrj 
5162*38fd1498Szrj static int
parm_decl_cost(struct ivopts_data * data,tree bound)5163*38fd1498Szrj parm_decl_cost (struct ivopts_data *data, tree bound)
5164*38fd1498Szrj {
5165*38fd1498Szrj   tree sbound = bound;
5166*38fd1498Szrj   STRIP_NOPS (sbound);
5167*38fd1498Szrj 
5168*38fd1498Szrj   if (TREE_CODE (sbound) == SSA_NAME
5169*38fd1498Szrj       && SSA_NAME_IS_DEFAULT_DEF (sbound)
5170*38fd1498Szrj       && TREE_CODE (SSA_NAME_VAR (sbound)) == PARM_DECL
5171*38fd1498Szrj       && data->body_includes_call)
5172*38fd1498Szrj     return COSTS_N_INSNS (1);
5173*38fd1498Szrj 
5174*38fd1498Szrj   return 0;
5175*38fd1498Szrj }
5176*38fd1498Szrj 
5177*38fd1498Szrj /* Determines cost of computing the use in GROUP with CAND in a condition.  */
5178*38fd1498Szrj 
5179*38fd1498Szrj static bool
determine_group_iv_cost_cond(struct ivopts_data * data,struct iv_group * group,struct iv_cand * cand)5180*38fd1498Szrj determine_group_iv_cost_cond (struct ivopts_data *data,
5181*38fd1498Szrj 			      struct iv_group *group, struct iv_cand *cand)
5182*38fd1498Szrj {
5183*38fd1498Szrj   tree bound = NULL_TREE;
5184*38fd1498Szrj   struct iv *cmp_iv;
5185*38fd1498Szrj   bitmap inv_exprs = NULL;
5186*38fd1498Szrj   bitmap inv_vars_elim = NULL, inv_vars_express = NULL, inv_vars;
5187*38fd1498Szrj   comp_cost elim_cost = infinite_cost, express_cost, cost, bound_cost;
5188*38fd1498Szrj   enum comp_iv_rewrite rewrite_type;
5189*38fd1498Szrj   iv_inv_expr_ent *inv_expr_elim = NULL, *inv_expr_express = NULL, *inv_expr;
5190*38fd1498Szrj   tree *control_var, *bound_cst;
5191*38fd1498Szrj   enum tree_code comp = ERROR_MARK;
5192*38fd1498Szrj   struct iv_use *use = group->vuses[0];
5193*38fd1498Szrj 
5194*38fd1498Szrj   /* Extract condition operands.  */
5195*38fd1498Szrj   rewrite_type = extract_cond_operands (data, use->stmt, &control_var,
5196*38fd1498Szrj 					&bound_cst, NULL, &cmp_iv);
5197*38fd1498Szrj   gcc_assert (rewrite_type != COMP_IV_NA);
5198*38fd1498Szrj 
5199*38fd1498Szrj   /* Try iv elimination.  */
5200*38fd1498Szrj   if (rewrite_type == COMP_IV_ELIM
5201*38fd1498Szrj       && may_eliminate_iv (data, use, cand, &bound, &comp))
5202*38fd1498Szrj     {
5203*38fd1498Szrj       elim_cost = force_var_cost (data, bound, &inv_vars_elim);
5204*38fd1498Szrj       if (elim_cost.cost == 0)
5205*38fd1498Szrj 	elim_cost.cost = parm_decl_cost (data, bound);
5206*38fd1498Szrj       else if (TREE_CODE (bound) == INTEGER_CST)
5207*38fd1498Szrj 	elim_cost.cost = 0;
5208*38fd1498Szrj       /* If we replace a loop condition 'i < n' with 'p < base + n',
5209*38fd1498Szrj 	 inv_vars_elim will have 'base' and 'n' set, which implies that both
5210*38fd1498Szrj 	 'base' and 'n' will be live during the loop.	 More likely,
5211*38fd1498Szrj 	 'base + n' will be loop invariant, resulting in only one live value
5212*38fd1498Szrj 	 during the loop.  So in that case we clear inv_vars_elim and set
5213*38fd1498Szrj 	 inv_expr_elim instead.  */
5214*38fd1498Szrj       if (inv_vars_elim && bitmap_count_bits (inv_vars_elim) > 1)
5215*38fd1498Szrj 	{
5216*38fd1498Szrj 	  inv_expr_elim = get_loop_invariant_expr (data, bound);
5217*38fd1498Szrj 	  bitmap_clear (inv_vars_elim);
5218*38fd1498Szrj 	}
5219*38fd1498Szrj       /* The bound is a loop invariant, so it will be only computed
5220*38fd1498Szrj 	 once.  */
5221*38fd1498Szrj       elim_cost.cost = adjust_setup_cost (data, elim_cost.cost);
5222*38fd1498Szrj     }
5223*38fd1498Szrj 
5224*38fd1498Szrj   /* When the condition is a comparison of the candidate IV against
5225*38fd1498Szrj      zero, prefer this IV.
5226*38fd1498Szrj 
5227*38fd1498Szrj      TODO: The constant that we're subtracting from the cost should
5228*38fd1498Szrj      be target-dependent.  This information should be added to the
5229*38fd1498Szrj      target costs for each backend.  */
5230*38fd1498Szrj   if (!elim_cost.infinite_cost_p () /* Do not try to decrease infinite! */
5231*38fd1498Szrj       && integer_zerop (*bound_cst)
5232*38fd1498Szrj       && (operand_equal_p (*control_var, cand->var_after, 0)
5233*38fd1498Szrj 	  || operand_equal_p (*control_var, cand->var_before, 0)))
5234*38fd1498Szrj     elim_cost -= 1;
5235*38fd1498Szrj 
5236*38fd1498Szrj   express_cost = get_computation_cost (data, use, cand, false,
5237*38fd1498Szrj 				       &inv_vars_express, NULL,
5238*38fd1498Szrj 				       &inv_expr_express);
5239*38fd1498Szrj   if (cmp_iv != NULL)
5240*38fd1498Szrj     find_inv_vars (data, &cmp_iv->base, &inv_vars_express);
5241*38fd1498Szrj 
5242*38fd1498Szrj   /* Count the cost of the original bound as well.  */
5243*38fd1498Szrj   bound_cost = force_var_cost (data, *bound_cst, NULL);
5244*38fd1498Szrj   if (bound_cost.cost == 0)
5245*38fd1498Szrj     bound_cost.cost = parm_decl_cost (data, *bound_cst);
5246*38fd1498Szrj   else if (TREE_CODE (*bound_cst) == INTEGER_CST)
5247*38fd1498Szrj     bound_cost.cost = 0;
5248*38fd1498Szrj   express_cost += bound_cost;
5249*38fd1498Szrj 
5250*38fd1498Szrj   /* Choose the better approach, preferring the eliminated IV. */
5251*38fd1498Szrj   if (elim_cost <= express_cost)
5252*38fd1498Szrj     {
5253*38fd1498Szrj       cost = elim_cost;
5254*38fd1498Szrj       inv_vars = inv_vars_elim;
5255*38fd1498Szrj       inv_vars_elim = NULL;
5256*38fd1498Szrj       inv_expr = inv_expr_elim;
5257*38fd1498Szrj     }
5258*38fd1498Szrj   else
5259*38fd1498Szrj     {
5260*38fd1498Szrj       cost = express_cost;
5261*38fd1498Szrj       inv_vars = inv_vars_express;
5262*38fd1498Szrj       inv_vars_express = NULL;
5263*38fd1498Szrj       bound = NULL_TREE;
5264*38fd1498Szrj       comp = ERROR_MARK;
5265*38fd1498Szrj       inv_expr = inv_expr_express;
5266*38fd1498Szrj     }
5267*38fd1498Szrj 
5268*38fd1498Szrj   if (inv_expr)
5269*38fd1498Szrj     {
5270*38fd1498Szrj       inv_exprs = BITMAP_ALLOC (NULL);
5271*38fd1498Szrj       bitmap_set_bit (inv_exprs, inv_expr->id);
5272*38fd1498Szrj     }
5273*38fd1498Szrj   set_group_iv_cost (data, group, cand, cost,
5274*38fd1498Szrj 		     inv_vars, bound, comp, inv_exprs);
5275*38fd1498Szrj 
5276*38fd1498Szrj   if (inv_vars_elim)
5277*38fd1498Szrj     BITMAP_FREE (inv_vars_elim);
5278*38fd1498Szrj   if (inv_vars_express)
5279*38fd1498Szrj     BITMAP_FREE (inv_vars_express);
5280*38fd1498Szrj 
5281*38fd1498Szrj   return !cost.infinite_cost_p ();
5282*38fd1498Szrj }
5283*38fd1498Szrj 
5284*38fd1498Szrj /* Determines cost of computing uses in GROUP with CAND.  Returns false
5285*38fd1498Szrj    if USE cannot be represented with CAND.  */
5286*38fd1498Szrj 
5287*38fd1498Szrj static bool
determine_group_iv_cost(struct ivopts_data * data,struct iv_group * group,struct iv_cand * cand)5288*38fd1498Szrj determine_group_iv_cost (struct ivopts_data *data,
5289*38fd1498Szrj 			 struct iv_group *group, struct iv_cand *cand)
5290*38fd1498Szrj {
5291*38fd1498Szrj   switch (group->type)
5292*38fd1498Szrj     {
5293*38fd1498Szrj     case USE_NONLINEAR_EXPR:
5294*38fd1498Szrj       return determine_group_iv_cost_generic (data, group, cand);
5295*38fd1498Szrj 
5296*38fd1498Szrj     case USE_REF_ADDRESS:
5297*38fd1498Szrj     case USE_PTR_ADDRESS:
5298*38fd1498Szrj       return determine_group_iv_cost_address (data, group, cand);
5299*38fd1498Szrj 
5300*38fd1498Szrj     case USE_COMPARE:
5301*38fd1498Szrj       return determine_group_iv_cost_cond (data, group, cand);
5302*38fd1498Szrj 
5303*38fd1498Szrj     default:
5304*38fd1498Szrj       gcc_unreachable ();
5305*38fd1498Szrj     }
5306*38fd1498Szrj }
5307*38fd1498Szrj 
5308*38fd1498Szrj /* Return true if get_computation_cost indicates that autoincrement is
5309*38fd1498Szrj    a possibility for the pair of USE and CAND, false otherwise.  */
5310*38fd1498Szrj 
5311*38fd1498Szrj static bool
autoinc_possible_for_pair(struct ivopts_data * data,struct iv_use * use,struct iv_cand * cand)5312*38fd1498Szrj autoinc_possible_for_pair (struct ivopts_data *data, struct iv_use *use,
5313*38fd1498Szrj 			   struct iv_cand *cand)
5314*38fd1498Szrj {
5315*38fd1498Szrj   if (!address_p (use->type))
5316*38fd1498Szrj     return false;
5317*38fd1498Szrj 
5318*38fd1498Szrj   bool can_autoinc = false;
5319*38fd1498Szrj   get_computation_cost (data, use, cand, true, NULL, &can_autoinc, NULL);
5320*38fd1498Szrj   return can_autoinc;
5321*38fd1498Szrj }
5322*38fd1498Szrj 
5323*38fd1498Szrj /* Examine IP_ORIGINAL candidates to see if they are incremented next to a
5324*38fd1498Szrj    use that allows autoincrement, and set their AINC_USE if possible.  */
5325*38fd1498Szrj 
5326*38fd1498Szrj static void
set_autoinc_for_original_candidates(struct ivopts_data * data)5327*38fd1498Szrj set_autoinc_for_original_candidates (struct ivopts_data *data)
5328*38fd1498Szrj {
5329*38fd1498Szrj   unsigned i, j;
5330*38fd1498Szrj 
5331*38fd1498Szrj   for (i = 0; i < data->vcands.length (); i++)
5332*38fd1498Szrj     {
5333*38fd1498Szrj       struct iv_cand *cand = data->vcands[i];
5334*38fd1498Szrj       struct iv_use *closest_before = NULL;
5335*38fd1498Szrj       struct iv_use *closest_after = NULL;
5336*38fd1498Szrj       if (cand->pos != IP_ORIGINAL)
5337*38fd1498Szrj 	continue;
5338*38fd1498Szrj 
5339*38fd1498Szrj       for (j = 0; j < data->vgroups.length (); j++)
5340*38fd1498Szrj 	{
5341*38fd1498Szrj 	  struct iv_group *group = data->vgroups[j];
5342*38fd1498Szrj 	  struct iv_use *use = group->vuses[0];
5343*38fd1498Szrj 	  unsigned uid = gimple_uid (use->stmt);
5344*38fd1498Szrj 
5345*38fd1498Szrj 	  if (gimple_bb (use->stmt) != gimple_bb (cand->incremented_at))
5346*38fd1498Szrj 	    continue;
5347*38fd1498Szrj 
5348*38fd1498Szrj 	  if (uid < gimple_uid (cand->incremented_at)
5349*38fd1498Szrj 	      && (closest_before == NULL
5350*38fd1498Szrj 		  || uid > gimple_uid (closest_before->stmt)))
5351*38fd1498Szrj 	    closest_before = use;
5352*38fd1498Szrj 
5353*38fd1498Szrj 	  if (uid > gimple_uid (cand->incremented_at)
5354*38fd1498Szrj 	      && (closest_after == NULL
5355*38fd1498Szrj 		  || uid < gimple_uid (closest_after->stmt)))
5356*38fd1498Szrj 	    closest_after = use;
5357*38fd1498Szrj 	}
5358*38fd1498Szrj 
5359*38fd1498Szrj       if (closest_before != NULL
5360*38fd1498Szrj 	  && autoinc_possible_for_pair (data, closest_before, cand))
5361*38fd1498Szrj 	cand->ainc_use = closest_before;
5362*38fd1498Szrj       else if (closest_after != NULL
5363*38fd1498Szrj 	       && autoinc_possible_for_pair (data, closest_after, cand))
5364*38fd1498Szrj 	cand->ainc_use = closest_after;
5365*38fd1498Szrj     }
5366*38fd1498Szrj }
5367*38fd1498Szrj 
5368*38fd1498Szrj /* Relate compare use with all candidates.  */
5369*38fd1498Szrj 
5370*38fd1498Szrj static void
relate_compare_use_with_all_cands(struct ivopts_data * data)5371*38fd1498Szrj relate_compare_use_with_all_cands (struct ivopts_data *data)
5372*38fd1498Szrj {
5373*38fd1498Szrj   unsigned i, count = data->vcands.length ();
5374*38fd1498Szrj   for (i = 0; i < data->vgroups.length (); i++)
5375*38fd1498Szrj     {
5376*38fd1498Szrj       struct iv_group *group = data->vgroups[i];
5377*38fd1498Szrj 
5378*38fd1498Szrj       if (group->type == USE_COMPARE)
5379*38fd1498Szrj 	bitmap_set_range (group->related_cands, 0, count);
5380*38fd1498Szrj     }
5381*38fd1498Szrj }
5382*38fd1498Szrj 
5383*38fd1498Szrj /* Finds the candidates for the induction variables.  */
5384*38fd1498Szrj 
5385*38fd1498Szrj static void
find_iv_candidates(struct ivopts_data * data)5386*38fd1498Szrj find_iv_candidates (struct ivopts_data *data)
5387*38fd1498Szrj {
5388*38fd1498Szrj   /* Add commonly used ivs.  */
5389*38fd1498Szrj   add_standard_iv_candidates (data);
5390*38fd1498Szrj 
5391*38fd1498Szrj   /* Add old induction variables.  */
5392*38fd1498Szrj   add_iv_candidate_for_bivs (data);
5393*38fd1498Szrj 
5394*38fd1498Szrj   /* Add induction variables derived from uses.  */
5395*38fd1498Szrj   add_iv_candidate_for_groups (data);
5396*38fd1498Szrj 
5397*38fd1498Szrj   set_autoinc_for_original_candidates (data);
5398*38fd1498Szrj 
5399*38fd1498Szrj   /* Record the important candidates.  */
5400*38fd1498Szrj   record_important_candidates (data);
5401*38fd1498Szrj 
5402*38fd1498Szrj   /* Relate compare iv_use with all candidates.  */
5403*38fd1498Szrj   if (!data->consider_all_candidates)
5404*38fd1498Szrj     relate_compare_use_with_all_cands (data);
5405*38fd1498Szrj 
5406*38fd1498Szrj   if (dump_file && (dump_flags & TDF_DETAILS))
5407*38fd1498Szrj     {
5408*38fd1498Szrj       unsigned i;
5409*38fd1498Szrj 
5410*38fd1498Szrj       fprintf (dump_file, "\n<Important Candidates>:\t");
5411*38fd1498Szrj       for (i = 0; i < data->vcands.length (); i++)
5412*38fd1498Szrj 	if (data->vcands[i]->important)
5413*38fd1498Szrj 	  fprintf (dump_file, " %d,", data->vcands[i]->id);
5414*38fd1498Szrj       fprintf (dump_file, "\n");
5415*38fd1498Szrj 
5416*38fd1498Szrj       fprintf (dump_file, "\n<Group, Cand> Related:\n");
5417*38fd1498Szrj       for (i = 0; i < data->vgroups.length (); i++)
5418*38fd1498Szrj 	{
5419*38fd1498Szrj 	  struct iv_group *group = data->vgroups[i];
5420*38fd1498Szrj 
5421*38fd1498Szrj 	  if (group->related_cands)
5422*38fd1498Szrj 	    {
5423*38fd1498Szrj 	      fprintf (dump_file, "  Group %d:\t", group->id);
5424*38fd1498Szrj 	      dump_bitmap (dump_file, group->related_cands);
5425*38fd1498Szrj 	    }
5426*38fd1498Szrj 	}
5427*38fd1498Szrj       fprintf (dump_file, "\n");
5428*38fd1498Szrj     }
5429*38fd1498Szrj }
5430*38fd1498Szrj 
5431*38fd1498Szrj /* Determines costs of computing use of iv with an iv candidate.  */
5432*38fd1498Szrj 
5433*38fd1498Szrj static void
determine_group_iv_costs(struct ivopts_data * data)5434*38fd1498Szrj determine_group_iv_costs (struct ivopts_data *data)
5435*38fd1498Szrj {
5436*38fd1498Szrj   unsigned i, j;
5437*38fd1498Szrj   struct iv_cand *cand;
5438*38fd1498Szrj   struct iv_group *group;
5439*38fd1498Szrj   bitmap to_clear = BITMAP_ALLOC (NULL);
5440*38fd1498Szrj 
5441*38fd1498Szrj   alloc_use_cost_map (data);
5442*38fd1498Szrj 
5443*38fd1498Szrj   for (i = 0; i < data->vgroups.length (); i++)
5444*38fd1498Szrj     {
5445*38fd1498Szrj       group = data->vgroups[i];
5446*38fd1498Szrj 
5447*38fd1498Szrj       if (data->consider_all_candidates)
5448*38fd1498Szrj 	{
5449*38fd1498Szrj 	  for (j = 0; j < data->vcands.length (); j++)
5450*38fd1498Szrj 	    {
5451*38fd1498Szrj 	      cand = data->vcands[j];
5452*38fd1498Szrj 	      determine_group_iv_cost (data, group, cand);
5453*38fd1498Szrj 	    }
5454*38fd1498Szrj 	}
5455*38fd1498Szrj       else
5456*38fd1498Szrj 	{
5457*38fd1498Szrj 	  bitmap_iterator bi;
5458*38fd1498Szrj 
5459*38fd1498Szrj 	  EXECUTE_IF_SET_IN_BITMAP (group->related_cands, 0, j, bi)
5460*38fd1498Szrj 	    {
5461*38fd1498Szrj 	      cand = data->vcands[j];
5462*38fd1498Szrj 	      if (!determine_group_iv_cost (data, group, cand))
5463*38fd1498Szrj 		bitmap_set_bit (to_clear, j);
5464*38fd1498Szrj 	    }
5465*38fd1498Szrj 
5466*38fd1498Szrj 	  /* Remove the candidates for that the cost is infinite from
5467*38fd1498Szrj 	     the list of related candidates.  */
5468*38fd1498Szrj 	  bitmap_and_compl_into (group->related_cands, to_clear);
5469*38fd1498Szrj 	  bitmap_clear (to_clear);
5470*38fd1498Szrj 	}
5471*38fd1498Szrj     }
5472*38fd1498Szrj 
5473*38fd1498Szrj   BITMAP_FREE (to_clear);
5474*38fd1498Szrj 
5475*38fd1498Szrj   if (dump_file && (dump_flags & TDF_DETAILS))
5476*38fd1498Szrj     {
5477*38fd1498Szrj       bitmap_iterator bi;
5478*38fd1498Szrj 
5479*38fd1498Szrj       /* Dump invariant variables.  */
5480*38fd1498Szrj       fprintf (dump_file, "\n<Invariant Vars>:\n");
5481*38fd1498Szrj       EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
5482*38fd1498Szrj 	{
5483*38fd1498Szrj 	  struct version_info *info = ver_info (data, i);
5484*38fd1498Szrj 	  if (info->inv_id)
5485*38fd1498Szrj 	    {
5486*38fd1498Szrj 	      fprintf (dump_file, "Inv %d:\t", info->inv_id);
5487*38fd1498Szrj 	      print_generic_expr (dump_file, info->name, TDF_SLIM);
5488*38fd1498Szrj 	      fprintf (dump_file, "%s\n",
5489*38fd1498Szrj 		       info->has_nonlin_use ? "" : "\t(eliminable)");
5490*38fd1498Szrj 	    }
5491*38fd1498Szrj 	}
5492*38fd1498Szrj 
5493*38fd1498Szrj       /* Dump invariant expressions.  */
5494*38fd1498Szrj       fprintf (dump_file, "\n<Invariant Expressions>:\n");
5495*38fd1498Szrj       auto_vec <iv_inv_expr_ent *> list (data->inv_expr_tab->elements ());
5496*38fd1498Szrj 
5497*38fd1498Szrj       for (hash_table<iv_inv_expr_hasher>::iterator it
5498*38fd1498Szrj 	   = data->inv_expr_tab->begin (); it != data->inv_expr_tab->end ();
5499*38fd1498Szrj 	   ++it)
5500*38fd1498Szrj 	list.safe_push (*it);
5501*38fd1498Szrj 
5502*38fd1498Szrj       list.qsort (sort_iv_inv_expr_ent);
5503*38fd1498Szrj 
5504*38fd1498Szrj       for (i = 0; i < list.length (); ++i)
5505*38fd1498Szrj 	{
5506*38fd1498Szrj 	  fprintf (dump_file, "inv_expr %d: \t", list[i]->id);
5507*38fd1498Szrj 	  print_generic_expr (dump_file, list[i]->expr, TDF_SLIM);
5508*38fd1498Szrj 	  fprintf (dump_file, "\n");
5509*38fd1498Szrj 	}
5510*38fd1498Szrj 
5511*38fd1498Szrj       fprintf (dump_file, "\n<Group-candidate Costs>:\n");
5512*38fd1498Szrj 
5513*38fd1498Szrj       for (i = 0; i < data->vgroups.length (); i++)
5514*38fd1498Szrj 	{
5515*38fd1498Szrj 	  group = data->vgroups[i];
5516*38fd1498Szrj 
5517*38fd1498Szrj 	  fprintf (dump_file, "Group %d:\n", i);
5518*38fd1498Szrj 	  fprintf (dump_file, "  cand\tcost\tcompl.\tinv.expr.\tinv.vars\n");
5519*38fd1498Szrj 	  for (j = 0; j < group->n_map_members; j++)
5520*38fd1498Szrj 	    {
5521*38fd1498Szrj 	      if (!group->cost_map[j].cand
5522*38fd1498Szrj 		  || group->cost_map[j].cost.infinite_cost_p ())
5523*38fd1498Szrj 		continue;
5524*38fd1498Szrj 
5525*38fd1498Szrj 	      fprintf (dump_file, "  %d\t%d\t%d\t",
5526*38fd1498Szrj 		       group->cost_map[j].cand->id,
5527*38fd1498Szrj 		       group->cost_map[j].cost.cost,
5528*38fd1498Szrj 		       group->cost_map[j].cost.complexity);
5529*38fd1498Szrj 	      if (!group->cost_map[j].inv_exprs
5530*38fd1498Szrj 		  || bitmap_empty_p (group->cost_map[j].inv_exprs))
5531*38fd1498Szrj 		fprintf (dump_file, "NIL;\t");
5532*38fd1498Szrj 	      else
5533*38fd1498Szrj 		bitmap_print (dump_file,
5534*38fd1498Szrj 			      group->cost_map[j].inv_exprs, "", ";\t");
5535*38fd1498Szrj 	      if (!group->cost_map[j].inv_vars
5536*38fd1498Szrj 		  || bitmap_empty_p (group->cost_map[j].inv_vars))
5537*38fd1498Szrj 		fprintf (dump_file, "NIL;\n");
5538*38fd1498Szrj 	      else
5539*38fd1498Szrj 		bitmap_print (dump_file,
5540*38fd1498Szrj 			      group->cost_map[j].inv_vars, "", "\n");
5541*38fd1498Szrj 	    }
5542*38fd1498Szrj 
5543*38fd1498Szrj 	  fprintf (dump_file, "\n");
5544*38fd1498Szrj 	}
5545*38fd1498Szrj       fprintf (dump_file, "\n");
5546*38fd1498Szrj     }
5547*38fd1498Szrj }
5548*38fd1498Szrj 
5549*38fd1498Szrj /* Determines cost of the candidate CAND.  */
5550*38fd1498Szrj 
5551*38fd1498Szrj static void
determine_iv_cost(struct ivopts_data * data,struct iv_cand * cand)5552*38fd1498Szrj determine_iv_cost (struct ivopts_data *data, struct iv_cand *cand)
5553*38fd1498Szrj {
5554*38fd1498Szrj   comp_cost cost_base;
5555*38fd1498Szrj   unsigned cost, cost_step;
5556*38fd1498Szrj   tree base;
5557*38fd1498Szrj 
5558*38fd1498Szrj   gcc_assert (cand->iv != NULL);
5559*38fd1498Szrj 
5560*38fd1498Szrj   /* There are two costs associated with the candidate -- its increment
5561*38fd1498Szrj      and its initialization.  The second is almost negligible for any loop
5562*38fd1498Szrj      that rolls enough, so we take it just very little into account.  */
5563*38fd1498Szrj 
5564*38fd1498Szrj   base = cand->iv->base;
5565*38fd1498Szrj   cost_base = force_var_cost (data, base, NULL);
5566*38fd1498Szrj   /* It will be exceptional that the iv register happens to be initialized with
5567*38fd1498Szrj      the proper value at no cost.  In general, there will at least be a regcopy
5568*38fd1498Szrj      or a const set.  */
5569*38fd1498Szrj   if (cost_base.cost == 0)
5570*38fd1498Szrj     cost_base.cost = COSTS_N_INSNS (1);
5571*38fd1498Szrj   cost_step = add_cost (data->speed, TYPE_MODE (TREE_TYPE (base)));
5572*38fd1498Szrj 
5573*38fd1498Szrj   cost = cost_step + adjust_setup_cost (data, cost_base.cost);
5574*38fd1498Szrj 
5575*38fd1498Szrj   /* Prefer the original ivs unless we may gain something by replacing it.
5576*38fd1498Szrj      The reason is to make debugging simpler; so this is not relevant for
5577*38fd1498Szrj      artificial ivs created by other optimization passes.  */
5578*38fd1498Szrj   if (cand->pos != IP_ORIGINAL
5579*38fd1498Szrj       || !SSA_NAME_VAR (cand->var_before)
5580*38fd1498Szrj       || DECL_ARTIFICIAL (SSA_NAME_VAR (cand->var_before)))
5581*38fd1498Szrj     cost++;
5582*38fd1498Szrj 
5583*38fd1498Szrj   /* Prefer not to insert statements into latch unless there are some
5584*38fd1498Szrj      already (so that we do not create unnecessary jumps).  */
5585*38fd1498Szrj   if (cand->pos == IP_END
5586*38fd1498Szrj       && empty_block_p (ip_end_pos (data->current_loop)))
5587*38fd1498Szrj     cost++;
5588*38fd1498Szrj 
5589*38fd1498Szrj   cand->cost = cost;
5590*38fd1498Szrj   cand->cost_step = cost_step;
5591*38fd1498Szrj }
5592*38fd1498Szrj 
5593*38fd1498Szrj /* Determines costs of computation of the candidates.  */
5594*38fd1498Szrj 
5595*38fd1498Szrj static void
determine_iv_costs(struct ivopts_data * data)5596*38fd1498Szrj determine_iv_costs (struct ivopts_data *data)
5597*38fd1498Szrj {
5598*38fd1498Szrj   unsigned i;
5599*38fd1498Szrj 
5600*38fd1498Szrj   if (dump_file && (dump_flags & TDF_DETAILS))
5601*38fd1498Szrj     {
5602*38fd1498Szrj       fprintf (dump_file, "<Candidate Costs>:\n");
5603*38fd1498Szrj       fprintf (dump_file, "  cand\tcost\n");
5604*38fd1498Szrj     }
5605*38fd1498Szrj 
5606*38fd1498Szrj   for (i = 0; i < data->vcands.length (); i++)
5607*38fd1498Szrj     {
5608*38fd1498Szrj       struct iv_cand *cand = data->vcands[i];
5609*38fd1498Szrj 
5610*38fd1498Szrj       determine_iv_cost (data, cand);
5611*38fd1498Szrj 
5612*38fd1498Szrj       if (dump_file && (dump_flags & TDF_DETAILS))
5613*38fd1498Szrj 	fprintf (dump_file, "  %d\t%d\n", i, cand->cost);
5614*38fd1498Szrj     }
5615*38fd1498Szrj 
5616*38fd1498Szrj   if (dump_file && (dump_flags & TDF_DETAILS))
5617*38fd1498Szrj     fprintf (dump_file, "\n");
5618*38fd1498Szrj }
5619*38fd1498Szrj 
5620*38fd1498Szrj /* Estimate register pressure for loop having N_INVS invariants and N_CANDS
5621*38fd1498Szrj    induction variables.  Note N_INVS includes both invariant variables and
5622*38fd1498Szrj    invariant expressions.  */
5623*38fd1498Szrj 
5624*38fd1498Szrj static unsigned
ivopts_estimate_reg_pressure(struct ivopts_data * data,unsigned n_invs,unsigned n_cands)5625*38fd1498Szrj ivopts_estimate_reg_pressure (struct ivopts_data *data, unsigned n_invs,
5626*38fd1498Szrj 			      unsigned n_cands)
5627*38fd1498Szrj {
5628*38fd1498Szrj   unsigned cost;
5629*38fd1498Szrj   unsigned n_old = data->regs_used, n_new = n_invs + n_cands;
5630*38fd1498Szrj   unsigned regs_needed = n_new + n_old, available_regs = target_avail_regs;
5631*38fd1498Szrj   bool speed = data->speed;
5632*38fd1498Szrj 
5633*38fd1498Szrj   /* If there is a call in the loop body, the call-clobbered registers
5634*38fd1498Szrj      are not available for loop invariants.  */
5635*38fd1498Szrj   if (data->body_includes_call)
5636*38fd1498Szrj     available_regs = available_regs - target_clobbered_regs;
5637*38fd1498Szrj 
5638*38fd1498Szrj   /* If we have enough registers.  */
5639*38fd1498Szrj   if (regs_needed + target_res_regs < available_regs)
5640*38fd1498Szrj     cost = n_new;
5641*38fd1498Szrj   /* If close to running out of registers, try to preserve them.  */
5642*38fd1498Szrj   else if (regs_needed <= available_regs)
5643*38fd1498Szrj     cost = target_reg_cost [speed] * regs_needed;
5644*38fd1498Szrj   /* If we run out of available registers but the number of candidates
5645*38fd1498Szrj      does not, we penalize extra registers using target_spill_cost.  */
5646*38fd1498Szrj   else if (n_cands <= available_regs)
5647*38fd1498Szrj     cost = target_reg_cost [speed] * available_regs
5648*38fd1498Szrj 	   + target_spill_cost [speed] * (regs_needed - available_regs);
5649*38fd1498Szrj   /* If the number of candidates runs out available registers, we penalize
5650*38fd1498Szrj      extra candidate registers using target_spill_cost * 2.  Because it is
5651*38fd1498Szrj      more expensive to spill induction variable than invariant.  */
5652*38fd1498Szrj   else
5653*38fd1498Szrj     cost = target_reg_cost [speed] * available_regs
5654*38fd1498Szrj 	   + target_spill_cost [speed] * (n_cands - available_regs) * 2
5655*38fd1498Szrj 	   + target_spill_cost [speed] * (regs_needed - n_cands);
5656*38fd1498Szrj 
5657*38fd1498Szrj   /* Finally, add the number of candidates, so that we prefer eliminating
5658*38fd1498Szrj      induction variables if possible.  */
5659*38fd1498Szrj   return cost + n_cands;
5660*38fd1498Szrj }
5661*38fd1498Szrj 
5662*38fd1498Szrj /* For each size of the induction variable set determine the penalty.  */
5663*38fd1498Szrj 
5664*38fd1498Szrj static void
determine_set_costs(struct ivopts_data * data)5665*38fd1498Szrj determine_set_costs (struct ivopts_data *data)
5666*38fd1498Szrj {
5667*38fd1498Szrj   unsigned j, n;
5668*38fd1498Szrj   gphi *phi;
5669*38fd1498Szrj   gphi_iterator psi;
5670*38fd1498Szrj   tree op;
5671*38fd1498Szrj   struct loop *loop = data->current_loop;
5672*38fd1498Szrj   bitmap_iterator bi;
5673*38fd1498Szrj 
5674*38fd1498Szrj   if (dump_file && (dump_flags & TDF_DETAILS))
5675*38fd1498Szrj     {
5676*38fd1498Szrj       fprintf (dump_file, "<Global Costs>:\n");
5677*38fd1498Szrj       fprintf (dump_file, "  target_avail_regs %d\n", target_avail_regs);
5678*38fd1498Szrj       fprintf (dump_file, "  target_clobbered_regs %d\n", target_clobbered_regs);
5679*38fd1498Szrj       fprintf (dump_file, "  target_reg_cost %d\n", target_reg_cost[data->speed]);
5680*38fd1498Szrj       fprintf (dump_file, "  target_spill_cost %d\n", target_spill_cost[data->speed]);
5681*38fd1498Szrj     }
5682*38fd1498Szrj 
5683*38fd1498Szrj   n = 0;
5684*38fd1498Szrj   for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
5685*38fd1498Szrj     {
5686*38fd1498Szrj       phi = psi.phi ();
5687*38fd1498Szrj       op = PHI_RESULT (phi);
5688*38fd1498Szrj 
5689*38fd1498Szrj       if (virtual_operand_p (op))
5690*38fd1498Szrj 	continue;
5691*38fd1498Szrj 
5692*38fd1498Szrj       if (get_iv (data, op))
5693*38fd1498Szrj 	continue;
5694*38fd1498Szrj 
5695*38fd1498Szrj       if (!POINTER_TYPE_P (TREE_TYPE (op))
5696*38fd1498Szrj 	  && !INTEGRAL_TYPE_P (TREE_TYPE (op)))
5697*38fd1498Szrj 	continue;
5698*38fd1498Szrj 
5699*38fd1498Szrj       n++;
5700*38fd1498Szrj     }
5701*38fd1498Szrj 
5702*38fd1498Szrj   EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j, bi)
5703*38fd1498Szrj     {
5704*38fd1498Szrj       struct version_info *info = ver_info (data, j);
5705*38fd1498Szrj 
5706*38fd1498Szrj       if (info->inv_id && info->has_nonlin_use)
5707*38fd1498Szrj 	n++;
5708*38fd1498Szrj     }
5709*38fd1498Szrj 
5710*38fd1498Szrj   data->regs_used = n;
5711*38fd1498Szrj   if (dump_file && (dump_flags & TDF_DETAILS))
5712*38fd1498Szrj     fprintf (dump_file, "  regs_used %d\n", n);
5713*38fd1498Szrj 
5714*38fd1498Szrj   if (dump_file && (dump_flags & TDF_DETAILS))
5715*38fd1498Szrj     {
5716*38fd1498Szrj       fprintf (dump_file, "  cost for size:\n");
5717*38fd1498Szrj       fprintf (dump_file, "  ivs\tcost\n");
5718*38fd1498Szrj       for (j = 0; j <= 2 * target_avail_regs; j++)
5719*38fd1498Szrj 	fprintf (dump_file, "  %d\t%d\n", j,
5720*38fd1498Szrj 		 ivopts_estimate_reg_pressure (data, 0, j));
5721*38fd1498Szrj       fprintf (dump_file, "\n");
5722*38fd1498Szrj     }
5723*38fd1498Szrj }
5724*38fd1498Szrj 
5725*38fd1498Szrj /* Returns true if A is a cheaper cost pair than B.  */
5726*38fd1498Szrj 
5727*38fd1498Szrj static bool
cheaper_cost_pair(struct cost_pair * a,struct cost_pair * b)5728*38fd1498Szrj cheaper_cost_pair (struct cost_pair *a, struct cost_pair *b)
5729*38fd1498Szrj {
5730*38fd1498Szrj   if (!a)
5731*38fd1498Szrj     return false;
5732*38fd1498Szrj 
5733*38fd1498Szrj   if (!b)
5734*38fd1498Szrj     return true;
5735*38fd1498Szrj 
5736*38fd1498Szrj   if (a->cost < b->cost)
5737*38fd1498Szrj     return true;
5738*38fd1498Szrj 
5739*38fd1498Szrj   if (b->cost < a->cost)
5740*38fd1498Szrj     return false;
5741*38fd1498Szrj 
5742*38fd1498Szrj   /* In case the costs are the same, prefer the cheaper candidate.  */
5743*38fd1498Szrj   if (a->cand->cost < b->cand->cost)
5744*38fd1498Szrj     return true;
5745*38fd1498Szrj 
5746*38fd1498Szrj   return false;
5747*38fd1498Szrj }
5748*38fd1498Szrj 
5749*38fd1498Szrj /* Compare if A is a more expensive cost pair than B.  Return 1, 0 and -1
5750*38fd1498Szrj    for more expensive, equal and cheaper respectively.  */
5751*38fd1498Szrj 
5752*38fd1498Szrj static int
compare_cost_pair(struct cost_pair * a,struct cost_pair * b)5753*38fd1498Szrj compare_cost_pair (struct cost_pair *a, struct cost_pair *b)
5754*38fd1498Szrj {
5755*38fd1498Szrj   if (cheaper_cost_pair (a, b))
5756*38fd1498Szrj     return -1;
5757*38fd1498Szrj   if (cheaper_cost_pair (b, a))
5758*38fd1498Szrj     return 1;
5759*38fd1498Szrj 
5760*38fd1498Szrj   return 0;
5761*38fd1498Szrj }
5762*38fd1498Szrj 
5763*38fd1498Szrj /* Returns candidate by that USE is expressed in IVS.  */
5764*38fd1498Szrj 
5765*38fd1498Szrj static struct cost_pair *
iv_ca_cand_for_group(struct iv_ca * ivs,struct iv_group * group)5766*38fd1498Szrj iv_ca_cand_for_group (struct iv_ca *ivs, struct iv_group *group)
5767*38fd1498Szrj {
5768*38fd1498Szrj   return ivs->cand_for_group[group->id];
5769*38fd1498Szrj }
5770*38fd1498Szrj 
5771*38fd1498Szrj /* Computes the cost field of IVS structure.  */
5772*38fd1498Szrj 
5773*38fd1498Szrj static void
iv_ca_recount_cost(struct ivopts_data * data,struct iv_ca * ivs)5774*38fd1498Szrj iv_ca_recount_cost (struct ivopts_data *data, struct iv_ca *ivs)
5775*38fd1498Szrj {
5776*38fd1498Szrj   comp_cost cost = ivs->cand_use_cost;
5777*38fd1498Szrj 
5778*38fd1498Szrj   cost += ivs->cand_cost;
5779*38fd1498Szrj   cost += ivopts_estimate_reg_pressure (data, ivs->n_invs, ivs->n_cands);
5780*38fd1498Szrj   ivs->cost = cost;
5781*38fd1498Szrj }
5782*38fd1498Szrj 
5783*38fd1498Szrj /* Remove use of invariants in set INVS by decreasing counter in N_INV_USES
5784*38fd1498Szrj    and IVS.  */
5785*38fd1498Szrj 
5786*38fd1498Szrj static void
iv_ca_set_remove_invs(struct iv_ca * ivs,bitmap invs,unsigned * n_inv_uses)5787*38fd1498Szrj iv_ca_set_remove_invs (struct iv_ca *ivs, bitmap invs, unsigned *n_inv_uses)
5788*38fd1498Szrj {
5789*38fd1498Szrj   bitmap_iterator bi;
5790*38fd1498Szrj   unsigned iid;
5791*38fd1498Szrj 
5792*38fd1498Szrj   if (!invs)
5793*38fd1498Szrj     return;
5794*38fd1498Szrj 
5795*38fd1498Szrj   gcc_assert (n_inv_uses != NULL);
5796*38fd1498Szrj   EXECUTE_IF_SET_IN_BITMAP (invs, 0, iid, bi)
5797*38fd1498Szrj     {
5798*38fd1498Szrj       n_inv_uses[iid]--;
5799*38fd1498Szrj       if (n_inv_uses[iid] == 0)
5800*38fd1498Szrj 	ivs->n_invs--;
5801*38fd1498Szrj     }
5802*38fd1498Szrj }
5803*38fd1498Szrj 
5804*38fd1498Szrj /* Set USE not to be expressed by any candidate in IVS.  */
5805*38fd1498Szrj 
5806*38fd1498Szrj static void
iv_ca_set_no_cp(struct ivopts_data * data,struct iv_ca * ivs,struct iv_group * group)5807*38fd1498Szrj iv_ca_set_no_cp (struct ivopts_data *data, struct iv_ca *ivs,
5808*38fd1498Szrj 		 struct iv_group *group)
5809*38fd1498Szrj {
5810*38fd1498Szrj   unsigned gid = group->id, cid;
5811*38fd1498Szrj   struct cost_pair *cp;
5812*38fd1498Szrj 
5813*38fd1498Szrj   cp = ivs->cand_for_group[gid];
5814*38fd1498Szrj   if (!cp)
5815*38fd1498Szrj     return;
5816*38fd1498Szrj   cid = cp->cand->id;
5817*38fd1498Szrj 
5818*38fd1498Szrj   ivs->bad_groups++;
5819*38fd1498Szrj   ivs->cand_for_group[gid] = NULL;
5820*38fd1498Szrj   ivs->n_cand_uses[cid]--;
5821*38fd1498Szrj 
5822*38fd1498Szrj   if (ivs->n_cand_uses[cid] == 0)
5823*38fd1498Szrj     {
5824*38fd1498Szrj       bitmap_clear_bit (ivs->cands, cid);
5825*38fd1498Szrj       ivs->n_cands--;
5826*38fd1498Szrj       ivs->cand_cost -= cp->cand->cost;
5827*38fd1498Szrj       iv_ca_set_remove_invs (ivs, cp->cand->inv_vars, ivs->n_inv_var_uses);
5828*38fd1498Szrj       iv_ca_set_remove_invs (ivs, cp->cand->inv_exprs, ivs->n_inv_expr_uses);
5829*38fd1498Szrj     }
5830*38fd1498Szrj 
5831*38fd1498Szrj   ivs->cand_use_cost -= cp->cost;
5832*38fd1498Szrj   iv_ca_set_remove_invs (ivs, cp->inv_vars, ivs->n_inv_var_uses);
5833*38fd1498Szrj   iv_ca_set_remove_invs (ivs, cp->inv_exprs, ivs->n_inv_expr_uses);
5834*38fd1498Szrj   iv_ca_recount_cost (data, ivs);
5835*38fd1498Szrj }
5836*38fd1498Szrj 
5837*38fd1498Szrj /* Add use of invariants in set INVS by increasing counter in N_INV_USES and
5838*38fd1498Szrj    IVS.  */
5839*38fd1498Szrj 
5840*38fd1498Szrj static void
iv_ca_set_add_invs(struct iv_ca * ivs,bitmap invs,unsigned * n_inv_uses)5841*38fd1498Szrj iv_ca_set_add_invs (struct iv_ca *ivs, bitmap invs, unsigned *n_inv_uses)
5842*38fd1498Szrj {
5843*38fd1498Szrj   bitmap_iterator bi;
5844*38fd1498Szrj   unsigned iid;
5845*38fd1498Szrj 
5846*38fd1498Szrj   if (!invs)
5847*38fd1498Szrj     return;
5848*38fd1498Szrj 
5849*38fd1498Szrj   gcc_assert (n_inv_uses != NULL);
5850*38fd1498Szrj   EXECUTE_IF_SET_IN_BITMAP (invs, 0, iid, bi)
5851*38fd1498Szrj     {
5852*38fd1498Szrj       n_inv_uses[iid]++;
5853*38fd1498Szrj       if (n_inv_uses[iid] == 1)
5854*38fd1498Szrj 	ivs->n_invs++;
5855*38fd1498Szrj     }
5856*38fd1498Szrj }
5857*38fd1498Szrj 
5858*38fd1498Szrj /* Set cost pair for GROUP in set IVS to CP.  */
5859*38fd1498Szrj 
5860*38fd1498Szrj static void
iv_ca_set_cp(struct ivopts_data * data,struct iv_ca * ivs,struct iv_group * group,struct cost_pair * cp)5861*38fd1498Szrj iv_ca_set_cp (struct ivopts_data *data, struct iv_ca *ivs,
5862*38fd1498Szrj 	      struct iv_group *group, struct cost_pair *cp)
5863*38fd1498Szrj {
5864*38fd1498Szrj   unsigned gid = group->id, cid;
5865*38fd1498Szrj 
5866*38fd1498Szrj   if (ivs->cand_for_group[gid] == cp)
5867*38fd1498Szrj     return;
5868*38fd1498Szrj 
5869*38fd1498Szrj   if (ivs->cand_for_group[gid])
5870*38fd1498Szrj     iv_ca_set_no_cp (data, ivs, group);
5871*38fd1498Szrj 
5872*38fd1498Szrj   if (cp)
5873*38fd1498Szrj     {
5874*38fd1498Szrj       cid = cp->cand->id;
5875*38fd1498Szrj 
5876*38fd1498Szrj       ivs->bad_groups--;
5877*38fd1498Szrj       ivs->cand_for_group[gid] = cp;
5878*38fd1498Szrj       ivs->n_cand_uses[cid]++;
5879*38fd1498Szrj       if (ivs->n_cand_uses[cid] == 1)
5880*38fd1498Szrj 	{
5881*38fd1498Szrj 	  bitmap_set_bit (ivs->cands, cid);
5882*38fd1498Szrj 	  ivs->n_cands++;
5883*38fd1498Szrj 	  ivs->cand_cost += cp->cand->cost;
5884*38fd1498Szrj 	  iv_ca_set_add_invs (ivs, cp->cand->inv_vars, ivs->n_inv_var_uses);
5885*38fd1498Szrj 	  iv_ca_set_add_invs (ivs, cp->cand->inv_exprs, ivs->n_inv_expr_uses);
5886*38fd1498Szrj 	}
5887*38fd1498Szrj 
5888*38fd1498Szrj       ivs->cand_use_cost += cp->cost;
5889*38fd1498Szrj       iv_ca_set_add_invs (ivs, cp->inv_vars, ivs->n_inv_var_uses);
5890*38fd1498Szrj       iv_ca_set_add_invs (ivs, cp->inv_exprs, ivs->n_inv_expr_uses);
5891*38fd1498Szrj       iv_ca_recount_cost (data, ivs);
5892*38fd1498Szrj     }
5893*38fd1498Szrj }
5894*38fd1498Szrj 
5895*38fd1498Szrj /* Extend set IVS by expressing USE by some of the candidates in it
5896*38fd1498Szrj    if possible.  Consider all important candidates if candidates in
5897*38fd1498Szrj    set IVS don't give any result.  */
5898*38fd1498Szrj 
5899*38fd1498Szrj static void
iv_ca_add_group(struct ivopts_data * data,struct iv_ca * ivs,struct iv_group * group)5900*38fd1498Szrj iv_ca_add_group (struct ivopts_data *data, struct iv_ca *ivs,
5901*38fd1498Szrj 	       struct iv_group *group)
5902*38fd1498Szrj {
5903*38fd1498Szrj   struct cost_pair *best_cp = NULL, *cp;
5904*38fd1498Szrj   bitmap_iterator bi;
5905*38fd1498Szrj   unsigned i;
5906*38fd1498Szrj   struct iv_cand *cand;
5907*38fd1498Szrj 
5908*38fd1498Szrj   gcc_assert (ivs->upto >= group->id);
5909*38fd1498Szrj   ivs->upto++;
5910*38fd1498Szrj   ivs->bad_groups++;
5911*38fd1498Szrj 
5912*38fd1498Szrj   EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
5913*38fd1498Szrj     {
5914*38fd1498Szrj       cand = data->vcands[i];
5915*38fd1498Szrj       cp = get_group_iv_cost (data, group, cand);
5916*38fd1498Szrj       if (cheaper_cost_pair (cp, best_cp))
5917*38fd1498Szrj 	best_cp = cp;
5918*38fd1498Szrj     }
5919*38fd1498Szrj 
5920*38fd1498Szrj   if (best_cp == NULL)
5921*38fd1498Szrj     {
5922*38fd1498Szrj       EXECUTE_IF_SET_IN_BITMAP (data->important_candidates, 0, i, bi)
5923*38fd1498Szrj 	{
5924*38fd1498Szrj 	  cand = data->vcands[i];
5925*38fd1498Szrj 	  cp = get_group_iv_cost (data, group, cand);
5926*38fd1498Szrj 	  if (cheaper_cost_pair (cp, best_cp))
5927*38fd1498Szrj 	    best_cp = cp;
5928*38fd1498Szrj 	}
5929*38fd1498Szrj     }
5930*38fd1498Szrj 
5931*38fd1498Szrj   iv_ca_set_cp (data, ivs, group, best_cp);
5932*38fd1498Szrj }
5933*38fd1498Szrj 
5934*38fd1498Szrj /* Get cost for assignment IVS.  */
5935*38fd1498Szrj 
5936*38fd1498Szrj static comp_cost
iv_ca_cost(struct iv_ca * ivs)5937*38fd1498Szrj iv_ca_cost (struct iv_ca *ivs)
5938*38fd1498Szrj {
5939*38fd1498Szrj   /* This was a conditional expression but it triggered a bug in
5940*38fd1498Szrj      Sun C 5.5.  */
5941*38fd1498Szrj   if (ivs->bad_groups)
5942*38fd1498Szrj     return infinite_cost;
5943*38fd1498Szrj   else
5944*38fd1498Szrj     return ivs->cost;
5945*38fd1498Szrj }
5946*38fd1498Szrj 
5947*38fd1498Szrj /* Compare if applying NEW_CP to GROUP for IVS introduces more invariants
5948*38fd1498Szrj    than OLD_CP.  Return 1, 0 and -1 for more, equal and fewer invariants
5949*38fd1498Szrj    respectively.  */
5950*38fd1498Szrj 
5951*38fd1498Szrj static int
iv_ca_compare_deps(struct ivopts_data * data,struct iv_ca * ivs,struct iv_group * group,struct cost_pair * old_cp,struct cost_pair * new_cp)5952*38fd1498Szrj iv_ca_compare_deps (struct ivopts_data *data, struct iv_ca *ivs,
5953*38fd1498Szrj 		    struct iv_group *group, struct cost_pair *old_cp,
5954*38fd1498Szrj 		    struct cost_pair *new_cp)
5955*38fd1498Szrj {
5956*38fd1498Szrj   gcc_assert (old_cp && new_cp && old_cp != new_cp);
5957*38fd1498Szrj   unsigned old_n_invs = ivs->n_invs;
5958*38fd1498Szrj   iv_ca_set_cp (data, ivs, group, new_cp);
5959*38fd1498Szrj   unsigned new_n_invs = ivs->n_invs;
5960*38fd1498Szrj   iv_ca_set_cp (data, ivs, group, old_cp);
5961*38fd1498Szrj 
5962*38fd1498Szrj   return new_n_invs > old_n_invs ? 1 : (new_n_invs < old_n_invs ? -1 : 0);
5963*38fd1498Szrj }
5964*38fd1498Szrj 
5965*38fd1498Szrj /* Creates change of expressing GROUP by NEW_CP instead of OLD_CP and chains
5966*38fd1498Szrj    it before NEXT.  */
5967*38fd1498Szrj 
5968*38fd1498Szrj static struct iv_ca_delta *
iv_ca_delta_add(struct iv_group * group,struct cost_pair * old_cp,struct cost_pair * new_cp,struct iv_ca_delta * next)5969*38fd1498Szrj iv_ca_delta_add (struct iv_group *group, struct cost_pair *old_cp,
5970*38fd1498Szrj 		 struct cost_pair *new_cp, struct iv_ca_delta *next)
5971*38fd1498Szrj {
5972*38fd1498Szrj   struct iv_ca_delta *change = XNEW (struct iv_ca_delta);
5973*38fd1498Szrj 
5974*38fd1498Szrj   change->group = group;
5975*38fd1498Szrj   change->old_cp = old_cp;
5976*38fd1498Szrj   change->new_cp = new_cp;
5977*38fd1498Szrj   change->next = next;
5978*38fd1498Szrj 
5979*38fd1498Szrj   return change;
5980*38fd1498Szrj }
5981*38fd1498Szrj 
5982*38fd1498Szrj /* Joins two lists of changes L1 and L2.  Destructive -- old lists
5983*38fd1498Szrj    are rewritten.  */
5984*38fd1498Szrj 
5985*38fd1498Szrj static struct iv_ca_delta *
iv_ca_delta_join(struct iv_ca_delta * l1,struct iv_ca_delta * l2)5986*38fd1498Szrj iv_ca_delta_join (struct iv_ca_delta *l1, struct iv_ca_delta *l2)
5987*38fd1498Szrj {
5988*38fd1498Szrj   struct iv_ca_delta *last;
5989*38fd1498Szrj 
5990*38fd1498Szrj   if (!l2)
5991*38fd1498Szrj     return l1;
5992*38fd1498Szrj 
5993*38fd1498Szrj   if (!l1)
5994*38fd1498Szrj     return l2;
5995*38fd1498Szrj 
5996*38fd1498Szrj   for (last = l1; last->next; last = last->next)
5997*38fd1498Szrj     continue;
5998*38fd1498Szrj   last->next = l2;
5999*38fd1498Szrj 
6000*38fd1498Szrj   return l1;
6001*38fd1498Szrj }
6002*38fd1498Szrj 
6003*38fd1498Szrj /* Reverse the list of changes DELTA, forming the inverse to it.  */
6004*38fd1498Szrj 
6005*38fd1498Szrj static struct iv_ca_delta *
iv_ca_delta_reverse(struct iv_ca_delta * delta)6006*38fd1498Szrj iv_ca_delta_reverse (struct iv_ca_delta *delta)
6007*38fd1498Szrj {
6008*38fd1498Szrj   struct iv_ca_delta *act, *next, *prev = NULL;
6009*38fd1498Szrj 
6010*38fd1498Szrj   for (act = delta; act; act = next)
6011*38fd1498Szrj     {
6012*38fd1498Szrj       next = act->next;
6013*38fd1498Szrj       act->next = prev;
6014*38fd1498Szrj       prev = act;
6015*38fd1498Szrj 
6016*38fd1498Szrj       std::swap (act->old_cp, act->new_cp);
6017*38fd1498Szrj     }
6018*38fd1498Szrj 
6019*38fd1498Szrj   return prev;
6020*38fd1498Szrj }
6021*38fd1498Szrj 
6022*38fd1498Szrj /* Commit changes in DELTA to IVS.  If FORWARD is false, the changes are
6023*38fd1498Szrj    reverted instead.  */
6024*38fd1498Szrj 
6025*38fd1498Szrj static void
iv_ca_delta_commit(struct ivopts_data * data,struct iv_ca * ivs,struct iv_ca_delta * delta,bool forward)6026*38fd1498Szrj iv_ca_delta_commit (struct ivopts_data *data, struct iv_ca *ivs,
6027*38fd1498Szrj 		    struct iv_ca_delta *delta, bool forward)
6028*38fd1498Szrj {
6029*38fd1498Szrj   struct cost_pair *from, *to;
6030*38fd1498Szrj   struct iv_ca_delta *act;
6031*38fd1498Szrj 
6032*38fd1498Szrj   if (!forward)
6033*38fd1498Szrj     delta = iv_ca_delta_reverse (delta);
6034*38fd1498Szrj 
6035*38fd1498Szrj   for (act = delta; act; act = act->next)
6036*38fd1498Szrj     {
6037*38fd1498Szrj       from = act->old_cp;
6038*38fd1498Szrj       to = act->new_cp;
6039*38fd1498Szrj       gcc_assert (iv_ca_cand_for_group (ivs, act->group) == from);
6040*38fd1498Szrj       iv_ca_set_cp (data, ivs, act->group, to);
6041*38fd1498Szrj     }
6042*38fd1498Szrj 
6043*38fd1498Szrj   if (!forward)
6044*38fd1498Szrj     iv_ca_delta_reverse (delta);
6045*38fd1498Szrj }
6046*38fd1498Szrj 
6047*38fd1498Szrj /* Returns true if CAND is used in IVS.  */
6048*38fd1498Szrj 
6049*38fd1498Szrj static bool
iv_ca_cand_used_p(struct iv_ca * ivs,struct iv_cand * cand)6050*38fd1498Szrj iv_ca_cand_used_p (struct iv_ca *ivs, struct iv_cand *cand)
6051*38fd1498Szrj {
6052*38fd1498Szrj   return ivs->n_cand_uses[cand->id] > 0;
6053*38fd1498Szrj }
6054*38fd1498Szrj 
6055*38fd1498Szrj /* Returns number of induction variable candidates in the set IVS.  */
6056*38fd1498Szrj 
6057*38fd1498Szrj static unsigned
iv_ca_n_cands(struct iv_ca * ivs)6058*38fd1498Szrj iv_ca_n_cands (struct iv_ca *ivs)
6059*38fd1498Szrj {
6060*38fd1498Szrj   return ivs->n_cands;
6061*38fd1498Szrj }
6062*38fd1498Szrj 
6063*38fd1498Szrj /* Free the list of changes DELTA.  */
6064*38fd1498Szrj 
6065*38fd1498Szrj static void
iv_ca_delta_free(struct iv_ca_delta ** delta)6066*38fd1498Szrj iv_ca_delta_free (struct iv_ca_delta **delta)
6067*38fd1498Szrj {
6068*38fd1498Szrj   struct iv_ca_delta *act, *next;
6069*38fd1498Szrj 
6070*38fd1498Szrj   for (act = *delta; act; act = next)
6071*38fd1498Szrj     {
6072*38fd1498Szrj       next = act->next;
6073*38fd1498Szrj       free (act);
6074*38fd1498Szrj     }
6075*38fd1498Szrj 
6076*38fd1498Szrj   *delta = NULL;
6077*38fd1498Szrj }
6078*38fd1498Szrj 
6079*38fd1498Szrj /* Allocates new iv candidates assignment.  */
6080*38fd1498Szrj 
6081*38fd1498Szrj static struct iv_ca *
iv_ca_new(struct ivopts_data * data)6082*38fd1498Szrj iv_ca_new (struct ivopts_data *data)
6083*38fd1498Szrj {
6084*38fd1498Szrj   struct iv_ca *nw = XNEW (struct iv_ca);
6085*38fd1498Szrj 
6086*38fd1498Szrj   nw->upto = 0;
6087*38fd1498Szrj   nw->bad_groups = 0;
6088*38fd1498Szrj   nw->cand_for_group = XCNEWVEC (struct cost_pair *,
6089*38fd1498Szrj 				 data->vgroups.length ());
6090*38fd1498Szrj   nw->n_cand_uses = XCNEWVEC (unsigned, data->vcands.length ());
6091*38fd1498Szrj   nw->cands = BITMAP_ALLOC (NULL);
6092*38fd1498Szrj   nw->n_cands = 0;
6093*38fd1498Szrj   nw->n_invs = 0;
6094*38fd1498Szrj   nw->cand_use_cost = no_cost;
6095*38fd1498Szrj   nw->cand_cost = 0;
6096*38fd1498Szrj   nw->n_inv_var_uses = XCNEWVEC (unsigned, data->max_inv_var_id + 1);
6097*38fd1498Szrj   nw->n_inv_expr_uses = XCNEWVEC (unsigned, data->max_inv_expr_id + 1);
6098*38fd1498Szrj   nw->cost = no_cost;
6099*38fd1498Szrj 
6100*38fd1498Szrj   return nw;
6101*38fd1498Szrj }
6102*38fd1498Szrj 
6103*38fd1498Szrj /* Free memory occupied by the set IVS.  */
6104*38fd1498Szrj 
6105*38fd1498Szrj static void
iv_ca_free(struct iv_ca ** ivs)6106*38fd1498Szrj iv_ca_free (struct iv_ca **ivs)
6107*38fd1498Szrj {
6108*38fd1498Szrj   free ((*ivs)->cand_for_group);
6109*38fd1498Szrj   free ((*ivs)->n_cand_uses);
6110*38fd1498Szrj   BITMAP_FREE ((*ivs)->cands);
6111*38fd1498Szrj   free ((*ivs)->n_inv_var_uses);
6112*38fd1498Szrj   free ((*ivs)->n_inv_expr_uses);
6113*38fd1498Szrj   free (*ivs);
6114*38fd1498Szrj   *ivs = NULL;
6115*38fd1498Szrj }
6116*38fd1498Szrj 
6117*38fd1498Szrj /* Dumps IVS to FILE.  */
6118*38fd1498Szrj 
6119*38fd1498Szrj static void
iv_ca_dump(struct ivopts_data * data,FILE * file,struct iv_ca * ivs)6120*38fd1498Szrj iv_ca_dump (struct ivopts_data *data, FILE *file, struct iv_ca *ivs)
6121*38fd1498Szrj {
6122*38fd1498Szrj   unsigned i;
6123*38fd1498Szrj   comp_cost cost = iv_ca_cost (ivs);
6124*38fd1498Szrj 
6125*38fd1498Szrj   fprintf (file, "  cost: %d (complexity %d)\n", cost.cost,
6126*38fd1498Szrj 	   cost.complexity);
6127*38fd1498Szrj   fprintf (file, "  cand_cost: %d\n  cand_group_cost: %d (complexity %d)\n",
6128*38fd1498Szrj 	   ivs->cand_cost, ivs->cand_use_cost.cost,
6129*38fd1498Szrj 	   ivs->cand_use_cost.complexity);
6130*38fd1498Szrj   bitmap_print (file, ivs->cands, "  candidates: ","\n");
6131*38fd1498Szrj 
6132*38fd1498Szrj   for (i = 0; i < ivs->upto; i++)
6133*38fd1498Szrj     {
6134*38fd1498Szrj       struct iv_group *group = data->vgroups[i];
6135*38fd1498Szrj       struct cost_pair *cp = iv_ca_cand_for_group (ivs, group);
6136*38fd1498Szrj       if (cp)
6137*38fd1498Szrj         fprintf (file, "   group:%d --> iv_cand:%d, cost=(%d,%d)\n",
6138*38fd1498Szrj 		 group->id, cp->cand->id, cp->cost.cost,
6139*38fd1498Szrj 		 cp->cost.complexity);
6140*38fd1498Szrj       else
6141*38fd1498Szrj 	fprintf (file, "   group:%d --> ??\n", group->id);
6142*38fd1498Szrj     }
6143*38fd1498Szrj 
6144*38fd1498Szrj   const char *pref = "";
6145*38fd1498Szrj   fprintf (file, "  invariant variables: ");
6146*38fd1498Szrj   for (i = 1; i <= data->max_inv_var_id; i++)
6147*38fd1498Szrj     if (ivs->n_inv_var_uses[i])
6148*38fd1498Szrj       {
6149*38fd1498Szrj 	fprintf (file, "%s%d", pref, i);
6150*38fd1498Szrj 	pref = ", ";
6151*38fd1498Szrj       }
6152*38fd1498Szrj 
6153*38fd1498Szrj   pref = "";
6154*38fd1498Szrj   fprintf (file, "\n  invariant expressions: ");
6155*38fd1498Szrj   for (i = 1; i <= data->max_inv_expr_id; i++)
6156*38fd1498Szrj     if (ivs->n_inv_expr_uses[i])
6157*38fd1498Szrj       {
6158*38fd1498Szrj 	fprintf (file, "%s%d", pref, i);
6159*38fd1498Szrj 	pref = ", ";
6160*38fd1498Szrj       }
6161*38fd1498Szrj 
6162*38fd1498Szrj   fprintf (file, "\n\n");
6163*38fd1498Szrj }
6164*38fd1498Szrj 
6165*38fd1498Szrj /* Try changing candidate in IVS to CAND for each use.  Return cost of the
6166*38fd1498Szrj    new set, and store differences in DELTA.  Number of induction variables
6167*38fd1498Szrj    in the new set is stored to N_IVS. MIN_NCAND is a flag. When it is true
6168*38fd1498Szrj    the function will try to find a solution with mimimal iv candidates.  */
6169*38fd1498Szrj 
6170*38fd1498Szrj static comp_cost
iv_ca_extend(struct ivopts_data * data,struct iv_ca * ivs,struct iv_cand * cand,struct iv_ca_delta ** delta,unsigned * n_ivs,bool min_ncand)6171*38fd1498Szrj iv_ca_extend (struct ivopts_data *data, struct iv_ca *ivs,
6172*38fd1498Szrj 	      struct iv_cand *cand, struct iv_ca_delta **delta,
6173*38fd1498Szrj 	      unsigned *n_ivs, bool min_ncand)
6174*38fd1498Szrj {
6175*38fd1498Szrj   unsigned i;
6176*38fd1498Szrj   comp_cost cost;
6177*38fd1498Szrj   struct iv_group *group;
6178*38fd1498Szrj   struct cost_pair *old_cp, *new_cp;
6179*38fd1498Szrj 
6180*38fd1498Szrj   *delta = NULL;
6181*38fd1498Szrj   for (i = 0; i < ivs->upto; i++)
6182*38fd1498Szrj     {
6183*38fd1498Szrj       group = data->vgroups[i];
6184*38fd1498Szrj       old_cp = iv_ca_cand_for_group (ivs, group);
6185*38fd1498Szrj 
6186*38fd1498Szrj       if (old_cp
6187*38fd1498Szrj 	  && old_cp->cand == cand)
6188*38fd1498Szrj 	continue;
6189*38fd1498Szrj 
6190*38fd1498Szrj       new_cp = get_group_iv_cost (data, group, cand);
6191*38fd1498Szrj       if (!new_cp)
6192*38fd1498Szrj 	continue;
6193*38fd1498Szrj 
6194*38fd1498Szrj       if (!min_ncand)
6195*38fd1498Szrj 	{
6196*38fd1498Szrj 	  int cmp_invs = iv_ca_compare_deps (data, ivs, group, old_cp, new_cp);
6197*38fd1498Szrj 	  /* Skip if new_cp depends on more invariants.  */
6198*38fd1498Szrj 	  if (cmp_invs > 0)
6199*38fd1498Szrj 	    continue;
6200*38fd1498Szrj 
6201*38fd1498Szrj 	  int cmp_cost = compare_cost_pair (new_cp, old_cp);
6202*38fd1498Szrj 	  /* Skip if new_cp is not cheaper.  */
6203*38fd1498Szrj 	  if (cmp_cost > 0 || (cmp_cost == 0 && cmp_invs == 0))
6204*38fd1498Szrj 	    continue;
6205*38fd1498Szrj 	}
6206*38fd1498Szrj 
6207*38fd1498Szrj       *delta = iv_ca_delta_add (group, old_cp, new_cp, *delta);
6208*38fd1498Szrj     }
6209*38fd1498Szrj 
6210*38fd1498Szrj   iv_ca_delta_commit (data, ivs, *delta, true);
6211*38fd1498Szrj   cost = iv_ca_cost (ivs);
6212*38fd1498Szrj   if (n_ivs)
6213*38fd1498Szrj     *n_ivs = iv_ca_n_cands (ivs);
6214*38fd1498Szrj   iv_ca_delta_commit (data, ivs, *delta, false);
6215*38fd1498Szrj 
6216*38fd1498Szrj   return cost;
6217*38fd1498Szrj }
6218*38fd1498Szrj 
6219*38fd1498Szrj /* Try narrowing set IVS by removing CAND.  Return the cost of
6220*38fd1498Szrj    the new set and store the differences in DELTA.  START is
6221*38fd1498Szrj    the candidate with which we start narrowing.  */
6222*38fd1498Szrj 
6223*38fd1498Szrj static comp_cost
iv_ca_narrow(struct ivopts_data * data,struct iv_ca * ivs,struct iv_cand * cand,struct iv_cand * start,struct iv_ca_delta ** delta)6224*38fd1498Szrj iv_ca_narrow (struct ivopts_data *data, struct iv_ca *ivs,
6225*38fd1498Szrj 	      struct iv_cand *cand, struct iv_cand *start,
6226*38fd1498Szrj 	      struct iv_ca_delta **delta)
6227*38fd1498Szrj {
6228*38fd1498Szrj   unsigned i, ci;
6229*38fd1498Szrj   struct iv_group *group;
6230*38fd1498Szrj   struct cost_pair *old_cp, *new_cp, *cp;
6231*38fd1498Szrj   bitmap_iterator bi;
6232*38fd1498Szrj   struct iv_cand *cnd;
6233*38fd1498Szrj   comp_cost cost, best_cost, acost;
6234*38fd1498Szrj 
6235*38fd1498Szrj   *delta = NULL;
6236*38fd1498Szrj   for (i = 0; i < data->vgroups.length (); i++)
6237*38fd1498Szrj     {
6238*38fd1498Szrj       group = data->vgroups[i];
6239*38fd1498Szrj 
6240*38fd1498Szrj       old_cp = iv_ca_cand_for_group (ivs, group);
6241*38fd1498Szrj       if (old_cp->cand != cand)
6242*38fd1498Szrj 	continue;
6243*38fd1498Szrj 
6244*38fd1498Szrj       best_cost = iv_ca_cost (ivs);
6245*38fd1498Szrj       /* Start narrowing with START.  */
6246*38fd1498Szrj       new_cp = get_group_iv_cost (data, group, start);
6247*38fd1498Szrj 
6248*38fd1498Szrj       if (data->consider_all_candidates)
6249*38fd1498Szrj 	{
6250*38fd1498Szrj 	  EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, ci, bi)
6251*38fd1498Szrj 	    {
6252*38fd1498Szrj 	      if (ci == cand->id || (start && ci == start->id))
6253*38fd1498Szrj 		continue;
6254*38fd1498Szrj 
6255*38fd1498Szrj 	      cnd = data->vcands[ci];
6256*38fd1498Szrj 
6257*38fd1498Szrj 	      cp = get_group_iv_cost (data, group, cnd);
6258*38fd1498Szrj 	      if (!cp)
6259*38fd1498Szrj 		continue;
6260*38fd1498Szrj 
6261*38fd1498Szrj 	      iv_ca_set_cp (data, ivs, group, cp);
6262*38fd1498Szrj 	      acost = iv_ca_cost (ivs);
6263*38fd1498Szrj 
6264*38fd1498Szrj 	      if (acost < best_cost)
6265*38fd1498Szrj 		{
6266*38fd1498Szrj 		  best_cost = acost;
6267*38fd1498Szrj 		  new_cp = cp;
6268*38fd1498Szrj 		}
6269*38fd1498Szrj 	    }
6270*38fd1498Szrj 	}
6271*38fd1498Szrj       else
6272*38fd1498Szrj 	{
6273*38fd1498Szrj 	  EXECUTE_IF_AND_IN_BITMAP (group->related_cands, ivs->cands, 0, ci, bi)
6274*38fd1498Szrj 	    {
6275*38fd1498Szrj 	      if (ci == cand->id || (start && ci == start->id))
6276*38fd1498Szrj 		continue;
6277*38fd1498Szrj 
6278*38fd1498Szrj 	      cnd = data->vcands[ci];
6279*38fd1498Szrj 
6280*38fd1498Szrj 	      cp = get_group_iv_cost (data, group, cnd);
6281*38fd1498Szrj 	      if (!cp)
6282*38fd1498Szrj 		continue;
6283*38fd1498Szrj 
6284*38fd1498Szrj 	      iv_ca_set_cp (data, ivs, group, cp);
6285*38fd1498Szrj 	      acost = iv_ca_cost (ivs);
6286*38fd1498Szrj 
6287*38fd1498Szrj 	      if (acost < best_cost)
6288*38fd1498Szrj 		{
6289*38fd1498Szrj 		  best_cost = acost;
6290*38fd1498Szrj 		  new_cp = cp;
6291*38fd1498Szrj 		}
6292*38fd1498Szrj 	    }
6293*38fd1498Szrj 	}
6294*38fd1498Szrj       /* Restore to old cp for use.  */
6295*38fd1498Szrj       iv_ca_set_cp (data, ivs, group, old_cp);
6296*38fd1498Szrj 
6297*38fd1498Szrj       if (!new_cp)
6298*38fd1498Szrj 	{
6299*38fd1498Szrj 	  iv_ca_delta_free (delta);
6300*38fd1498Szrj 	  return infinite_cost;
6301*38fd1498Szrj 	}
6302*38fd1498Szrj 
6303*38fd1498Szrj       *delta = iv_ca_delta_add (group, old_cp, new_cp, *delta);
6304*38fd1498Szrj     }
6305*38fd1498Szrj 
6306*38fd1498Szrj   iv_ca_delta_commit (data, ivs, *delta, true);
6307*38fd1498Szrj   cost = iv_ca_cost (ivs);
6308*38fd1498Szrj   iv_ca_delta_commit (data, ivs, *delta, false);
6309*38fd1498Szrj 
6310*38fd1498Szrj   return cost;
6311*38fd1498Szrj }
6312*38fd1498Szrj 
6313*38fd1498Szrj /* Try optimizing the set of candidates IVS by removing candidates different
6314*38fd1498Szrj    from to EXCEPT_CAND from it.  Return cost of the new set, and store
6315*38fd1498Szrj    differences in DELTA.  */
6316*38fd1498Szrj 
6317*38fd1498Szrj static comp_cost
iv_ca_prune(struct ivopts_data * data,struct iv_ca * ivs,struct iv_cand * except_cand,struct iv_ca_delta ** delta)6318*38fd1498Szrj iv_ca_prune (struct ivopts_data *data, struct iv_ca *ivs,
6319*38fd1498Szrj 	     struct iv_cand *except_cand, struct iv_ca_delta **delta)
6320*38fd1498Szrj {
6321*38fd1498Szrj   bitmap_iterator bi;
6322*38fd1498Szrj   struct iv_ca_delta *act_delta, *best_delta;
6323*38fd1498Szrj   unsigned i;
6324*38fd1498Szrj   comp_cost best_cost, acost;
6325*38fd1498Szrj   struct iv_cand *cand;
6326*38fd1498Szrj 
6327*38fd1498Szrj   best_delta = NULL;
6328*38fd1498Szrj   best_cost = iv_ca_cost (ivs);
6329*38fd1498Szrj 
6330*38fd1498Szrj   EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
6331*38fd1498Szrj     {
6332*38fd1498Szrj       cand = data->vcands[i];
6333*38fd1498Szrj 
6334*38fd1498Szrj       if (cand == except_cand)
6335*38fd1498Szrj 	continue;
6336*38fd1498Szrj 
6337*38fd1498Szrj       acost = iv_ca_narrow (data, ivs, cand, except_cand, &act_delta);
6338*38fd1498Szrj 
6339*38fd1498Szrj       if (acost < best_cost)
6340*38fd1498Szrj 	{
6341*38fd1498Szrj 	  best_cost = acost;
6342*38fd1498Szrj 	  iv_ca_delta_free (&best_delta);
6343*38fd1498Szrj 	  best_delta = act_delta;
6344*38fd1498Szrj 	}
6345*38fd1498Szrj       else
6346*38fd1498Szrj 	iv_ca_delta_free (&act_delta);
6347*38fd1498Szrj     }
6348*38fd1498Szrj 
6349*38fd1498Szrj   if (!best_delta)
6350*38fd1498Szrj     {
6351*38fd1498Szrj       *delta = NULL;
6352*38fd1498Szrj       return best_cost;
6353*38fd1498Szrj     }
6354*38fd1498Szrj 
6355*38fd1498Szrj   /* Recurse to possibly remove other unnecessary ivs.  */
6356*38fd1498Szrj   iv_ca_delta_commit (data, ivs, best_delta, true);
6357*38fd1498Szrj   best_cost = iv_ca_prune (data, ivs, except_cand, delta);
6358*38fd1498Szrj   iv_ca_delta_commit (data, ivs, best_delta, false);
6359*38fd1498Szrj   *delta = iv_ca_delta_join (best_delta, *delta);
6360*38fd1498Szrj   return best_cost;
6361*38fd1498Szrj }
6362*38fd1498Szrj 
6363*38fd1498Szrj /* Check if CAND_IDX is a candidate other than OLD_CAND and has
6364*38fd1498Szrj    cheaper local cost for GROUP than BEST_CP.  Return pointer to
6365*38fd1498Szrj    the corresponding cost_pair, otherwise just return BEST_CP.  */
6366*38fd1498Szrj 
6367*38fd1498Szrj static struct cost_pair*
cheaper_cost_with_cand(struct ivopts_data * data,struct iv_group * group,unsigned int cand_idx,struct iv_cand * old_cand,struct cost_pair * best_cp)6368*38fd1498Szrj cheaper_cost_with_cand (struct ivopts_data *data, struct iv_group *group,
6369*38fd1498Szrj 			unsigned int cand_idx, struct iv_cand *old_cand,
6370*38fd1498Szrj 			struct cost_pair *best_cp)
6371*38fd1498Szrj {
6372*38fd1498Szrj   struct iv_cand *cand;
6373*38fd1498Szrj   struct cost_pair *cp;
6374*38fd1498Szrj 
6375*38fd1498Szrj   gcc_assert (old_cand != NULL && best_cp != NULL);
6376*38fd1498Szrj   if (cand_idx == old_cand->id)
6377*38fd1498Szrj     return best_cp;
6378*38fd1498Szrj 
6379*38fd1498Szrj   cand = data->vcands[cand_idx];
6380*38fd1498Szrj   cp = get_group_iv_cost (data, group, cand);
6381*38fd1498Szrj   if (cp != NULL && cheaper_cost_pair (cp, best_cp))
6382*38fd1498Szrj     return cp;
6383*38fd1498Szrj 
6384*38fd1498Szrj   return best_cp;
6385*38fd1498Szrj }
6386*38fd1498Szrj 
6387*38fd1498Szrj /* Try breaking local optimal fixed-point for IVS by replacing candidates
6388*38fd1498Szrj    which are used by more than one iv uses.  For each of those candidates,
6389*38fd1498Szrj    this function tries to represent iv uses under that candidate using
6390*38fd1498Szrj    other ones with lower local cost, then tries to prune the new set.
6391*38fd1498Szrj    If the new set has lower cost, It returns the new cost after recording
6392*38fd1498Szrj    candidate replacement in list DELTA.  */
6393*38fd1498Szrj 
6394*38fd1498Szrj static comp_cost
iv_ca_replace(struct ivopts_data * data,struct iv_ca * ivs,struct iv_ca_delta ** delta)6395*38fd1498Szrj iv_ca_replace (struct ivopts_data *data, struct iv_ca *ivs,
6396*38fd1498Szrj 	       struct iv_ca_delta **delta)
6397*38fd1498Szrj {
6398*38fd1498Szrj   bitmap_iterator bi, bj;
6399*38fd1498Szrj   unsigned int i, j, k;
6400*38fd1498Szrj   struct iv_cand *cand;
6401*38fd1498Szrj   comp_cost orig_cost, acost;
6402*38fd1498Szrj   struct iv_ca_delta *act_delta, *tmp_delta;
6403*38fd1498Szrj   struct cost_pair *old_cp, *best_cp = NULL;
6404*38fd1498Szrj 
6405*38fd1498Szrj   *delta = NULL;
6406*38fd1498Szrj   orig_cost = iv_ca_cost (ivs);
6407*38fd1498Szrj 
6408*38fd1498Szrj   EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
6409*38fd1498Szrj     {
6410*38fd1498Szrj       if (ivs->n_cand_uses[i] == 1
6411*38fd1498Szrj 	  || ivs->n_cand_uses[i] > ALWAYS_PRUNE_CAND_SET_BOUND)
6412*38fd1498Szrj 	continue;
6413*38fd1498Szrj 
6414*38fd1498Szrj       cand = data->vcands[i];
6415*38fd1498Szrj 
6416*38fd1498Szrj       act_delta = NULL;
6417*38fd1498Szrj       /*  Represent uses under current candidate using other ones with
6418*38fd1498Szrj 	  lower local cost.  */
6419*38fd1498Szrj       for (j = 0; j < ivs->upto; j++)
6420*38fd1498Szrj 	{
6421*38fd1498Szrj 	  struct iv_group *group = data->vgroups[j];
6422*38fd1498Szrj 	  old_cp = iv_ca_cand_for_group (ivs, group);
6423*38fd1498Szrj 
6424*38fd1498Szrj 	  if (old_cp->cand != cand)
6425*38fd1498Szrj 	    continue;
6426*38fd1498Szrj 
6427*38fd1498Szrj 	  best_cp = old_cp;
6428*38fd1498Szrj 	  if (data->consider_all_candidates)
6429*38fd1498Szrj 	    for (k = 0; k < data->vcands.length (); k++)
6430*38fd1498Szrj 	      best_cp = cheaper_cost_with_cand (data, group, k,
6431*38fd1498Szrj 						old_cp->cand, best_cp);
6432*38fd1498Szrj 	  else
6433*38fd1498Szrj 	    EXECUTE_IF_SET_IN_BITMAP (group->related_cands, 0, k, bj)
6434*38fd1498Szrj 	      best_cp = cheaper_cost_with_cand (data, group, k,
6435*38fd1498Szrj 						old_cp->cand, best_cp);
6436*38fd1498Szrj 
6437*38fd1498Szrj 	  if (best_cp == old_cp)
6438*38fd1498Szrj 	    continue;
6439*38fd1498Szrj 
6440*38fd1498Szrj 	  act_delta = iv_ca_delta_add (group, old_cp, best_cp, act_delta);
6441*38fd1498Szrj 	}
6442*38fd1498Szrj       /* No need for further prune.  */
6443*38fd1498Szrj       if (!act_delta)
6444*38fd1498Szrj 	continue;
6445*38fd1498Szrj 
6446*38fd1498Szrj       /* Prune the new candidate set.  */
6447*38fd1498Szrj       iv_ca_delta_commit (data, ivs, act_delta, true);
6448*38fd1498Szrj       acost = iv_ca_prune (data, ivs, NULL, &tmp_delta);
6449*38fd1498Szrj       iv_ca_delta_commit (data, ivs, act_delta, false);
6450*38fd1498Szrj       act_delta = iv_ca_delta_join (act_delta, tmp_delta);
6451*38fd1498Szrj 
6452*38fd1498Szrj       if (acost < orig_cost)
6453*38fd1498Szrj 	{
6454*38fd1498Szrj 	  *delta = act_delta;
6455*38fd1498Szrj 	  return acost;
6456*38fd1498Szrj 	}
6457*38fd1498Szrj       else
6458*38fd1498Szrj 	iv_ca_delta_free (&act_delta);
6459*38fd1498Szrj     }
6460*38fd1498Szrj 
6461*38fd1498Szrj   return orig_cost;
6462*38fd1498Szrj }
6463*38fd1498Szrj 
6464*38fd1498Szrj /* Tries to extend the sets IVS in the best possible way in order to
6465*38fd1498Szrj    express the GROUP.  If ORIGINALP is true, prefer candidates from
6466*38fd1498Szrj    the original set of IVs, otherwise favor important candidates not
6467*38fd1498Szrj    based on any memory object.  */
6468*38fd1498Szrj 
6469*38fd1498Szrj static bool
try_add_cand_for(struct ivopts_data * data,struct iv_ca * ivs,struct iv_group * group,bool originalp)6470*38fd1498Szrj try_add_cand_for (struct ivopts_data *data, struct iv_ca *ivs,
6471*38fd1498Szrj 		  struct iv_group *group, bool originalp)
6472*38fd1498Szrj {
6473*38fd1498Szrj   comp_cost best_cost, act_cost;
6474*38fd1498Szrj   unsigned i;
6475*38fd1498Szrj   bitmap_iterator bi;
6476*38fd1498Szrj   struct iv_cand *cand;
6477*38fd1498Szrj   struct iv_ca_delta *best_delta = NULL, *act_delta;
6478*38fd1498Szrj   struct cost_pair *cp;
6479*38fd1498Szrj 
6480*38fd1498Szrj   iv_ca_add_group (data, ivs, group);
6481*38fd1498Szrj   best_cost = iv_ca_cost (ivs);
6482*38fd1498Szrj   cp = iv_ca_cand_for_group (ivs, group);
6483*38fd1498Szrj   if (cp)
6484*38fd1498Szrj     {
6485*38fd1498Szrj       best_delta = iv_ca_delta_add (group, NULL, cp, NULL);
6486*38fd1498Szrj       iv_ca_set_no_cp (data, ivs, group);
6487*38fd1498Szrj     }
6488*38fd1498Szrj 
6489*38fd1498Szrj   /* If ORIGINALP is true, try to find the original IV for the use.  Otherwise
6490*38fd1498Szrj      first try important candidates not based on any memory object.  Only if
6491*38fd1498Szrj      this fails, try the specific ones.  Rationale -- in loops with many
6492*38fd1498Szrj      variables the best choice often is to use just one generic biv.  If we
6493*38fd1498Szrj      added here many ivs specific to the uses, the optimization algorithm later
6494*38fd1498Szrj      would be likely to get stuck in a local minimum, thus causing us to create
6495*38fd1498Szrj      too many ivs.  The approach from few ivs to more seems more likely to be
6496*38fd1498Szrj      successful -- starting from few ivs, replacing an expensive use by a
6497*38fd1498Szrj      specific iv should always be a win.  */
6498*38fd1498Szrj   EXECUTE_IF_SET_IN_BITMAP (group->related_cands, 0, i, bi)
6499*38fd1498Szrj     {
6500*38fd1498Szrj       cand = data->vcands[i];
6501*38fd1498Szrj 
6502*38fd1498Szrj       if (originalp && cand->pos !=IP_ORIGINAL)
6503*38fd1498Szrj 	continue;
6504*38fd1498Szrj 
6505*38fd1498Szrj       if (!originalp && cand->iv->base_object != NULL_TREE)
6506*38fd1498Szrj 	continue;
6507*38fd1498Szrj 
6508*38fd1498Szrj       if (iv_ca_cand_used_p (ivs, cand))
6509*38fd1498Szrj 	continue;
6510*38fd1498Szrj 
6511*38fd1498Szrj       cp = get_group_iv_cost (data, group, cand);
6512*38fd1498Szrj       if (!cp)
6513*38fd1498Szrj 	continue;
6514*38fd1498Szrj 
6515*38fd1498Szrj       iv_ca_set_cp (data, ivs, group, cp);
6516*38fd1498Szrj       act_cost = iv_ca_extend (data, ivs, cand, &act_delta, NULL,
6517*38fd1498Szrj 			       true);
6518*38fd1498Szrj       iv_ca_set_no_cp (data, ivs, group);
6519*38fd1498Szrj       act_delta = iv_ca_delta_add (group, NULL, cp, act_delta);
6520*38fd1498Szrj 
6521*38fd1498Szrj       if (act_cost < best_cost)
6522*38fd1498Szrj 	{
6523*38fd1498Szrj 	  best_cost = act_cost;
6524*38fd1498Szrj 
6525*38fd1498Szrj 	  iv_ca_delta_free (&best_delta);
6526*38fd1498Szrj 	  best_delta = act_delta;
6527*38fd1498Szrj 	}
6528*38fd1498Szrj       else
6529*38fd1498Szrj 	iv_ca_delta_free (&act_delta);
6530*38fd1498Szrj     }
6531*38fd1498Szrj 
6532*38fd1498Szrj   if (best_cost.infinite_cost_p ())
6533*38fd1498Szrj     {
6534*38fd1498Szrj       for (i = 0; i < group->n_map_members; i++)
6535*38fd1498Szrj 	{
6536*38fd1498Szrj 	  cp = group->cost_map + i;
6537*38fd1498Szrj 	  cand = cp->cand;
6538*38fd1498Szrj 	  if (!cand)
6539*38fd1498Szrj 	    continue;
6540*38fd1498Szrj 
6541*38fd1498Szrj 	  /* Already tried this.  */
6542*38fd1498Szrj 	  if (cand->important)
6543*38fd1498Szrj 	    {
6544*38fd1498Szrj 	      if (originalp && cand->pos == IP_ORIGINAL)
6545*38fd1498Szrj 		continue;
6546*38fd1498Szrj 	      if (!originalp && cand->iv->base_object == NULL_TREE)
6547*38fd1498Szrj 		continue;
6548*38fd1498Szrj 	    }
6549*38fd1498Szrj 
6550*38fd1498Szrj 	  if (iv_ca_cand_used_p (ivs, cand))
6551*38fd1498Szrj 	    continue;
6552*38fd1498Szrj 
6553*38fd1498Szrj 	  act_delta = NULL;
6554*38fd1498Szrj 	  iv_ca_set_cp (data, ivs, group, cp);
6555*38fd1498Szrj 	  act_cost = iv_ca_extend (data, ivs, cand, &act_delta, NULL, true);
6556*38fd1498Szrj 	  iv_ca_set_no_cp (data, ivs, group);
6557*38fd1498Szrj 	  act_delta = iv_ca_delta_add (group,
6558*38fd1498Szrj 				       iv_ca_cand_for_group (ivs, group),
6559*38fd1498Szrj 				       cp, act_delta);
6560*38fd1498Szrj 
6561*38fd1498Szrj 	  if (act_cost < best_cost)
6562*38fd1498Szrj 	    {
6563*38fd1498Szrj 	      best_cost = act_cost;
6564*38fd1498Szrj 
6565*38fd1498Szrj 	      if (best_delta)
6566*38fd1498Szrj 		iv_ca_delta_free (&best_delta);
6567*38fd1498Szrj 	      best_delta = act_delta;
6568*38fd1498Szrj 	    }
6569*38fd1498Szrj 	  else
6570*38fd1498Szrj 	    iv_ca_delta_free (&act_delta);
6571*38fd1498Szrj 	}
6572*38fd1498Szrj     }
6573*38fd1498Szrj 
6574*38fd1498Szrj   iv_ca_delta_commit (data, ivs, best_delta, true);
6575*38fd1498Szrj   iv_ca_delta_free (&best_delta);
6576*38fd1498Szrj 
6577*38fd1498Szrj   return !best_cost.infinite_cost_p ();
6578*38fd1498Szrj }
6579*38fd1498Szrj 
6580*38fd1498Szrj /* Finds an initial assignment of candidates to uses.  */
6581*38fd1498Szrj 
6582*38fd1498Szrj static struct iv_ca *
get_initial_solution(struct ivopts_data * data,bool originalp)6583*38fd1498Szrj get_initial_solution (struct ivopts_data *data, bool originalp)
6584*38fd1498Szrj {
6585*38fd1498Szrj   unsigned i;
6586*38fd1498Szrj   struct iv_ca *ivs = iv_ca_new (data);
6587*38fd1498Szrj 
6588*38fd1498Szrj   for (i = 0; i < data->vgroups.length (); i++)
6589*38fd1498Szrj     if (!try_add_cand_for (data, ivs, data->vgroups[i], originalp))
6590*38fd1498Szrj       {
6591*38fd1498Szrj 	iv_ca_free (&ivs);
6592*38fd1498Szrj 	return NULL;
6593*38fd1498Szrj       }
6594*38fd1498Szrj 
6595*38fd1498Szrj   return ivs;
6596*38fd1498Szrj }
6597*38fd1498Szrj 
6598*38fd1498Szrj /* Tries to improve set of induction variables IVS.  TRY_REPLACE_P
6599*38fd1498Szrj    points to a bool variable, this function tries to break local
6600*38fd1498Szrj    optimal fixed-point by replacing candidates in IVS if it's true.  */
6601*38fd1498Szrj 
6602*38fd1498Szrj static bool
try_improve_iv_set(struct ivopts_data * data,struct iv_ca * ivs,bool * try_replace_p)6603*38fd1498Szrj try_improve_iv_set (struct ivopts_data *data,
6604*38fd1498Szrj 		    struct iv_ca *ivs, bool *try_replace_p)
6605*38fd1498Szrj {
6606*38fd1498Szrj   unsigned i, n_ivs;
6607*38fd1498Szrj   comp_cost acost, best_cost = iv_ca_cost (ivs);
6608*38fd1498Szrj   struct iv_ca_delta *best_delta = NULL, *act_delta, *tmp_delta;
6609*38fd1498Szrj   struct iv_cand *cand;
6610*38fd1498Szrj 
6611*38fd1498Szrj   /* Try extending the set of induction variables by one.  */
6612*38fd1498Szrj   for (i = 0; i < data->vcands.length (); i++)
6613*38fd1498Szrj     {
6614*38fd1498Szrj       cand = data->vcands[i];
6615*38fd1498Szrj 
6616*38fd1498Szrj       if (iv_ca_cand_used_p (ivs, cand))
6617*38fd1498Szrj 	continue;
6618*38fd1498Szrj 
6619*38fd1498Szrj       acost = iv_ca_extend (data, ivs, cand, &act_delta, &n_ivs, false);
6620*38fd1498Szrj       if (!act_delta)
6621*38fd1498Szrj 	continue;
6622*38fd1498Szrj 
6623*38fd1498Szrj       /* If we successfully added the candidate and the set is small enough,
6624*38fd1498Szrj 	 try optimizing it by removing other candidates.  */
6625*38fd1498Szrj       if (n_ivs <= ALWAYS_PRUNE_CAND_SET_BOUND)
6626*38fd1498Szrj       	{
6627*38fd1498Szrj 	  iv_ca_delta_commit (data, ivs, act_delta, true);
6628*38fd1498Szrj 	  acost = iv_ca_prune (data, ivs, cand, &tmp_delta);
6629*38fd1498Szrj 	  iv_ca_delta_commit (data, ivs, act_delta, false);
6630*38fd1498Szrj 	  act_delta = iv_ca_delta_join (act_delta, tmp_delta);
6631*38fd1498Szrj 	}
6632*38fd1498Szrj 
6633*38fd1498Szrj       if (acost < best_cost)
6634*38fd1498Szrj 	{
6635*38fd1498Szrj 	  best_cost = acost;
6636*38fd1498Szrj 	  iv_ca_delta_free (&best_delta);
6637*38fd1498Szrj 	  best_delta = act_delta;
6638*38fd1498Szrj 	}
6639*38fd1498Szrj       else
6640*38fd1498Szrj 	iv_ca_delta_free (&act_delta);
6641*38fd1498Szrj     }
6642*38fd1498Szrj 
6643*38fd1498Szrj   if (!best_delta)
6644*38fd1498Szrj     {
6645*38fd1498Szrj       /* Try removing the candidates from the set instead.  */
6646*38fd1498Szrj       best_cost = iv_ca_prune (data, ivs, NULL, &best_delta);
6647*38fd1498Szrj 
6648*38fd1498Szrj       if (!best_delta && *try_replace_p)
6649*38fd1498Szrj 	{
6650*38fd1498Szrj 	  *try_replace_p = false;
6651*38fd1498Szrj 	  /* So far candidate selecting algorithm tends to choose fewer IVs
6652*38fd1498Szrj 	     so that it can handle cases in which loops have many variables
6653*38fd1498Szrj 	     but the best choice is often to use only one general biv.  One
6654*38fd1498Szrj 	     weakness is it can't handle opposite cases, in which different
6655*38fd1498Szrj 	     candidates should be chosen with respect to each use.  To solve
6656*38fd1498Szrj 	     the problem, we replace candidates in a manner described by the
6657*38fd1498Szrj 	     comments of iv_ca_replace, thus give general algorithm a chance
6658*38fd1498Szrj 	     to break local optimal fixed-point in these cases.  */
6659*38fd1498Szrj 	  best_cost = iv_ca_replace (data, ivs, &best_delta);
6660*38fd1498Szrj 	}
6661*38fd1498Szrj 
6662*38fd1498Szrj       if (!best_delta)
6663*38fd1498Szrj 	return false;
6664*38fd1498Szrj     }
6665*38fd1498Szrj 
6666*38fd1498Szrj   iv_ca_delta_commit (data, ivs, best_delta, true);
6667*38fd1498Szrj   gcc_assert (best_cost == iv_ca_cost (ivs));
6668*38fd1498Szrj   iv_ca_delta_free (&best_delta);
6669*38fd1498Szrj   return true;
6670*38fd1498Szrj }
6671*38fd1498Szrj 
6672*38fd1498Szrj /* Attempts to find the optimal set of induction variables.  We do simple
6673*38fd1498Szrj    greedy heuristic -- we try to replace at most one candidate in the selected
6674*38fd1498Szrj    solution and remove the unused ivs while this improves the cost.  */
6675*38fd1498Szrj 
6676*38fd1498Szrj static struct iv_ca *
find_optimal_iv_set_1(struct ivopts_data * data,bool originalp)6677*38fd1498Szrj find_optimal_iv_set_1 (struct ivopts_data *data, bool originalp)
6678*38fd1498Szrj {
6679*38fd1498Szrj   struct iv_ca *set;
6680*38fd1498Szrj   bool try_replace_p = true;
6681*38fd1498Szrj 
6682*38fd1498Szrj   /* Get the initial solution.  */
6683*38fd1498Szrj   set = get_initial_solution (data, originalp);
6684*38fd1498Szrj   if (!set)
6685*38fd1498Szrj     {
6686*38fd1498Szrj       if (dump_file && (dump_flags & TDF_DETAILS))
6687*38fd1498Szrj 	fprintf (dump_file, "Unable to substitute for ivs, failed.\n");
6688*38fd1498Szrj       return NULL;
6689*38fd1498Szrj     }
6690*38fd1498Szrj 
6691*38fd1498Szrj   if (dump_file && (dump_flags & TDF_DETAILS))
6692*38fd1498Szrj     {
6693*38fd1498Szrj       fprintf (dump_file, "Initial set of candidates:\n");
6694*38fd1498Szrj       iv_ca_dump (data, dump_file, set);
6695*38fd1498Szrj     }
6696*38fd1498Szrj 
6697*38fd1498Szrj   while (try_improve_iv_set (data, set, &try_replace_p))
6698*38fd1498Szrj     {
6699*38fd1498Szrj       if (dump_file && (dump_flags & TDF_DETAILS))
6700*38fd1498Szrj 	{
6701*38fd1498Szrj 	  fprintf (dump_file, "Improved to:\n");
6702*38fd1498Szrj 	  iv_ca_dump (data, dump_file, set);
6703*38fd1498Szrj 	}
6704*38fd1498Szrj     }
6705*38fd1498Szrj 
6706*38fd1498Szrj   return set;
6707*38fd1498Szrj }
6708*38fd1498Szrj 
6709*38fd1498Szrj static struct iv_ca *
find_optimal_iv_set(struct ivopts_data * data)6710*38fd1498Szrj find_optimal_iv_set (struct ivopts_data *data)
6711*38fd1498Szrj {
6712*38fd1498Szrj   unsigned i;
6713*38fd1498Szrj   comp_cost cost, origcost;
6714*38fd1498Szrj   struct iv_ca *set, *origset;
6715*38fd1498Szrj 
6716*38fd1498Szrj   /* Determine the cost based on a strategy that starts with original IVs,
6717*38fd1498Szrj      and try again using a strategy that prefers candidates not based
6718*38fd1498Szrj      on any IVs.  */
6719*38fd1498Szrj   origset = find_optimal_iv_set_1 (data, true);
6720*38fd1498Szrj   set = find_optimal_iv_set_1 (data, false);
6721*38fd1498Szrj 
6722*38fd1498Szrj   if (!origset && !set)
6723*38fd1498Szrj     return NULL;
6724*38fd1498Szrj 
6725*38fd1498Szrj   origcost = origset ? iv_ca_cost (origset) : infinite_cost;
6726*38fd1498Szrj   cost = set ? iv_ca_cost (set) : infinite_cost;
6727*38fd1498Szrj 
6728*38fd1498Szrj   if (dump_file && (dump_flags & TDF_DETAILS))
6729*38fd1498Szrj     {
6730*38fd1498Szrj       fprintf (dump_file, "Original cost %d (complexity %d)\n\n",
6731*38fd1498Szrj 	       origcost.cost, origcost.complexity);
6732*38fd1498Szrj       fprintf (dump_file, "Final cost %d (complexity %d)\n\n",
6733*38fd1498Szrj 	       cost.cost, cost.complexity);
6734*38fd1498Szrj     }
6735*38fd1498Szrj 
6736*38fd1498Szrj   /* Choose the one with the best cost.  */
6737*38fd1498Szrj   if (origcost <= cost)
6738*38fd1498Szrj     {
6739*38fd1498Szrj       if (set)
6740*38fd1498Szrj 	iv_ca_free (&set);
6741*38fd1498Szrj       set = origset;
6742*38fd1498Szrj     }
6743*38fd1498Szrj   else if (origset)
6744*38fd1498Szrj     iv_ca_free (&origset);
6745*38fd1498Szrj 
6746*38fd1498Szrj   for (i = 0; i < data->vgroups.length (); i++)
6747*38fd1498Szrj     {
6748*38fd1498Szrj       struct iv_group *group = data->vgroups[i];
6749*38fd1498Szrj       group->selected = iv_ca_cand_for_group (set, group)->cand;
6750*38fd1498Szrj     }
6751*38fd1498Szrj 
6752*38fd1498Szrj   return set;
6753*38fd1498Szrj }
6754*38fd1498Szrj 
6755*38fd1498Szrj /* Creates a new induction variable corresponding to CAND.  */
6756*38fd1498Szrj 
6757*38fd1498Szrj static void
create_new_iv(struct ivopts_data * data,struct iv_cand * cand)6758*38fd1498Szrj create_new_iv (struct ivopts_data *data, struct iv_cand *cand)
6759*38fd1498Szrj {
6760*38fd1498Szrj   gimple_stmt_iterator incr_pos;
6761*38fd1498Szrj   tree base;
6762*38fd1498Szrj   struct iv_use *use;
6763*38fd1498Szrj   struct iv_group *group;
6764*38fd1498Szrj   bool after = false;
6765*38fd1498Szrj 
6766*38fd1498Szrj   gcc_assert (cand->iv != NULL);
6767*38fd1498Szrj 
6768*38fd1498Szrj   switch (cand->pos)
6769*38fd1498Szrj     {
6770*38fd1498Szrj     case IP_NORMAL:
6771*38fd1498Szrj       incr_pos = gsi_last_bb (ip_normal_pos (data->current_loop));
6772*38fd1498Szrj       break;
6773*38fd1498Szrj 
6774*38fd1498Szrj     case IP_END:
6775*38fd1498Szrj       incr_pos = gsi_last_bb (ip_end_pos (data->current_loop));
6776*38fd1498Szrj       after = true;
6777*38fd1498Szrj       break;
6778*38fd1498Szrj 
6779*38fd1498Szrj     case IP_AFTER_USE:
6780*38fd1498Szrj       after = true;
6781*38fd1498Szrj       /* fall through */
6782*38fd1498Szrj     case IP_BEFORE_USE:
6783*38fd1498Szrj       incr_pos = gsi_for_stmt (cand->incremented_at);
6784*38fd1498Szrj       break;
6785*38fd1498Szrj 
6786*38fd1498Szrj     case IP_ORIGINAL:
6787*38fd1498Szrj       /* Mark that the iv is preserved.  */
6788*38fd1498Szrj       name_info (data, cand->var_before)->preserve_biv = true;
6789*38fd1498Szrj       name_info (data, cand->var_after)->preserve_biv = true;
6790*38fd1498Szrj 
6791*38fd1498Szrj       /* Rewrite the increment so that it uses var_before directly.  */
6792*38fd1498Szrj       use = find_interesting_uses_op (data, cand->var_after);
6793*38fd1498Szrj       group = data->vgroups[use->group_id];
6794*38fd1498Szrj       group->selected = cand;
6795*38fd1498Szrj       return;
6796*38fd1498Szrj     }
6797*38fd1498Szrj 
6798*38fd1498Szrj   gimple_add_tmp_var (cand->var_before);
6799*38fd1498Szrj 
6800*38fd1498Szrj   base = unshare_expr (cand->iv->base);
6801*38fd1498Szrj 
6802*38fd1498Szrj   create_iv (base, unshare_expr (cand->iv->step),
6803*38fd1498Szrj 	     cand->var_before, data->current_loop,
6804*38fd1498Szrj 	     &incr_pos, after, &cand->var_before, &cand->var_after);
6805*38fd1498Szrj }
6806*38fd1498Szrj 
6807*38fd1498Szrj /* Creates new induction variables described in SET.  */
6808*38fd1498Szrj 
6809*38fd1498Szrj static void
create_new_ivs(struct ivopts_data * data,struct iv_ca * set)6810*38fd1498Szrj create_new_ivs (struct ivopts_data *data, struct iv_ca *set)
6811*38fd1498Szrj {
6812*38fd1498Szrj   unsigned i;
6813*38fd1498Szrj   struct iv_cand *cand;
6814*38fd1498Szrj   bitmap_iterator bi;
6815*38fd1498Szrj 
6816*38fd1498Szrj   EXECUTE_IF_SET_IN_BITMAP (set->cands, 0, i, bi)
6817*38fd1498Szrj     {
6818*38fd1498Szrj       cand = data->vcands[i];
6819*38fd1498Szrj       create_new_iv (data, cand);
6820*38fd1498Szrj     }
6821*38fd1498Szrj 
6822*38fd1498Szrj   if (dump_file && (dump_flags & TDF_DETAILS))
6823*38fd1498Szrj     {
6824*38fd1498Szrj       fprintf (dump_file, "Selected IV set for loop %d",
6825*38fd1498Szrj 	       data->current_loop->num);
6826*38fd1498Szrj       if (data->loop_loc != UNKNOWN_LOCATION)
6827*38fd1498Szrj 	fprintf (dump_file, " at %s:%d", LOCATION_FILE (data->loop_loc),
6828*38fd1498Szrj 		 LOCATION_LINE (data->loop_loc));
6829*38fd1498Szrj       fprintf (dump_file, ", " HOST_WIDE_INT_PRINT_DEC " avg niters",
6830*38fd1498Szrj 	       avg_loop_niter (data->current_loop));
6831*38fd1498Szrj       fprintf (dump_file, ", %lu IVs:\n", bitmap_count_bits (set->cands));
6832*38fd1498Szrj       EXECUTE_IF_SET_IN_BITMAP (set->cands, 0, i, bi)
6833*38fd1498Szrj 	{
6834*38fd1498Szrj 	  cand = data->vcands[i];
6835*38fd1498Szrj 	  dump_cand (dump_file, cand);
6836*38fd1498Szrj 	}
6837*38fd1498Szrj       fprintf (dump_file, "\n");
6838*38fd1498Szrj     }
6839*38fd1498Szrj }
6840*38fd1498Szrj 
6841*38fd1498Szrj /* Rewrites USE (definition of iv used in a nonlinear expression)
6842*38fd1498Szrj    using candidate CAND.  */
6843*38fd1498Szrj 
6844*38fd1498Szrj static void
rewrite_use_nonlinear_expr(struct ivopts_data * data,struct iv_use * use,struct iv_cand * cand)6845*38fd1498Szrj rewrite_use_nonlinear_expr (struct ivopts_data *data,
6846*38fd1498Szrj 			    struct iv_use *use, struct iv_cand *cand)
6847*38fd1498Szrj {
6848*38fd1498Szrj   gassign *ass;
6849*38fd1498Szrj   gimple_stmt_iterator bsi;
6850*38fd1498Szrj   tree comp, type = get_use_type (use), tgt;
6851*38fd1498Szrj 
6852*38fd1498Szrj   /* An important special case -- if we are asked to express value of
6853*38fd1498Szrj      the original iv by itself, just exit; there is no need to
6854*38fd1498Szrj      introduce a new computation (that might also need casting the
6855*38fd1498Szrj      variable to unsigned and back).  */
6856*38fd1498Szrj   if (cand->pos == IP_ORIGINAL
6857*38fd1498Szrj       && cand->incremented_at == use->stmt)
6858*38fd1498Szrj     {
6859*38fd1498Szrj       tree op = NULL_TREE;
6860*38fd1498Szrj       enum tree_code stmt_code;
6861*38fd1498Szrj 
6862*38fd1498Szrj       gcc_assert (is_gimple_assign (use->stmt));
6863*38fd1498Szrj       gcc_assert (gimple_assign_lhs (use->stmt) == cand->var_after);
6864*38fd1498Szrj 
6865*38fd1498Szrj       /* Check whether we may leave the computation unchanged.
6866*38fd1498Szrj 	 This is the case only if it does not rely on other
6867*38fd1498Szrj 	 computations in the loop -- otherwise, the computation
6868*38fd1498Szrj 	 we rely upon may be removed in remove_unused_ivs,
6869*38fd1498Szrj 	 thus leading to ICE.  */
6870*38fd1498Szrj       stmt_code = gimple_assign_rhs_code (use->stmt);
6871*38fd1498Szrj       if (stmt_code == PLUS_EXPR
6872*38fd1498Szrj 	  || stmt_code == MINUS_EXPR
6873*38fd1498Szrj 	  || stmt_code == POINTER_PLUS_EXPR)
6874*38fd1498Szrj 	{
6875*38fd1498Szrj 	  if (gimple_assign_rhs1 (use->stmt) == cand->var_before)
6876*38fd1498Szrj 	    op = gimple_assign_rhs2 (use->stmt);
6877*38fd1498Szrj 	  else if (gimple_assign_rhs2 (use->stmt) == cand->var_before)
6878*38fd1498Szrj 	    op = gimple_assign_rhs1 (use->stmt);
6879*38fd1498Szrj 	}
6880*38fd1498Szrj 
6881*38fd1498Szrj       if (op != NULL_TREE)
6882*38fd1498Szrj 	{
6883*38fd1498Szrj 	  if (expr_invariant_in_loop_p (data->current_loop, op))
6884*38fd1498Szrj 	    return;
6885*38fd1498Szrj 	  if (TREE_CODE (op) == SSA_NAME)
6886*38fd1498Szrj 	    {
6887*38fd1498Szrj 	      struct iv *iv = get_iv (data, op);
6888*38fd1498Szrj 	      if (iv != NULL && integer_zerop (iv->step))
6889*38fd1498Szrj 		return;
6890*38fd1498Szrj 	    }
6891*38fd1498Szrj 	}
6892*38fd1498Szrj     }
6893*38fd1498Szrj 
6894*38fd1498Szrj   switch (gimple_code (use->stmt))
6895*38fd1498Szrj     {
6896*38fd1498Szrj     case GIMPLE_PHI:
6897*38fd1498Szrj       tgt = PHI_RESULT (use->stmt);
6898*38fd1498Szrj 
6899*38fd1498Szrj       /* If we should keep the biv, do not replace it.  */
6900*38fd1498Szrj       if (name_info (data, tgt)->preserve_biv)
6901*38fd1498Szrj 	return;
6902*38fd1498Szrj 
6903*38fd1498Szrj       bsi = gsi_after_labels (gimple_bb (use->stmt));
6904*38fd1498Szrj       break;
6905*38fd1498Szrj 
6906*38fd1498Szrj     case GIMPLE_ASSIGN:
6907*38fd1498Szrj       tgt = gimple_assign_lhs (use->stmt);
6908*38fd1498Szrj       bsi = gsi_for_stmt (use->stmt);
6909*38fd1498Szrj       break;
6910*38fd1498Szrj 
6911*38fd1498Szrj     default:
6912*38fd1498Szrj       gcc_unreachable ();
6913*38fd1498Szrj     }
6914*38fd1498Szrj 
6915*38fd1498Szrj   aff_tree aff_inv, aff_var;
6916*38fd1498Szrj   if (!get_computation_aff_1 (data->current_loop, use->stmt,
6917*38fd1498Szrj 			      use, cand, &aff_inv, &aff_var))
6918*38fd1498Szrj     gcc_unreachable ();
6919*38fd1498Szrj 
6920*38fd1498Szrj   unshare_aff_combination (&aff_inv);
6921*38fd1498Szrj   unshare_aff_combination (&aff_var);
6922*38fd1498Szrj   /* Prefer CSE opportunity than loop invariant by adding offset at last
6923*38fd1498Szrj      so that iv_uses have different offsets can be CSEed.  */
6924*38fd1498Szrj   poly_widest_int offset = aff_inv.offset;
6925*38fd1498Szrj   aff_inv.offset = 0;
6926*38fd1498Szrj 
6927*38fd1498Szrj   gimple_seq stmt_list = NULL, seq = NULL;
6928*38fd1498Szrj   tree comp_op1 = aff_combination_to_tree (&aff_inv);
6929*38fd1498Szrj   tree comp_op2 = aff_combination_to_tree (&aff_var);
6930*38fd1498Szrj   gcc_assert (comp_op1 && comp_op2);
6931*38fd1498Szrj 
6932*38fd1498Szrj   comp_op1 = force_gimple_operand (comp_op1, &seq, true, NULL);
6933*38fd1498Szrj   gimple_seq_add_seq (&stmt_list, seq);
6934*38fd1498Szrj   comp_op2 = force_gimple_operand (comp_op2, &seq, true, NULL);
6935*38fd1498Szrj   gimple_seq_add_seq (&stmt_list, seq);
6936*38fd1498Szrj 
6937*38fd1498Szrj   if (POINTER_TYPE_P (TREE_TYPE (comp_op2)))
6938*38fd1498Szrj     std::swap (comp_op1, comp_op2);
6939*38fd1498Szrj 
6940*38fd1498Szrj   if (POINTER_TYPE_P (TREE_TYPE (comp_op1)))
6941*38fd1498Szrj     {
6942*38fd1498Szrj       comp = fold_build_pointer_plus (comp_op1,
6943*38fd1498Szrj 				      fold_convert (sizetype, comp_op2));
6944*38fd1498Szrj       comp = fold_build_pointer_plus (comp,
6945*38fd1498Szrj 				      wide_int_to_tree (sizetype, offset));
6946*38fd1498Szrj     }
6947*38fd1498Szrj   else
6948*38fd1498Szrj     {
6949*38fd1498Szrj       comp = fold_build2 (PLUS_EXPR, TREE_TYPE (comp_op1), comp_op1,
6950*38fd1498Szrj 			  fold_convert (TREE_TYPE (comp_op1), comp_op2));
6951*38fd1498Szrj       comp = fold_build2 (PLUS_EXPR, TREE_TYPE (comp_op1), comp,
6952*38fd1498Szrj 			  wide_int_to_tree (TREE_TYPE (comp_op1), offset));
6953*38fd1498Szrj     }
6954*38fd1498Szrj 
6955*38fd1498Szrj   comp = fold_convert (type, comp);
6956*38fd1498Szrj   if (!valid_gimple_rhs_p (comp)
6957*38fd1498Szrj       || (gimple_code (use->stmt) != GIMPLE_PHI
6958*38fd1498Szrj 	  /* We can't allow re-allocating the stmt as it might be pointed
6959*38fd1498Szrj 	     to still.  */
6960*38fd1498Szrj 	  && (get_gimple_rhs_num_ops (TREE_CODE (comp))
6961*38fd1498Szrj 	      >= gimple_num_ops (gsi_stmt (bsi)))))
6962*38fd1498Szrj     {
6963*38fd1498Szrj       comp = force_gimple_operand (comp, &seq, true, NULL);
6964*38fd1498Szrj       gimple_seq_add_seq (&stmt_list, seq);
6965*38fd1498Szrj       if (POINTER_TYPE_P (TREE_TYPE (tgt)))
6966*38fd1498Szrj 	{
6967*38fd1498Szrj 	  duplicate_ssa_name_ptr_info (comp, SSA_NAME_PTR_INFO (tgt));
6968*38fd1498Szrj 	  /* As this isn't a plain copy we have to reset alignment
6969*38fd1498Szrj 	     information.  */
6970*38fd1498Szrj 	  if (SSA_NAME_PTR_INFO (comp))
6971*38fd1498Szrj 	    mark_ptr_info_alignment_unknown (SSA_NAME_PTR_INFO (comp));
6972*38fd1498Szrj 	}
6973*38fd1498Szrj     }
6974*38fd1498Szrj 
6975*38fd1498Szrj   gsi_insert_seq_before (&bsi, stmt_list, GSI_SAME_STMT);
6976*38fd1498Szrj   if (gimple_code (use->stmt) == GIMPLE_PHI)
6977*38fd1498Szrj     {
6978*38fd1498Szrj       ass = gimple_build_assign (tgt, comp);
6979*38fd1498Szrj       gsi_insert_before (&bsi, ass, GSI_SAME_STMT);
6980*38fd1498Szrj 
6981*38fd1498Szrj       bsi = gsi_for_stmt (use->stmt);
6982*38fd1498Szrj       remove_phi_node (&bsi, false);
6983*38fd1498Szrj     }
6984*38fd1498Szrj   else
6985*38fd1498Szrj     {
6986*38fd1498Szrj       gimple_assign_set_rhs_from_tree (&bsi, comp);
6987*38fd1498Szrj       use->stmt = gsi_stmt (bsi);
6988*38fd1498Szrj     }
6989*38fd1498Szrj }
6990*38fd1498Szrj 
6991*38fd1498Szrj /* Performs a peephole optimization to reorder the iv update statement with
6992*38fd1498Szrj    a mem ref to enable instruction combining in later phases. The mem ref uses
6993*38fd1498Szrj    the iv value before the update, so the reordering transformation requires
6994*38fd1498Szrj    adjustment of the offset. CAND is the selected IV_CAND.
6995*38fd1498Szrj 
6996*38fd1498Szrj    Example:
6997*38fd1498Szrj 
6998*38fd1498Szrj    t = MEM_REF (base, iv1, 8, 16);  // base, index, stride, offset
6999*38fd1498Szrj    iv2 = iv1 + 1;
7000*38fd1498Szrj 
7001*38fd1498Szrj    if (t < val)      (1)
7002*38fd1498Szrj      goto L;
7003*38fd1498Szrj    goto Head;
7004*38fd1498Szrj 
7005*38fd1498Szrj 
7006*38fd1498Szrj    directly propagating t over to (1) will introduce overlapping live range
7007*38fd1498Szrj    thus increase register pressure. This peephole transform it into:
7008*38fd1498Szrj 
7009*38fd1498Szrj 
7010*38fd1498Szrj    iv2 = iv1 + 1;
7011*38fd1498Szrj    t = MEM_REF (base, iv2, 8, 8);
7012*38fd1498Szrj    if (t < val)
7013*38fd1498Szrj      goto L;
7014*38fd1498Szrj    goto Head;
7015*38fd1498Szrj */
7016*38fd1498Szrj 
7017*38fd1498Szrj static void
adjust_iv_update_pos(struct iv_cand * cand,struct iv_use * use)7018*38fd1498Szrj adjust_iv_update_pos (struct iv_cand *cand, struct iv_use *use)
7019*38fd1498Szrj {
7020*38fd1498Szrj   tree var_after;
7021*38fd1498Szrj   gimple *iv_update, *stmt;
7022*38fd1498Szrj   basic_block bb;
7023*38fd1498Szrj   gimple_stmt_iterator gsi, gsi_iv;
7024*38fd1498Szrj 
7025*38fd1498Szrj   if (cand->pos != IP_NORMAL)
7026*38fd1498Szrj     return;
7027*38fd1498Szrj 
7028*38fd1498Szrj   var_after = cand->var_after;
7029*38fd1498Szrj   iv_update = SSA_NAME_DEF_STMT (var_after);
7030*38fd1498Szrj 
7031*38fd1498Szrj   bb = gimple_bb (iv_update);
7032*38fd1498Szrj   gsi = gsi_last_nondebug_bb (bb);
7033*38fd1498Szrj   stmt = gsi_stmt (gsi);
7034*38fd1498Szrj 
7035*38fd1498Szrj   /* Only handle conditional statement for now.  */
7036*38fd1498Szrj   if (gimple_code (stmt) != GIMPLE_COND)
7037*38fd1498Szrj     return;
7038*38fd1498Szrj 
7039*38fd1498Szrj   gsi_prev_nondebug (&gsi);
7040*38fd1498Szrj   stmt = gsi_stmt (gsi);
7041*38fd1498Szrj   if (stmt != iv_update)
7042*38fd1498Szrj     return;
7043*38fd1498Szrj 
7044*38fd1498Szrj   gsi_prev_nondebug (&gsi);
7045*38fd1498Szrj   if (gsi_end_p (gsi))
7046*38fd1498Szrj     return;
7047*38fd1498Szrj 
7048*38fd1498Szrj   stmt = gsi_stmt (gsi);
7049*38fd1498Szrj   if (gimple_code (stmt) != GIMPLE_ASSIGN)
7050*38fd1498Szrj     return;
7051*38fd1498Szrj 
7052*38fd1498Szrj   if (stmt != use->stmt)
7053*38fd1498Szrj     return;
7054*38fd1498Szrj 
7055*38fd1498Szrj   if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
7056*38fd1498Szrj     return;
7057*38fd1498Szrj 
7058*38fd1498Szrj   if (dump_file && (dump_flags & TDF_DETAILS))
7059*38fd1498Szrj     {
7060*38fd1498Szrj       fprintf (dump_file, "Reordering \n");
7061*38fd1498Szrj       print_gimple_stmt (dump_file, iv_update, 0);
7062*38fd1498Szrj       print_gimple_stmt (dump_file, use->stmt, 0);
7063*38fd1498Szrj       fprintf (dump_file, "\n");
7064*38fd1498Szrj     }
7065*38fd1498Szrj 
7066*38fd1498Szrj   gsi = gsi_for_stmt (use->stmt);
7067*38fd1498Szrj   gsi_iv = gsi_for_stmt (iv_update);
7068*38fd1498Szrj   gsi_move_before (&gsi_iv, &gsi);
7069*38fd1498Szrj 
7070*38fd1498Szrj   cand->pos = IP_BEFORE_USE;
7071*38fd1498Szrj   cand->incremented_at = use->stmt;
7072*38fd1498Szrj }
7073*38fd1498Szrj 
7074*38fd1498Szrj /* Return the alias pointer type that should be used for a MEM_REF
7075*38fd1498Szrj    associated with USE, which has type USE_PTR_ADDRESS.  */
7076*38fd1498Szrj 
7077*38fd1498Szrj static tree
get_alias_ptr_type_for_ptr_address(iv_use * use)7078*38fd1498Szrj get_alias_ptr_type_for_ptr_address (iv_use *use)
7079*38fd1498Szrj {
7080*38fd1498Szrj   gcall *call = as_a <gcall *> (use->stmt);
7081*38fd1498Szrj   switch (gimple_call_internal_fn (call))
7082*38fd1498Szrj     {
7083*38fd1498Szrj     case IFN_MASK_LOAD:
7084*38fd1498Szrj     case IFN_MASK_STORE:
7085*38fd1498Szrj       /* The second argument contains the correct alias type.  */
7086*38fd1498Szrj       gcc_assert (use->op_p = gimple_call_arg_ptr (call, 0));
7087*38fd1498Szrj       return TREE_TYPE (gimple_call_arg (call, 1));
7088*38fd1498Szrj 
7089*38fd1498Szrj     default:
7090*38fd1498Szrj       gcc_unreachable ();
7091*38fd1498Szrj     }
7092*38fd1498Szrj }
7093*38fd1498Szrj 
7094*38fd1498Szrj 
7095*38fd1498Szrj /* Rewrites USE (address that is an iv) using candidate CAND.  */
7096*38fd1498Szrj 
7097*38fd1498Szrj static void
rewrite_use_address(struct ivopts_data * data,struct iv_use * use,struct iv_cand * cand)7098*38fd1498Szrj rewrite_use_address (struct ivopts_data *data,
7099*38fd1498Szrj 		     struct iv_use *use, struct iv_cand *cand)
7100*38fd1498Szrj {
7101*38fd1498Szrj   aff_tree aff;
7102*38fd1498Szrj   bool ok;
7103*38fd1498Szrj 
7104*38fd1498Szrj   adjust_iv_update_pos (cand, use);
7105*38fd1498Szrj   ok = get_computation_aff (data->current_loop, use->stmt, use, cand, &aff);
7106*38fd1498Szrj   gcc_assert (ok);
7107*38fd1498Szrj   unshare_aff_combination (&aff);
7108*38fd1498Szrj 
7109*38fd1498Szrj   /* To avoid undefined overflow problems, all IV candidates use unsigned
7110*38fd1498Szrj      integer types.  The drawback is that this makes it impossible for
7111*38fd1498Szrj      create_mem_ref to distinguish an IV that is based on a memory object
7112*38fd1498Szrj      from one that represents simply an offset.
7113*38fd1498Szrj 
7114*38fd1498Szrj      To work around this problem, we pass a hint to create_mem_ref that
7115*38fd1498Szrj      indicates which variable (if any) in aff is an IV based on a memory
7116*38fd1498Szrj      object.  Note that we only consider the candidate.  If this is not
7117*38fd1498Szrj      based on an object, the base of the reference is in some subexpression
7118*38fd1498Szrj      of the use -- but these will use pointer types, so they are recognized
7119*38fd1498Szrj      by the create_mem_ref heuristics anyway.  */
7120*38fd1498Szrj   tree iv = var_at_stmt (data->current_loop, cand, use->stmt);
7121*38fd1498Szrj   tree base_hint = (cand->iv->base_object) ? iv : NULL_TREE;
7122*38fd1498Szrj   gimple_stmt_iterator bsi = gsi_for_stmt (use->stmt);
7123*38fd1498Szrj   tree type = use->mem_type;
7124*38fd1498Szrj   tree alias_ptr_type;
7125*38fd1498Szrj   if (use->type == USE_PTR_ADDRESS)
7126*38fd1498Szrj     alias_ptr_type = get_alias_ptr_type_for_ptr_address (use);
7127*38fd1498Szrj   else
7128*38fd1498Szrj     {
7129*38fd1498Szrj       gcc_assert (type == TREE_TYPE (*use->op_p));
7130*38fd1498Szrj       unsigned int align = get_object_alignment (*use->op_p);
7131*38fd1498Szrj       if (align != TYPE_ALIGN (type))
7132*38fd1498Szrj 	type = build_aligned_type (type, align);
7133*38fd1498Szrj       alias_ptr_type = reference_alias_ptr_type (*use->op_p);
7134*38fd1498Szrj     }
7135*38fd1498Szrj   tree ref = create_mem_ref (&bsi, type, &aff, alias_ptr_type,
7136*38fd1498Szrj 			     iv, base_hint, data->speed);
7137*38fd1498Szrj 
7138*38fd1498Szrj   if (use->type == USE_PTR_ADDRESS)
7139*38fd1498Szrj     {
7140*38fd1498Szrj       ref = fold_build1 (ADDR_EXPR, build_pointer_type (use->mem_type), ref);
7141*38fd1498Szrj       ref = fold_convert (get_use_type (use), ref);
7142*38fd1498Szrj       ref = force_gimple_operand_gsi (&bsi, ref, true, NULL_TREE,
7143*38fd1498Szrj 				      true, GSI_SAME_STMT);
7144*38fd1498Szrj     }
7145*38fd1498Szrj   else
7146*38fd1498Szrj     copy_ref_info (ref, *use->op_p);
7147*38fd1498Szrj 
7148*38fd1498Szrj   *use->op_p = ref;
7149*38fd1498Szrj }
7150*38fd1498Szrj 
7151*38fd1498Szrj /* Rewrites USE (the condition such that one of the arguments is an iv) using
7152*38fd1498Szrj    candidate CAND.  */
7153*38fd1498Szrj 
7154*38fd1498Szrj static void
rewrite_use_compare(struct ivopts_data * data,struct iv_use * use,struct iv_cand * cand)7155*38fd1498Szrj rewrite_use_compare (struct ivopts_data *data,
7156*38fd1498Szrj 		     struct iv_use *use, struct iv_cand *cand)
7157*38fd1498Szrj {
7158*38fd1498Szrj   tree comp, op, bound;
7159*38fd1498Szrj   gimple_stmt_iterator bsi = gsi_for_stmt (use->stmt);
7160*38fd1498Szrj   enum tree_code compare;
7161*38fd1498Szrj   struct iv_group *group = data->vgroups[use->group_id];
7162*38fd1498Szrj   struct cost_pair *cp = get_group_iv_cost (data, group, cand);
7163*38fd1498Szrj 
7164*38fd1498Szrj   bound = cp->value;
7165*38fd1498Szrj   if (bound)
7166*38fd1498Szrj     {
7167*38fd1498Szrj       tree var = var_at_stmt (data->current_loop, cand, use->stmt);
7168*38fd1498Szrj       tree var_type = TREE_TYPE (var);
7169*38fd1498Szrj       gimple_seq stmts;
7170*38fd1498Szrj 
7171*38fd1498Szrj       if (dump_file && (dump_flags & TDF_DETAILS))
7172*38fd1498Szrj 	{
7173*38fd1498Szrj 	  fprintf (dump_file, "Replacing exit test: ");
7174*38fd1498Szrj 	  print_gimple_stmt (dump_file, use->stmt, 0, TDF_SLIM);
7175*38fd1498Szrj 	}
7176*38fd1498Szrj       compare = cp->comp;
7177*38fd1498Szrj       bound = unshare_expr (fold_convert (var_type, bound));
7178*38fd1498Szrj       op = force_gimple_operand (bound, &stmts, true, NULL_TREE);
7179*38fd1498Szrj       if (stmts)
7180*38fd1498Szrj 	gsi_insert_seq_on_edge_immediate (
7181*38fd1498Szrj 		loop_preheader_edge (data->current_loop),
7182*38fd1498Szrj 		stmts);
7183*38fd1498Szrj 
7184*38fd1498Szrj       gcond *cond_stmt = as_a <gcond *> (use->stmt);
7185*38fd1498Szrj       gimple_cond_set_lhs (cond_stmt, var);
7186*38fd1498Szrj       gimple_cond_set_code (cond_stmt, compare);
7187*38fd1498Szrj       gimple_cond_set_rhs (cond_stmt, op);
7188*38fd1498Szrj       return;
7189*38fd1498Szrj     }
7190*38fd1498Szrj 
7191*38fd1498Szrj   /* The induction variable elimination failed; just express the original
7192*38fd1498Szrj      giv.  */
7193*38fd1498Szrj   comp = get_computation_at (data->current_loop, use->stmt, use, cand);
7194*38fd1498Szrj   gcc_assert (comp != NULL_TREE);
7195*38fd1498Szrj   gcc_assert (use->op_p != NULL);
7196*38fd1498Szrj   *use->op_p = force_gimple_operand_gsi (&bsi, comp, true,
7197*38fd1498Szrj 					 SSA_NAME_VAR (*use->op_p),
7198*38fd1498Szrj 					 true, GSI_SAME_STMT);
7199*38fd1498Szrj }
7200*38fd1498Szrj 
7201*38fd1498Szrj /* Rewrite the groups using the selected induction variables.  */
7202*38fd1498Szrj 
7203*38fd1498Szrj static void
rewrite_groups(struct ivopts_data * data)7204*38fd1498Szrj rewrite_groups (struct ivopts_data *data)
7205*38fd1498Szrj {
7206*38fd1498Szrj   unsigned i, j;
7207*38fd1498Szrj 
7208*38fd1498Szrj   for (i = 0; i < data->vgroups.length (); i++)
7209*38fd1498Szrj     {
7210*38fd1498Szrj       struct iv_group *group = data->vgroups[i];
7211*38fd1498Szrj       struct iv_cand *cand = group->selected;
7212*38fd1498Szrj 
7213*38fd1498Szrj       gcc_assert (cand);
7214*38fd1498Szrj 
7215*38fd1498Szrj       if (group->type == USE_NONLINEAR_EXPR)
7216*38fd1498Szrj 	{
7217*38fd1498Szrj 	  for (j = 0; j < group->vuses.length (); j++)
7218*38fd1498Szrj 	    {
7219*38fd1498Szrj 	      rewrite_use_nonlinear_expr (data, group->vuses[j], cand);
7220*38fd1498Szrj 	      update_stmt (group->vuses[j]->stmt);
7221*38fd1498Szrj 	    }
7222*38fd1498Szrj 	}
7223*38fd1498Szrj       else if (address_p (group->type))
7224*38fd1498Szrj 	{
7225*38fd1498Szrj 	  for (j = 0; j < group->vuses.length (); j++)
7226*38fd1498Szrj 	    {
7227*38fd1498Szrj 	      rewrite_use_address (data, group->vuses[j], cand);
7228*38fd1498Szrj 	      update_stmt (group->vuses[j]->stmt);
7229*38fd1498Szrj 	    }
7230*38fd1498Szrj 	}
7231*38fd1498Szrj       else
7232*38fd1498Szrj 	{
7233*38fd1498Szrj 	  gcc_assert (group->type == USE_COMPARE);
7234*38fd1498Szrj 
7235*38fd1498Szrj 	  for (j = 0; j < group->vuses.length (); j++)
7236*38fd1498Szrj 	    {
7237*38fd1498Szrj 	      rewrite_use_compare (data, group->vuses[j], cand);
7238*38fd1498Szrj 	      update_stmt (group->vuses[j]->stmt);
7239*38fd1498Szrj 	    }
7240*38fd1498Szrj 	}
7241*38fd1498Szrj     }
7242*38fd1498Szrj }
7243*38fd1498Szrj 
7244*38fd1498Szrj /* Removes the ivs that are not used after rewriting.  */
7245*38fd1498Szrj 
7246*38fd1498Szrj static void
remove_unused_ivs(struct ivopts_data * data)7247*38fd1498Szrj remove_unused_ivs (struct ivopts_data *data)
7248*38fd1498Szrj {
7249*38fd1498Szrj   unsigned j;
7250*38fd1498Szrj   bitmap_iterator bi;
7251*38fd1498Szrj   bitmap toremove = BITMAP_ALLOC (NULL);
7252*38fd1498Szrj 
7253*38fd1498Szrj   /* Figure out an order in which to release SSA DEFs so that we don't
7254*38fd1498Szrj      release something that we'd have to propagate into a debug stmt
7255*38fd1498Szrj      afterwards.  */
7256*38fd1498Szrj   EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j, bi)
7257*38fd1498Szrj     {
7258*38fd1498Szrj       struct version_info *info;
7259*38fd1498Szrj 
7260*38fd1498Szrj       info = ver_info (data, j);
7261*38fd1498Szrj       if (info->iv
7262*38fd1498Szrj 	  && !integer_zerop (info->iv->step)
7263*38fd1498Szrj 	  && !info->inv_id
7264*38fd1498Szrj 	  && !info->iv->nonlin_use
7265*38fd1498Szrj 	  && !info->preserve_biv)
7266*38fd1498Szrj 	{
7267*38fd1498Szrj 	  bitmap_set_bit (toremove, SSA_NAME_VERSION (info->iv->ssa_name));
7268*38fd1498Szrj 
7269*38fd1498Szrj 	  tree def = info->iv->ssa_name;
7270*38fd1498Szrj 
7271*38fd1498Szrj 	  if (MAY_HAVE_DEBUG_BIND_STMTS && SSA_NAME_DEF_STMT (def))
7272*38fd1498Szrj 	    {
7273*38fd1498Szrj 	      imm_use_iterator imm_iter;
7274*38fd1498Szrj 	      use_operand_p use_p;
7275*38fd1498Szrj 	      gimple *stmt;
7276*38fd1498Szrj 	      int count = 0;
7277*38fd1498Szrj 
7278*38fd1498Szrj 	      FOR_EACH_IMM_USE_STMT (stmt, imm_iter, def)
7279*38fd1498Szrj 		{
7280*38fd1498Szrj 		  if (!gimple_debug_bind_p (stmt))
7281*38fd1498Szrj 		    continue;
7282*38fd1498Szrj 
7283*38fd1498Szrj 		  /* We just want to determine whether to do nothing
7284*38fd1498Szrj 		     (count == 0), to substitute the computed
7285*38fd1498Szrj 		     expression into a single use of the SSA DEF by
7286*38fd1498Szrj 		     itself (count == 1), or to use a debug temp
7287*38fd1498Szrj 		     because the SSA DEF is used multiple times or as
7288*38fd1498Szrj 		     part of a larger expression (count > 1). */
7289*38fd1498Szrj 		  count++;
7290*38fd1498Szrj 		  if (gimple_debug_bind_get_value (stmt) != def)
7291*38fd1498Szrj 		    count++;
7292*38fd1498Szrj 
7293*38fd1498Szrj 		  if (count > 1)
7294*38fd1498Szrj 		    BREAK_FROM_IMM_USE_STMT (imm_iter);
7295*38fd1498Szrj 		}
7296*38fd1498Szrj 
7297*38fd1498Szrj 	      if (!count)
7298*38fd1498Szrj 		continue;
7299*38fd1498Szrj 
7300*38fd1498Szrj 	      struct iv_use dummy_use;
7301*38fd1498Szrj 	      struct iv_cand *best_cand = NULL, *cand;
7302*38fd1498Szrj 	      unsigned i, best_pref = 0, cand_pref;
7303*38fd1498Szrj 
7304*38fd1498Szrj 	      memset (&dummy_use, 0, sizeof (dummy_use));
7305*38fd1498Szrj 	      dummy_use.iv = info->iv;
7306*38fd1498Szrj 	      for (i = 0; i < data->vgroups.length () && i < 64; i++)
7307*38fd1498Szrj 		{
7308*38fd1498Szrj 		  cand = data->vgroups[i]->selected;
7309*38fd1498Szrj 		  if (cand == best_cand)
7310*38fd1498Szrj 		    continue;
7311*38fd1498Szrj 		  cand_pref = operand_equal_p (cand->iv->step,
7312*38fd1498Szrj 					       info->iv->step, 0)
7313*38fd1498Szrj 		    ? 4 : 0;
7314*38fd1498Szrj 		  cand_pref
7315*38fd1498Szrj 		    += TYPE_MODE (TREE_TYPE (cand->iv->base))
7316*38fd1498Szrj 		    == TYPE_MODE (TREE_TYPE (info->iv->base))
7317*38fd1498Szrj 		    ? 2 : 0;
7318*38fd1498Szrj 		  cand_pref
7319*38fd1498Szrj 		    += TREE_CODE (cand->iv->base) == INTEGER_CST
7320*38fd1498Szrj 		    ? 1 : 0;
7321*38fd1498Szrj 		  if (best_cand == NULL || best_pref < cand_pref)
7322*38fd1498Szrj 		    {
7323*38fd1498Szrj 		      best_cand = cand;
7324*38fd1498Szrj 		      best_pref = cand_pref;
7325*38fd1498Szrj 		    }
7326*38fd1498Szrj 		}
7327*38fd1498Szrj 
7328*38fd1498Szrj 	      if (!best_cand)
7329*38fd1498Szrj 		continue;
7330*38fd1498Szrj 
7331*38fd1498Szrj 	      tree comp = get_computation_at (data->current_loop,
7332*38fd1498Szrj 					      SSA_NAME_DEF_STMT (def),
7333*38fd1498Szrj 					      &dummy_use, best_cand);
7334*38fd1498Szrj 	      if (!comp)
7335*38fd1498Szrj 		continue;
7336*38fd1498Szrj 
7337*38fd1498Szrj 	      if (count > 1)
7338*38fd1498Szrj 		{
7339*38fd1498Szrj 		  tree vexpr = make_node (DEBUG_EXPR_DECL);
7340*38fd1498Szrj 		  DECL_ARTIFICIAL (vexpr) = 1;
7341*38fd1498Szrj 		  TREE_TYPE (vexpr) = TREE_TYPE (comp);
7342*38fd1498Szrj 		  if (SSA_NAME_VAR (def))
7343*38fd1498Szrj 		    SET_DECL_MODE (vexpr, DECL_MODE (SSA_NAME_VAR (def)));
7344*38fd1498Szrj 		  else
7345*38fd1498Szrj 		    SET_DECL_MODE (vexpr, TYPE_MODE (TREE_TYPE (vexpr)));
7346*38fd1498Szrj 		  gdebug *def_temp
7347*38fd1498Szrj 		    = gimple_build_debug_bind (vexpr, comp, NULL);
7348*38fd1498Szrj 		  gimple_stmt_iterator gsi;
7349*38fd1498Szrj 
7350*38fd1498Szrj 		  if (gimple_code (SSA_NAME_DEF_STMT (def)) == GIMPLE_PHI)
7351*38fd1498Szrj 		    gsi = gsi_after_labels (gimple_bb
7352*38fd1498Szrj 					    (SSA_NAME_DEF_STMT (def)));
7353*38fd1498Szrj 		  else
7354*38fd1498Szrj 		    gsi = gsi_for_stmt (SSA_NAME_DEF_STMT (def));
7355*38fd1498Szrj 
7356*38fd1498Szrj 		  gsi_insert_before (&gsi, def_temp, GSI_SAME_STMT);
7357*38fd1498Szrj 		  comp = vexpr;
7358*38fd1498Szrj 		}
7359*38fd1498Szrj 
7360*38fd1498Szrj 	      FOR_EACH_IMM_USE_STMT (stmt, imm_iter, def)
7361*38fd1498Szrj 		{
7362*38fd1498Szrj 		  if (!gimple_debug_bind_p (stmt))
7363*38fd1498Szrj 		    continue;
7364*38fd1498Szrj 
7365*38fd1498Szrj 		  FOR_EACH_IMM_USE_ON_STMT (use_p, imm_iter)
7366*38fd1498Szrj 		    SET_USE (use_p, comp);
7367*38fd1498Szrj 
7368*38fd1498Szrj 		  update_stmt (stmt);
7369*38fd1498Szrj 		}
7370*38fd1498Szrj 	    }
7371*38fd1498Szrj 	}
7372*38fd1498Szrj     }
7373*38fd1498Szrj 
7374*38fd1498Szrj   release_defs_bitset (toremove);
7375*38fd1498Szrj 
7376*38fd1498Szrj   BITMAP_FREE (toremove);
7377*38fd1498Szrj }
7378*38fd1498Szrj 
7379*38fd1498Szrj /* Frees memory occupied by struct tree_niter_desc in *VALUE. Callback
7380*38fd1498Szrj    for hash_map::traverse.  */
7381*38fd1498Szrj 
7382*38fd1498Szrj bool
free_tree_niter_desc(edge const &,tree_niter_desc * const & value,void *)7383*38fd1498Szrj free_tree_niter_desc (edge const &, tree_niter_desc *const &value, void *)
7384*38fd1498Szrj {
7385*38fd1498Szrj   free (value);
7386*38fd1498Szrj   return true;
7387*38fd1498Szrj }
7388*38fd1498Szrj 
7389*38fd1498Szrj /* Frees data allocated by the optimization of a single loop.  */
7390*38fd1498Szrj 
7391*38fd1498Szrj static void
free_loop_data(struct ivopts_data * data)7392*38fd1498Szrj free_loop_data (struct ivopts_data *data)
7393*38fd1498Szrj {
7394*38fd1498Szrj   unsigned i, j;
7395*38fd1498Szrj   bitmap_iterator bi;
7396*38fd1498Szrj   tree obj;
7397*38fd1498Szrj 
7398*38fd1498Szrj   if (data->niters)
7399*38fd1498Szrj     {
7400*38fd1498Szrj       data->niters->traverse<void *, free_tree_niter_desc> (NULL);
7401*38fd1498Szrj       delete data->niters;
7402*38fd1498Szrj       data->niters = NULL;
7403*38fd1498Szrj     }
7404*38fd1498Szrj 
7405*38fd1498Szrj   EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
7406*38fd1498Szrj     {
7407*38fd1498Szrj       struct version_info *info;
7408*38fd1498Szrj 
7409*38fd1498Szrj       info = ver_info (data, i);
7410*38fd1498Szrj       info->iv = NULL;
7411*38fd1498Szrj       info->has_nonlin_use = false;
7412*38fd1498Szrj       info->preserve_biv = false;
7413*38fd1498Szrj       info->inv_id = 0;
7414*38fd1498Szrj     }
7415*38fd1498Szrj   bitmap_clear (data->relevant);
7416*38fd1498Szrj   bitmap_clear (data->important_candidates);
7417*38fd1498Szrj 
7418*38fd1498Szrj   for (i = 0; i < data->vgroups.length (); i++)
7419*38fd1498Szrj     {
7420*38fd1498Szrj       struct iv_group *group = data->vgroups[i];
7421*38fd1498Szrj 
7422*38fd1498Szrj       for (j = 0; j < group->vuses.length (); j++)
7423*38fd1498Szrj 	free (group->vuses[j]);
7424*38fd1498Szrj       group->vuses.release ();
7425*38fd1498Szrj 
7426*38fd1498Szrj       BITMAP_FREE (group->related_cands);
7427*38fd1498Szrj       for (j = 0; j < group->n_map_members; j++)
7428*38fd1498Szrj 	{
7429*38fd1498Szrj 	  if (group->cost_map[j].inv_vars)
7430*38fd1498Szrj 	    BITMAP_FREE (group->cost_map[j].inv_vars);
7431*38fd1498Szrj 	  if (group->cost_map[j].inv_exprs)
7432*38fd1498Szrj 	    BITMAP_FREE (group->cost_map[j].inv_exprs);
7433*38fd1498Szrj 	}
7434*38fd1498Szrj 
7435*38fd1498Szrj       free (group->cost_map);
7436*38fd1498Szrj       free (group);
7437*38fd1498Szrj     }
7438*38fd1498Szrj   data->vgroups.truncate (0);
7439*38fd1498Szrj 
7440*38fd1498Szrj   for (i = 0; i < data->vcands.length (); i++)
7441*38fd1498Szrj     {
7442*38fd1498Szrj       struct iv_cand *cand = data->vcands[i];
7443*38fd1498Szrj 
7444*38fd1498Szrj       if (cand->inv_vars)
7445*38fd1498Szrj 	BITMAP_FREE (cand->inv_vars);
7446*38fd1498Szrj       if (cand->inv_exprs)
7447*38fd1498Szrj 	BITMAP_FREE (cand->inv_exprs);
7448*38fd1498Szrj       free (cand);
7449*38fd1498Szrj     }
7450*38fd1498Szrj   data->vcands.truncate (0);
7451*38fd1498Szrj 
7452*38fd1498Szrj   if (data->version_info_size < num_ssa_names)
7453*38fd1498Szrj     {
7454*38fd1498Szrj       data->version_info_size = 2 * num_ssa_names;
7455*38fd1498Szrj       free (data->version_info);
7456*38fd1498Szrj       data->version_info = XCNEWVEC (struct version_info, data->version_info_size);
7457*38fd1498Szrj     }
7458*38fd1498Szrj 
7459*38fd1498Szrj   data->max_inv_var_id = 0;
7460*38fd1498Szrj   data->max_inv_expr_id = 0;
7461*38fd1498Szrj 
7462*38fd1498Szrj   FOR_EACH_VEC_ELT (decl_rtl_to_reset, i, obj)
7463*38fd1498Szrj     SET_DECL_RTL (obj, NULL_RTX);
7464*38fd1498Szrj 
7465*38fd1498Szrj   decl_rtl_to_reset.truncate (0);
7466*38fd1498Szrj 
7467*38fd1498Szrj   data->inv_expr_tab->empty ();
7468*38fd1498Szrj 
7469*38fd1498Szrj   data->iv_common_cand_tab->empty ();
7470*38fd1498Szrj   data->iv_common_cands.truncate (0);
7471*38fd1498Szrj }
7472*38fd1498Szrj 
7473*38fd1498Szrj /* Finalizes data structures used by the iv optimization pass.  LOOPS is the
7474*38fd1498Szrj    loop tree.  */
7475*38fd1498Szrj 
7476*38fd1498Szrj static void
tree_ssa_iv_optimize_finalize(struct ivopts_data * data)7477*38fd1498Szrj tree_ssa_iv_optimize_finalize (struct ivopts_data *data)
7478*38fd1498Szrj {
7479*38fd1498Szrj   free_loop_data (data);
7480*38fd1498Szrj   free (data->version_info);
7481*38fd1498Szrj   BITMAP_FREE (data->relevant);
7482*38fd1498Szrj   BITMAP_FREE (data->important_candidates);
7483*38fd1498Szrj 
7484*38fd1498Szrj   decl_rtl_to_reset.release ();
7485*38fd1498Szrj   data->vgroups.release ();
7486*38fd1498Szrj   data->vcands.release ();
7487*38fd1498Szrj   delete data->inv_expr_tab;
7488*38fd1498Szrj   data->inv_expr_tab = NULL;
7489*38fd1498Szrj   free_affine_expand_cache (&data->name_expansion_cache);
7490*38fd1498Szrj   delete data->iv_common_cand_tab;
7491*38fd1498Szrj   data->iv_common_cand_tab = NULL;
7492*38fd1498Szrj   data->iv_common_cands.release ();
7493*38fd1498Szrj   obstack_free (&data->iv_obstack, NULL);
7494*38fd1498Szrj }
7495*38fd1498Szrj 
7496*38fd1498Szrj /* Returns true if the loop body BODY includes any function calls.  */
7497*38fd1498Szrj 
7498*38fd1498Szrj static bool
loop_body_includes_call(basic_block * body,unsigned num_nodes)7499*38fd1498Szrj loop_body_includes_call (basic_block *body, unsigned num_nodes)
7500*38fd1498Szrj {
7501*38fd1498Szrj   gimple_stmt_iterator gsi;
7502*38fd1498Szrj   unsigned i;
7503*38fd1498Szrj 
7504*38fd1498Szrj   for (i = 0; i < num_nodes; i++)
7505*38fd1498Szrj     for (gsi = gsi_start_bb (body[i]); !gsi_end_p (gsi); gsi_next (&gsi))
7506*38fd1498Szrj       {
7507*38fd1498Szrj 	gimple *stmt = gsi_stmt (gsi);
7508*38fd1498Szrj 	if (is_gimple_call (stmt)
7509*38fd1498Szrj 	    && !gimple_call_internal_p (stmt)
7510*38fd1498Szrj 	    && !is_inexpensive_builtin (gimple_call_fndecl (stmt)))
7511*38fd1498Szrj 	  return true;
7512*38fd1498Szrj       }
7513*38fd1498Szrj   return false;
7514*38fd1498Szrj }
7515*38fd1498Szrj 
7516*38fd1498Szrj /* Optimizes the LOOP.  Returns true if anything changed.  */
7517*38fd1498Szrj 
7518*38fd1498Szrj static bool
tree_ssa_iv_optimize_loop(struct ivopts_data * data,struct loop * loop)7519*38fd1498Szrj tree_ssa_iv_optimize_loop (struct ivopts_data *data, struct loop *loop)
7520*38fd1498Szrj {
7521*38fd1498Szrj   bool changed = false;
7522*38fd1498Szrj   struct iv_ca *iv_ca;
7523*38fd1498Szrj   edge exit = single_dom_exit (loop);
7524*38fd1498Szrj   basic_block *body;
7525*38fd1498Szrj 
7526*38fd1498Szrj   gcc_assert (!data->niters);
7527*38fd1498Szrj   data->current_loop = loop;
7528*38fd1498Szrj   data->loop_loc = find_loop_location (loop);
7529*38fd1498Szrj   data->speed = optimize_loop_for_speed_p (loop);
7530*38fd1498Szrj 
7531*38fd1498Szrj   if (dump_file && (dump_flags & TDF_DETAILS))
7532*38fd1498Szrj     {
7533*38fd1498Szrj       fprintf (dump_file, "Processing loop %d", loop->num);
7534*38fd1498Szrj       if (data->loop_loc != UNKNOWN_LOCATION)
7535*38fd1498Szrj 	fprintf (dump_file, " at %s:%d", LOCATION_FILE (data->loop_loc),
7536*38fd1498Szrj 		 LOCATION_LINE (data->loop_loc));
7537*38fd1498Szrj       fprintf (dump_file, "\n");
7538*38fd1498Szrj 
7539*38fd1498Szrj       if (exit)
7540*38fd1498Szrj 	{
7541*38fd1498Szrj 	  fprintf (dump_file, "  single exit %d -> %d, exit condition ",
7542*38fd1498Szrj 		   exit->src->index, exit->dest->index);
7543*38fd1498Szrj 	  print_gimple_stmt (dump_file, last_stmt (exit->src), 0, TDF_SLIM);
7544*38fd1498Szrj 	  fprintf (dump_file, "\n");
7545*38fd1498Szrj 	}
7546*38fd1498Szrj 
7547*38fd1498Szrj       fprintf (dump_file, "\n");
7548*38fd1498Szrj     }
7549*38fd1498Szrj 
7550*38fd1498Szrj   body = get_loop_body (loop);
7551*38fd1498Szrj   data->body_includes_call = loop_body_includes_call (body, loop->num_nodes);
7552*38fd1498Szrj   renumber_gimple_stmt_uids_in_blocks (body, loop->num_nodes);
7553*38fd1498Szrj   free (body);
7554*38fd1498Szrj 
7555*38fd1498Szrj   data->loop_single_exit_p = exit != NULL && loop_only_exit_p (loop, exit);
7556*38fd1498Szrj 
7557*38fd1498Szrj   /* For each ssa name determines whether it behaves as an induction variable
7558*38fd1498Szrj      in some loop.  */
7559*38fd1498Szrj   if (!find_induction_variables (data))
7560*38fd1498Szrj     goto finish;
7561*38fd1498Szrj 
7562*38fd1498Szrj   /* Finds interesting uses (item 1).  */
7563*38fd1498Szrj   find_interesting_uses (data);
7564*38fd1498Szrj   if (data->vgroups.length () > MAX_CONSIDERED_GROUPS)
7565*38fd1498Szrj     goto finish;
7566*38fd1498Szrj 
7567*38fd1498Szrj   /* Finds candidates for the induction variables (item 2).  */
7568*38fd1498Szrj   find_iv_candidates (data);
7569*38fd1498Szrj 
7570*38fd1498Szrj   /* Calculates the costs (item 3, part 1).  */
7571*38fd1498Szrj   determine_iv_costs (data);
7572*38fd1498Szrj   determine_group_iv_costs (data);
7573*38fd1498Szrj   determine_set_costs (data);
7574*38fd1498Szrj 
7575*38fd1498Szrj   /* Find the optimal set of induction variables (item 3, part 2).  */
7576*38fd1498Szrj   iv_ca = find_optimal_iv_set (data);
7577*38fd1498Szrj   if (!iv_ca)
7578*38fd1498Szrj     goto finish;
7579*38fd1498Szrj   changed = true;
7580*38fd1498Szrj 
7581*38fd1498Szrj   /* Create the new induction variables (item 4, part 1).  */
7582*38fd1498Szrj   create_new_ivs (data, iv_ca);
7583*38fd1498Szrj   iv_ca_free (&iv_ca);
7584*38fd1498Szrj 
7585*38fd1498Szrj   /* Rewrite the uses (item 4, part 2).  */
7586*38fd1498Szrj   rewrite_groups (data);
7587*38fd1498Szrj 
7588*38fd1498Szrj   /* Remove the ivs that are unused after rewriting.  */
7589*38fd1498Szrj   remove_unused_ivs (data);
7590*38fd1498Szrj 
7591*38fd1498Szrj   /* We have changed the structure of induction variables; it might happen
7592*38fd1498Szrj      that definitions in the scev database refer to some of them that were
7593*38fd1498Szrj      eliminated.  */
7594*38fd1498Szrj   scev_reset ();
7595*38fd1498Szrj 
7596*38fd1498Szrj finish:
7597*38fd1498Szrj   free_loop_data (data);
7598*38fd1498Szrj 
7599*38fd1498Szrj   return changed;
7600*38fd1498Szrj }
7601*38fd1498Szrj 
7602*38fd1498Szrj /* Main entry point.  Optimizes induction variables in loops.  */
7603*38fd1498Szrj 
7604*38fd1498Szrj void
tree_ssa_iv_optimize(void)7605*38fd1498Szrj tree_ssa_iv_optimize (void)
7606*38fd1498Szrj {
7607*38fd1498Szrj   struct loop *loop;
7608*38fd1498Szrj   struct ivopts_data data;
7609*38fd1498Szrj 
7610*38fd1498Szrj   tree_ssa_iv_optimize_init (&data);
7611*38fd1498Szrj 
7612*38fd1498Szrj   /* Optimize the loops starting with the innermost ones.  */
7613*38fd1498Szrj   FOR_EACH_LOOP (loop, LI_FROM_INNERMOST)
7614*38fd1498Szrj     {
7615*38fd1498Szrj       if (dump_file && (dump_flags & TDF_DETAILS))
7616*38fd1498Szrj 	flow_loop_dump (loop, dump_file, NULL, 1);
7617*38fd1498Szrj 
7618*38fd1498Szrj       tree_ssa_iv_optimize_loop (&data, loop);
7619*38fd1498Szrj     }
7620*38fd1498Szrj 
7621*38fd1498Szrj   tree_ssa_iv_optimize_finalize (&data);
7622*38fd1498Szrj }
7623*38fd1498Szrj 
7624*38fd1498Szrj #include "gt-tree-ssa-loop-ivopts.h"
7625