xref: /netbsd-src/external/gpl3/gcc/dist/gcc/tree-ssa-loop-ivopts.cc (revision 0a3071956a3a9fdebdbf7f338cf2d439b45fc728)
1 /* Induction variable optimizations.
2    Copyright (C) 2003-2022 Free Software Foundation, Inc.
3 
4 This file is part of GCC.
5 
6 GCC is free software; you can redistribute it and/or modify it
7 under the terms of the GNU General Public License as published by the
8 Free Software Foundation; either version 3, or (at your option) any
9 later version.
10 
11 GCC is distributed in the hope that it will be useful, but WITHOUT
12 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
14 for more details.
15 
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING3.  If not see
18 <http://www.gnu.org/licenses/>.  */
19 
20 /* This pass tries to find the optimal set of induction variables for the loop.
21    It optimizes just the basic linear induction variables (although adding
22    support for other types should not be too hard).  It includes the
23    optimizations commonly known as strength reduction, induction variable
24    coalescing and induction variable elimination.  It does it in the
25    following steps:
26 
27    1) The interesting uses of induction variables are found.  This includes
28 
29       -- uses of induction variables in non-linear expressions
30       -- addresses of arrays
31       -- comparisons of induction variables
32 
33       Note the interesting uses are categorized and handled in group.
34       Generally, address type uses are grouped together if their iv bases
35       are different in constant offset.
36 
37    2) Candidates for the induction variables are found.  This includes
38 
39       -- old induction variables
40       -- the variables defined by expressions derived from the "interesting
41 	 groups/uses" above
42 
43    3) The optimal (w.r. to a cost function) set of variables is chosen.  The
44       cost function assigns a cost to sets of induction variables and consists
45       of three parts:
46 
47       -- The group/use costs.  Each of the interesting groups/uses chooses
48 	 the best induction variable in the set and adds its cost to the sum.
49 	 The cost reflects the time spent on modifying the induction variables
50 	 value to be usable for the given purpose (adding base and offset for
51 	 arrays, etc.).
52       -- The variable costs.  Each of the variables has a cost assigned that
53 	 reflects the costs associated with incrementing the value of the
54 	 variable.  The original variables are somewhat preferred.
55       -- The set cost.  Depending on the size of the set, extra cost may be
56 	 added to reflect register pressure.
57 
58       All the costs are defined in a machine-specific way, using the target
59       hooks and machine descriptions to determine them.
60 
61    4) The trees are transformed to use the new variables, the dead code is
62       removed.
63 
64    All of this is done loop by loop.  Doing it globally is theoretically
65    possible, it might give a better performance and it might enable us
66    to decide costs more precisely, but getting all the interactions right
67    would be complicated.
68 
69    For the targets supporting low-overhead loops, IVOPTs has to take care of
70    the loops which will probably be transformed in RTL doloop optimization,
71    to try to make selected IV candidate set optimal.  The process of doloop
72    support includes:
73 
74    1) Analyze the current loop will be transformed to doloop or not, find and
75       mark its compare type IV use as doloop use (iv_group field doloop_p), and
76       set flag doloop_use_p of ivopts_data to notify subsequent processings on
77       doloop.  See analyze_and_mark_doloop_use and its callees for the details.
78       The target hook predict_doloop_p can be used for target specific checks.
79 
80    2) Add one doloop dedicated IV cand {(may_be_zero ? 1 : (niter + 1)), +, -1},
81       set flag doloop_p of iv_cand, step cost is set as zero and no extra cost
82       like biv.  For cost determination between doloop IV cand and IV use, the
83       target hooks doloop_cost_for_generic and doloop_cost_for_address are
84       provided to add on extra costs for generic type and address type IV use.
85       Zero cost is assigned to the pair between doloop IV cand and doloop IV
86       use, and bound zero is set for IV elimination.
87 
88    3) With the cost setting in step 2), the current cost model based IV
89       selection algorithm will process as usual, pick up doloop dedicated IV if
90       profitable.  */
91 
92 #include "config.h"
93 #include "system.h"
94 #include "coretypes.h"
95 #include "backend.h"
96 #include "rtl.h"
97 #include "tree.h"
98 #include "gimple.h"
99 #include "cfghooks.h"
100 #include "tree-pass.h"
101 #include "memmodel.h"
102 #include "tm_p.h"
103 #include "ssa.h"
104 #include "expmed.h"
105 #include "insn-config.h"
106 #include "emit-rtl.h"
107 #include "recog.h"
108 #include "cgraph.h"
109 #include "gimple-pretty-print.h"
110 #include "alias.h"
111 #include "fold-const.h"
112 #include "stor-layout.h"
113 #include "tree-eh.h"
114 #include "gimplify.h"
115 #include "gimple-iterator.h"
116 #include "gimplify-me.h"
117 #include "tree-cfg.h"
118 #include "tree-ssa-loop-ivopts.h"
119 #include "tree-ssa-loop-manip.h"
120 #include "tree-ssa-loop-niter.h"
121 #include "tree-ssa-loop.h"
122 #include "explow.h"
123 #include "expr.h"
124 #include "tree-dfa.h"
125 #include "tree-ssa.h"
126 #include "cfgloop.h"
127 #include "tree-scalar-evolution.h"
128 #include "tree-affine.h"
129 #include "tree-ssa-propagate.h"
130 #include "tree-ssa-address.h"
131 #include "builtins.h"
132 #include "tree-vectorizer.h"
133 #include "dbgcnt.h"
134 #include "cfganal.h"
135 
136 /* For lang_hooks.types.type_for_mode.  */
137 #include "langhooks.h"
138 
139 /* FIXME: Expressions are expanded to RTL in this pass to determine the
140    cost of different addressing modes.  This should be moved to a TBD
141    interface between the GIMPLE and RTL worlds.  */
142 
143 /* The infinite cost.  */
144 #define INFTY 1000000000
145 
146 /* Returns the expected number of loop iterations for LOOP.
147    The average trip count is computed from profile data if it
148    exists. */
149 
150 static inline HOST_WIDE_INT
avg_loop_niter(class loop * loop)151 avg_loop_niter (class loop *loop)
152 {
153   HOST_WIDE_INT niter = estimated_stmt_executions_int (loop);
154   if (niter == -1)
155     {
156       niter = likely_max_stmt_executions_int (loop);
157 
158       if (niter == -1 || niter > param_avg_loop_niter)
159 	return param_avg_loop_niter;
160     }
161 
162   return niter;
163 }
164 
165 struct iv_use;
166 
167 /* Representation of the induction variable.  */
168 struct iv
169 {
170   tree base;		/* Initial value of the iv.  */
171   tree base_object;	/* A memory object to that the induction variable points.  */
172   tree step;		/* Step of the iv (constant only).  */
173   tree ssa_name;	/* The ssa name with the value.  */
174   struct iv_use *nonlin_use;	/* The identifier in the use if it is the case.  */
175   bool biv_p;		/* Is it a biv?  */
176   bool no_overflow;	/* True if the iv doesn't overflow.  */
177   bool have_address_use;/* For biv, indicate if it's used in any address
178 			   type use.  */
179 };
180 
181 /* Per-ssa version information (induction variable descriptions, etc.).  */
182 struct version_info
183 {
184   tree name;		/* The ssa name.  */
185   struct iv *iv;	/* Induction variable description.  */
186   bool has_nonlin_use;	/* For a loop-level invariant, whether it is used in
187 			   an expression that is not an induction variable.  */
188   bool preserve_biv;	/* For the original biv, whether to preserve it.  */
189   unsigned inv_id;	/* Id of an invariant.  */
190 };
191 
192 /* Types of uses.  */
193 enum use_type
194 {
195   USE_NONLINEAR_EXPR,	/* Use in a nonlinear expression.  */
196   USE_REF_ADDRESS,	/* Use is an address for an explicit memory
197 			   reference.  */
198   USE_PTR_ADDRESS,	/* Use is a pointer argument to a function in
199 			   cases where the expansion of the function
200 			   will turn the argument into a normal address.  */
201   USE_COMPARE		/* Use is a compare.  */
202 };
203 
204 /* Cost of a computation.  */
205 class comp_cost
206 {
207 public:
comp_cost()208   comp_cost (): cost (0), complexity (0), scratch (0)
209   {}
210 
comp_cost(int64_t cost,unsigned complexity,int64_t scratch=0)211   comp_cost (int64_t cost, unsigned complexity, int64_t scratch = 0)
212     : cost (cost), complexity (complexity), scratch (scratch)
213   {}
214 
215   /* Returns true if COST is infinite.  */
216   bool infinite_cost_p ();
217 
218   /* Adds costs COST1 and COST2.  */
219   friend comp_cost operator+ (comp_cost cost1, comp_cost cost2);
220 
221   /* Adds COST to the comp_cost.  */
222   comp_cost operator+= (comp_cost cost);
223 
224   /* Adds constant C to this comp_cost.  */
225   comp_cost operator+= (HOST_WIDE_INT c);
226 
227   /* Subtracts constant C to this comp_cost.  */
228   comp_cost operator-= (HOST_WIDE_INT c);
229 
230   /* Divide the comp_cost by constant C.  */
231   comp_cost operator/= (HOST_WIDE_INT c);
232 
233   /* Multiply the comp_cost by constant C.  */
234   comp_cost operator*= (HOST_WIDE_INT c);
235 
236   /* Subtracts costs COST1 and COST2.  */
237   friend comp_cost operator- (comp_cost cost1, comp_cost cost2);
238 
239   /* Subtracts COST from this comp_cost.  */
240   comp_cost operator-= (comp_cost cost);
241 
242   /* Returns true if COST1 is smaller than COST2.  */
243   friend bool operator< (comp_cost cost1, comp_cost cost2);
244 
245   /* Returns true if COST1 and COST2 are equal.  */
246   friend bool operator== (comp_cost cost1, comp_cost cost2);
247 
248   /* Returns true if COST1 is smaller or equal than COST2.  */
249   friend bool operator<= (comp_cost cost1, comp_cost cost2);
250 
251   int64_t cost;		/* The runtime cost.  */
252   unsigned complexity;  /* The estimate of the complexity of the code for
253 			   the computation (in no concrete units --
254 			   complexity field should be larger for more
255 			   complex expressions and addressing modes).  */
256   int64_t scratch;	/* Scratch used during cost computation.  */
257 };
258 
259 static const comp_cost no_cost;
260 static const comp_cost infinite_cost (INFTY, 0, INFTY);
261 
262 bool
infinite_cost_p()263 comp_cost::infinite_cost_p ()
264 {
265   return cost == INFTY;
266 }
267 
268 comp_cost
operator +(comp_cost cost1,comp_cost cost2)269 operator+ (comp_cost cost1, comp_cost cost2)
270 {
271   if (cost1.infinite_cost_p () || cost2.infinite_cost_p ())
272     return infinite_cost;
273 
274   gcc_assert (cost1.cost + cost2.cost < infinite_cost.cost);
275   cost1.cost += cost2.cost;
276   cost1.complexity += cost2.complexity;
277 
278   return cost1;
279 }
280 
281 comp_cost
operator -(comp_cost cost1,comp_cost cost2)282 operator- (comp_cost cost1, comp_cost cost2)
283 {
284   if (cost1.infinite_cost_p ())
285     return infinite_cost;
286 
287   gcc_assert (!cost2.infinite_cost_p ());
288   gcc_assert (cost1.cost - cost2.cost < infinite_cost.cost);
289 
290   cost1.cost -= cost2.cost;
291   cost1.complexity -= cost2.complexity;
292 
293   return cost1;
294 }
295 
296 comp_cost
operator +=(comp_cost cost)297 comp_cost::operator+= (comp_cost cost)
298 {
299   *this = *this + cost;
300   return *this;
301 }
302 
303 comp_cost
operator +=(HOST_WIDE_INT c)304 comp_cost::operator+= (HOST_WIDE_INT c)
305 {
306   if (c >= INFTY)
307     this->cost = INFTY;
308 
309   if (infinite_cost_p ())
310     return *this;
311 
312   gcc_assert (this->cost + c < infinite_cost.cost);
313   this->cost += c;
314 
315   return *this;
316 }
317 
318 comp_cost
operator -=(HOST_WIDE_INT c)319 comp_cost::operator-= (HOST_WIDE_INT c)
320 {
321   if (infinite_cost_p ())
322     return *this;
323 
324   gcc_assert (this->cost - c < infinite_cost.cost);
325   this->cost -= c;
326 
327   return *this;
328 }
329 
330 comp_cost
operator /=(HOST_WIDE_INT c)331 comp_cost::operator/= (HOST_WIDE_INT c)
332 {
333   gcc_assert (c != 0);
334   if (infinite_cost_p ())
335     return *this;
336 
337   this->cost /= c;
338 
339   return *this;
340 }
341 
342 comp_cost
operator *=(HOST_WIDE_INT c)343 comp_cost::operator*= (HOST_WIDE_INT c)
344 {
345   if (infinite_cost_p ())
346     return *this;
347 
348   gcc_assert (this->cost * c < infinite_cost.cost);
349   this->cost *= c;
350 
351   return *this;
352 }
353 
354 comp_cost
operator -=(comp_cost cost)355 comp_cost::operator-= (comp_cost cost)
356 {
357   *this = *this - cost;
358   return *this;
359 }
360 
361 bool
operator <(comp_cost cost1,comp_cost cost2)362 operator< (comp_cost cost1, comp_cost cost2)
363 {
364   if (cost1.cost == cost2.cost)
365     return cost1.complexity < cost2.complexity;
366 
367   return cost1.cost < cost2.cost;
368 }
369 
370 bool
operator ==(comp_cost cost1,comp_cost cost2)371 operator== (comp_cost cost1, comp_cost cost2)
372 {
373   return cost1.cost == cost2.cost
374     && cost1.complexity == cost2.complexity;
375 }
376 
377 bool
operator <=(comp_cost cost1,comp_cost cost2)378 operator<= (comp_cost cost1, comp_cost cost2)
379 {
380   return cost1 < cost2 || cost1 == cost2;
381 }
382 
383 struct iv_inv_expr_ent;
384 
385 /* The candidate - cost pair.  */
386 class cost_pair
387 {
388 public:
389   struct iv_cand *cand;	/* The candidate.  */
390   comp_cost cost;	/* The cost.  */
391   enum tree_code comp;	/* For iv elimination, the comparison.  */
392   bitmap inv_vars;	/* The list of invariant ssa_vars that have to be
393 			   preserved when representing iv_use with iv_cand.  */
394   bitmap inv_exprs;	/* The list of newly created invariant expressions
395 			   when representing iv_use with iv_cand.  */
396   tree value;		/* For final value elimination, the expression for
397 			   the final value of the iv.  For iv elimination,
398 			   the new bound to compare with.  */
399 };
400 
401 /* Use.  */
402 struct iv_use
403 {
404   unsigned id;		/* The id of the use.  */
405   unsigned group_id;	/* The group id the use belongs to.  */
406   enum use_type type;	/* Type of the use.  */
407   tree mem_type;	/* The memory type to use when testing whether an
408 			   address is legitimate, and what the address's
409 			   cost is.  */
410   struct iv *iv;	/* The induction variable it is based on.  */
411   gimple *stmt;		/* Statement in that it occurs.  */
412   tree *op_p;		/* The place where it occurs.  */
413 
414   tree addr_base;	/* Base address with const offset stripped.  */
415   poly_uint64_pod addr_offset;
416 			/* Const offset stripped from base address.  */
417 };
418 
419 /* Group of uses.  */
420 struct iv_group
421 {
422   /* The id of the group.  */
423   unsigned id;
424   /* Uses of the group are of the same type.  */
425   enum use_type type;
426   /* The set of "related" IV candidates, plus the important ones.  */
427   bitmap related_cands;
428   /* Number of IV candidates in the cost_map.  */
429   unsigned n_map_members;
430   /* The costs wrto the iv candidates.  */
431   class cost_pair *cost_map;
432   /* The selected candidate for the group.  */
433   struct iv_cand *selected;
434   /* To indicate this is a doloop use group.  */
435   bool doloop_p;
436   /* Uses in the group.  */
437   vec<struct iv_use *> vuses;
438 };
439 
440 /* The position where the iv is computed.  */
441 enum iv_position
442 {
443   IP_NORMAL,		/* At the end, just before the exit condition.  */
444   IP_END,		/* At the end of the latch block.  */
445   IP_BEFORE_USE,	/* Immediately before a specific use.  */
446   IP_AFTER_USE,		/* Immediately after a specific use.  */
447   IP_ORIGINAL		/* The original biv.  */
448 };
449 
450 /* The induction variable candidate.  */
451 struct iv_cand
452 {
453   unsigned id;		/* The number of the candidate.  */
454   bool important;	/* Whether this is an "important" candidate, i.e. such
455 			   that it should be considered by all uses.  */
456   bool involves_undefs; /* Whether the IV involves undefined values.  */
457   ENUM_BITFIELD(iv_position) pos : 8;	/* Where it is computed.  */
458   gimple *incremented_at;/* For original biv, the statement where it is
459 			   incremented.  */
460   tree var_before;	/* The variable used for it before increment.  */
461   tree var_after;	/* The variable used for it after increment.  */
462   struct iv *iv;	/* The value of the candidate.  NULL for
463 			   "pseudocandidate" used to indicate the possibility
464 			   to replace the final value of an iv by direct
465 			   computation of the value.  */
466   unsigned cost;	/* Cost of the candidate.  */
467   unsigned cost_step;	/* Cost of the candidate's increment operation.  */
468   struct iv_use *ainc_use; /* For IP_{BEFORE,AFTER}_USE candidates, the place
469 			      where it is incremented.  */
470   bitmap inv_vars;	/* The list of invariant ssa_vars used in step of the
471 			   iv_cand.  */
472   bitmap inv_exprs;	/* If step is more complicated than a single ssa_var,
473 			   hanlde it as a new invariant expression which will
474 			   be hoisted out of loop.  */
475   struct iv *orig_iv;	/* The original iv if this cand is added from biv with
476 			   smaller type.  */
477   bool doloop_p;	/* Whether this is a doloop candidate.  */
478 };
479 
480 /* Hashtable entry for common candidate derived from iv uses.  */
481 class iv_common_cand
482 {
483 public:
484   tree base;
485   tree step;
486   /* IV uses from which this common candidate is derived.  */
487   auto_vec<struct iv_use *> uses;
488   hashval_t hash;
489 };
490 
491 /* Hashtable helpers.  */
492 
493 struct iv_common_cand_hasher : delete_ptr_hash <iv_common_cand>
494 {
495   static inline hashval_t hash (const iv_common_cand *);
496   static inline bool equal (const iv_common_cand *, const iv_common_cand *);
497 };
498 
499 /* Hash function for possible common candidates.  */
500 
501 inline hashval_t
hash(const iv_common_cand * ccand)502 iv_common_cand_hasher::hash (const iv_common_cand *ccand)
503 {
504   return ccand->hash;
505 }
506 
507 /* Hash table equality function for common candidates.  */
508 
509 inline bool
equal(const iv_common_cand * ccand1,const iv_common_cand * ccand2)510 iv_common_cand_hasher::equal (const iv_common_cand *ccand1,
511 			      const iv_common_cand *ccand2)
512 {
513   return (ccand1->hash == ccand2->hash
514 	  && operand_equal_p (ccand1->base, ccand2->base, 0)
515 	  && operand_equal_p (ccand1->step, ccand2->step, 0)
516 	  && (TYPE_PRECISION (TREE_TYPE (ccand1->base))
517 	      == TYPE_PRECISION (TREE_TYPE (ccand2->base))));
518 }
519 
520 /* Loop invariant expression hashtable entry.  */
521 
522 struct iv_inv_expr_ent
523 {
524   /* Tree expression of the entry.  */
525   tree expr;
526   /* Unique indentifier.  */
527   int id;
528   /* Hash value.  */
529   hashval_t hash;
530 };
531 
532 /* Sort iv_inv_expr_ent pair A and B by id field.  */
533 
534 static int
sort_iv_inv_expr_ent(const void * a,const void * b)535 sort_iv_inv_expr_ent (const void *a, const void *b)
536 {
537   const iv_inv_expr_ent * const *e1 = (const iv_inv_expr_ent * const *) (a);
538   const iv_inv_expr_ent * const *e2 = (const iv_inv_expr_ent * const *) (b);
539 
540   unsigned id1 = (*e1)->id;
541   unsigned id2 = (*e2)->id;
542 
543   if (id1 < id2)
544     return -1;
545   else if (id1 > id2)
546     return 1;
547   else
548     return 0;
549 }
550 
551 /* Hashtable helpers.  */
552 
553 struct iv_inv_expr_hasher : free_ptr_hash <iv_inv_expr_ent>
554 {
555   static inline hashval_t hash (const iv_inv_expr_ent *);
556   static inline bool equal (const iv_inv_expr_ent *, const iv_inv_expr_ent *);
557 };
558 
559 /* Return true if uses of type TYPE represent some form of address.  */
560 
561 inline bool
address_p(use_type type)562 address_p (use_type type)
563 {
564   return type == USE_REF_ADDRESS || type == USE_PTR_ADDRESS;
565 }
566 
567 /* Hash function for loop invariant expressions.  */
568 
569 inline hashval_t
hash(const iv_inv_expr_ent * expr)570 iv_inv_expr_hasher::hash (const iv_inv_expr_ent *expr)
571 {
572   return expr->hash;
573 }
574 
575 /* Hash table equality function for expressions.  */
576 
577 inline bool
equal(const iv_inv_expr_ent * expr1,const iv_inv_expr_ent * expr2)578 iv_inv_expr_hasher::equal (const iv_inv_expr_ent *expr1,
579 			   const iv_inv_expr_ent *expr2)
580 {
581   return expr1->hash == expr2->hash
582 	 && operand_equal_p (expr1->expr, expr2->expr, 0);
583 }
584 
585 struct ivopts_data
586 {
587   /* The currently optimized loop.  */
588   class loop *current_loop;
589   location_t loop_loc;
590 
591   /* Numbers of iterations for all exits of the current loop.  */
592   hash_map<edge, tree_niter_desc *> *niters;
593 
594   /* Number of registers used in it.  */
595   unsigned regs_used;
596 
597   /* The size of version_info array allocated.  */
598   unsigned version_info_size;
599 
600   /* The array of information for the ssa names.  */
601   struct version_info *version_info;
602 
603   /* The hashtable of loop invariant expressions created
604      by ivopt.  */
605   hash_table<iv_inv_expr_hasher> *inv_expr_tab;
606 
607   /* The bitmap of indices in version_info whose value was changed.  */
608   bitmap relevant;
609 
610   /* The uses of induction variables.  */
611   vec<iv_group *> vgroups;
612 
613   /* The candidates.  */
614   vec<iv_cand *> vcands;
615 
616   /* A bitmap of important candidates.  */
617   bitmap important_candidates;
618 
619   /* Cache used by tree_to_aff_combination_expand.  */
620   hash_map<tree, name_expansion *> *name_expansion_cache;
621 
622   /* The hashtable of common candidates derived from iv uses.  */
623   hash_table<iv_common_cand_hasher> *iv_common_cand_tab;
624 
625   /* The common candidates.  */
626   vec<iv_common_cand *> iv_common_cands;
627 
628   /* Hash map recording base object information of tree exp.  */
629   hash_map<tree, tree> *base_object_map;
630 
631   /* The maximum invariant variable id.  */
632   unsigned max_inv_var_id;
633 
634   /* The maximum invariant expression id.  */
635   unsigned max_inv_expr_id;
636 
637   /* Number of no_overflow BIVs which are not used in memory address.  */
638   unsigned bivs_not_used_in_addr;
639 
640   /* Obstack for iv structure.  */
641   struct obstack iv_obstack;
642 
643   /* Whether to consider just related and important candidates when replacing a
644      use.  */
645   bool consider_all_candidates;
646 
647   /* Are we optimizing for speed?  */
648   bool speed;
649 
650   /* Whether the loop body includes any function calls.  */
651   bool body_includes_call;
652 
653   /* Whether the loop body can only be exited via single exit.  */
654   bool loop_single_exit_p;
655 
656   /* Whether the loop has doloop comparison use.  */
657   bool doloop_use_p;
658 };
659 
660 /* An assignment of iv candidates to uses.  */
661 
662 class iv_ca
663 {
664 public:
665   /* The number of uses covered by the assignment.  */
666   unsigned upto;
667 
668   /* Number of uses that cannot be expressed by the candidates in the set.  */
669   unsigned bad_groups;
670 
671   /* Candidate assigned to a use, together with the related costs.  */
672   class cost_pair **cand_for_group;
673 
674   /* Number of times each candidate is used.  */
675   unsigned *n_cand_uses;
676 
677   /* The candidates used.  */
678   bitmap cands;
679 
680   /* The number of candidates in the set.  */
681   unsigned n_cands;
682 
683   /* The number of invariants needed, including both invariant variants and
684      invariant expressions.  */
685   unsigned n_invs;
686 
687   /* Total cost of expressing uses.  */
688   comp_cost cand_use_cost;
689 
690   /* Total cost of candidates.  */
691   int64_t cand_cost;
692 
693   /* Number of times each invariant variable is used.  */
694   unsigned *n_inv_var_uses;
695 
696   /* Number of times each invariant expression is used.  */
697   unsigned *n_inv_expr_uses;
698 
699   /* Total cost of the assignment.  */
700   comp_cost cost;
701 };
702 
703 /* Difference of two iv candidate assignments.  */
704 
705 struct iv_ca_delta
706 {
707   /* Changed group.  */
708   struct iv_group *group;
709 
710   /* An old assignment (for rollback purposes).  */
711   class cost_pair *old_cp;
712 
713   /* A new assignment.  */
714   class cost_pair *new_cp;
715 
716   /* Next change in the list.  */
717   struct iv_ca_delta *next;
718 };
719 
720 /* Bound on number of candidates below that all candidates are considered.  */
721 
722 #define CONSIDER_ALL_CANDIDATES_BOUND \
723   ((unsigned) param_iv_consider_all_candidates_bound)
724 
725 /* If there are more iv occurrences, we just give up (it is quite unlikely that
726    optimizing such a loop would help, and it would take ages).  */
727 
728 #define MAX_CONSIDERED_GROUPS \
729   ((unsigned) param_iv_max_considered_uses)
730 
731 /* If there are at most this number of ivs in the set, try removing unnecessary
732    ivs from the set always.  */
733 
734 #define ALWAYS_PRUNE_CAND_SET_BOUND \
735   ((unsigned) param_iv_always_prune_cand_set_bound)
736 
737 /* The list of trees for that the decl_rtl field must be reset is stored
738    here.  */
739 
740 static vec<tree> decl_rtl_to_reset;
741 
742 static comp_cost force_expr_to_var_cost (tree, bool);
743 
744 /* The single loop exit if it dominates the latch, NULL otherwise.  */
745 
746 edge
single_dom_exit(class loop * loop)747 single_dom_exit (class loop *loop)
748 {
749   edge exit = single_exit (loop);
750 
751   if (!exit)
752     return NULL;
753 
754   if (!just_once_each_iteration_p (loop, exit->src))
755     return NULL;
756 
757   return exit;
758 }
759 
760 /* Dumps information about the induction variable IV to FILE.  Don't dump
761    variable's name if DUMP_NAME is FALSE.  The information is dumped with
762    preceding spaces indicated by INDENT_LEVEL.  */
763 
764 void
dump_iv(FILE * file,struct iv * iv,bool dump_name,unsigned indent_level)765 dump_iv (FILE *file, struct iv *iv, bool dump_name, unsigned indent_level)
766 {
767   const char *p;
768   const char spaces[9] = {' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '\0'};
769 
770   if (indent_level > 4)
771     indent_level = 4;
772   p = spaces + 8 - (indent_level << 1);
773 
774   fprintf (file, "%sIV struct:\n", p);
775   if (iv->ssa_name && dump_name)
776     {
777       fprintf (file, "%s  SSA_NAME:\t", p);
778       print_generic_expr (file, iv->ssa_name, TDF_SLIM);
779       fprintf (file, "\n");
780     }
781 
782   fprintf (file, "%s  Type:\t", p);
783   print_generic_expr (file, TREE_TYPE (iv->base), TDF_SLIM);
784   fprintf (file, "\n");
785 
786   fprintf (file, "%s  Base:\t", p);
787   print_generic_expr (file, iv->base, TDF_SLIM);
788   fprintf (file, "\n");
789 
790   fprintf (file, "%s  Step:\t", p);
791   print_generic_expr (file, iv->step, TDF_SLIM);
792   fprintf (file, "\n");
793 
794   if (iv->base_object)
795     {
796       fprintf (file, "%s  Object:\t", p);
797       print_generic_expr (file, iv->base_object, TDF_SLIM);
798       fprintf (file, "\n");
799     }
800 
801   fprintf (file, "%s  Biv:\t%c\n", p, iv->biv_p ? 'Y' : 'N');
802 
803   fprintf (file, "%s  Overflowness wrto loop niter:\t%s\n",
804 	   p, iv->no_overflow ? "No-overflow" : "Overflow");
805 }
806 
807 /* Dumps information about the USE to FILE.  */
808 
809 void
dump_use(FILE * file,struct iv_use * use)810 dump_use (FILE *file, struct iv_use *use)
811 {
812   fprintf (file, "  Use %d.%d:\n", use->group_id, use->id);
813   fprintf (file, "    At stmt:\t");
814   print_gimple_stmt (file, use->stmt, 0);
815   fprintf (file, "    At pos:\t");
816   if (use->op_p)
817     print_generic_expr (file, *use->op_p, TDF_SLIM);
818   fprintf (file, "\n");
819   dump_iv (file, use->iv, false, 2);
820 }
821 
822 /* Dumps information about the uses to FILE.  */
823 
824 void
dump_groups(FILE * file,struct ivopts_data * data)825 dump_groups (FILE *file, struct ivopts_data *data)
826 {
827   unsigned i, j;
828   struct iv_group *group;
829 
830   for (i = 0; i < data->vgroups.length (); i++)
831     {
832       group = data->vgroups[i];
833       fprintf (file, "Group %d:\n", group->id);
834       if (group->type == USE_NONLINEAR_EXPR)
835 	fprintf (file, "  Type:\tGENERIC\n");
836       else if (group->type == USE_REF_ADDRESS)
837 	fprintf (file, "  Type:\tREFERENCE ADDRESS\n");
838       else if (group->type == USE_PTR_ADDRESS)
839 	fprintf (file, "  Type:\tPOINTER ARGUMENT ADDRESS\n");
840       else
841 	{
842 	  gcc_assert (group->type == USE_COMPARE);
843 	  fprintf (file, "  Type:\tCOMPARE\n");
844 	}
845       for (j = 0; j < group->vuses.length (); j++)
846 	dump_use (file, group->vuses[j]);
847     }
848 }
849 
850 /* Dumps information about induction variable candidate CAND to FILE.  */
851 
852 void
dump_cand(FILE * file,struct iv_cand * cand)853 dump_cand (FILE *file, struct iv_cand *cand)
854 {
855   struct iv *iv = cand->iv;
856 
857   fprintf (file, "Candidate %d:\n", cand->id);
858   if (cand->inv_vars)
859     {
860       fprintf (file, "  Depend on inv.vars: ");
861       dump_bitmap (file, cand->inv_vars);
862     }
863   if (cand->inv_exprs)
864     {
865       fprintf (file, "  Depend on inv.exprs: ");
866       dump_bitmap (file, cand->inv_exprs);
867     }
868 
869   if (cand->var_before)
870     {
871       fprintf (file, "  Var befor: ");
872       print_generic_expr (file, cand->var_before, TDF_SLIM);
873       fprintf (file, "\n");
874     }
875   if (cand->var_after)
876     {
877       fprintf (file, "  Var after: ");
878       print_generic_expr (file, cand->var_after, TDF_SLIM);
879       fprintf (file, "\n");
880     }
881 
882   switch (cand->pos)
883     {
884     case IP_NORMAL:
885       fprintf (file, "  Incr POS: before exit test\n");
886       break;
887 
888     case IP_BEFORE_USE:
889       fprintf (file, "  Incr POS: before use %d\n", cand->ainc_use->id);
890       break;
891 
892     case IP_AFTER_USE:
893       fprintf (file, "  Incr POS: after use %d\n", cand->ainc_use->id);
894       break;
895 
896     case IP_END:
897       fprintf (file, "  Incr POS: at end\n");
898       break;
899 
900     case IP_ORIGINAL:
901       fprintf (file, "  Incr POS: orig biv\n");
902       break;
903     }
904 
905   dump_iv (file, iv, false, 1);
906 }
907 
908 /* Returns the info for ssa version VER.  */
909 
910 static inline struct version_info *
ver_info(struct ivopts_data * data,unsigned ver)911 ver_info (struct ivopts_data *data, unsigned ver)
912 {
913   return data->version_info + ver;
914 }
915 
916 /* Returns the info for ssa name NAME.  */
917 
918 static inline struct version_info *
name_info(struct ivopts_data * data,tree name)919 name_info (struct ivopts_data *data, tree name)
920 {
921   return ver_info (data, SSA_NAME_VERSION (name));
922 }
923 
924 /* Returns true if STMT is after the place where the IP_NORMAL ivs will be
925    emitted in LOOP.  */
926 
927 static bool
stmt_after_ip_normal_pos(class loop * loop,gimple * stmt)928 stmt_after_ip_normal_pos (class loop *loop, gimple *stmt)
929 {
930   basic_block bb = ip_normal_pos (loop), sbb = gimple_bb (stmt);
931 
932   gcc_assert (bb);
933 
934   if (sbb == loop->latch)
935     return true;
936 
937   if (sbb != bb)
938     return false;
939 
940   return stmt == last_stmt (bb);
941 }
942 
943 /* Returns true if STMT if after the place where the original induction
944    variable CAND is incremented.  If TRUE_IF_EQUAL is set, we return true
945    if the positions are identical.  */
946 
947 static bool
stmt_after_inc_pos(struct iv_cand * cand,gimple * stmt,bool true_if_equal)948 stmt_after_inc_pos (struct iv_cand *cand, gimple *stmt, bool true_if_equal)
949 {
950   basic_block cand_bb = gimple_bb (cand->incremented_at);
951   basic_block stmt_bb = gimple_bb (stmt);
952 
953   if (!dominated_by_p (CDI_DOMINATORS, stmt_bb, cand_bb))
954     return false;
955 
956   if (stmt_bb != cand_bb)
957     return true;
958 
959   if (true_if_equal
960       && gimple_uid (stmt) == gimple_uid (cand->incremented_at))
961     return true;
962   return gimple_uid (stmt) > gimple_uid (cand->incremented_at);
963 }
964 
965 /* Returns true if STMT if after the place where the induction variable
966    CAND is incremented in LOOP.  */
967 
968 static bool
stmt_after_increment(class loop * loop,struct iv_cand * cand,gimple * stmt)969 stmt_after_increment (class loop *loop, struct iv_cand *cand, gimple *stmt)
970 {
971   switch (cand->pos)
972     {
973     case IP_END:
974       return false;
975 
976     case IP_NORMAL:
977       return stmt_after_ip_normal_pos (loop, stmt);
978 
979     case IP_ORIGINAL:
980     case IP_AFTER_USE:
981       return stmt_after_inc_pos (cand, stmt, false);
982 
983     case IP_BEFORE_USE:
984       return stmt_after_inc_pos (cand, stmt, true);
985 
986     default:
987       gcc_unreachable ();
988     }
989 }
990 
991 /* walk_tree callback for contains_abnormal_ssa_name_p.  */
992 
993 static tree
contains_abnormal_ssa_name_p_1(tree * tp,int * walk_subtrees,void *)994 contains_abnormal_ssa_name_p_1 (tree *tp, int *walk_subtrees, void *)
995 {
996   if (TREE_CODE (*tp) == SSA_NAME
997       && SSA_NAME_OCCURS_IN_ABNORMAL_PHI (*tp))
998     return *tp;
999 
1000   if (!EXPR_P (*tp))
1001     *walk_subtrees = 0;
1002 
1003   return NULL_TREE;
1004 }
1005 
1006 /* Returns true if EXPR contains a ssa name that occurs in an
1007    abnormal phi node.  */
1008 
1009 bool
contains_abnormal_ssa_name_p(tree expr)1010 contains_abnormal_ssa_name_p (tree expr)
1011 {
1012   return walk_tree_without_duplicates
1013 	   (&expr, contains_abnormal_ssa_name_p_1, NULL) != NULL_TREE;
1014 }
1015 
1016 /*  Returns the structure describing number of iterations determined from
1017     EXIT of DATA->current_loop, or NULL if something goes wrong.  */
1018 
1019 static class tree_niter_desc *
niter_for_exit(struct ivopts_data * data,edge exit)1020 niter_for_exit (struct ivopts_data *data, edge exit)
1021 {
1022   class tree_niter_desc *desc;
1023   tree_niter_desc **slot;
1024 
1025   if (!data->niters)
1026     {
1027       data->niters = new hash_map<edge, tree_niter_desc *>;
1028       slot = NULL;
1029     }
1030   else
1031     slot = data->niters->get (exit);
1032 
1033   if (!slot)
1034     {
1035       /* Try to determine number of iterations.  We cannot safely work with ssa
1036 	 names that appear in phi nodes on abnormal edges, so that we do not
1037 	 create overlapping life ranges for them (PR 27283).  */
1038       desc = XNEW (class tree_niter_desc);
1039       if (!number_of_iterations_exit (data->current_loop,
1040 				      exit, desc, true)
1041      	  || contains_abnormal_ssa_name_p (desc->niter))
1042 	{
1043 	  XDELETE (desc);
1044 	  desc = NULL;
1045 	}
1046       data->niters->put (exit, desc);
1047     }
1048   else
1049     desc = *slot;
1050 
1051   return desc;
1052 }
1053 
1054 /* Returns the structure describing number of iterations determined from
1055    single dominating exit of DATA->current_loop, or NULL if something
1056    goes wrong.  */
1057 
1058 static class tree_niter_desc *
niter_for_single_dom_exit(struct ivopts_data * data)1059 niter_for_single_dom_exit (struct ivopts_data *data)
1060 {
1061   edge exit = single_dom_exit (data->current_loop);
1062 
1063   if (!exit)
1064     return NULL;
1065 
1066   return niter_for_exit (data, exit);
1067 }
1068 
1069 /* Initializes data structures used by the iv optimization pass, stored
1070    in DATA.  */
1071 
1072 static void
tree_ssa_iv_optimize_init(struct ivopts_data * data)1073 tree_ssa_iv_optimize_init (struct ivopts_data *data)
1074 {
1075   data->version_info_size = 2 * num_ssa_names;
1076   data->version_info = XCNEWVEC (struct version_info, data->version_info_size);
1077   data->relevant = BITMAP_ALLOC (NULL);
1078   data->important_candidates = BITMAP_ALLOC (NULL);
1079   data->max_inv_var_id = 0;
1080   data->max_inv_expr_id = 0;
1081   data->niters = NULL;
1082   data->vgroups.create (20);
1083   data->vcands.create (20);
1084   data->inv_expr_tab = new hash_table<iv_inv_expr_hasher> (10);
1085   data->name_expansion_cache = NULL;
1086   data->base_object_map = NULL;
1087   data->iv_common_cand_tab = new hash_table<iv_common_cand_hasher> (10);
1088   data->iv_common_cands.create (20);
1089   decl_rtl_to_reset.create (20);
1090   gcc_obstack_init (&data->iv_obstack);
1091 }
1092 
1093 /* walk_tree callback for determine_base_object.  */
1094 
1095 static tree
determine_base_object_1(tree * tp,int * walk_subtrees,void * wdata)1096 determine_base_object_1 (tree *tp, int *walk_subtrees, void *wdata)
1097 {
1098   tree_code code = TREE_CODE (*tp);
1099   tree obj = NULL_TREE;
1100   if (code == ADDR_EXPR)
1101     {
1102       tree base = get_base_address (TREE_OPERAND (*tp, 0));
1103       if (!base)
1104 	obj = *tp;
1105       else if (TREE_CODE (base) != MEM_REF)
1106 	obj = fold_convert (ptr_type_node, build_fold_addr_expr (base));
1107     }
1108   else if (code == SSA_NAME && POINTER_TYPE_P (TREE_TYPE (*tp)))
1109 	obj = fold_convert (ptr_type_node, *tp);
1110 
1111   if (!obj)
1112     {
1113       if (!EXPR_P (*tp))
1114 	*walk_subtrees = 0;
1115 
1116       return NULL_TREE;
1117     }
1118   /* Record special node for multiple base objects and stop.  */
1119   if (*static_cast<tree *> (wdata))
1120     {
1121       *static_cast<tree *> (wdata) = integer_zero_node;
1122       return integer_zero_node;
1123     }
1124   /* Record the base object and continue looking.  */
1125   *static_cast<tree *> (wdata) = obj;
1126   return NULL_TREE;
1127 }
1128 
1129 /* Returns a memory object to that EXPR points with caching.  Return NULL if we
1130    are able to determine that it does not point to any such object; specially
1131    return integer_zero_node if EXPR contains multiple base objects.  */
1132 
1133 static tree
determine_base_object(struct ivopts_data * data,tree expr)1134 determine_base_object (struct ivopts_data *data, tree expr)
1135 {
1136   tree *slot, obj = NULL_TREE;
1137   if (data->base_object_map)
1138     {
1139       if ((slot = data->base_object_map->get(expr)) != NULL)
1140 	return *slot;
1141     }
1142   else
1143     data->base_object_map = new hash_map<tree, tree>;
1144 
1145   (void) walk_tree_without_duplicates (&expr, determine_base_object_1, &obj);
1146   data->base_object_map->put (expr, obj);
1147   return obj;
1148 }
1149 
1150 /* Return true if address expression with non-DECL_P operand appears
1151    in EXPR.  */
1152 
1153 static bool
contain_complex_addr_expr(tree expr)1154 contain_complex_addr_expr (tree expr)
1155 {
1156   bool res = false;
1157 
1158   STRIP_NOPS (expr);
1159   switch (TREE_CODE (expr))
1160     {
1161     case POINTER_PLUS_EXPR:
1162     case PLUS_EXPR:
1163     case MINUS_EXPR:
1164       res |= contain_complex_addr_expr (TREE_OPERAND (expr, 0));
1165       res |= contain_complex_addr_expr (TREE_OPERAND (expr, 1));
1166       break;
1167 
1168     case ADDR_EXPR:
1169       return (!DECL_P (TREE_OPERAND (expr, 0)));
1170 
1171     default:
1172       return false;
1173     }
1174 
1175   return res;
1176 }
1177 
1178 /* Allocates an induction variable with given initial value BASE and step STEP
1179    for loop LOOP.  NO_OVERFLOW implies the iv doesn't overflow.  */
1180 
1181 static struct iv *
alloc_iv(struct ivopts_data * data,tree base,tree step,bool no_overflow=false)1182 alloc_iv (struct ivopts_data *data, tree base, tree step,
1183 	  bool no_overflow = false)
1184 {
1185   tree expr = base;
1186   struct iv *iv = (struct iv*) obstack_alloc (&data->iv_obstack,
1187 					      sizeof (struct iv));
1188   gcc_assert (step != NULL_TREE);
1189 
1190   /* Lower address expression in base except ones with DECL_P as operand.
1191      By doing this:
1192        1) More accurate cost can be computed for address expressions;
1193        2) Duplicate candidates won't be created for bases in different
1194 	  forms, like &a[0] and &a.  */
1195   STRIP_NOPS (expr);
1196   if ((TREE_CODE (expr) == ADDR_EXPR && !DECL_P (TREE_OPERAND (expr, 0)))
1197       || contain_complex_addr_expr (expr))
1198     {
1199       aff_tree comb;
1200       tree_to_aff_combination (expr, TREE_TYPE (expr), &comb);
1201       base = fold_convert (TREE_TYPE (base), aff_combination_to_tree (&comb));
1202     }
1203 
1204   iv->base = base;
1205   iv->base_object = determine_base_object (data, base);
1206   iv->step = step;
1207   iv->biv_p = false;
1208   iv->nonlin_use = NULL;
1209   iv->ssa_name = NULL_TREE;
1210   if (!no_overflow
1211        && !iv_can_overflow_p (data->current_loop, TREE_TYPE (base),
1212 			      base, step))
1213     no_overflow = true;
1214   iv->no_overflow = no_overflow;
1215   iv->have_address_use = false;
1216 
1217   return iv;
1218 }
1219 
1220 /* Sets STEP and BASE for induction variable IV.  NO_OVERFLOW implies the IV
1221    doesn't overflow.  */
1222 
1223 static void
set_iv(struct ivopts_data * data,tree iv,tree base,tree step,bool no_overflow)1224 set_iv (struct ivopts_data *data, tree iv, tree base, tree step,
1225 	bool no_overflow)
1226 {
1227   struct version_info *info = name_info (data, iv);
1228 
1229   gcc_assert (!info->iv);
1230 
1231   bitmap_set_bit (data->relevant, SSA_NAME_VERSION (iv));
1232   info->iv = alloc_iv (data, base, step, no_overflow);
1233   info->iv->ssa_name = iv;
1234 }
1235 
1236 /* Finds induction variable declaration for VAR.  */
1237 
1238 static struct iv *
get_iv(struct ivopts_data * data,tree var)1239 get_iv (struct ivopts_data *data, tree var)
1240 {
1241   basic_block bb;
1242   tree type = TREE_TYPE (var);
1243 
1244   if (!POINTER_TYPE_P (type)
1245       && !INTEGRAL_TYPE_P (type))
1246     return NULL;
1247 
1248   if (!name_info (data, var)->iv)
1249     {
1250       bb = gimple_bb (SSA_NAME_DEF_STMT (var));
1251 
1252       if (!bb
1253 	  || !flow_bb_inside_loop_p (data->current_loop, bb))
1254 	{
1255 	  if (POINTER_TYPE_P (type))
1256 	    type = sizetype;
1257 	  set_iv (data, var, var, build_int_cst (type, 0), true);
1258 	}
1259     }
1260 
1261   return name_info (data, var)->iv;
1262 }
1263 
1264 /* Return the first non-invariant ssa var found in EXPR.  */
1265 
1266 static tree
extract_single_var_from_expr(tree expr)1267 extract_single_var_from_expr (tree expr)
1268 {
1269   int i, n;
1270   tree tmp;
1271   enum tree_code code;
1272 
1273   if (!expr || is_gimple_min_invariant (expr))
1274     return NULL;
1275 
1276   code = TREE_CODE (expr);
1277   if (IS_EXPR_CODE_CLASS (TREE_CODE_CLASS (code)))
1278     {
1279       n = TREE_OPERAND_LENGTH (expr);
1280       for (i = 0; i < n; i++)
1281 	{
1282 	  tmp = extract_single_var_from_expr (TREE_OPERAND (expr, i));
1283 
1284 	  if (tmp)
1285 	    return tmp;
1286 	}
1287     }
1288   return (TREE_CODE (expr) == SSA_NAME) ? expr : NULL;
1289 }
1290 
1291 /* Finds basic ivs.  */
1292 
1293 static bool
find_bivs(struct ivopts_data * data)1294 find_bivs (struct ivopts_data *data)
1295 {
1296   gphi *phi;
1297   affine_iv iv;
1298   tree step, type, base, stop;
1299   bool found = false;
1300   class loop *loop = data->current_loop;
1301   gphi_iterator psi;
1302 
1303   for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
1304     {
1305       phi = psi.phi ();
1306 
1307       if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (PHI_RESULT (phi)))
1308 	continue;
1309 
1310       if (virtual_operand_p (PHI_RESULT (phi)))
1311 	continue;
1312 
1313       if (!simple_iv (loop, loop, PHI_RESULT (phi), &iv, true))
1314 	continue;
1315 
1316       if (integer_zerop (iv.step))
1317 	continue;
1318 
1319       step = iv.step;
1320       base = PHI_ARG_DEF_FROM_EDGE (phi, loop_preheader_edge (loop));
1321       /* Stop expanding iv base at the first ssa var referred by iv step.
1322 	 Ideally we should stop at any ssa var, because that's expensive
1323 	 and unusual to happen, we just do it on the first one.
1324 
1325 	 See PR64705 for the rationale.  */
1326       stop = extract_single_var_from_expr (step);
1327       base = expand_simple_operations (base, stop);
1328       if (contains_abnormal_ssa_name_p (base)
1329 	  || contains_abnormal_ssa_name_p (step))
1330 	continue;
1331 
1332       type = TREE_TYPE (PHI_RESULT (phi));
1333       base = fold_convert (type, base);
1334       if (step)
1335 	{
1336 	  if (POINTER_TYPE_P (type))
1337 	    step = convert_to_ptrofftype (step);
1338 	  else
1339 	    step = fold_convert (type, step);
1340 	}
1341 
1342       set_iv (data, PHI_RESULT (phi), base, step, iv.no_overflow);
1343       found = true;
1344     }
1345 
1346   return found;
1347 }
1348 
1349 /* Marks basic ivs.  */
1350 
1351 static void
mark_bivs(struct ivopts_data * data)1352 mark_bivs (struct ivopts_data *data)
1353 {
1354   gphi *phi;
1355   gimple *def;
1356   tree var;
1357   struct iv *iv, *incr_iv;
1358   class loop *loop = data->current_loop;
1359   basic_block incr_bb;
1360   gphi_iterator psi;
1361 
1362   data->bivs_not_used_in_addr = 0;
1363   for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
1364     {
1365       phi = psi.phi ();
1366 
1367       iv = get_iv (data, PHI_RESULT (phi));
1368       if (!iv)
1369 	continue;
1370 
1371       var = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (loop));
1372       def = SSA_NAME_DEF_STMT (var);
1373       /* Don't mark iv peeled from other one as biv.  */
1374       if (def
1375 	  && gimple_code (def) == GIMPLE_PHI
1376 	  && gimple_bb (def) == loop->header)
1377 	continue;
1378 
1379       incr_iv = get_iv (data, var);
1380       if (!incr_iv)
1381 	continue;
1382 
1383       /* If the increment is in the subloop, ignore it.  */
1384       incr_bb = gimple_bb (SSA_NAME_DEF_STMT (var));
1385       if (incr_bb->loop_father != data->current_loop
1386 	  || (incr_bb->flags & BB_IRREDUCIBLE_LOOP))
1387 	continue;
1388 
1389       iv->biv_p = true;
1390       incr_iv->biv_p = true;
1391       if (iv->no_overflow)
1392 	data->bivs_not_used_in_addr++;
1393       if (incr_iv->no_overflow)
1394 	data->bivs_not_used_in_addr++;
1395     }
1396 }
1397 
1398 /* Checks whether STMT defines a linear induction variable and stores its
1399    parameters to IV.  */
1400 
1401 static bool
find_givs_in_stmt_scev(struct ivopts_data * data,gimple * stmt,affine_iv * iv)1402 find_givs_in_stmt_scev (struct ivopts_data *data, gimple *stmt, affine_iv *iv)
1403 {
1404   tree lhs, stop;
1405   class loop *loop = data->current_loop;
1406 
1407   iv->base = NULL_TREE;
1408   iv->step = NULL_TREE;
1409 
1410   if (gimple_code (stmt) != GIMPLE_ASSIGN)
1411     return false;
1412 
1413   lhs = gimple_assign_lhs (stmt);
1414   if (TREE_CODE (lhs) != SSA_NAME)
1415     return false;
1416 
1417   if (!simple_iv (loop, loop_containing_stmt (stmt), lhs, iv, true))
1418     return false;
1419 
1420   /* Stop expanding iv base at the first ssa var referred by iv step.
1421      Ideally we should stop at any ssa var, because that's expensive
1422      and unusual to happen, we just do it on the first one.
1423 
1424      See PR64705 for the rationale.  */
1425   stop = extract_single_var_from_expr (iv->step);
1426   iv->base = expand_simple_operations (iv->base, stop);
1427   if (contains_abnormal_ssa_name_p (iv->base)
1428       || contains_abnormal_ssa_name_p (iv->step))
1429     return false;
1430 
1431   /* If STMT could throw, then do not consider STMT as defining a GIV.
1432      While this will suppress optimizations, we cannot safely delete this
1433      GIV and associated statements, even if it appears it is not used.  */
1434   if (stmt_could_throw_p (cfun, stmt))
1435     return false;
1436 
1437   return true;
1438 }
1439 
1440 /* Finds general ivs in statement STMT.  */
1441 
1442 static void
find_givs_in_stmt(struct ivopts_data * data,gimple * stmt)1443 find_givs_in_stmt (struct ivopts_data *data, gimple *stmt)
1444 {
1445   affine_iv iv;
1446 
1447   if (!find_givs_in_stmt_scev (data, stmt, &iv))
1448     return;
1449 
1450   set_iv (data, gimple_assign_lhs (stmt), iv.base, iv.step, iv.no_overflow);
1451 }
1452 
1453 /* Finds general ivs in basic block BB.  */
1454 
1455 static void
find_givs_in_bb(struct ivopts_data * data,basic_block bb)1456 find_givs_in_bb (struct ivopts_data *data, basic_block bb)
1457 {
1458   gimple_stmt_iterator bsi;
1459 
1460   for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
1461     find_givs_in_stmt (data, gsi_stmt (bsi));
1462 }
1463 
1464 /* Finds general ivs.  */
1465 
1466 static void
find_givs(struct ivopts_data * data,basic_block * body)1467 find_givs (struct ivopts_data *data, basic_block *body)
1468 {
1469   class loop *loop = data->current_loop;
1470   unsigned i;
1471 
1472   for (i = 0; i < loop->num_nodes; i++)
1473     find_givs_in_bb (data, body[i]);
1474 }
1475 
1476 /* For each ssa name defined in LOOP determines whether it is an induction
1477    variable and if so, its initial value and step.  */
1478 
1479 static bool
find_induction_variables(struct ivopts_data * data,basic_block * body)1480 find_induction_variables (struct ivopts_data *data, basic_block *body)
1481 {
1482   unsigned i;
1483   bitmap_iterator bi;
1484 
1485   if (!find_bivs (data))
1486     return false;
1487 
1488   find_givs (data, body);
1489   mark_bivs (data);
1490 
1491   if (dump_file && (dump_flags & TDF_DETAILS))
1492     {
1493       class tree_niter_desc *niter = niter_for_single_dom_exit (data);
1494 
1495       if (niter)
1496 	{
1497 	  fprintf (dump_file, "  number of iterations ");
1498 	  print_generic_expr (dump_file, niter->niter, TDF_SLIM);
1499 	  if (!integer_zerop (niter->may_be_zero))
1500 	    {
1501 	      fprintf (dump_file, "; zero if ");
1502 	      print_generic_expr (dump_file, niter->may_be_zero, TDF_SLIM);
1503 	    }
1504 	  fprintf (dump_file, "\n");
1505 	};
1506 
1507       fprintf (dump_file, "\n<Induction Vars>:\n");
1508       EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
1509 	{
1510 	  struct version_info *info = ver_info (data, i);
1511 	  if (info->iv && info->iv->step && !integer_zerop (info->iv->step))
1512 	    dump_iv (dump_file, ver_info (data, i)->iv, true, 0);
1513 	}
1514     }
1515 
1516   return true;
1517 }
1518 
1519 /* Records a use of TYPE at *USE_P in STMT whose value is IV in GROUP.
1520    For address type use, ADDR_BASE is the stripped IV base, ADDR_OFFSET
1521    is the const offset stripped from IV base and MEM_TYPE is the type
1522    of the memory being addressed.  For uses of other types, ADDR_BASE
1523    and ADDR_OFFSET are zero by default and MEM_TYPE is NULL_TREE.  */
1524 
1525 static struct iv_use *
record_use(struct iv_group * group,tree * use_p,struct iv * iv,gimple * stmt,enum use_type type,tree mem_type,tree addr_base,poly_uint64 addr_offset)1526 record_use (struct iv_group *group, tree *use_p, struct iv *iv,
1527 	    gimple *stmt, enum use_type type, tree mem_type,
1528 	    tree addr_base, poly_uint64 addr_offset)
1529 {
1530   struct iv_use *use = XCNEW (struct iv_use);
1531 
1532   use->id = group->vuses.length ();
1533   use->group_id = group->id;
1534   use->type = type;
1535   use->mem_type = mem_type;
1536   use->iv = iv;
1537   use->stmt = stmt;
1538   use->op_p = use_p;
1539   use->addr_base = addr_base;
1540   use->addr_offset = addr_offset;
1541 
1542   group->vuses.safe_push (use);
1543   return use;
1544 }
1545 
1546 /* Checks whether OP is a loop-level invariant and if so, records it.
1547    NONLINEAR_USE is true if the invariant is used in a way we do not
1548    handle specially.  */
1549 
1550 static void
record_invariant(struct ivopts_data * data,tree op,bool nonlinear_use)1551 record_invariant (struct ivopts_data *data, tree op, bool nonlinear_use)
1552 {
1553   basic_block bb;
1554   struct version_info *info;
1555 
1556   if (TREE_CODE (op) != SSA_NAME
1557       || virtual_operand_p (op))
1558     return;
1559 
1560   bb = gimple_bb (SSA_NAME_DEF_STMT (op));
1561   if (bb
1562       && flow_bb_inside_loop_p (data->current_loop, bb))
1563     return;
1564 
1565   info = name_info (data, op);
1566   info->name = op;
1567   info->has_nonlin_use |= nonlinear_use;
1568   if (!info->inv_id)
1569     info->inv_id = ++data->max_inv_var_id;
1570   bitmap_set_bit (data->relevant, SSA_NAME_VERSION (op));
1571 }
1572 
1573 /* Record a group of TYPE.  */
1574 
1575 static struct iv_group *
record_group(struct ivopts_data * data,enum use_type type)1576 record_group (struct ivopts_data *data, enum use_type type)
1577 {
1578   struct iv_group *group = XCNEW (struct iv_group);
1579 
1580   group->id = data->vgroups.length ();
1581   group->type = type;
1582   group->related_cands = BITMAP_ALLOC (NULL);
1583   group->vuses.create (1);
1584   group->doloop_p = false;
1585 
1586   data->vgroups.safe_push (group);
1587   return group;
1588 }
1589 
1590 /* Record a use of TYPE at *USE_P in STMT whose value is IV in a group.
1591    New group will be created if there is no existing group for the use.
1592    MEM_TYPE is the type of memory being addressed, or NULL if this
1593    isn't an address reference.  */
1594 
1595 static struct iv_use *
record_group_use(struct ivopts_data * data,tree * use_p,struct iv * iv,gimple * stmt,enum use_type type,tree mem_type)1596 record_group_use (struct ivopts_data *data, tree *use_p,
1597 		  struct iv *iv, gimple *stmt, enum use_type type,
1598 		  tree mem_type)
1599 {
1600   tree addr_base = NULL;
1601   struct iv_group *group = NULL;
1602   poly_uint64 addr_offset = 0;
1603 
1604   /* Record non address type use in a new group.  */
1605   if (address_p (type))
1606     {
1607       unsigned int i;
1608 
1609       addr_base = strip_offset (iv->base, &addr_offset);
1610       for (i = 0; i < data->vgroups.length (); i++)
1611 	{
1612 	  struct iv_use *use;
1613 
1614 	  group = data->vgroups[i];
1615 	  use = group->vuses[0];
1616 	  if (!address_p (use->type))
1617 	    continue;
1618 
1619 	  /* Check if it has the same stripped base and step.  */
1620 	  if (operand_equal_p (iv->base_object, use->iv->base_object, 0)
1621 	      && operand_equal_p (iv->step, use->iv->step, 0)
1622 	      && operand_equal_p (addr_base, use->addr_base, 0))
1623 	    break;
1624 	}
1625       if (i == data->vgroups.length ())
1626 	group = NULL;
1627     }
1628 
1629   if (!group)
1630     group = record_group (data, type);
1631 
1632   return record_use (group, use_p, iv, stmt, type, mem_type,
1633 		     addr_base, addr_offset);
1634 }
1635 
1636 /* Checks whether the use OP is interesting and if so, records it.  */
1637 
1638 static struct iv_use *
find_interesting_uses_op(struct ivopts_data * data,tree op)1639 find_interesting_uses_op (struct ivopts_data *data, tree op)
1640 {
1641   struct iv *iv;
1642   gimple *stmt;
1643   struct iv_use *use;
1644 
1645   if (TREE_CODE (op) != SSA_NAME)
1646     return NULL;
1647 
1648   iv = get_iv (data, op);
1649   if (!iv)
1650     return NULL;
1651 
1652   if (iv->nonlin_use)
1653     {
1654       gcc_assert (iv->nonlin_use->type == USE_NONLINEAR_EXPR);
1655       return iv->nonlin_use;
1656     }
1657 
1658   if (integer_zerop (iv->step))
1659     {
1660       record_invariant (data, op, true);
1661       return NULL;
1662     }
1663 
1664   stmt = SSA_NAME_DEF_STMT (op);
1665   gcc_assert (gimple_code (stmt) == GIMPLE_PHI || is_gimple_assign (stmt));
1666 
1667   use = record_group_use (data, NULL, iv, stmt, USE_NONLINEAR_EXPR, NULL_TREE);
1668   iv->nonlin_use = use;
1669   return use;
1670 }
1671 
1672 /* Indicate how compare type iv_use can be handled.  */
1673 enum comp_iv_rewrite
1674 {
1675   COMP_IV_NA,
1676   /* We may rewrite compare type iv_use by expressing value of the iv_use.  */
1677   COMP_IV_EXPR,
1678   /* We may rewrite compare type iv_uses on both sides of comparison by
1679      expressing value of each iv_use.  */
1680   COMP_IV_EXPR_2,
1681   /* We may rewrite compare type iv_use by expressing value of the iv_use
1682      or by eliminating it with other iv_cand.  */
1683   COMP_IV_ELIM
1684 };
1685 
1686 /* Given a condition in statement STMT, checks whether it is a compare
1687    of an induction variable and an invariant.  If this is the case,
1688    CONTROL_VAR is set to location of the iv, BOUND to the location of
1689    the invariant, IV_VAR and IV_BOUND are set to the corresponding
1690    induction variable descriptions, and true is returned.  If this is not
1691    the case, CONTROL_VAR and BOUND are set to the arguments of the
1692    condition and false is returned.  */
1693 
1694 static enum comp_iv_rewrite
extract_cond_operands(struct ivopts_data * data,gimple * stmt,tree ** control_var,tree ** bound,struct iv ** iv_var,struct iv ** iv_bound)1695 extract_cond_operands (struct ivopts_data *data, gimple *stmt,
1696 		       tree **control_var, tree **bound,
1697 		       struct iv **iv_var, struct iv **iv_bound)
1698 {
1699   /* The objects returned when COND has constant operands.  */
1700   static struct iv const_iv;
1701   static tree zero;
1702   tree *op0 = &zero, *op1 = &zero;
1703   struct iv *iv0 = &const_iv, *iv1 = &const_iv;
1704   enum comp_iv_rewrite rewrite_type = COMP_IV_NA;
1705 
1706   if (gimple_code (stmt) == GIMPLE_COND)
1707     {
1708       gcond *cond_stmt = as_a <gcond *> (stmt);
1709       op0 = gimple_cond_lhs_ptr (cond_stmt);
1710       op1 = gimple_cond_rhs_ptr (cond_stmt);
1711     }
1712   else
1713     {
1714       op0 = gimple_assign_rhs1_ptr (stmt);
1715       op1 = gimple_assign_rhs2_ptr (stmt);
1716     }
1717 
1718   zero = integer_zero_node;
1719   const_iv.step = integer_zero_node;
1720 
1721   if (TREE_CODE (*op0) == SSA_NAME)
1722     iv0 = get_iv (data, *op0);
1723   if (TREE_CODE (*op1) == SSA_NAME)
1724     iv1 = get_iv (data, *op1);
1725 
1726   /* If both sides of comparison are IVs.  We can express ivs on both end.  */
1727   if (iv0 && iv1 && !integer_zerop (iv0->step) && !integer_zerop (iv1->step))
1728     {
1729       rewrite_type = COMP_IV_EXPR_2;
1730       goto end;
1731     }
1732 
1733   /* If none side of comparison is IV.  */
1734   if ((!iv0 || integer_zerop (iv0->step))
1735       && (!iv1 || integer_zerop (iv1->step)))
1736     goto end;
1737 
1738   /* Control variable may be on the other side.  */
1739   if (!iv0 || integer_zerop (iv0->step))
1740     {
1741       std::swap (op0, op1);
1742       std::swap (iv0, iv1);
1743     }
1744   /* If one side is IV and the other side isn't loop invariant.  */
1745   if (!iv1)
1746     rewrite_type = COMP_IV_EXPR;
1747   /* If one side is IV and the other side is loop invariant.  */
1748   else if (!integer_zerop (iv0->step) && integer_zerop (iv1->step))
1749     rewrite_type = COMP_IV_ELIM;
1750 
1751 end:
1752   if (control_var)
1753     *control_var = op0;
1754   if (iv_var)
1755     *iv_var = iv0;
1756   if (bound)
1757     *bound = op1;
1758   if (iv_bound)
1759     *iv_bound = iv1;
1760 
1761   return rewrite_type;
1762 }
1763 
1764 /* Checks whether the condition in STMT is interesting and if so,
1765    records it.  */
1766 
1767 static void
find_interesting_uses_cond(struct ivopts_data * data,gimple * stmt)1768 find_interesting_uses_cond (struct ivopts_data *data, gimple *stmt)
1769 {
1770   tree *var_p, *bound_p;
1771   struct iv *var_iv, *bound_iv;
1772   enum comp_iv_rewrite ret;
1773 
1774   ret = extract_cond_operands (data, stmt,
1775 			       &var_p, &bound_p, &var_iv, &bound_iv);
1776   if (ret == COMP_IV_NA)
1777     {
1778       find_interesting_uses_op (data, *var_p);
1779       find_interesting_uses_op (data, *bound_p);
1780       return;
1781     }
1782 
1783   record_group_use (data, var_p, var_iv, stmt, USE_COMPARE, NULL_TREE);
1784   /* Record compare type iv_use for iv on the other side of comparison.  */
1785   if (ret == COMP_IV_EXPR_2)
1786     record_group_use (data, bound_p, bound_iv, stmt, USE_COMPARE, NULL_TREE);
1787 }
1788 
1789 /* Returns the outermost loop EXPR is obviously invariant in
1790    relative to the loop LOOP, i.e. if all its operands are defined
1791    outside of the returned loop.  Returns NULL if EXPR is not
1792    even obviously invariant in LOOP.  */
1793 
1794 class loop *
outermost_invariant_loop_for_expr(class loop * loop,tree expr)1795 outermost_invariant_loop_for_expr (class loop *loop, tree expr)
1796 {
1797   basic_block def_bb;
1798   unsigned i, len;
1799 
1800   if (is_gimple_min_invariant (expr))
1801     return current_loops->tree_root;
1802 
1803   if (TREE_CODE (expr) == SSA_NAME)
1804     {
1805       def_bb = gimple_bb (SSA_NAME_DEF_STMT (expr));
1806       if (def_bb)
1807 	{
1808 	  if (flow_bb_inside_loop_p (loop, def_bb))
1809 	    return NULL;
1810 	  return superloop_at_depth (loop,
1811 				     loop_depth (def_bb->loop_father) + 1);
1812 	}
1813 
1814       return current_loops->tree_root;
1815     }
1816 
1817   if (!EXPR_P (expr))
1818     return NULL;
1819 
1820   unsigned maxdepth = 0;
1821   len = TREE_OPERAND_LENGTH (expr);
1822   for (i = 0; i < len; i++)
1823     {
1824       class loop *ivloop;
1825       if (!TREE_OPERAND (expr, i))
1826 	continue;
1827 
1828       ivloop = outermost_invariant_loop_for_expr (loop, TREE_OPERAND (expr, i));
1829       if (!ivloop)
1830 	return NULL;
1831       maxdepth = MAX (maxdepth, loop_depth (ivloop));
1832     }
1833 
1834   return superloop_at_depth (loop, maxdepth);
1835 }
1836 
1837 /* Returns true if expression EXPR is obviously invariant in LOOP,
1838    i.e. if all its operands are defined outside of the LOOP.  LOOP
1839    should not be the function body.  */
1840 
1841 bool
expr_invariant_in_loop_p(class loop * loop,tree expr)1842 expr_invariant_in_loop_p (class loop *loop, tree expr)
1843 {
1844   basic_block def_bb;
1845   unsigned i, len;
1846 
1847   gcc_assert (loop_depth (loop) > 0);
1848 
1849   if (is_gimple_min_invariant (expr))
1850     return true;
1851 
1852   if (TREE_CODE (expr) == SSA_NAME)
1853     {
1854       def_bb = gimple_bb (SSA_NAME_DEF_STMT (expr));
1855       if (def_bb
1856 	  && flow_bb_inside_loop_p (loop, def_bb))
1857 	return false;
1858 
1859       return true;
1860     }
1861 
1862   if (!EXPR_P (expr))
1863     return false;
1864 
1865   len = TREE_OPERAND_LENGTH (expr);
1866   for (i = 0; i < len; i++)
1867     if (TREE_OPERAND (expr, i)
1868 	&& !expr_invariant_in_loop_p (loop, TREE_OPERAND (expr, i)))
1869       return false;
1870 
1871   return true;
1872 }
1873 
1874 /* Given expression EXPR which computes inductive values with respect
1875    to loop recorded in DATA, this function returns biv from which EXPR
1876    is derived by tracing definition chains of ssa variables in EXPR.  */
1877 
1878 static struct iv*
find_deriving_biv_for_expr(struct ivopts_data * data,tree expr)1879 find_deriving_biv_for_expr (struct ivopts_data *data, tree expr)
1880 {
1881   struct iv *iv;
1882   unsigned i, n;
1883   tree e2, e1;
1884   enum tree_code code;
1885   gimple *stmt;
1886 
1887   if (expr == NULL_TREE)
1888     return NULL;
1889 
1890   if (is_gimple_min_invariant (expr))
1891     return NULL;
1892 
1893   code = TREE_CODE (expr);
1894   if (IS_EXPR_CODE_CLASS (TREE_CODE_CLASS (code)))
1895     {
1896       n = TREE_OPERAND_LENGTH (expr);
1897       for (i = 0; i < n; i++)
1898 	{
1899 	  iv = find_deriving_biv_for_expr (data, TREE_OPERAND (expr, i));
1900 	  if (iv)
1901 	    return iv;
1902 	}
1903     }
1904 
1905   /* Stop if it's not ssa name.  */
1906   if (code != SSA_NAME)
1907     return NULL;
1908 
1909   iv = get_iv (data, expr);
1910   if (!iv || integer_zerop (iv->step))
1911     return NULL;
1912   else if (iv->biv_p)
1913     return iv;
1914 
1915   stmt = SSA_NAME_DEF_STMT (expr);
1916   if (gphi *phi = dyn_cast <gphi *> (stmt))
1917     {
1918       ssa_op_iter iter;
1919       use_operand_p use_p;
1920       basic_block phi_bb = gimple_bb (phi);
1921 
1922       /* Skip loop header PHI that doesn't define biv.  */
1923       if (phi_bb->loop_father == data->current_loop)
1924 	return NULL;
1925 
1926       if (virtual_operand_p (gimple_phi_result (phi)))
1927 	return NULL;
1928 
1929       FOR_EACH_PHI_ARG (use_p, phi, iter, SSA_OP_USE)
1930 	{
1931 	  tree use = USE_FROM_PTR (use_p);
1932 	  iv = find_deriving_biv_for_expr (data, use);
1933 	  if (iv)
1934 	    return iv;
1935 	}
1936       return NULL;
1937     }
1938   if (gimple_code (stmt) != GIMPLE_ASSIGN)
1939     return NULL;
1940 
1941   e1 = gimple_assign_rhs1 (stmt);
1942   code = gimple_assign_rhs_code (stmt);
1943   if (get_gimple_rhs_class (code) == GIMPLE_SINGLE_RHS)
1944     return find_deriving_biv_for_expr (data, e1);
1945 
1946   switch (code)
1947     {
1948     case MULT_EXPR:
1949     case PLUS_EXPR:
1950     case MINUS_EXPR:
1951     case POINTER_PLUS_EXPR:
1952       /* Increments, decrements and multiplications by a constant
1953 	 are simple.  */
1954       e2 = gimple_assign_rhs2 (stmt);
1955       iv = find_deriving_biv_for_expr (data, e2);
1956       if (iv)
1957 	return iv;
1958       gcc_fallthrough ();
1959 
1960     CASE_CONVERT:
1961       /* Casts are simple.  */
1962       return find_deriving_biv_for_expr (data, e1);
1963 
1964     default:
1965       break;
1966     }
1967 
1968   return NULL;
1969 }
1970 
1971 /* Record BIV, its predecessor and successor that they are used in
1972    address type uses.  */
1973 
1974 static void
record_biv_for_address_use(struct ivopts_data * data,struct iv * biv)1975 record_biv_for_address_use (struct ivopts_data *data, struct iv *biv)
1976 {
1977   unsigned i;
1978   tree type, base_1, base_2;
1979   bitmap_iterator bi;
1980 
1981   if (!biv || !biv->biv_p || integer_zerop (biv->step)
1982       || biv->have_address_use || !biv->no_overflow)
1983     return;
1984 
1985   type = TREE_TYPE (biv->base);
1986   if (!INTEGRAL_TYPE_P (type))
1987     return;
1988 
1989   biv->have_address_use = true;
1990   data->bivs_not_used_in_addr--;
1991   base_1 = fold_build2 (PLUS_EXPR, type, biv->base, biv->step);
1992   EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
1993     {
1994       struct iv *iv = ver_info (data, i)->iv;
1995 
1996       if (!iv || !iv->biv_p || integer_zerop (iv->step)
1997 	  || iv->have_address_use || !iv->no_overflow)
1998 	continue;
1999 
2000       if (type != TREE_TYPE (iv->base)
2001 	  || !INTEGRAL_TYPE_P (TREE_TYPE (iv->base)))
2002 	continue;
2003 
2004       if (!operand_equal_p (biv->step, iv->step, 0))
2005 	continue;
2006 
2007       base_2 = fold_build2 (PLUS_EXPR, type, iv->base, iv->step);
2008       if (operand_equal_p (base_1, iv->base, 0)
2009 	  || operand_equal_p (base_2, biv->base, 0))
2010 	{
2011 	  iv->have_address_use = true;
2012 	  data->bivs_not_used_in_addr--;
2013 	}
2014     }
2015 }
2016 
2017 /* Cumulates the steps of indices into DATA and replaces their values with the
2018    initial ones.  Returns false when the value of the index cannot be determined.
2019    Callback for for_each_index.  */
2020 
2021 struct ifs_ivopts_data
2022 {
2023   struct ivopts_data *ivopts_data;
2024   gimple *stmt;
2025   tree step;
2026 };
2027 
2028 static bool
idx_find_step(tree base,tree * idx,void * data)2029 idx_find_step (tree base, tree *idx, void *data)
2030 {
2031   struct ifs_ivopts_data *dta = (struct ifs_ivopts_data *) data;
2032   struct iv *iv;
2033   bool use_overflow_semantics = false;
2034   tree step, iv_base, iv_step, lbound, off;
2035   class loop *loop = dta->ivopts_data->current_loop;
2036 
2037   /* If base is a component ref, require that the offset of the reference
2038      be invariant.  */
2039   if (TREE_CODE (base) == COMPONENT_REF)
2040     {
2041       off = component_ref_field_offset (base);
2042       return expr_invariant_in_loop_p (loop, off);
2043     }
2044 
2045   /* If base is array, first check whether we will be able to move the
2046      reference out of the loop (in order to take its address in strength
2047      reduction).  In order for this to work we need both lower bound
2048      and step to be loop invariants.  */
2049   if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
2050     {
2051       /* Moreover, for a range, the size needs to be invariant as well.  */
2052       if (TREE_CODE (base) == ARRAY_RANGE_REF
2053 	  && !expr_invariant_in_loop_p (loop, TYPE_SIZE (TREE_TYPE (base))))
2054 	return false;
2055 
2056       step = array_ref_element_size (base);
2057       lbound = array_ref_low_bound (base);
2058 
2059       if (!expr_invariant_in_loop_p (loop, step)
2060 	  || !expr_invariant_in_loop_p (loop, lbound))
2061 	return false;
2062     }
2063 
2064   if (TREE_CODE (*idx) != SSA_NAME)
2065     return true;
2066 
2067   iv = get_iv (dta->ivopts_data, *idx);
2068   if (!iv)
2069     return false;
2070 
2071   /* XXX  We produce for a base of *D42 with iv->base being &x[0]
2072 	  *&x[0], which is not folded and does not trigger the
2073 	  ARRAY_REF path below.  */
2074   *idx = iv->base;
2075 
2076   if (integer_zerop (iv->step))
2077     return true;
2078 
2079   if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
2080     {
2081       step = array_ref_element_size (base);
2082 
2083       /* We only handle addresses whose step is an integer constant.  */
2084       if (TREE_CODE (step) != INTEGER_CST)
2085 	return false;
2086     }
2087   else
2088     /* The step for pointer arithmetics already is 1 byte.  */
2089     step = size_one_node;
2090 
2091   iv_base = iv->base;
2092   iv_step = iv->step;
2093   if (iv->no_overflow && nowrap_type_p (TREE_TYPE (iv_step)))
2094     use_overflow_semantics = true;
2095 
2096   if (!convert_affine_scev (dta->ivopts_data->current_loop,
2097 			    sizetype, &iv_base, &iv_step, dta->stmt,
2098 			    use_overflow_semantics))
2099     {
2100       /* The index might wrap.  */
2101       return false;
2102     }
2103 
2104   step = fold_build2 (MULT_EXPR, sizetype, step, iv_step);
2105   dta->step = fold_build2 (PLUS_EXPR, sizetype, dta->step, step);
2106 
2107   if (dta->ivopts_data->bivs_not_used_in_addr)
2108     {
2109       if (!iv->biv_p)
2110 	iv = find_deriving_biv_for_expr (dta->ivopts_data, iv->ssa_name);
2111 
2112       record_biv_for_address_use (dta->ivopts_data, iv);
2113     }
2114   return true;
2115 }
2116 
2117 /* Records use in index IDX.  Callback for for_each_index.  Ivopts data
2118    object is passed to it in DATA.  */
2119 
2120 static bool
idx_record_use(tree base,tree * idx,void * vdata)2121 idx_record_use (tree base, tree *idx,
2122 		void *vdata)
2123 {
2124   struct ivopts_data *data = (struct ivopts_data *) vdata;
2125   find_interesting_uses_op (data, *idx);
2126   if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
2127     {
2128       if (TREE_OPERAND (base, 2))
2129 	find_interesting_uses_op (data, TREE_OPERAND (base, 2));
2130       if (TREE_OPERAND (base, 3))
2131 	find_interesting_uses_op (data, TREE_OPERAND (base, 3));
2132     }
2133   return true;
2134 }
2135 
2136 /* If we can prove that TOP = cst * BOT for some constant cst,
2137    store cst to MUL and return true.  Otherwise return false.
2138    The returned value is always sign-extended, regardless of the
2139    signedness of TOP and BOT.  */
2140 
2141 static bool
constant_multiple_of(tree top,tree bot,widest_int * mul)2142 constant_multiple_of (tree top, tree bot, widest_int *mul)
2143 {
2144   tree mby;
2145   enum tree_code code;
2146   unsigned precision = TYPE_PRECISION (TREE_TYPE (top));
2147   widest_int res, p0, p1;
2148 
2149   STRIP_NOPS (top);
2150   STRIP_NOPS (bot);
2151 
2152   if (operand_equal_p (top, bot, 0))
2153     {
2154       *mul = 1;
2155       return true;
2156     }
2157 
2158   code = TREE_CODE (top);
2159   switch (code)
2160     {
2161     case MULT_EXPR:
2162       mby = TREE_OPERAND (top, 1);
2163       if (TREE_CODE (mby) != INTEGER_CST)
2164 	return false;
2165 
2166       if (!constant_multiple_of (TREE_OPERAND (top, 0), bot, &res))
2167 	return false;
2168 
2169       *mul = wi::sext (res * wi::to_widest (mby), precision);
2170       return true;
2171 
2172     case PLUS_EXPR:
2173     case MINUS_EXPR:
2174       if (!constant_multiple_of (TREE_OPERAND (top, 0), bot, &p0)
2175 	  || !constant_multiple_of (TREE_OPERAND (top, 1), bot, &p1))
2176 	return false;
2177 
2178       if (code == MINUS_EXPR)
2179 	p1 = -p1;
2180       *mul = wi::sext (p0 + p1, precision);
2181       return true;
2182 
2183     case INTEGER_CST:
2184       if (TREE_CODE (bot) != INTEGER_CST)
2185 	return false;
2186 
2187       p0 = widest_int::from (wi::to_wide (top), SIGNED);
2188       p1 = widest_int::from (wi::to_wide (bot), SIGNED);
2189       if (p1 == 0)
2190 	return false;
2191       *mul = wi::sext (wi::divmod_trunc (p0, p1, SIGNED, &res), precision);
2192       return res == 0;
2193 
2194     default:
2195       if (POLY_INT_CST_P (top)
2196 	  && POLY_INT_CST_P (bot)
2197 	  && constant_multiple_p (wi::to_poly_widest (top),
2198 				  wi::to_poly_widest (bot), mul))
2199 	return true;
2200 
2201       return false;
2202     }
2203 }
2204 
2205 /* Return true if memory reference REF with step STEP may be unaligned.  */
2206 
2207 static bool
may_be_unaligned_p(tree ref,tree step)2208 may_be_unaligned_p (tree ref, tree step)
2209 {
2210   /* TARGET_MEM_REFs are translated directly to valid MEMs on the target,
2211      thus they are not misaligned.  */
2212   if (TREE_CODE (ref) == TARGET_MEM_REF)
2213     return false;
2214 
2215   unsigned int align = TYPE_ALIGN (TREE_TYPE (ref));
2216   if (GET_MODE_ALIGNMENT (TYPE_MODE (TREE_TYPE (ref))) > align)
2217     align = GET_MODE_ALIGNMENT (TYPE_MODE (TREE_TYPE (ref)));
2218 
2219   unsigned HOST_WIDE_INT bitpos;
2220   unsigned int ref_align;
2221   get_object_alignment_1 (ref, &ref_align, &bitpos);
2222   if (ref_align < align
2223       || (bitpos % align) != 0
2224       || (bitpos % BITS_PER_UNIT) != 0)
2225     return true;
2226 
2227   unsigned int trailing_zeros = tree_ctz (step);
2228   if (trailing_zeros < HOST_BITS_PER_INT
2229       && (1U << trailing_zeros) * BITS_PER_UNIT < align)
2230     return true;
2231 
2232   return false;
2233 }
2234 
2235 /* Return true if EXPR may be non-addressable.   */
2236 
2237 bool
may_be_nonaddressable_p(tree expr)2238 may_be_nonaddressable_p (tree expr)
2239 {
2240   switch (TREE_CODE (expr))
2241     {
2242     case VAR_DECL:
2243       /* Check if it's a register variable.  */
2244       return DECL_HARD_REGISTER (expr);
2245 
2246     case TARGET_MEM_REF:
2247       /* TARGET_MEM_REFs are translated directly to valid MEMs on the
2248 	 target, thus they are always addressable.  */
2249       return false;
2250 
2251     case MEM_REF:
2252       /* Likewise for MEM_REFs, modulo the storage order.  */
2253       return REF_REVERSE_STORAGE_ORDER (expr);
2254 
2255     case BIT_FIELD_REF:
2256       if (REF_REVERSE_STORAGE_ORDER (expr))
2257 	return true;
2258       return may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
2259 
2260     case COMPONENT_REF:
2261       if (TYPE_REVERSE_STORAGE_ORDER (TREE_TYPE (TREE_OPERAND (expr, 0))))
2262 	return true;
2263       return DECL_NONADDRESSABLE_P (TREE_OPERAND (expr, 1))
2264 	     || may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
2265 
2266     case ARRAY_REF:
2267     case ARRAY_RANGE_REF:
2268       if (TYPE_REVERSE_STORAGE_ORDER (TREE_TYPE (TREE_OPERAND (expr, 0))))
2269 	return true;
2270       return may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
2271 
2272     case VIEW_CONVERT_EXPR:
2273       /* This kind of view-conversions may wrap non-addressable objects
2274 	 and make them look addressable.  After some processing the
2275 	 non-addressability may be uncovered again, causing ADDR_EXPRs
2276 	 of inappropriate objects to be built.  */
2277       if (is_gimple_reg (TREE_OPERAND (expr, 0))
2278 	  || !is_gimple_addressable (TREE_OPERAND (expr, 0)))
2279 	return true;
2280       return may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
2281 
2282     CASE_CONVERT:
2283       return true;
2284 
2285     default:
2286       break;
2287     }
2288 
2289   return false;
2290 }
2291 
2292 /* Finds addresses in *OP_P inside STMT.  */
2293 
2294 static void
find_interesting_uses_address(struct ivopts_data * data,gimple * stmt,tree * op_p)2295 find_interesting_uses_address (struct ivopts_data *data, gimple *stmt,
2296 			       tree *op_p)
2297 {
2298   tree base = *op_p, step = size_zero_node;
2299   struct iv *civ;
2300   struct ifs_ivopts_data ifs_ivopts_data;
2301 
2302   /* Do not play with volatile memory references.  A bit too conservative,
2303      perhaps, but safe.  */
2304   if (gimple_has_volatile_ops (stmt))
2305     goto fail;
2306 
2307   /* Ignore bitfields for now.  Not really something terribly complicated
2308      to handle.  TODO.  */
2309   if (TREE_CODE (base) == BIT_FIELD_REF)
2310     goto fail;
2311 
2312   base = unshare_expr (base);
2313 
2314   if (TREE_CODE (base) == TARGET_MEM_REF)
2315     {
2316       tree type = build_pointer_type (TREE_TYPE (base));
2317       tree astep;
2318 
2319       if (TMR_BASE (base)
2320 	  && TREE_CODE (TMR_BASE (base)) == SSA_NAME)
2321 	{
2322 	  civ = get_iv (data, TMR_BASE (base));
2323 	  if (!civ)
2324 	    goto fail;
2325 
2326 	  TMR_BASE (base) = civ->base;
2327 	  step = civ->step;
2328 	}
2329       if (TMR_INDEX2 (base)
2330 	  && TREE_CODE (TMR_INDEX2 (base)) == SSA_NAME)
2331 	{
2332 	  civ = get_iv (data, TMR_INDEX2 (base));
2333 	  if (!civ)
2334 	    goto fail;
2335 
2336 	  TMR_INDEX2 (base) = civ->base;
2337 	  step = civ->step;
2338 	}
2339       if (TMR_INDEX (base)
2340 	  && TREE_CODE (TMR_INDEX (base)) == SSA_NAME)
2341 	{
2342 	  civ = get_iv (data, TMR_INDEX (base));
2343 	  if (!civ)
2344 	    goto fail;
2345 
2346 	  TMR_INDEX (base) = civ->base;
2347 	  astep = civ->step;
2348 
2349 	  if (astep)
2350 	    {
2351 	      if (TMR_STEP (base))
2352 		astep = fold_build2 (MULT_EXPR, type, TMR_STEP (base), astep);
2353 
2354 	      step = fold_build2 (PLUS_EXPR, type, step, astep);
2355 	    }
2356 	}
2357 
2358       if (integer_zerop (step))
2359 	goto fail;
2360       base = tree_mem_ref_addr (type, base);
2361     }
2362   else
2363     {
2364       ifs_ivopts_data.ivopts_data = data;
2365       ifs_ivopts_data.stmt = stmt;
2366       ifs_ivopts_data.step = size_zero_node;
2367       if (!for_each_index (&base, idx_find_step, &ifs_ivopts_data)
2368 	  || integer_zerop (ifs_ivopts_data.step))
2369 	goto fail;
2370       step = ifs_ivopts_data.step;
2371 
2372       /* Check that the base expression is addressable.  This needs
2373 	 to be done after substituting bases of IVs into it.  */
2374       if (may_be_nonaddressable_p (base))
2375 	goto fail;
2376 
2377       /* Moreover, on strict alignment platforms, check that it is
2378 	 sufficiently aligned.  */
2379       if (STRICT_ALIGNMENT && may_be_unaligned_p (base, step))
2380 	goto fail;
2381 
2382       base = build_fold_addr_expr (base);
2383 
2384       /* Substituting bases of IVs into the base expression might
2385 	 have caused folding opportunities.  */
2386       if (TREE_CODE (base) == ADDR_EXPR)
2387 	{
2388 	  tree *ref = &TREE_OPERAND (base, 0);
2389 	  while (handled_component_p (*ref))
2390 	    ref = &TREE_OPERAND (*ref, 0);
2391 	  if (TREE_CODE (*ref) == MEM_REF)
2392 	    {
2393 	      tree tem = fold_binary (MEM_REF, TREE_TYPE (*ref),
2394 				      TREE_OPERAND (*ref, 0),
2395 				      TREE_OPERAND (*ref, 1));
2396 	      if (tem)
2397 		*ref = tem;
2398 	    }
2399 	}
2400     }
2401 
2402   civ = alloc_iv (data, base, step);
2403   /* Fail if base object of this memory reference is unknown.  */
2404   if (civ->base_object == NULL_TREE)
2405     goto fail;
2406 
2407   record_group_use (data, op_p, civ, stmt, USE_REF_ADDRESS, TREE_TYPE (*op_p));
2408   return;
2409 
2410 fail:
2411   for_each_index (op_p, idx_record_use, data);
2412 }
2413 
2414 /* Finds and records invariants used in STMT.  */
2415 
2416 static void
find_invariants_stmt(struct ivopts_data * data,gimple * stmt)2417 find_invariants_stmt (struct ivopts_data *data, gimple *stmt)
2418 {
2419   ssa_op_iter iter;
2420   use_operand_p use_p;
2421   tree op;
2422 
2423   FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
2424     {
2425       op = USE_FROM_PTR (use_p);
2426       record_invariant (data, op, false);
2427     }
2428 }
2429 
2430 /* CALL calls an internal function.  If operand *OP_P will become an
2431    address when the call is expanded, return the type of the memory
2432    being addressed, otherwise return null.  */
2433 
2434 static tree
get_mem_type_for_internal_fn(gcall * call,tree * op_p)2435 get_mem_type_for_internal_fn (gcall *call, tree *op_p)
2436 {
2437   switch (gimple_call_internal_fn (call))
2438     {
2439     case IFN_MASK_LOAD:
2440     case IFN_MASK_LOAD_LANES:
2441     case IFN_LEN_LOAD:
2442       if (op_p == gimple_call_arg_ptr (call, 0))
2443 	return TREE_TYPE (gimple_call_lhs (call));
2444       return NULL_TREE;
2445 
2446     case IFN_MASK_STORE:
2447     case IFN_MASK_STORE_LANES:
2448     case IFN_LEN_STORE:
2449       if (op_p == gimple_call_arg_ptr (call, 0))
2450 	return TREE_TYPE (gimple_call_arg (call, 3));
2451       return NULL_TREE;
2452 
2453     default:
2454       return NULL_TREE;
2455     }
2456 }
2457 
2458 /* IV is a (non-address) iv that describes operand *OP_P of STMT.
2459    Return true if the operand will become an address when STMT
2460    is expanded and record the associated address use if so.  */
2461 
2462 static bool
find_address_like_use(struct ivopts_data * data,gimple * stmt,tree * op_p,struct iv * iv)2463 find_address_like_use (struct ivopts_data *data, gimple *stmt, tree *op_p,
2464 		       struct iv *iv)
2465 {
2466   /* Fail if base object of this memory reference is unknown.  */
2467   if (iv->base_object == NULL_TREE)
2468     return false;
2469 
2470   tree mem_type = NULL_TREE;
2471   if (gcall *call = dyn_cast <gcall *> (stmt))
2472     if (gimple_call_internal_p (call))
2473       mem_type = get_mem_type_for_internal_fn (call, op_p);
2474   if (mem_type)
2475     {
2476       iv = alloc_iv (data, iv->base, iv->step);
2477       record_group_use (data, op_p, iv, stmt, USE_PTR_ADDRESS, mem_type);
2478       return true;
2479     }
2480   return false;
2481 }
2482 
2483 /* Finds interesting uses of induction variables in the statement STMT.  */
2484 
2485 static void
find_interesting_uses_stmt(struct ivopts_data * data,gimple * stmt)2486 find_interesting_uses_stmt (struct ivopts_data *data, gimple *stmt)
2487 {
2488   struct iv *iv;
2489   tree op, *lhs, *rhs;
2490   ssa_op_iter iter;
2491   use_operand_p use_p;
2492   enum tree_code code;
2493 
2494   find_invariants_stmt (data, stmt);
2495 
2496   if (gimple_code (stmt) == GIMPLE_COND)
2497     {
2498       find_interesting_uses_cond (data, stmt);
2499       return;
2500     }
2501 
2502   if (is_gimple_assign (stmt))
2503     {
2504       lhs = gimple_assign_lhs_ptr (stmt);
2505       rhs = gimple_assign_rhs1_ptr (stmt);
2506 
2507       if (TREE_CODE (*lhs) == SSA_NAME)
2508 	{
2509 	  /* If the statement defines an induction variable, the uses are not
2510 	     interesting by themselves.  */
2511 
2512 	  iv = get_iv (data, *lhs);
2513 
2514 	  if (iv && !integer_zerop (iv->step))
2515 	    return;
2516 	}
2517 
2518       code = gimple_assign_rhs_code (stmt);
2519       if (get_gimple_rhs_class (code) == GIMPLE_SINGLE_RHS
2520 	  && (REFERENCE_CLASS_P (*rhs)
2521 	      || is_gimple_val (*rhs)))
2522 	{
2523 	  if (REFERENCE_CLASS_P (*rhs))
2524 	    find_interesting_uses_address (data, stmt, rhs);
2525 	  else
2526 	    find_interesting_uses_op (data, *rhs);
2527 
2528 	  if (REFERENCE_CLASS_P (*lhs))
2529 	    find_interesting_uses_address (data, stmt, lhs);
2530 	  return;
2531 	}
2532       else if (TREE_CODE_CLASS (code) == tcc_comparison)
2533 	{
2534 	  find_interesting_uses_cond (data, stmt);
2535 	  return;
2536 	}
2537 
2538       /* TODO -- we should also handle address uses of type
2539 
2540 	 memory = call (whatever);
2541 
2542 	 and
2543 
2544 	 call (memory).  */
2545     }
2546 
2547   if (gimple_code (stmt) == GIMPLE_PHI
2548       && gimple_bb (stmt) == data->current_loop->header)
2549     {
2550       iv = get_iv (data, PHI_RESULT (stmt));
2551 
2552       if (iv && !integer_zerop (iv->step))
2553 	return;
2554     }
2555 
2556   FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
2557     {
2558       op = USE_FROM_PTR (use_p);
2559 
2560       if (TREE_CODE (op) != SSA_NAME)
2561 	continue;
2562 
2563       iv = get_iv (data, op);
2564       if (!iv)
2565 	continue;
2566 
2567       if (!find_address_like_use (data, stmt, use_p->use, iv))
2568 	find_interesting_uses_op (data, op);
2569     }
2570 }
2571 
2572 /* Finds interesting uses of induction variables outside of loops
2573    on loop exit edge EXIT.  */
2574 
2575 static void
find_interesting_uses_outside(struct ivopts_data * data,edge exit)2576 find_interesting_uses_outside (struct ivopts_data *data, edge exit)
2577 {
2578   gphi *phi;
2579   gphi_iterator psi;
2580   tree def;
2581 
2582   for (psi = gsi_start_phis (exit->dest); !gsi_end_p (psi); gsi_next (&psi))
2583     {
2584       phi = psi.phi ();
2585       def = PHI_ARG_DEF_FROM_EDGE (phi, exit);
2586       if (!virtual_operand_p (def))
2587 	find_interesting_uses_op (data, def);
2588     }
2589 }
2590 
2591 /* Return TRUE if OFFSET is within the range of [base + offset] addressing
2592    mode for memory reference represented by USE.  */
2593 
2594 static GTY (()) vec<rtx, va_gc> *addr_list;
2595 
2596 static bool
addr_offset_valid_p(struct iv_use * use,poly_int64 offset)2597 addr_offset_valid_p (struct iv_use *use, poly_int64 offset)
2598 {
2599   rtx reg, addr;
2600   unsigned list_index;
2601   addr_space_t as = TYPE_ADDR_SPACE (TREE_TYPE (use->iv->base));
2602   machine_mode addr_mode, mem_mode = TYPE_MODE (use->mem_type);
2603 
2604   list_index = (unsigned) as * MAX_MACHINE_MODE + (unsigned) mem_mode;
2605   if (list_index >= vec_safe_length (addr_list))
2606     vec_safe_grow_cleared (addr_list, list_index + MAX_MACHINE_MODE, true);
2607 
2608   addr = (*addr_list)[list_index];
2609   if (!addr)
2610     {
2611       addr_mode = targetm.addr_space.address_mode (as);
2612       reg = gen_raw_REG (addr_mode, LAST_VIRTUAL_REGISTER + 1);
2613       addr = gen_rtx_fmt_ee (PLUS, addr_mode, reg, NULL_RTX);
2614       (*addr_list)[list_index] = addr;
2615     }
2616   else
2617     addr_mode = GET_MODE (addr);
2618 
2619   XEXP (addr, 1) = gen_int_mode (offset, addr_mode);
2620   return (memory_address_addr_space_p (mem_mode, addr, as));
2621 }
2622 
2623 /* Comparison function to sort group in ascending order of addr_offset.  */
2624 
2625 static int
group_compare_offset(const void * a,const void * b)2626 group_compare_offset (const void *a, const void *b)
2627 {
2628   const struct iv_use *const *u1 = (const struct iv_use *const *) a;
2629   const struct iv_use *const *u2 = (const struct iv_use *const *) b;
2630 
2631   return compare_sizes_for_sort ((*u1)->addr_offset, (*u2)->addr_offset);
2632 }
2633 
2634 /* Check if small groups should be split.  Return true if no group
2635    contains more than two uses with distinct addr_offsets.  Return
2636    false otherwise.  We want to split such groups because:
2637 
2638      1) Small groups don't have much benefit and may interfer with
2639 	general candidate selection.
2640      2) Size for problem with only small groups is usually small and
2641 	general algorithm can handle it well.
2642 
2643    TODO -- Above claim may not hold when we want to merge memory
2644    accesses with conseuctive addresses.  */
2645 
2646 static bool
split_small_address_groups_p(struct ivopts_data * data)2647 split_small_address_groups_p (struct ivopts_data *data)
2648 {
2649   unsigned int i, j, distinct = 1;
2650   struct iv_use *pre;
2651   struct iv_group *group;
2652 
2653   for (i = 0; i < data->vgroups.length (); i++)
2654     {
2655       group = data->vgroups[i];
2656       if (group->vuses.length () == 1)
2657 	continue;
2658 
2659       gcc_assert (address_p (group->type));
2660       if (group->vuses.length () == 2)
2661 	{
2662 	  if (compare_sizes_for_sort (group->vuses[0]->addr_offset,
2663 				      group->vuses[1]->addr_offset) > 0)
2664 	    std::swap (group->vuses[0], group->vuses[1]);
2665 	}
2666       else
2667 	group->vuses.qsort (group_compare_offset);
2668 
2669       if (distinct > 2)
2670 	continue;
2671 
2672       distinct = 1;
2673       for (pre = group->vuses[0], j = 1; j < group->vuses.length (); j++)
2674 	{
2675 	  if (maybe_ne (group->vuses[j]->addr_offset, pre->addr_offset))
2676 	    {
2677 	      pre = group->vuses[j];
2678 	      distinct++;
2679 	    }
2680 
2681 	  if (distinct > 2)
2682 	    break;
2683 	}
2684     }
2685 
2686   return (distinct <= 2);
2687 }
2688 
2689 /* For each group of address type uses, this function further groups
2690    these uses according to the maximum offset supported by target's
2691    [base + offset] addressing mode.  */
2692 
2693 static void
split_address_groups(struct ivopts_data * data)2694 split_address_groups (struct ivopts_data *data)
2695 {
2696   unsigned int i, j;
2697   /* Always split group.  */
2698   bool split_p = split_small_address_groups_p (data);
2699 
2700   for (i = 0; i < data->vgroups.length (); i++)
2701     {
2702       struct iv_group *new_group = NULL;
2703       struct iv_group *group = data->vgroups[i];
2704       struct iv_use *use = group->vuses[0];
2705 
2706       use->id = 0;
2707       use->group_id = group->id;
2708       if (group->vuses.length () == 1)
2709 	continue;
2710 
2711       gcc_assert (address_p (use->type));
2712 
2713       for (j = 1; j < group->vuses.length ();)
2714 	{
2715 	  struct iv_use *next = group->vuses[j];
2716 	  poly_int64 offset = next->addr_offset - use->addr_offset;
2717 
2718 	  /* Split group if aksed to, or the offset against the first
2719 	     use can't fit in offset part of addressing mode.  IV uses
2720 	     having the same offset are still kept in one group.  */
2721 	  if (maybe_ne (offset, 0)
2722 	      && (split_p || !addr_offset_valid_p (use, offset)))
2723 	    {
2724 	      if (!new_group)
2725 		new_group = record_group (data, group->type);
2726 	      group->vuses.ordered_remove (j);
2727 	      new_group->vuses.safe_push (next);
2728 	      continue;
2729 	    }
2730 
2731 	  next->id = j;
2732 	  next->group_id = group->id;
2733 	  j++;
2734 	}
2735     }
2736 }
2737 
2738 /* Finds uses of the induction variables that are interesting.  */
2739 
2740 static void
find_interesting_uses(struct ivopts_data * data,basic_block * body)2741 find_interesting_uses (struct ivopts_data *data, basic_block *body)
2742 {
2743   basic_block bb;
2744   gimple_stmt_iterator bsi;
2745   unsigned i;
2746   edge e;
2747 
2748   for (i = 0; i < data->current_loop->num_nodes; i++)
2749     {
2750       edge_iterator ei;
2751       bb = body[i];
2752 
2753       FOR_EACH_EDGE (e, ei, bb->succs)
2754 	if (e->dest != EXIT_BLOCK_PTR_FOR_FN (cfun)
2755 	    && !flow_bb_inside_loop_p (data->current_loop, e->dest))
2756 	  find_interesting_uses_outside (data, e);
2757 
2758       for (bsi = gsi_start_phis (bb); !gsi_end_p (bsi); gsi_next (&bsi))
2759 	find_interesting_uses_stmt (data, gsi_stmt (bsi));
2760       for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
2761 	if (!is_gimple_debug (gsi_stmt (bsi)))
2762 	  find_interesting_uses_stmt (data, gsi_stmt (bsi));
2763     }
2764 
2765   split_address_groups (data);
2766 
2767   if (dump_file && (dump_flags & TDF_DETAILS))
2768     {
2769       fprintf (dump_file, "\n<IV Groups>:\n");
2770       dump_groups (dump_file, data);
2771       fprintf (dump_file, "\n");
2772     }
2773 }
2774 
2775 /* Strips constant offsets from EXPR and stores them to OFFSET.  If INSIDE_ADDR
2776    is true, assume we are inside an address.  If TOP_COMPREF is true, assume
2777    we are at the top-level of the processed address.  */
2778 
2779 static tree
strip_offset_1(tree expr,bool inside_addr,bool top_compref,poly_int64 * offset)2780 strip_offset_1 (tree expr, bool inside_addr, bool top_compref,
2781 		poly_int64 *offset)
2782 {
2783   tree op0 = NULL_TREE, op1 = NULL_TREE, tmp, step;
2784   enum tree_code code;
2785   tree type, orig_type = TREE_TYPE (expr);
2786   poly_int64 off0, off1;
2787   HOST_WIDE_INT st;
2788   tree orig_expr = expr;
2789 
2790   STRIP_NOPS (expr);
2791 
2792   type = TREE_TYPE (expr);
2793   code = TREE_CODE (expr);
2794   *offset = 0;
2795 
2796   switch (code)
2797     {
2798     case POINTER_PLUS_EXPR:
2799     case PLUS_EXPR:
2800     case MINUS_EXPR:
2801       op0 = TREE_OPERAND (expr, 0);
2802       op1 = TREE_OPERAND (expr, 1);
2803 
2804       op0 = strip_offset_1 (op0, false, false, &off0);
2805       op1 = strip_offset_1 (op1, false, false, &off1);
2806 
2807       *offset = (code == MINUS_EXPR ? off0 - off1 : off0 + off1);
2808       if (op0 == TREE_OPERAND (expr, 0)
2809 	  && op1 == TREE_OPERAND (expr, 1))
2810 	return orig_expr;
2811 
2812       if (integer_zerop (op1))
2813 	expr = op0;
2814       else if (integer_zerop (op0))
2815 	{
2816 	  if (code == MINUS_EXPR)
2817 	    expr = fold_build1 (NEGATE_EXPR, type, op1);
2818 	  else
2819 	    expr = op1;
2820 	}
2821       else
2822 	expr = fold_build2 (code, type, op0, op1);
2823 
2824       return fold_convert (orig_type, expr);
2825 
2826     case MULT_EXPR:
2827       op1 = TREE_OPERAND (expr, 1);
2828       if (!cst_and_fits_in_hwi (op1))
2829 	return orig_expr;
2830 
2831       op0 = TREE_OPERAND (expr, 0);
2832       op0 = strip_offset_1 (op0, false, false, &off0);
2833       if (op0 == TREE_OPERAND (expr, 0))
2834 	return orig_expr;
2835 
2836       *offset = off0 * int_cst_value (op1);
2837       if (integer_zerop (op0))
2838 	expr = op0;
2839       else
2840 	expr = fold_build2 (MULT_EXPR, type, op0, op1);
2841 
2842       return fold_convert (orig_type, expr);
2843 
2844     case ARRAY_REF:
2845     case ARRAY_RANGE_REF:
2846       if (!inside_addr)
2847 	return orig_expr;
2848 
2849       step = array_ref_element_size (expr);
2850       if (!cst_and_fits_in_hwi (step))
2851 	break;
2852 
2853       st = int_cst_value (step);
2854       op1 = TREE_OPERAND (expr, 1);
2855       op1 = strip_offset_1 (op1, false, false, &off1);
2856       *offset = off1 * st;
2857 
2858       if (top_compref
2859 	  && integer_zerop (op1))
2860 	{
2861 	  /* Strip the component reference completely.  */
2862 	  op0 = TREE_OPERAND (expr, 0);
2863 	  op0 = strip_offset_1 (op0, inside_addr, top_compref, &off0);
2864 	  *offset += off0;
2865 	  return op0;
2866 	}
2867       break;
2868 
2869     case COMPONENT_REF:
2870       {
2871 	tree field;
2872 
2873 	if (!inside_addr)
2874 	  return orig_expr;
2875 
2876 	tmp = component_ref_field_offset (expr);
2877 	field = TREE_OPERAND (expr, 1);
2878 	if (top_compref
2879 	    && cst_and_fits_in_hwi (tmp)
2880 	    && cst_and_fits_in_hwi (DECL_FIELD_BIT_OFFSET (field)))
2881 	  {
2882 	    HOST_WIDE_INT boffset, abs_off;
2883 
2884 	    /* Strip the component reference completely.  */
2885 	    op0 = TREE_OPERAND (expr, 0);
2886 	    op0 = strip_offset_1 (op0, inside_addr, top_compref, &off0);
2887 	    boffset = int_cst_value (DECL_FIELD_BIT_OFFSET (field));
2888 	    abs_off = abs_hwi (boffset) / BITS_PER_UNIT;
2889 	    if (boffset < 0)
2890 	      abs_off = -abs_off;
2891 
2892 	    *offset = off0 + int_cst_value (tmp) + abs_off;
2893 	    return op0;
2894 	  }
2895       }
2896       break;
2897 
2898     case ADDR_EXPR:
2899       op0 = TREE_OPERAND (expr, 0);
2900       op0 = strip_offset_1 (op0, true, true, &off0);
2901       *offset += off0;
2902 
2903       if (op0 == TREE_OPERAND (expr, 0))
2904 	return orig_expr;
2905 
2906       expr = build_fold_addr_expr (op0);
2907       return fold_convert (orig_type, expr);
2908 
2909     case MEM_REF:
2910       /* ???  Offset operand?  */
2911       inside_addr = false;
2912       break;
2913 
2914     default:
2915       if (ptrdiff_tree_p (expr, offset) && maybe_ne (*offset, 0))
2916 	return build_int_cst (orig_type, 0);
2917       return orig_expr;
2918     }
2919 
2920   /* Default handling of expressions for that we want to recurse into
2921      the first operand.  */
2922   op0 = TREE_OPERAND (expr, 0);
2923   op0 = strip_offset_1 (op0, inside_addr, false, &off0);
2924   *offset += off0;
2925 
2926   if (op0 == TREE_OPERAND (expr, 0)
2927       && (!op1 || op1 == TREE_OPERAND (expr, 1)))
2928     return orig_expr;
2929 
2930   expr = copy_node (expr);
2931   TREE_OPERAND (expr, 0) = op0;
2932   if (op1)
2933     TREE_OPERAND (expr, 1) = op1;
2934 
2935   /* Inside address, we might strip the top level component references,
2936      thus changing type of the expression.  Handling of ADDR_EXPR
2937      will fix that.  */
2938   expr = fold_convert (orig_type, expr);
2939 
2940   return expr;
2941 }
2942 
2943 /* Strips constant offsets from EXPR and stores them to OFFSET.  */
2944 
2945 tree
strip_offset(tree expr,poly_uint64_pod * offset)2946 strip_offset (tree expr, poly_uint64_pod *offset)
2947 {
2948   poly_int64 off;
2949   tree core = strip_offset_1 (expr, false, false, &off);
2950   *offset = off;
2951   return core;
2952 }
2953 
2954 /* Returns variant of TYPE that can be used as base for different uses.
2955    We return unsigned type with the same precision, which avoids problems
2956    with overflows.  */
2957 
2958 static tree
generic_type_for(tree type)2959 generic_type_for (tree type)
2960 {
2961   if (POINTER_TYPE_P (type))
2962     return unsigned_type_for (type);
2963 
2964   if (TYPE_UNSIGNED (type))
2965     return type;
2966 
2967   return unsigned_type_for (type);
2968 }
2969 
2970 /* Private data for walk_tree.  */
2971 
2972 struct walk_tree_data
2973 {
2974   bitmap *inv_vars;
2975   struct ivopts_data *idata;
2976 };
2977 
2978 /* Callback function for walk_tree, it records invariants and symbol
2979    reference in *EXPR_P.  DATA is the structure storing result info.  */
2980 
2981 static tree
find_inv_vars_cb(tree * expr_p,int * ws ATTRIBUTE_UNUSED,void * data)2982 find_inv_vars_cb (tree *expr_p, int *ws ATTRIBUTE_UNUSED, void *data)
2983 {
2984   tree op = *expr_p;
2985   struct version_info *info;
2986   struct walk_tree_data *wdata = (struct walk_tree_data*) data;
2987 
2988   if (TREE_CODE (op) != SSA_NAME)
2989     return NULL_TREE;
2990 
2991   info = name_info (wdata->idata, op);
2992   /* Because we expand simple operations when finding IVs, loop invariant
2993      variable that isn't referred by the original loop could be used now.
2994      Record such invariant variables here.  */
2995   if (!info->iv)
2996     {
2997       struct ivopts_data *idata = wdata->idata;
2998       basic_block bb = gimple_bb (SSA_NAME_DEF_STMT (op));
2999 
3000       if (!bb || !flow_bb_inside_loop_p (idata->current_loop, bb))
3001 	{
3002 	  tree steptype = TREE_TYPE (op);
3003 	  if (POINTER_TYPE_P (steptype))
3004 	    steptype = sizetype;
3005 	  set_iv (idata, op, op, build_int_cst (steptype, 0), true);
3006 	  record_invariant (idata, op, false);
3007 	}
3008     }
3009   if (!info->inv_id || info->has_nonlin_use)
3010     return NULL_TREE;
3011 
3012   if (!*wdata->inv_vars)
3013     *wdata->inv_vars = BITMAP_ALLOC (NULL);
3014   bitmap_set_bit (*wdata->inv_vars, info->inv_id);
3015 
3016   return NULL_TREE;
3017 }
3018 
3019 /* Records invariants in *EXPR_P.  INV_VARS is the bitmap to that we should
3020    store it.  */
3021 
3022 static inline void
find_inv_vars(struct ivopts_data * data,tree * expr_p,bitmap * inv_vars)3023 find_inv_vars (struct ivopts_data *data, tree *expr_p, bitmap *inv_vars)
3024 {
3025   struct walk_tree_data wdata;
3026 
3027   if (!inv_vars)
3028     return;
3029 
3030   wdata.idata = data;
3031   wdata.inv_vars = inv_vars;
3032   walk_tree (expr_p, find_inv_vars_cb, &wdata, NULL);
3033 }
3034 
3035 /* Get entry from invariant expr hash table for INV_EXPR.  New entry
3036    will be recorded if it doesn't exist yet.  Given below two exprs:
3037      inv_expr + cst1, inv_expr + cst2
3038    It's hard to make decision whether constant part should be stripped
3039    or not.  We choose to not strip based on below facts:
3040      1) We need to count ADD cost for constant part if it's stripped,
3041 	which isn't always trivial where this functions is called.
3042      2) Stripping constant away may be conflict with following loop
3043 	invariant hoisting pass.
3044      3) Not stripping constant away results in more invariant exprs,
3045 	which usually leads to decision preferring lower reg pressure.  */
3046 
3047 static iv_inv_expr_ent *
get_loop_invariant_expr(struct ivopts_data * data,tree inv_expr)3048 get_loop_invariant_expr (struct ivopts_data *data, tree inv_expr)
3049 {
3050   STRIP_NOPS (inv_expr);
3051 
3052   if (poly_int_tree_p (inv_expr)
3053       || TREE_CODE (inv_expr) == SSA_NAME)
3054     return NULL;
3055 
3056   /* Don't strip constant part away as we used to.  */
3057 
3058   /* Stores EXPR in DATA->inv_expr_tab, return pointer to iv_inv_expr_ent.  */
3059   struct iv_inv_expr_ent ent;
3060   ent.expr = inv_expr;
3061   ent.hash = iterative_hash_expr (inv_expr, 0);
3062   struct iv_inv_expr_ent **slot = data->inv_expr_tab->find_slot (&ent, INSERT);
3063 
3064   if (!*slot)
3065     {
3066       *slot = XNEW (struct iv_inv_expr_ent);
3067       (*slot)->expr = inv_expr;
3068       (*slot)->hash = ent.hash;
3069       (*slot)->id = ++data->max_inv_expr_id;
3070     }
3071 
3072   return *slot;
3073 }
3074 
3075 
3076 /* Return *TP if it is an SSA_NAME marked with TREE_VISITED, i.e., as
3077    unsuitable as ivopts candidates for potentially involving undefined
3078    behavior.  */
3079 
3080 static tree
find_ssa_undef(tree * tp,int * walk_subtrees,void * bb_)3081 find_ssa_undef (tree *tp, int *walk_subtrees, void *bb_)
3082 {
3083   basic_block bb = (basic_block) bb_;
3084   if (TREE_CODE (*tp) == SSA_NAME
3085       && ssa_name_maybe_undef_p (*tp)
3086       && !ssa_name_any_use_dominates_bb_p (*tp, bb))
3087     return *tp;
3088   if (!EXPR_P (*tp))
3089     *walk_subtrees = 0;
3090   return NULL;
3091 }
3092 
3093 /* Adds a candidate BASE + STEP * i.  Important field is set to IMPORTANT and
3094    position to POS.  If USE is not NULL, the candidate is set as related to
3095    it.  If both BASE and STEP are NULL, we add a pseudocandidate for the
3096    replacement of the final value of the iv by a direct computation.  */
3097 
3098 static struct iv_cand *
add_candidate_1(struct ivopts_data * data,tree base,tree step,bool important,enum iv_position pos,struct iv_use * use,gimple * incremented_at,struct iv * orig_iv=NULL,bool doloop=false)3099 add_candidate_1 (struct ivopts_data *data, tree base, tree step, bool important,
3100 		 enum iv_position pos, struct iv_use *use,
3101 		 gimple *incremented_at, struct iv *orig_iv = NULL,
3102 		 bool doloop = false)
3103 {
3104   unsigned i;
3105   struct iv_cand *cand = NULL;
3106   tree type, orig_type;
3107 
3108   gcc_assert (base && step);
3109 
3110   /* -fkeep-gc-roots-live means that we have to keep a real pointer
3111      live, but the ivopts code may replace a real pointer with one
3112      pointing before or after the memory block that is then adjusted
3113      into the memory block during the loop.  FIXME: It would likely be
3114      better to actually force the pointer live and still use ivopts;
3115      for example, it would be enough to write the pointer into memory
3116      and keep it there until after the loop.  */
3117   if (flag_keep_gc_roots_live && POINTER_TYPE_P (TREE_TYPE (base)))
3118     return NULL;
3119 
3120   /* If BASE contains undefined SSA names make sure we only record
3121      the original IV.  */
3122   bool involves_undefs = false;
3123   if (walk_tree (&base, find_ssa_undef, data->current_loop->header, NULL))
3124     {
3125       if (pos != IP_ORIGINAL)
3126 	return NULL;
3127       important = false;
3128       involves_undefs = true;
3129     }
3130 
3131   /* For non-original variables, make sure their values are computed in a type
3132      that does not invoke undefined behavior on overflows (since in general,
3133      we cannot prove that these induction variables are non-wrapping).  */
3134   if (pos != IP_ORIGINAL)
3135     {
3136       orig_type = TREE_TYPE (base);
3137       type = generic_type_for (orig_type);
3138       if (type != orig_type)
3139 	{
3140 	  base = fold_convert (type, base);
3141 	  step = fold_convert (type, step);
3142 	}
3143     }
3144 
3145   for (i = 0; i < data->vcands.length (); i++)
3146     {
3147       cand = data->vcands[i];
3148 
3149       if (cand->pos != pos)
3150 	continue;
3151 
3152       if (cand->incremented_at != incremented_at
3153 	  || ((pos == IP_AFTER_USE || pos == IP_BEFORE_USE)
3154 	      && cand->ainc_use != use))
3155 	continue;
3156 
3157       if (operand_equal_p (base, cand->iv->base, 0)
3158 	  && operand_equal_p (step, cand->iv->step, 0)
3159 	  && (TYPE_PRECISION (TREE_TYPE (base))
3160 	      == TYPE_PRECISION (TREE_TYPE (cand->iv->base))))
3161 	break;
3162     }
3163 
3164   if (i == data->vcands.length ())
3165     {
3166       cand = XCNEW (struct iv_cand);
3167       cand->id = i;
3168       cand->iv = alloc_iv (data, base, step);
3169       cand->pos = pos;
3170       if (pos != IP_ORIGINAL)
3171 	{
3172 	  if (doloop)
3173 	    cand->var_before = create_tmp_var_raw (TREE_TYPE (base), "doloop");
3174 	  else
3175 	    cand->var_before = create_tmp_var_raw (TREE_TYPE (base), "ivtmp");
3176 	  cand->var_after = cand->var_before;
3177 	}
3178       cand->important = important;
3179       cand->involves_undefs = involves_undefs;
3180       cand->incremented_at = incremented_at;
3181       cand->doloop_p = doloop;
3182       data->vcands.safe_push (cand);
3183 
3184       if (!poly_int_tree_p (step))
3185 	{
3186 	  find_inv_vars (data, &step, &cand->inv_vars);
3187 
3188 	  iv_inv_expr_ent *inv_expr = get_loop_invariant_expr (data, step);
3189 	  /* Share bitmap between inv_vars and inv_exprs for cand.  */
3190 	  if (inv_expr != NULL)
3191 	    {
3192 	      cand->inv_exprs = cand->inv_vars;
3193 	      cand->inv_vars = NULL;
3194 	      if (cand->inv_exprs)
3195 		bitmap_clear (cand->inv_exprs);
3196 	      else
3197 		cand->inv_exprs = BITMAP_ALLOC (NULL);
3198 
3199 	      bitmap_set_bit (cand->inv_exprs, inv_expr->id);
3200 	    }
3201 	}
3202 
3203       if (pos == IP_AFTER_USE || pos == IP_BEFORE_USE)
3204 	cand->ainc_use = use;
3205       else
3206 	cand->ainc_use = NULL;
3207 
3208       cand->orig_iv = orig_iv;
3209       if (dump_file && (dump_flags & TDF_DETAILS))
3210 	dump_cand (dump_file, cand);
3211     }
3212 
3213   cand->important |= important;
3214   cand->doloop_p |= doloop;
3215 
3216   /* Relate candidate to the group for which it is added.  */
3217   if (use)
3218     bitmap_set_bit (data->vgroups[use->group_id]->related_cands, i);
3219 
3220   return cand;
3221 }
3222 
3223 /* Returns true if incrementing the induction variable at the end of the LOOP
3224    is allowed.
3225 
3226    The purpose is to avoid splitting latch edge with a biv increment, thus
3227    creating a jump, possibly confusing other optimization passes and leaving
3228    less freedom to scheduler.  So we allow IP_END only if IP_NORMAL is not
3229    available (so we do not have a better alternative), or if the latch edge
3230    is already nonempty.  */
3231 
3232 static bool
allow_ip_end_pos_p(class loop * loop)3233 allow_ip_end_pos_p (class loop *loop)
3234 {
3235   if (!ip_normal_pos (loop))
3236     return true;
3237 
3238   if (!empty_block_p (ip_end_pos (loop)))
3239     return true;
3240 
3241   return false;
3242 }
3243 
3244 /* If possible, adds autoincrement candidates BASE + STEP * i based on use USE.
3245    Important field is set to IMPORTANT.  */
3246 
3247 static void
add_autoinc_candidates(struct ivopts_data * data,tree base,tree step,bool important,struct iv_use * use)3248 add_autoinc_candidates (struct ivopts_data *data, tree base, tree step,
3249 			bool important, struct iv_use *use)
3250 {
3251   basic_block use_bb = gimple_bb (use->stmt);
3252   machine_mode mem_mode;
3253   unsigned HOST_WIDE_INT cstepi;
3254 
3255   /* If we insert the increment in any position other than the standard
3256      ones, we must ensure that it is incremented once per iteration.
3257      It must not be in an inner nested loop, or one side of an if
3258      statement.  */
3259   if (use_bb->loop_father != data->current_loop
3260       || !dominated_by_p (CDI_DOMINATORS, data->current_loop->latch, use_bb)
3261       || stmt_can_throw_internal (cfun, use->stmt)
3262       || !cst_and_fits_in_hwi (step))
3263     return;
3264 
3265   cstepi = int_cst_value (step);
3266 
3267   mem_mode = TYPE_MODE (use->mem_type);
3268   if (((USE_LOAD_PRE_INCREMENT (mem_mode)
3269 	|| USE_STORE_PRE_INCREMENT (mem_mode))
3270        && known_eq (GET_MODE_SIZE (mem_mode), cstepi))
3271       || ((USE_LOAD_PRE_DECREMENT (mem_mode)
3272 	   || USE_STORE_PRE_DECREMENT (mem_mode))
3273 	  && known_eq (GET_MODE_SIZE (mem_mode), -cstepi)))
3274     {
3275       enum tree_code code = MINUS_EXPR;
3276       tree new_base;
3277       tree new_step = step;
3278 
3279       if (POINTER_TYPE_P (TREE_TYPE (base)))
3280 	{
3281 	  new_step = fold_build1 (NEGATE_EXPR, TREE_TYPE (step), step);
3282 	  code = POINTER_PLUS_EXPR;
3283 	}
3284       else
3285 	new_step = fold_convert (TREE_TYPE (base), new_step);
3286       new_base = fold_build2 (code, TREE_TYPE (base), base, new_step);
3287       add_candidate_1 (data, new_base, step, important, IP_BEFORE_USE, use,
3288 		       use->stmt);
3289     }
3290   if (((USE_LOAD_POST_INCREMENT (mem_mode)
3291 	|| USE_STORE_POST_INCREMENT (mem_mode))
3292        && known_eq (GET_MODE_SIZE (mem_mode), cstepi))
3293       || ((USE_LOAD_POST_DECREMENT (mem_mode)
3294 	   || USE_STORE_POST_DECREMENT (mem_mode))
3295 	  && known_eq (GET_MODE_SIZE (mem_mode), -cstepi)))
3296     {
3297       add_candidate_1 (data, base, step, important, IP_AFTER_USE, use,
3298 		       use->stmt);
3299     }
3300 }
3301 
3302 /* Adds a candidate BASE + STEP * i.  Important field is set to IMPORTANT and
3303    position to POS.  If USE is not NULL, the candidate is set as related to
3304    it.  The candidate computation is scheduled before exit condition and at
3305    the end of loop.  */
3306 
3307 static void
add_candidate(struct ivopts_data * data,tree base,tree step,bool important,struct iv_use * use,struct iv * orig_iv=NULL,bool doloop=false)3308 add_candidate (struct ivopts_data *data, tree base, tree step, bool important,
3309 	       struct iv_use *use, struct iv *orig_iv = NULL,
3310 	       bool doloop = false)
3311 {
3312   if (ip_normal_pos (data->current_loop))
3313     add_candidate_1 (data, base, step, important, IP_NORMAL, use, NULL, orig_iv,
3314 		     doloop);
3315   /* Exclude doloop candidate here since it requires decrement then comparison
3316      and jump, the IP_END position doesn't match.  */
3317   if (!doloop && ip_end_pos (data->current_loop)
3318       && allow_ip_end_pos_p (data->current_loop))
3319     add_candidate_1 (data, base, step, important, IP_END, use, NULL, orig_iv);
3320 }
3321 
3322 /* Adds standard iv candidates.  */
3323 
3324 static void
add_standard_iv_candidates(struct ivopts_data * data)3325 add_standard_iv_candidates (struct ivopts_data *data)
3326 {
3327   add_candidate (data, integer_zero_node, integer_one_node, true, NULL);
3328 
3329   /* The same for a double-integer type if it is still fast enough.  */
3330   if (TYPE_PRECISION
3331 	(long_integer_type_node) > TYPE_PRECISION (integer_type_node)
3332       && TYPE_PRECISION (long_integer_type_node) <= BITS_PER_WORD)
3333     add_candidate (data, build_int_cst (long_integer_type_node, 0),
3334 		   build_int_cst (long_integer_type_node, 1), true, NULL);
3335 
3336   /* The same for a double-integer type if it is still fast enough.  */
3337   if (TYPE_PRECISION
3338 	(long_long_integer_type_node) > TYPE_PRECISION (long_integer_type_node)
3339       && TYPE_PRECISION (long_long_integer_type_node) <= BITS_PER_WORD)
3340     add_candidate (data, build_int_cst (long_long_integer_type_node, 0),
3341 		   build_int_cst (long_long_integer_type_node, 1), true, NULL);
3342 }
3343 
3344 
3345 /* Adds candidates bases on the old induction variable IV.  */
3346 
3347 static void
add_iv_candidate_for_biv(struct ivopts_data * data,struct iv * iv)3348 add_iv_candidate_for_biv (struct ivopts_data *data, struct iv *iv)
3349 {
3350   gimple *phi;
3351   tree def;
3352   struct iv_cand *cand;
3353 
3354   /* Check if this biv is used in address type use.  */
3355   if (iv->no_overflow  && iv->have_address_use
3356       && INTEGRAL_TYPE_P (TREE_TYPE (iv->base))
3357       && TYPE_PRECISION (TREE_TYPE (iv->base)) < TYPE_PRECISION (sizetype))
3358     {
3359       tree base = fold_convert (sizetype, iv->base);
3360       tree step = fold_convert (sizetype, iv->step);
3361 
3362       /* Add iv cand of same precision as index part in TARGET_MEM_REF.  */
3363       add_candidate (data, base, step, true, NULL, iv);
3364       /* Add iv cand of the original type only if it has nonlinear use.  */
3365       if (iv->nonlin_use)
3366 	add_candidate (data, iv->base, iv->step, true, NULL);
3367     }
3368   else
3369     add_candidate (data, iv->base, iv->step, true, NULL);
3370 
3371   /* The same, but with initial value zero.  */
3372   if (POINTER_TYPE_P (TREE_TYPE (iv->base)))
3373     add_candidate (data, size_int (0), iv->step, true, NULL);
3374   else
3375     add_candidate (data, build_int_cst (TREE_TYPE (iv->base), 0),
3376 		   iv->step, true, NULL);
3377 
3378   phi = SSA_NAME_DEF_STMT (iv->ssa_name);
3379   if (gimple_code (phi) == GIMPLE_PHI)
3380     {
3381       /* Additionally record the possibility of leaving the original iv
3382 	 untouched.  */
3383       def = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (data->current_loop));
3384       /* Don't add candidate if it's from another PHI node because
3385 	 it's an affine iv appearing in the form of PEELED_CHREC.  */
3386       phi = SSA_NAME_DEF_STMT (def);
3387       if (gimple_code (phi) != GIMPLE_PHI)
3388 	{
3389 	  cand = add_candidate_1 (data,
3390 				  iv->base, iv->step, true, IP_ORIGINAL, NULL,
3391 				  SSA_NAME_DEF_STMT (def));
3392 	  if (cand)
3393 	    {
3394 	      cand->var_before = iv->ssa_name;
3395 	      cand->var_after = def;
3396 	    }
3397 	}
3398       else
3399 	gcc_assert (gimple_bb (phi) == data->current_loop->header);
3400     }
3401 }
3402 
3403 /* Adds candidates based on the old induction variables.  */
3404 
3405 static void
add_iv_candidate_for_bivs(struct ivopts_data * data)3406 add_iv_candidate_for_bivs (struct ivopts_data *data)
3407 {
3408   unsigned i;
3409   struct iv *iv;
3410   bitmap_iterator bi;
3411 
3412   EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
3413     {
3414       iv = ver_info (data, i)->iv;
3415       if (iv && iv->biv_p && !integer_zerop (iv->step))
3416 	add_iv_candidate_for_biv (data, iv);
3417     }
3418 }
3419 
3420 /* Record common candidate {BASE, STEP} derived from USE in hashtable.  */
3421 
3422 static void
record_common_cand(struct ivopts_data * data,tree base,tree step,struct iv_use * use)3423 record_common_cand (struct ivopts_data *data, tree base,
3424 		    tree step, struct iv_use *use)
3425 {
3426   class iv_common_cand ent;
3427   class iv_common_cand **slot;
3428 
3429   ent.base = base;
3430   ent.step = step;
3431   ent.hash = iterative_hash_expr (base, 0);
3432   ent.hash = iterative_hash_expr (step, ent.hash);
3433 
3434   slot = data->iv_common_cand_tab->find_slot (&ent, INSERT);
3435   if (*slot == NULL)
3436     {
3437       *slot = new iv_common_cand ();
3438       (*slot)->base = base;
3439       (*slot)->step = step;
3440       (*slot)->uses.create (8);
3441       (*slot)->hash = ent.hash;
3442       data->iv_common_cands.safe_push ((*slot));
3443     }
3444 
3445   gcc_assert (use != NULL);
3446   (*slot)->uses.safe_push (use);
3447   return;
3448 }
3449 
3450 /* Comparison function used to sort common candidates.  */
3451 
3452 static int
common_cand_cmp(const void * p1,const void * p2)3453 common_cand_cmp (const void *p1, const void *p2)
3454 {
3455   unsigned n1, n2;
3456   const class iv_common_cand *const *const ccand1
3457     = (const class iv_common_cand *const *)p1;
3458   const class iv_common_cand *const *const ccand2
3459     = (const class iv_common_cand *const *)p2;
3460 
3461   n1 = (*ccand1)->uses.length ();
3462   n2 = (*ccand2)->uses.length ();
3463   return n2 - n1;
3464 }
3465 
3466 /* Adds IV candidates based on common candidated recorded.  */
3467 
3468 static void
add_iv_candidate_derived_from_uses(struct ivopts_data * data)3469 add_iv_candidate_derived_from_uses (struct ivopts_data *data)
3470 {
3471   unsigned i, j;
3472   struct iv_cand *cand_1, *cand_2;
3473 
3474   data->iv_common_cands.qsort (common_cand_cmp);
3475   for (i = 0; i < data->iv_common_cands.length (); i++)
3476     {
3477       class iv_common_cand *ptr = data->iv_common_cands[i];
3478 
3479       /* Only add IV candidate if it's derived from multiple uses.  */
3480       if (ptr->uses.length () <= 1)
3481 	break;
3482 
3483       cand_1 = NULL;
3484       cand_2 = NULL;
3485       if (ip_normal_pos (data->current_loop))
3486 	cand_1 = add_candidate_1 (data, ptr->base, ptr->step,
3487 				  false, IP_NORMAL, NULL, NULL);
3488 
3489       if (ip_end_pos (data->current_loop)
3490 	  && allow_ip_end_pos_p (data->current_loop))
3491 	cand_2 = add_candidate_1 (data, ptr->base, ptr->step,
3492 				  false, IP_END, NULL, NULL);
3493 
3494       /* Bind deriving uses and the new candidates.  */
3495       for (j = 0; j < ptr->uses.length (); j++)
3496 	{
3497 	  struct iv_group *group = data->vgroups[ptr->uses[j]->group_id];
3498 	  if (cand_1)
3499 	    bitmap_set_bit (group->related_cands, cand_1->id);
3500 	  if (cand_2)
3501 	    bitmap_set_bit (group->related_cands, cand_2->id);
3502 	}
3503     }
3504 
3505   /* Release data since it is useless from this point.  */
3506   data->iv_common_cand_tab->empty ();
3507   data->iv_common_cands.truncate (0);
3508 }
3509 
3510 /* Adds candidates based on the value of USE's iv.  */
3511 
3512 static void
add_iv_candidate_for_use(struct ivopts_data * data,struct iv_use * use)3513 add_iv_candidate_for_use (struct ivopts_data *data, struct iv_use *use)
3514 {
3515   poly_uint64 offset;
3516   tree base;
3517   struct iv *iv = use->iv;
3518   tree basetype = TREE_TYPE (iv->base);
3519 
3520   /* Don't add candidate for iv_use with non integer, pointer or non-mode
3521      precision types, instead, add candidate for the corresponding scev in
3522      unsigned type with the same precision.  See PR93674 for more info.  */
3523   if ((TREE_CODE (basetype) != INTEGER_TYPE && !POINTER_TYPE_P (basetype))
3524       || !type_has_mode_precision_p (basetype))
3525     {
3526       basetype = lang_hooks.types.type_for_mode (TYPE_MODE (basetype),
3527 						 TYPE_UNSIGNED (basetype));
3528       add_candidate (data, fold_convert (basetype, iv->base),
3529 		     fold_convert (basetype, iv->step), false, NULL);
3530       return;
3531     }
3532 
3533   add_candidate (data, iv->base, iv->step, false, use);
3534 
3535   /* Record common candidate for use in case it can be shared by others.  */
3536   record_common_cand (data, iv->base, iv->step, use);
3537 
3538   /* Record common candidate with initial value zero.  */
3539   basetype = TREE_TYPE (iv->base);
3540   if (POINTER_TYPE_P (basetype))
3541     basetype = sizetype;
3542   record_common_cand (data, build_int_cst (basetype, 0), iv->step, use);
3543 
3544   /* Compare the cost of an address with an unscaled index with the cost of
3545     an address with a scaled index and add candidate if useful.  */
3546   poly_int64 step;
3547   if (use != NULL
3548       && poly_int_tree_p (iv->step, &step)
3549       && address_p (use->type))
3550     {
3551       poly_int64 new_step;
3552       unsigned int fact = preferred_mem_scale_factor
3553 	(use->iv->base,
3554 	 TYPE_MODE (use->mem_type),
3555 	 optimize_loop_for_speed_p (data->current_loop));
3556 
3557       if (fact != 1
3558 	  && multiple_p (step, fact, &new_step))
3559 	add_candidate (data, size_int (0),
3560 		       wide_int_to_tree (sizetype, new_step),
3561 		       true, NULL);
3562     }
3563 
3564   /* Record common candidate with constant offset stripped in base.
3565      Like the use itself, we also add candidate directly for it.  */
3566   base = strip_offset (iv->base, &offset);
3567   if (maybe_ne (offset, 0U) || base != iv->base)
3568     {
3569       record_common_cand (data, base, iv->step, use);
3570       add_candidate (data, base, iv->step, false, use);
3571     }
3572 
3573   /* Record common candidate with base_object removed in base.  */
3574   base = iv->base;
3575   STRIP_NOPS (base);
3576   if (iv->base_object != NULL && TREE_CODE (base) == POINTER_PLUS_EXPR)
3577     {
3578       tree step = iv->step;
3579 
3580       STRIP_NOPS (step);
3581       base = TREE_OPERAND (base, 1);
3582       step = fold_convert (sizetype, step);
3583       record_common_cand (data, base, step, use);
3584       /* Also record common candidate with offset stripped.  */
3585       base = strip_offset (base, &offset);
3586       if (maybe_ne (offset, 0U))
3587 	record_common_cand (data, base, step, use);
3588     }
3589 
3590   /* At last, add auto-incremental candidates.  Make such variables
3591      important since other iv uses with same base object may be based
3592      on it.  */
3593   if (use != NULL && address_p (use->type))
3594     add_autoinc_candidates (data, iv->base, iv->step, true, use);
3595 }
3596 
3597 /* Adds candidates based on the uses.  */
3598 
3599 static void
add_iv_candidate_for_groups(struct ivopts_data * data)3600 add_iv_candidate_for_groups (struct ivopts_data *data)
3601 {
3602   unsigned i;
3603 
3604   /* Only add candidate for the first use in group.  */
3605   for (i = 0; i < data->vgroups.length (); i++)
3606     {
3607       struct iv_group *group = data->vgroups[i];
3608 
3609       gcc_assert (group->vuses[0] != NULL);
3610       add_iv_candidate_for_use (data, group->vuses[0]);
3611     }
3612   add_iv_candidate_derived_from_uses (data);
3613 }
3614 
3615 /* Record important candidates and add them to related_cands bitmaps.  */
3616 
3617 static void
record_important_candidates(struct ivopts_data * data)3618 record_important_candidates (struct ivopts_data *data)
3619 {
3620   unsigned i;
3621   struct iv_group *group;
3622 
3623   for (i = 0; i < data->vcands.length (); i++)
3624     {
3625       struct iv_cand *cand = data->vcands[i];
3626 
3627       if (cand->important)
3628 	bitmap_set_bit (data->important_candidates, i);
3629     }
3630 
3631   data->consider_all_candidates = (data->vcands.length ()
3632 				   <= CONSIDER_ALL_CANDIDATES_BOUND);
3633 
3634   /* Add important candidates to groups' related_cands bitmaps.  */
3635   for (i = 0; i < data->vgroups.length (); i++)
3636     {
3637       group = data->vgroups[i];
3638       bitmap_ior_into (group->related_cands, data->important_candidates);
3639     }
3640 }
3641 
3642 /* Allocates the data structure mapping the (use, candidate) pairs to costs.
3643    If consider_all_candidates is true, we use a two-dimensional array, otherwise
3644    we allocate a simple list to every use.  */
3645 
3646 static void
alloc_use_cost_map(struct ivopts_data * data)3647 alloc_use_cost_map (struct ivopts_data *data)
3648 {
3649   unsigned i, size, s;
3650 
3651   for (i = 0; i < data->vgroups.length (); i++)
3652     {
3653       struct iv_group *group = data->vgroups[i];
3654 
3655       if (data->consider_all_candidates)
3656 	size = data->vcands.length ();
3657       else
3658 	{
3659 	  s = bitmap_count_bits (group->related_cands);
3660 
3661 	  /* Round up to the power of two, so that moduling by it is fast.  */
3662 	  size = s ? (1 << ceil_log2 (s)) : 1;
3663 	}
3664 
3665       group->n_map_members = size;
3666       group->cost_map = XCNEWVEC (class cost_pair, size);
3667     }
3668 }
3669 
3670 /* Sets cost of (GROUP, CAND) pair to COST and record that it depends
3671    on invariants INV_VARS and that the value used in expressing it is
3672    VALUE, and in case of iv elimination the comparison operator is COMP.  */
3673 
3674 static void
set_group_iv_cost(struct ivopts_data * data,struct iv_group * group,struct iv_cand * cand,comp_cost cost,bitmap inv_vars,tree value,enum tree_code comp,bitmap inv_exprs)3675 set_group_iv_cost (struct ivopts_data *data,
3676 		   struct iv_group *group, struct iv_cand *cand,
3677 		   comp_cost cost, bitmap inv_vars, tree value,
3678 		   enum tree_code comp, bitmap inv_exprs)
3679 {
3680   unsigned i, s;
3681 
3682   if (cost.infinite_cost_p ())
3683     {
3684       BITMAP_FREE (inv_vars);
3685       BITMAP_FREE (inv_exprs);
3686       return;
3687     }
3688 
3689   if (data->consider_all_candidates)
3690     {
3691       group->cost_map[cand->id].cand = cand;
3692       group->cost_map[cand->id].cost = cost;
3693       group->cost_map[cand->id].inv_vars = inv_vars;
3694       group->cost_map[cand->id].inv_exprs = inv_exprs;
3695       group->cost_map[cand->id].value = value;
3696       group->cost_map[cand->id].comp = comp;
3697       return;
3698     }
3699 
3700   /* n_map_members is a power of two, so this computes modulo.  */
3701   s = cand->id & (group->n_map_members - 1);
3702   for (i = s; i < group->n_map_members; i++)
3703     if (!group->cost_map[i].cand)
3704       goto found;
3705   for (i = 0; i < s; i++)
3706     if (!group->cost_map[i].cand)
3707       goto found;
3708 
3709   gcc_unreachable ();
3710 
3711 found:
3712   group->cost_map[i].cand = cand;
3713   group->cost_map[i].cost = cost;
3714   group->cost_map[i].inv_vars = inv_vars;
3715   group->cost_map[i].inv_exprs = inv_exprs;
3716   group->cost_map[i].value = value;
3717   group->cost_map[i].comp = comp;
3718 }
3719 
3720 /* Gets cost of (GROUP, CAND) pair.  */
3721 
3722 static class cost_pair *
get_group_iv_cost(struct ivopts_data * data,struct iv_group * group,struct iv_cand * cand)3723 get_group_iv_cost (struct ivopts_data *data, struct iv_group *group,
3724 		   struct iv_cand *cand)
3725 {
3726   unsigned i, s;
3727   class cost_pair *ret;
3728 
3729   if (!cand)
3730     return NULL;
3731 
3732   if (data->consider_all_candidates)
3733     {
3734       ret = group->cost_map + cand->id;
3735       if (!ret->cand)
3736 	return NULL;
3737 
3738       return ret;
3739     }
3740 
3741   /* n_map_members is a power of two, so this computes modulo.  */
3742   s = cand->id & (group->n_map_members - 1);
3743   for (i = s; i < group->n_map_members; i++)
3744     if (group->cost_map[i].cand == cand)
3745       return group->cost_map + i;
3746     else if (group->cost_map[i].cand == NULL)
3747       return NULL;
3748   for (i = 0; i < s; i++)
3749     if (group->cost_map[i].cand == cand)
3750       return group->cost_map + i;
3751     else if (group->cost_map[i].cand == NULL)
3752       return NULL;
3753 
3754   return NULL;
3755 }
3756 
3757 /* Produce DECL_RTL for object obj so it looks like it is stored in memory.  */
3758 static rtx
produce_memory_decl_rtl(tree obj,int * regno)3759 produce_memory_decl_rtl (tree obj, int *regno)
3760 {
3761   addr_space_t as = TYPE_ADDR_SPACE (TREE_TYPE (obj));
3762   machine_mode address_mode = targetm.addr_space.address_mode (as);
3763   rtx x;
3764 
3765   gcc_assert (obj);
3766   if (TREE_STATIC (obj) || DECL_EXTERNAL (obj))
3767     {
3768       const char *name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (obj));
3769       x = gen_rtx_SYMBOL_REF (address_mode, name);
3770       SET_SYMBOL_REF_DECL (x, obj);
3771       x = gen_rtx_MEM (DECL_MODE (obj), x);
3772       set_mem_addr_space (x, as);
3773       targetm.encode_section_info (obj, x, true);
3774     }
3775   else
3776     {
3777       x = gen_raw_REG (address_mode, (*regno)++);
3778       x = gen_rtx_MEM (DECL_MODE (obj), x);
3779       set_mem_addr_space (x, as);
3780     }
3781 
3782   return x;
3783 }
3784 
3785 /* Prepares decl_rtl for variables referred in *EXPR_P.  Callback for
3786    walk_tree.  DATA contains the actual fake register number.  */
3787 
3788 static tree
prepare_decl_rtl(tree * expr_p,int * ws,void * data)3789 prepare_decl_rtl (tree *expr_p, int *ws, void *data)
3790 {
3791   tree obj = NULL_TREE;
3792   rtx x = NULL_RTX;
3793   int *regno = (int *) data;
3794 
3795   switch (TREE_CODE (*expr_p))
3796     {
3797     case ADDR_EXPR:
3798       for (expr_p = &TREE_OPERAND (*expr_p, 0);
3799 	   handled_component_p (*expr_p);
3800 	   expr_p = &TREE_OPERAND (*expr_p, 0))
3801 	continue;
3802       obj = *expr_p;
3803       if (DECL_P (obj) && HAS_RTL_P (obj) && !DECL_RTL_SET_P (obj))
3804 	x = produce_memory_decl_rtl (obj, regno);
3805       break;
3806 
3807     case SSA_NAME:
3808       *ws = 0;
3809       obj = SSA_NAME_VAR (*expr_p);
3810       /* Defer handling of anonymous SSA_NAMEs to the expander.  */
3811       if (!obj)
3812 	return NULL_TREE;
3813       if (!DECL_RTL_SET_P (obj))
3814 	x = gen_raw_REG (DECL_MODE (obj), (*regno)++);
3815       break;
3816 
3817     case VAR_DECL:
3818     case PARM_DECL:
3819     case RESULT_DECL:
3820       *ws = 0;
3821       obj = *expr_p;
3822 
3823       if (DECL_RTL_SET_P (obj))
3824 	break;
3825 
3826       if (DECL_MODE (obj) == BLKmode)
3827 	x = produce_memory_decl_rtl (obj, regno);
3828       else
3829 	x = gen_raw_REG (DECL_MODE (obj), (*regno)++);
3830 
3831       break;
3832 
3833     default:
3834       break;
3835     }
3836 
3837   if (x)
3838     {
3839       decl_rtl_to_reset.safe_push (obj);
3840       SET_DECL_RTL (obj, x);
3841     }
3842 
3843   return NULL_TREE;
3844 }
3845 
3846 /* Predict whether the given loop will be transformed in the RTL
3847    doloop_optimize pass.  Attempt to duplicate some doloop_optimize checks.
3848    This is only for target independent checks, see targetm.predict_doloop_p
3849    for the target dependent ones.
3850 
3851    Note that according to some initial investigation, some checks like costly
3852    niter check and invalid stmt scanning don't have much gains among general
3853    cases, so keep this as simple as possible first.
3854 
3855    Some RTL specific checks seems unable to be checked in gimple, if any new
3856    checks or easy checks _are_ missing here, please add them.  */
3857 
3858 static bool
generic_predict_doloop_p(struct ivopts_data * data)3859 generic_predict_doloop_p (struct ivopts_data *data)
3860 {
3861   class loop *loop = data->current_loop;
3862 
3863   /* Call target hook for target dependent checks.  */
3864   if (!targetm.predict_doloop_p (loop))
3865     {
3866       if (dump_file && (dump_flags & TDF_DETAILS))
3867 	fprintf (dump_file, "Predict doloop failure due to"
3868 			    " target specific checks.\n");
3869       return false;
3870     }
3871 
3872   /* Similar to doloop_optimize, check iteration description to know it's
3873      suitable or not.  Keep it as simple as possible, feel free to extend it
3874      if you find any multiple exits cases matter.  */
3875   edge exit = single_dom_exit (loop);
3876   class tree_niter_desc *niter_desc;
3877   if (!exit || !(niter_desc = niter_for_exit (data, exit)))
3878     {
3879       if (dump_file && (dump_flags & TDF_DETAILS))
3880 	fprintf (dump_file, "Predict doloop failure due to"
3881 			    " unexpected niters.\n");
3882       return false;
3883     }
3884 
3885   /* Similar to doloop_optimize, check whether iteration count too small
3886      and not profitable.  */
3887   HOST_WIDE_INT est_niter = get_estimated_loop_iterations_int (loop);
3888   if (est_niter == -1)
3889     est_niter = get_likely_max_loop_iterations_int (loop);
3890   if (est_niter >= 0 && est_niter < 3)
3891     {
3892       if (dump_file && (dump_flags & TDF_DETAILS))
3893 	fprintf (dump_file,
3894 		 "Predict doloop failure due to"
3895 		 " too few iterations (%u).\n",
3896 		 (unsigned int) est_niter);
3897       return false;
3898     }
3899 
3900   return true;
3901 }
3902 
3903 /* Determines cost of the computation of EXPR.  */
3904 
3905 static unsigned
computation_cost(tree expr,bool speed)3906 computation_cost (tree expr, bool speed)
3907 {
3908   rtx_insn *seq;
3909   rtx rslt;
3910   tree type = TREE_TYPE (expr);
3911   unsigned cost;
3912   /* Avoid using hard regs in ways which may be unsupported.  */
3913   int regno = LAST_VIRTUAL_REGISTER + 1;
3914   struct cgraph_node *node = cgraph_node::get (current_function_decl);
3915   enum node_frequency real_frequency = node->frequency;
3916 
3917   node->frequency = NODE_FREQUENCY_NORMAL;
3918   crtl->maybe_hot_insn_p = speed;
3919   walk_tree (&expr, prepare_decl_rtl, &regno, NULL);
3920   start_sequence ();
3921   rslt = expand_expr (expr, NULL_RTX, TYPE_MODE (type), EXPAND_NORMAL);
3922   seq = get_insns ();
3923   end_sequence ();
3924   default_rtl_profile ();
3925   node->frequency = real_frequency;
3926 
3927   cost = seq_cost (seq, speed);
3928   if (MEM_P (rslt))
3929     cost += address_cost (XEXP (rslt, 0), TYPE_MODE (type),
3930 			  TYPE_ADDR_SPACE (type), speed);
3931   else if (!REG_P (rslt))
3932     cost += set_src_cost (rslt, TYPE_MODE (type), speed);
3933 
3934   return cost;
3935 }
3936 
3937 /* Returns variable containing the value of candidate CAND at statement AT.  */
3938 
3939 static tree
var_at_stmt(class loop * loop,struct iv_cand * cand,gimple * stmt)3940 var_at_stmt (class loop *loop, struct iv_cand *cand, gimple *stmt)
3941 {
3942   if (stmt_after_increment (loop, cand, stmt))
3943     return cand->var_after;
3944   else
3945     return cand->var_before;
3946 }
3947 
3948 /* If A is (TYPE) BA and B is (TYPE) BB, and the types of BA and BB have the
3949    same precision that is at least as wide as the precision of TYPE, stores
3950    BA to A and BB to B, and returns the type of BA.  Otherwise, returns the
3951    type of A and B.  */
3952 
3953 static tree
determine_common_wider_type(tree * a,tree * b)3954 determine_common_wider_type (tree *a, tree *b)
3955 {
3956   tree wider_type = NULL;
3957   tree suba, subb;
3958   tree atype = TREE_TYPE (*a);
3959 
3960   if (CONVERT_EXPR_P (*a))
3961     {
3962       suba = TREE_OPERAND (*a, 0);
3963       wider_type = TREE_TYPE (suba);
3964       if (TYPE_PRECISION (wider_type) < TYPE_PRECISION (atype))
3965 	return atype;
3966     }
3967   else
3968     return atype;
3969 
3970   if (CONVERT_EXPR_P (*b))
3971     {
3972       subb = TREE_OPERAND (*b, 0);
3973       if (TYPE_PRECISION (wider_type) != TYPE_PRECISION (TREE_TYPE (subb)))
3974 	return atype;
3975     }
3976   else
3977     return atype;
3978 
3979   *a = suba;
3980   *b = subb;
3981   return wider_type;
3982 }
3983 
3984 /* Determines the expression by that USE is expressed from induction variable
3985    CAND at statement AT in LOOP.  The expression is stored in two parts in a
3986    decomposed form.  The invariant part is stored in AFF_INV; while variant
3987    part in AFF_VAR.  Store ratio of CAND.step over USE.step in PRAT if it's
3988    non-null.  Returns false if USE cannot be expressed using CAND.  */
3989 
3990 static bool
get_computation_aff_1(class loop * loop,gimple * at,struct iv_use * use,struct iv_cand * cand,class aff_tree * aff_inv,class aff_tree * aff_var,widest_int * prat=NULL)3991 get_computation_aff_1 (class loop *loop, gimple *at, struct iv_use *use,
3992 		       struct iv_cand *cand, class aff_tree *aff_inv,
3993 		       class aff_tree *aff_var, widest_int *prat = NULL)
3994 {
3995   tree ubase = use->iv->base, ustep = use->iv->step;
3996   tree cbase = cand->iv->base, cstep = cand->iv->step;
3997   tree common_type, uutype, var, cstep_common;
3998   tree utype = TREE_TYPE (ubase), ctype = TREE_TYPE (cbase);
3999   aff_tree aff_cbase;
4000   widest_int rat;
4001 
4002   /* We must have a precision to express the values of use.  */
4003   if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
4004     return false;
4005 
4006   var = var_at_stmt (loop, cand, at);
4007   uutype = unsigned_type_for (utype);
4008 
4009   /* If the conversion is not noop, perform it.  */
4010   if (TYPE_PRECISION (utype) < TYPE_PRECISION (ctype))
4011     {
4012       if (cand->orig_iv != NULL && CONVERT_EXPR_P (cbase)
4013 	  && (CONVERT_EXPR_P (cstep) || poly_int_tree_p (cstep)))
4014 	{
4015 	  tree inner_base, inner_step, inner_type;
4016 	  inner_base = TREE_OPERAND (cbase, 0);
4017 	  if (CONVERT_EXPR_P (cstep))
4018 	    inner_step = TREE_OPERAND (cstep, 0);
4019 	  else
4020 	    inner_step = cstep;
4021 
4022 	  inner_type = TREE_TYPE (inner_base);
4023 	  /* If candidate is added from a biv whose type is smaller than
4024 	     ctype, we know both candidate and the biv won't overflow.
4025 	     In this case, it's safe to skip the convertion in candidate.
4026 	     As an example, (unsigned short)((unsigned long)A) equals to
4027 	     (unsigned short)A, if A has a type no larger than short.  */
4028 	  if (TYPE_PRECISION (inner_type) <= TYPE_PRECISION (uutype))
4029 	    {
4030 	      cbase = inner_base;
4031 	      cstep = inner_step;
4032 	    }
4033 	}
4034       cbase = fold_convert (uutype, cbase);
4035       cstep = fold_convert (uutype, cstep);
4036       var = fold_convert (uutype, var);
4037     }
4038 
4039   /* Ratio is 1 when computing the value of biv cand by itself.
4040      We can't rely on constant_multiple_of in this case because the
4041      use is created after the original biv is selected.  The call
4042      could fail because of inconsistent fold behavior.  See PR68021
4043      for more information.  */
4044   if (cand->pos == IP_ORIGINAL && cand->incremented_at == use->stmt)
4045     {
4046       gcc_assert (is_gimple_assign (use->stmt));
4047       gcc_assert (use->iv->ssa_name == cand->var_after);
4048       gcc_assert (gimple_assign_lhs (use->stmt) == cand->var_after);
4049       rat = 1;
4050     }
4051   else if (!constant_multiple_of (ustep, cstep, &rat))
4052     return false;
4053 
4054   if (prat)
4055     *prat = rat;
4056 
4057   /* In case both UBASE and CBASE are shortened to UUTYPE from some common
4058      type, we achieve better folding by computing their difference in this
4059      wider type, and cast the result to UUTYPE.  We do not need to worry about
4060      overflows, as all the arithmetics will in the end be performed in UUTYPE
4061      anyway.  */
4062   common_type = determine_common_wider_type (&ubase, &cbase);
4063 
4064   /* use = ubase - ratio * cbase + ratio * var.  */
4065   tree_to_aff_combination (ubase, common_type, aff_inv);
4066   tree_to_aff_combination (cbase, common_type, &aff_cbase);
4067   tree_to_aff_combination (var, uutype, aff_var);
4068 
4069   /* We need to shift the value if we are after the increment.  */
4070   if (stmt_after_increment (loop, cand, at))
4071     {
4072       aff_tree cstep_aff;
4073 
4074       if (common_type != uutype)
4075 	cstep_common = fold_convert (common_type, cstep);
4076       else
4077 	cstep_common = cstep;
4078 
4079       tree_to_aff_combination (cstep_common, common_type, &cstep_aff);
4080       aff_combination_add (&aff_cbase, &cstep_aff);
4081     }
4082 
4083   aff_combination_scale (&aff_cbase, -rat);
4084   aff_combination_add (aff_inv, &aff_cbase);
4085   if (common_type != uutype)
4086     aff_combination_convert (aff_inv, uutype);
4087 
4088   aff_combination_scale (aff_var, rat);
4089   return true;
4090 }
4091 
4092 /* Determines the expression by that USE is expressed from induction variable
4093    CAND at statement AT in LOOP.  The expression is stored in a decomposed
4094    form into AFF.  Returns false if USE cannot be expressed using CAND.  */
4095 
4096 static bool
get_computation_aff(class loop * loop,gimple * at,struct iv_use * use,struct iv_cand * cand,class aff_tree * aff)4097 get_computation_aff (class loop *loop, gimple *at, struct iv_use *use,
4098 		     struct iv_cand *cand, class aff_tree *aff)
4099 {
4100   aff_tree aff_var;
4101 
4102   if (!get_computation_aff_1 (loop, at, use, cand, aff, &aff_var))
4103     return false;
4104 
4105   aff_combination_add (aff, &aff_var);
4106   return true;
4107 }
4108 
4109 /* Return the type of USE.  */
4110 
4111 static tree
get_use_type(struct iv_use * use)4112 get_use_type (struct iv_use *use)
4113 {
4114   tree base_type = TREE_TYPE (use->iv->base);
4115   tree type;
4116 
4117   if (use->type == USE_REF_ADDRESS)
4118     {
4119       /* The base_type may be a void pointer.  Create a pointer type based on
4120 	 the mem_ref instead.  */
4121       type = build_pointer_type (TREE_TYPE (*use->op_p));
4122       gcc_assert (TYPE_ADDR_SPACE (TREE_TYPE (type))
4123 		  == TYPE_ADDR_SPACE (TREE_TYPE (base_type)));
4124     }
4125   else
4126     type = base_type;
4127 
4128   return type;
4129 }
4130 
4131 /* Determines the expression by that USE is expressed from induction variable
4132    CAND at statement AT in LOOP.  The computation is unshared.  */
4133 
4134 static tree
get_computation_at(class loop * loop,gimple * at,struct iv_use * use,struct iv_cand * cand)4135 get_computation_at (class loop *loop, gimple *at,
4136 		    struct iv_use *use, struct iv_cand *cand)
4137 {
4138   aff_tree aff;
4139   tree type = get_use_type (use);
4140 
4141   if (!get_computation_aff (loop, at, use, cand, &aff))
4142     return NULL_TREE;
4143   unshare_aff_combination (&aff);
4144   return fold_convert (type, aff_combination_to_tree (&aff));
4145 }
4146 
4147 /* Like get_computation_at, but try harder, even if the computation
4148    is more expensive.  Intended for debug stmts.  */
4149 
4150 static tree
get_debug_computation_at(class loop * loop,gimple * at,struct iv_use * use,struct iv_cand * cand)4151 get_debug_computation_at (class loop *loop, gimple *at,
4152 			  struct iv_use *use, struct iv_cand *cand)
4153 {
4154   if (tree ret = get_computation_at (loop, at, use, cand))
4155     return ret;
4156 
4157   tree ubase = use->iv->base, ustep = use->iv->step;
4158   tree cbase = cand->iv->base, cstep = cand->iv->step;
4159   tree var;
4160   tree utype = TREE_TYPE (ubase), ctype = TREE_TYPE (cbase);
4161   widest_int rat;
4162 
4163   /* We must have a precision to express the values of use.  */
4164   if (TYPE_PRECISION (utype) >= TYPE_PRECISION (ctype))
4165     return NULL_TREE;
4166 
4167   /* Try to handle the case that get_computation_at doesn't,
4168      try to express
4169      use = ubase + (var - cbase) / ratio.  */
4170   if (!constant_multiple_of (cstep, fold_convert (TREE_TYPE (cstep), ustep),
4171 			     &rat))
4172     return NULL_TREE;
4173 
4174   bool neg_p = false;
4175   if (wi::neg_p (rat))
4176     {
4177       if (TYPE_UNSIGNED (ctype))
4178 	return NULL_TREE;
4179       neg_p = true;
4180       rat = wi::neg (rat);
4181     }
4182 
4183   /* If both IVs can wrap around and CAND doesn't have a power of two step,
4184      it is unsafe.  Consider uint16_t CAND with step 9, when wrapping around,
4185      the values will be ... 0xfff0, 0xfff9, 2, 11 ... and when use is say
4186      uint8_t with step 3, those values divided by 3 cast to uint8_t will be
4187      ... 0x50, 0x53, 0, 3 ... rather than expected 0x50, 0x53, 0x56, 0x59.  */
4188   if (!use->iv->no_overflow
4189       && !cand->iv->no_overflow
4190       && !integer_pow2p (cstep))
4191     return NULL_TREE;
4192 
4193   int bits = wi::exact_log2 (rat);
4194   if (bits == -1)
4195     bits = wi::floor_log2 (rat) + 1;
4196   if (!cand->iv->no_overflow
4197       && TYPE_PRECISION (utype) + bits > TYPE_PRECISION (ctype))
4198     return NULL_TREE;
4199 
4200   var = var_at_stmt (loop, cand, at);
4201 
4202   if (POINTER_TYPE_P (ctype))
4203     {
4204       ctype = unsigned_type_for (ctype);
4205       cbase = fold_convert (ctype, cbase);
4206       cstep = fold_convert (ctype, cstep);
4207       var = fold_convert (ctype, var);
4208     }
4209 
4210   if (stmt_after_increment (loop, cand, at))
4211     var = fold_build2 (MINUS_EXPR, TREE_TYPE (var), var,
4212 		       unshare_expr (cstep));
4213 
4214   var = fold_build2 (MINUS_EXPR, TREE_TYPE (var), var, cbase);
4215   var = fold_build2 (EXACT_DIV_EXPR, TREE_TYPE (var), var,
4216 		     wide_int_to_tree (TREE_TYPE (var), rat));
4217   if (POINTER_TYPE_P (utype))
4218     {
4219       var = fold_convert (sizetype, var);
4220       if (neg_p)
4221 	var = fold_build1 (NEGATE_EXPR, sizetype, var);
4222       var = fold_build2 (POINTER_PLUS_EXPR, utype, ubase, var);
4223     }
4224   else
4225     {
4226       var = fold_convert (utype, var);
4227       var = fold_build2 (neg_p ? MINUS_EXPR : PLUS_EXPR, utype,
4228 			 ubase, var);
4229     }
4230   return var;
4231 }
4232 
4233 /* Adjust the cost COST for being in loop setup rather than loop body.
4234    If we're optimizing for space, the loop setup overhead is constant;
4235    if we're optimizing for speed, amortize it over the per-iteration cost.
4236    If ROUND_UP_P is true, the result is round up rather than to zero when
4237    optimizing for speed.  */
4238 static int64_t
adjust_setup_cost(struct ivopts_data * data,int64_t cost,bool round_up_p=false)4239 adjust_setup_cost (struct ivopts_data *data, int64_t cost,
4240 		   bool round_up_p = false)
4241 {
4242   if (cost == INFTY)
4243     return cost;
4244   else if (optimize_loop_for_speed_p (data->current_loop))
4245     {
4246       int64_t niters = (int64_t) avg_loop_niter (data->current_loop);
4247       return (cost + (round_up_p ? niters - 1 : 0)) / niters;
4248     }
4249   else
4250     return cost;
4251 }
4252 
4253 /* Calculate the SPEED or size cost of shiftadd EXPR in MODE.  MULT is the
4254    EXPR operand holding the shift.  COST0 and COST1 are the costs for
4255    calculating the operands of EXPR.  Returns true if successful, and returns
4256    the cost in COST.  */
4257 
4258 static bool
get_shiftadd_cost(tree expr,scalar_int_mode mode,comp_cost cost0,comp_cost cost1,tree mult,bool speed,comp_cost * cost)4259 get_shiftadd_cost (tree expr, scalar_int_mode mode, comp_cost cost0,
4260 		   comp_cost cost1, tree mult, bool speed, comp_cost *cost)
4261 {
4262   comp_cost res;
4263   tree op1 = TREE_OPERAND (expr, 1);
4264   tree cst = TREE_OPERAND (mult, 1);
4265   tree multop = TREE_OPERAND (mult, 0);
4266   int m = exact_log2 (int_cst_value (cst));
4267   int maxm = MIN (BITS_PER_WORD, GET_MODE_BITSIZE (mode));
4268   int as_cost, sa_cost;
4269   bool mult_in_op1;
4270 
4271   if (!(m >= 0 && m < maxm))
4272     return false;
4273 
4274   STRIP_NOPS (op1);
4275   mult_in_op1 = operand_equal_p (op1, mult, 0);
4276 
4277   as_cost = add_cost (speed, mode) + shift_cost (speed, mode, m);
4278 
4279   /* If the target has a cheap shift-and-add or shift-and-sub instruction,
4280      use that in preference to a shift insn followed by an add insn.  */
4281   sa_cost = (TREE_CODE (expr) != MINUS_EXPR
4282 	     ? shiftadd_cost (speed, mode, m)
4283 	     : (mult_in_op1
4284 		? shiftsub1_cost (speed, mode, m)
4285 		: shiftsub0_cost (speed, mode, m)));
4286 
4287   res = comp_cost (MIN (as_cost, sa_cost), 0);
4288   res += (mult_in_op1 ? cost0 : cost1);
4289 
4290   STRIP_NOPS (multop);
4291   if (!is_gimple_val (multop))
4292     res += force_expr_to_var_cost (multop, speed);
4293 
4294   *cost = res;
4295   return true;
4296 }
4297 
4298 /* Estimates cost of forcing expression EXPR into a variable.  */
4299 
4300 static comp_cost
force_expr_to_var_cost(tree expr,bool speed)4301 force_expr_to_var_cost (tree expr, bool speed)
4302 {
4303   static bool costs_initialized = false;
4304   static unsigned integer_cost [2];
4305   static unsigned symbol_cost [2];
4306   static unsigned address_cost [2];
4307   tree op0, op1;
4308   comp_cost cost0, cost1, cost;
4309   machine_mode mode;
4310   scalar_int_mode int_mode;
4311 
4312   if (!costs_initialized)
4313     {
4314       tree type = build_pointer_type (integer_type_node);
4315       tree var, addr;
4316       rtx x;
4317       int i;
4318 
4319       var = create_tmp_var_raw (integer_type_node, "test_var");
4320       TREE_STATIC (var) = 1;
4321       x = produce_memory_decl_rtl (var, NULL);
4322       SET_DECL_RTL (var, x);
4323 
4324       addr = build1 (ADDR_EXPR, type, var);
4325 
4326 
4327       for (i = 0; i < 2; i++)
4328 	{
4329 	  integer_cost[i] = computation_cost (build_int_cst (integer_type_node,
4330 							     2000), i);
4331 
4332 	  symbol_cost[i] = computation_cost (addr, i) + 1;
4333 
4334 	  address_cost[i]
4335 	    = computation_cost (fold_build_pointer_plus_hwi (addr, 2000), i) + 1;
4336 	  if (dump_file && (dump_flags & TDF_DETAILS))
4337 	    {
4338 	      fprintf (dump_file, "force_expr_to_var_cost %s costs:\n", i ? "speed" : "size");
4339 	      fprintf (dump_file, "  integer %d\n", (int) integer_cost[i]);
4340 	      fprintf (dump_file, "  symbol %d\n", (int) symbol_cost[i]);
4341 	      fprintf (dump_file, "  address %d\n", (int) address_cost[i]);
4342 	      fprintf (dump_file, "  other %d\n", (int) target_spill_cost[i]);
4343 	      fprintf (dump_file, "\n");
4344 	    }
4345 	}
4346 
4347       costs_initialized = true;
4348     }
4349 
4350   STRIP_NOPS (expr);
4351 
4352   if (SSA_VAR_P (expr))
4353     return no_cost;
4354 
4355   if (is_gimple_min_invariant (expr))
4356     {
4357       if (poly_int_tree_p (expr))
4358 	return comp_cost (integer_cost [speed], 0);
4359 
4360       if (TREE_CODE (expr) == ADDR_EXPR)
4361 	{
4362 	  tree obj = TREE_OPERAND (expr, 0);
4363 
4364 	  if (VAR_P (obj)
4365 	      || TREE_CODE (obj) == PARM_DECL
4366 	      || TREE_CODE (obj) == RESULT_DECL)
4367 	    return comp_cost (symbol_cost [speed], 0);
4368 	}
4369 
4370       return comp_cost (address_cost [speed], 0);
4371     }
4372 
4373   switch (TREE_CODE (expr))
4374     {
4375     case POINTER_PLUS_EXPR:
4376     case PLUS_EXPR:
4377     case MINUS_EXPR:
4378     case MULT_EXPR:
4379     case TRUNC_DIV_EXPR:
4380     case BIT_AND_EXPR:
4381     case BIT_IOR_EXPR:
4382     case LSHIFT_EXPR:
4383     case RSHIFT_EXPR:
4384       op0 = TREE_OPERAND (expr, 0);
4385       op1 = TREE_OPERAND (expr, 1);
4386       STRIP_NOPS (op0);
4387       STRIP_NOPS (op1);
4388       break;
4389 
4390     CASE_CONVERT:
4391     case NEGATE_EXPR:
4392     case BIT_NOT_EXPR:
4393       op0 = TREE_OPERAND (expr, 0);
4394       STRIP_NOPS (op0);
4395       op1 = NULL_TREE;
4396       break;
4397     /* See add_iv_candidate_for_doloop, for doloop may_be_zero case, we
4398        introduce COND_EXPR for IV base, need to support better cost estimation
4399        for this COND_EXPR and tcc_comparison.  */
4400     case COND_EXPR:
4401       op0 = TREE_OPERAND (expr, 1);
4402       STRIP_NOPS (op0);
4403       op1 = TREE_OPERAND (expr, 2);
4404       STRIP_NOPS (op1);
4405       break;
4406     case LT_EXPR:
4407     case LE_EXPR:
4408     case GT_EXPR:
4409     case GE_EXPR:
4410     case EQ_EXPR:
4411     case NE_EXPR:
4412     case UNORDERED_EXPR:
4413     case ORDERED_EXPR:
4414     case UNLT_EXPR:
4415     case UNLE_EXPR:
4416     case UNGT_EXPR:
4417     case UNGE_EXPR:
4418     case UNEQ_EXPR:
4419     case LTGT_EXPR:
4420     case MAX_EXPR:
4421     case MIN_EXPR:
4422       op0 = TREE_OPERAND (expr, 0);
4423       STRIP_NOPS (op0);
4424       op1 = TREE_OPERAND (expr, 1);
4425       STRIP_NOPS (op1);
4426       break;
4427 
4428     default:
4429       /* Just an arbitrary value, FIXME.  */
4430       return comp_cost (target_spill_cost[speed], 0);
4431     }
4432 
4433   if (op0 == NULL_TREE
4434       || TREE_CODE (op0) == SSA_NAME || CONSTANT_CLASS_P (op0))
4435     cost0 = no_cost;
4436   else
4437     cost0 = force_expr_to_var_cost (op0, speed);
4438 
4439   if (op1 == NULL_TREE
4440       || TREE_CODE (op1) == SSA_NAME || CONSTANT_CLASS_P (op1))
4441     cost1 = no_cost;
4442   else
4443     cost1 = force_expr_to_var_cost (op1, speed);
4444 
4445   mode = TYPE_MODE (TREE_TYPE (expr));
4446   switch (TREE_CODE (expr))
4447     {
4448     case POINTER_PLUS_EXPR:
4449     case PLUS_EXPR:
4450     case MINUS_EXPR:
4451     case NEGATE_EXPR:
4452       cost = comp_cost (add_cost (speed, mode), 0);
4453       if (TREE_CODE (expr) != NEGATE_EXPR)
4454 	{
4455 	  tree mult = NULL_TREE;
4456 	  comp_cost sa_cost;
4457 	  if (TREE_CODE (op1) == MULT_EXPR)
4458 	    mult = op1;
4459 	  else if (TREE_CODE (op0) == MULT_EXPR)
4460 	    mult = op0;
4461 
4462 	  if (mult != NULL_TREE
4463 	      && is_a <scalar_int_mode> (mode, &int_mode)
4464 	      && cst_and_fits_in_hwi (TREE_OPERAND (mult, 1))
4465 	      && get_shiftadd_cost (expr, int_mode, cost0, cost1, mult,
4466 				    speed, &sa_cost))
4467 	    return sa_cost;
4468 	}
4469       break;
4470 
4471     CASE_CONVERT:
4472       {
4473 	tree inner_mode, outer_mode;
4474 	outer_mode = TREE_TYPE (expr);
4475 	inner_mode = TREE_TYPE (op0);
4476 	cost = comp_cost (convert_cost (TYPE_MODE (outer_mode),
4477 				       TYPE_MODE (inner_mode), speed), 0);
4478       }
4479       break;
4480 
4481     case MULT_EXPR:
4482       if (cst_and_fits_in_hwi (op0))
4483 	cost = comp_cost (mult_by_coeff_cost (int_cst_value (op0),
4484 					     mode, speed), 0);
4485       else if (cst_and_fits_in_hwi (op1))
4486 	cost = comp_cost (mult_by_coeff_cost (int_cst_value (op1),
4487 					     mode, speed), 0);
4488       else
4489 	return comp_cost (target_spill_cost [speed], 0);
4490       break;
4491 
4492     case TRUNC_DIV_EXPR:
4493       /* Division by power of two is usually cheap, so we allow it.  Forbid
4494 	 anything else.  */
4495       if (integer_pow2p (TREE_OPERAND (expr, 1)))
4496 	cost = comp_cost (add_cost (speed, mode), 0);
4497       else
4498 	cost = comp_cost (target_spill_cost[speed], 0);
4499       break;
4500 
4501     case BIT_AND_EXPR:
4502     case BIT_IOR_EXPR:
4503     case BIT_NOT_EXPR:
4504     case LSHIFT_EXPR:
4505     case RSHIFT_EXPR:
4506       cost = comp_cost (add_cost (speed, mode), 0);
4507       break;
4508     case COND_EXPR:
4509       op0 = TREE_OPERAND (expr, 0);
4510       STRIP_NOPS (op0);
4511       if (op0 == NULL_TREE || TREE_CODE (op0) == SSA_NAME
4512 	  || CONSTANT_CLASS_P (op0))
4513 	cost = no_cost;
4514       else
4515 	cost = force_expr_to_var_cost (op0, speed);
4516       break;
4517     case LT_EXPR:
4518     case LE_EXPR:
4519     case GT_EXPR:
4520     case GE_EXPR:
4521     case EQ_EXPR:
4522     case NE_EXPR:
4523     case UNORDERED_EXPR:
4524     case ORDERED_EXPR:
4525     case UNLT_EXPR:
4526     case UNLE_EXPR:
4527     case UNGT_EXPR:
4528     case UNGE_EXPR:
4529     case UNEQ_EXPR:
4530     case LTGT_EXPR:
4531     case MAX_EXPR:
4532     case MIN_EXPR:
4533       /* Simply use add cost for now, FIXME if there is some more accurate cost
4534 	 evaluation way.  */
4535       cost = comp_cost (add_cost (speed, mode), 0);
4536       break;
4537 
4538     default:
4539       gcc_unreachable ();
4540     }
4541 
4542   cost += cost0;
4543   cost += cost1;
4544   return cost;
4545 }
4546 
4547 /* Estimates cost of forcing EXPR into a variable.  INV_VARS is a set of the
4548    invariants the computation depends on.  */
4549 
4550 static comp_cost
force_var_cost(struct ivopts_data * data,tree expr,bitmap * inv_vars)4551 force_var_cost (struct ivopts_data *data, tree expr, bitmap *inv_vars)
4552 {
4553   if (!expr)
4554     return no_cost;
4555 
4556   find_inv_vars (data, &expr, inv_vars);
4557   return force_expr_to_var_cost (expr, data->speed);
4558 }
4559 
4560 /* Returns cost of auto-modifying address expression in shape base + offset.
4561    AINC_STEP is step size of the address IV.  AINC_OFFSET is offset of the
4562    address expression.  The address expression has ADDR_MODE in addr space
4563    AS.  The memory access has MEM_MODE.  SPEED means we are optimizing for
4564    speed or size.  */
4565 
4566 enum ainc_type
4567 {
4568   AINC_PRE_INC,		/* Pre increment.  */
4569   AINC_PRE_DEC,		/* Pre decrement.  */
4570   AINC_POST_INC,	/* Post increment.  */
4571   AINC_POST_DEC,	/* Post decrement.  */
4572   AINC_NONE		/* Also the number of auto increment types.  */
4573 };
4574 
4575 struct ainc_cost_data
4576 {
4577   int64_t costs[AINC_NONE];
4578 };
4579 
4580 static comp_cost
get_address_cost_ainc(poly_int64 ainc_step,poly_int64 ainc_offset,machine_mode addr_mode,machine_mode mem_mode,addr_space_t as,bool speed)4581 get_address_cost_ainc (poly_int64 ainc_step, poly_int64 ainc_offset,
4582 		       machine_mode addr_mode, machine_mode mem_mode,
4583 		       addr_space_t as, bool speed)
4584 {
4585   if (!USE_LOAD_PRE_DECREMENT (mem_mode)
4586       && !USE_STORE_PRE_DECREMENT (mem_mode)
4587       && !USE_LOAD_POST_DECREMENT (mem_mode)
4588       && !USE_STORE_POST_DECREMENT (mem_mode)
4589       && !USE_LOAD_PRE_INCREMENT (mem_mode)
4590       && !USE_STORE_PRE_INCREMENT (mem_mode)
4591       && !USE_LOAD_POST_INCREMENT (mem_mode)
4592       && !USE_STORE_POST_INCREMENT (mem_mode))
4593     return infinite_cost;
4594 
4595   static vec<ainc_cost_data *> ainc_cost_data_list;
4596   unsigned idx = (unsigned) as * MAX_MACHINE_MODE + (unsigned) mem_mode;
4597   if (idx >= ainc_cost_data_list.length ())
4598     {
4599       unsigned nsize = ((unsigned) as + 1) *MAX_MACHINE_MODE;
4600 
4601       gcc_assert (nsize > idx);
4602       ainc_cost_data_list.safe_grow_cleared (nsize, true);
4603     }
4604 
4605   ainc_cost_data *data = ainc_cost_data_list[idx];
4606   if (data == NULL)
4607     {
4608       rtx reg = gen_raw_REG (addr_mode, LAST_VIRTUAL_REGISTER + 1);
4609 
4610       data = (ainc_cost_data *) xcalloc (1, sizeof (*data));
4611       data->costs[AINC_PRE_DEC] = INFTY;
4612       data->costs[AINC_POST_DEC] = INFTY;
4613       data->costs[AINC_PRE_INC] = INFTY;
4614       data->costs[AINC_POST_INC] = INFTY;
4615       if (USE_LOAD_PRE_DECREMENT (mem_mode)
4616 	  || USE_STORE_PRE_DECREMENT (mem_mode))
4617 	{
4618 	  rtx addr = gen_rtx_PRE_DEC (addr_mode, reg);
4619 
4620 	  if (memory_address_addr_space_p (mem_mode, addr, as))
4621 	    data->costs[AINC_PRE_DEC]
4622 	      = address_cost (addr, mem_mode, as, speed);
4623 	}
4624       if (USE_LOAD_POST_DECREMENT (mem_mode)
4625 	  || USE_STORE_POST_DECREMENT (mem_mode))
4626 	{
4627 	  rtx addr = gen_rtx_POST_DEC (addr_mode, reg);
4628 
4629 	  if (memory_address_addr_space_p (mem_mode, addr, as))
4630 	    data->costs[AINC_POST_DEC]
4631 	      = address_cost (addr, mem_mode, as, speed);
4632 	}
4633       if (USE_LOAD_PRE_INCREMENT (mem_mode)
4634 	  || USE_STORE_PRE_INCREMENT (mem_mode))
4635 	{
4636 	  rtx addr = gen_rtx_PRE_INC (addr_mode, reg);
4637 
4638 	  if (memory_address_addr_space_p (mem_mode, addr, as))
4639 	    data->costs[AINC_PRE_INC]
4640 	      = address_cost (addr, mem_mode, as, speed);
4641 	}
4642       if (USE_LOAD_POST_INCREMENT (mem_mode)
4643 	  || USE_STORE_POST_INCREMENT (mem_mode))
4644 	{
4645 	  rtx addr = gen_rtx_POST_INC (addr_mode, reg);
4646 
4647 	  if (memory_address_addr_space_p (mem_mode, addr, as))
4648 	    data->costs[AINC_POST_INC]
4649 	      = address_cost (addr, mem_mode, as, speed);
4650 	}
4651       ainc_cost_data_list[idx] = data;
4652     }
4653 
4654   poly_int64 msize = GET_MODE_SIZE (mem_mode);
4655   if (known_eq (ainc_offset, 0) && known_eq (msize, ainc_step))
4656     return comp_cost (data->costs[AINC_POST_INC], 0);
4657   if (known_eq (ainc_offset, 0) && known_eq (msize, -ainc_step))
4658     return comp_cost (data->costs[AINC_POST_DEC], 0);
4659   if (known_eq (ainc_offset, msize) && known_eq (msize, ainc_step))
4660     return comp_cost (data->costs[AINC_PRE_INC], 0);
4661   if (known_eq (ainc_offset, -msize) && known_eq (msize, -ainc_step))
4662     return comp_cost (data->costs[AINC_PRE_DEC], 0);
4663 
4664   return infinite_cost;
4665 }
4666 
4667 /* Return cost of computing USE's address expression by using CAND.
4668    AFF_INV and AFF_VAR represent invariant and variant parts of the
4669    address expression, respectively.  If AFF_INV is simple, store
4670    the loop invariant variables which are depended by it in INV_VARS;
4671    if AFF_INV is complicated, handle it as a new invariant expression
4672    and record it in INV_EXPR.  RATIO indicates multiple times between
4673    steps of USE and CAND.  If CAN_AUTOINC is nonNULL, store boolean
4674    value to it indicating if this is an auto-increment address.  */
4675 
4676 static comp_cost
get_address_cost(struct ivopts_data * data,struct iv_use * use,struct iv_cand * cand,aff_tree * aff_inv,aff_tree * aff_var,HOST_WIDE_INT ratio,bitmap * inv_vars,iv_inv_expr_ent ** inv_expr,bool * can_autoinc,bool speed)4677 get_address_cost (struct ivopts_data *data, struct iv_use *use,
4678 		  struct iv_cand *cand, aff_tree *aff_inv,
4679 		  aff_tree *aff_var, HOST_WIDE_INT ratio,
4680 		  bitmap *inv_vars, iv_inv_expr_ent **inv_expr,
4681 		  bool *can_autoinc, bool speed)
4682 {
4683   rtx addr;
4684   bool simple_inv = true;
4685   tree comp_inv = NULL_TREE, type = aff_var->type;
4686   comp_cost var_cost = no_cost, cost = no_cost;
4687   struct mem_address parts = {NULL_TREE, integer_one_node,
4688 			      NULL_TREE, NULL_TREE, NULL_TREE};
4689   machine_mode addr_mode = TYPE_MODE (type);
4690   machine_mode mem_mode = TYPE_MODE (use->mem_type);
4691   addr_space_t as = TYPE_ADDR_SPACE (TREE_TYPE (use->iv->base));
4692   /* Only true if ratio != 1.  */
4693   bool ok_with_ratio_p = false;
4694   bool ok_without_ratio_p = false;
4695 
4696   if (!aff_combination_const_p (aff_inv))
4697     {
4698       parts.index = integer_one_node;
4699       /* Addressing mode "base + index".  */
4700       ok_without_ratio_p = valid_mem_ref_p (mem_mode, as, &parts);
4701       if (ratio != 1)
4702 	{
4703 	  parts.step = wide_int_to_tree (type, ratio);
4704 	  /* Addressing mode "base + index << scale".  */
4705 	  ok_with_ratio_p = valid_mem_ref_p (mem_mode, as, &parts);
4706 	  if (!ok_with_ratio_p)
4707 	    parts.step = NULL_TREE;
4708 	}
4709       if (ok_with_ratio_p || ok_without_ratio_p)
4710 	{
4711 	  if (maybe_ne (aff_inv->offset, 0))
4712 	    {
4713 	      parts.offset = wide_int_to_tree (sizetype, aff_inv->offset);
4714 	      /* Addressing mode "base + index [<< scale] + offset".  */
4715 	      if (!valid_mem_ref_p (mem_mode, as, &parts))
4716 		parts.offset = NULL_TREE;
4717 	      else
4718 		aff_inv->offset = 0;
4719 	    }
4720 
4721 	  move_fixed_address_to_symbol (&parts, aff_inv);
4722 	  /* Base is fixed address and is moved to symbol part.  */
4723 	  if (parts.symbol != NULL_TREE && aff_combination_zero_p (aff_inv))
4724 	    parts.base = NULL_TREE;
4725 
4726 	  /* Addressing mode "symbol + base + index [<< scale] [+ offset]".  */
4727 	  if (parts.symbol != NULL_TREE
4728 	      && !valid_mem_ref_p (mem_mode, as, &parts))
4729 	    {
4730 	      aff_combination_add_elt (aff_inv, parts.symbol, 1);
4731 	      parts.symbol = NULL_TREE;
4732 	      /* Reset SIMPLE_INV since symbol address needs to be computed
4733 		 outside of address expression in this case.  */
4734 	      simple_inv = false;
4735 	      /* Symbol part is moved back to base part, it can't be NULL.  */
4736 	      parts.base = integer_one_node;
4737 	    }
4738 	}
4739       else
4740 	parts.index = NULL_TREE;
4741     }
4742   else
4743     {
4744       poly_int64 ainc_step;
4745       if (can_autoinc
4746 	  && ratio == 1
4747 	  && ptrdiff_tree_p (cand->iv->step, &ainc_step))
4748 	{
4749 	  poly_int64 ainc_offset = (aff_inv->offset).force_shwi ();
4750 
4751 	  if (stmt_after_increment (data->current_loop, cand, use->stmt))
4752 	    ainc_offset += ainc_step;
4753 	  cost = get_address_cost_ainc (ainc_step, ainc_offset,
4754 					addr_mode, mem_mode, as, speed);
4755 	  if (!cost.infinite_cost_p ())
4756 	    {
4757 	      *can_autoinc = true;
4758 	      return cost;
4759 	    }
4760 	  cost = no_cost;
4761 	}
4762       if (!aff_combination_zero_p (aff_inv))
4763 	{
4764 	  parts.offset = wide_int_to_tree (sizetype, aff_inv->offset);
4765 	  /* Addressing mode "base + offset".  */
4766 	  if (!valid_mem_ref_p (mem_mode, as, &parts))
4767 	    parts.offset = NULL_TREE;
4768 	  else
4769 	    aff_inv->offset = 0;
4770 	}
4771     }
4772 
4773   if (simple_inv)
4774     simple_inv = (aff_inv == NULL
4775 		  || aff_combination_const_p (aff_inv)
4776 		  || aff_combination_singleton_var_p (aff_inv));
4777   if (!aff_combination_zero_p (aff_inv))
4778     comp_inv = aff_combination_to_tree (aff_inv);
4779   if (comp_inv != NULL_TREE)
4780     cost = force_var_cost (data, comp_inv, inv_vars);
4781   if (ratio != 1 && parts.step == NULL_TREE)
4782     var_cost += mult_by_coeff_cost (ratio, addr_mode, speed);
4783   if (comp_inv != NULL_TREE && parts.index == NULL_TREE)
4784     var_cost += add_cost (speed, addr_mode);
4785 
4786   if (comp_inv && inv_expr && !simple_inv)
4787     {
4788       *inv_expr = get_loop_invariant_expr (data, comp_inv);
4789       /* Clear depends on.  */
4790       if (*inv_expr != NULL && inv_vars && *inv_vars)
4791 	bitmap_clear (*inv_vars);
4792 
4793       /* Cost of small invariant expression adjusted against loop niters
4794 	 is usually zero, which makes it difficult to be differentiated
4795 	 from candidate based on loop invariant variables.  Secondly, the
4796 	 generated invariant expression may not be hoisted out of loop by
4797 	 following pass.  We penalize the cost by rounding up in order to
4798 	 neutralize such effects.  */
4799       cost.cost = adjust_setup_cost (data, cost.cost, true);
4800       cost.scratch = cost.cost;
4801     }
4802 
4803   cost += var_cost;
4804   addr = addr_for_mem_ref (&parts, as, false);
4805   gcc_assert (memory_address_addr_space_p (mem_mode, addr, as));
4806   cost += address_cost (addr, mem_mode, as, speed);
4807 
4808   if (parts.symbol != NULL_TREE)
4809     cost.complexity += 1;
4810   /* Don't increase the complexity of adding a scaled index if it's
4811      the only kind of index that the target allows.  */
4812   if (parts.step != NULL_TREE && ok_without_ratio_p)
4813     cost.complexity += 1;
4814   if (parts.base != NULL_TREE && parts.index != NULL_TREE)
4815     cost.complexity += 1;
4816   if (parts.offset != NULL_TREE && !integer_zerop (parts.offset))
4817     cost.complexity += 1;
4818 
4819   return cost;
4820 }
4821 
4822 /* Scale (multiply) the computed COST (except scratch part that should be
4823    hoisted out a loop) by header->frequency / AT->frequency, which makes
4824    expected cost more accurate.  */
4825 
4826 static comp_cost
get_scaled_computation_cost_at(ivopts_data * data,gimple * at,comp_cost cost)4827 get_scaled_computation_cost_at (ivopts_data *data, gimple *at, comp_cost cost)
4828 {
4829   if (data->speed
4830       && data->current_loop->header->count.to_frequency (cfun) > 0)
4831     {
4832       basic_block bb = gimple_bb (at);
4833       gcc_assert (cost.scratch <= cost.cost);
4834       int scale_factor = (int)(intptr_t) bb->aux;
4835       if (scale_factor == 1)
4836 	return cost;
4837 
4838       int64_t scaled_cost
4839 	= cost.scratch + (cost.cost - cost.scratch) * scale_factor;
4840 
4841       if (dump_file && (dump_flags & TDF_DETAILS))
4842 	fprintf (dump_file, "Scaling cost based on bb prob by %2.2f: "
4843 		 "%" PRId64 " (scratch: %" PRId64 ") -> %" PRId64 "\n",
4844 		 1.0f * scale_factor, cost.cost, cost.scratch, scaled_cost);
4845 
4846       cost.cost = scaled_cost;
4847     }
4848 
4849   return cost;
4850 }
4851 
4852 /* Determines the cost of the computation by that USE is expressed
4853    from induction variable CAND.  If ADDRESS_P is true, we just need
4854    to create an address from it, otherwise we want to get it into
4855    register.  A set of invariants we depend on is stored in INV_VARS.
4856    If CAN_AUTOINC is nonnull, use it to record whether autoinc
4857    addressing is likely.  If INV_EXPR is nonnull, record invariant
4858    expr entry in it.  */
4859 
4860 static comp_cost
get_computation_cost(struct ivopts_data * data,struct iv_use * use,struct iv_cand * cand,bool address_p,bitmap * inv_vars,bool * can_autoinc,iv_inv_expr_ent ** inv_expr)4861 get_computation_cost (struct ivopts_data *data, struct iv_use *use,
4862 		      struct iv_cand *cand, bool address_p, bitmap *inv_vars,
4863 		      bool *can_autoinc, iv_inv_expr_ent **inv_expr)
4864 {
4865   gimple *at = use->stmt;
4866   tree ubase = use->iv->base, cbase = cand->iv->base;
4867   tree utype = TREE_TYPE (ubase), ctype = TREE_TYPE (cbase);
4868   tree comp_inv = NULL_TREE;
4869   HOST_WIDE_INT ratio, aratio;
4870   comp_cost cost;
4871   widest_int rat;
4872   aff_tree aff_inv, aff_var;
4873   bool speed = optimize_bb_for_speed_p (gimple_bb (at));
4874 
4875   if (inv_vars)
4876     *inv_vars = NULL;
4877   if (can_autoinc)
4878     *can_autoinc = false;
4879   if (inv_expr)
4880     *inv_expr = NULL;
4881 
4882   /* Check if we have enough precision to express the values of use.  */
4883   if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
4884     return infinite_cost;
4885 
4886   if (address_p
4887       || (use->iv->base_object
4888 	  && cand->iv->base_object
4889 	  && POINTER_TYPE_P (TREE_TYPE (use->iv->base_object))
4890 	  && POINTER_TYPE_P (TREE_TYPE (cand->iv->base_object))))
4891     {
4892       /* Do not try to express address of an object with computation based
4893 	 on address of a different object.  This may cause problems in rtl
4894 	 level alias analysis (that does not expect this to be happening,
4895 	 as this is illegal in C), and would be unlikely to be useful
4896 	 anyway.  */
4897       if (use->iv->base_object
4898 	  && cand->iv->base_object
4899 	  && !operand_equal_p (use->iv->base_object, cand->iv->base_object, 0))
4900 	return infinite_cost;
4901     }
4902 
4903   if (!get_computation_aff_1 (data->current_loop, at, use,
4904 			      cand, &aff_inv, &aff_var, &rat)
4905       || !wi::fits_shwi_p (rat))
4906     return infinite_cost;
4907 
4908   ratio = rat.to_shwi ();
4909   if (address_p)
4910     {
4911       cost = get_address_cost (data, use, cand, &aff_inv, &aff_var, ratio,
4912 			       inv_vars, inv_expr, can_autoinc, speed);
4913       cost = get_scaled_computation_cost_at (data, at, cost);
4914       /* For doloop IV cand, add on the extra cost.  */
4915       cost += cand->doloop_p ? targetm.doloop_cost_for_address : 0;
4916       return cost;
4917     }
4918 
4919   bool simple_inv = (aff_combination_const_p (&aff_inv)
4920 		     || aff_combination_singleton_var_p (&aff_inv));
4921   tree signed_type = signed_type_for (aff_combination_type (&aff_inv));
4922   aff_combination_convert (&aff_inv, signed_type);
4923   if (!aff_combination_zero_p (&aff_inv))
4924     comp_inv = aff_combination_to_tree (&aff_inv);
4925 
4926   cost = force_var_cost (data, comp_inv, inv_vars);
4927   if (comp_inv && inv_expr && !simple_inv)
4928     {
4929       *inv_expr = get_loop_invariant_expr (data, comp_inv);
4930       /* Clear depends on.  */
4931       if (*inv_expr != NULL && inv_vars && *inv_vars)
4932 	bitmap_clear (*inv_vars);
4933 
4934       cost.cost = adjust_setup_cost (data, cost.cost);
4935       /* Record setup cost in scratch field.  */
4936       cost.scratch = cost.cost;
4937     }
4938   /* Cost of constant integer can be covered when adding invariant part to
4939      variant part.  */
4940   else if (comp_inv && CONSTANT_CLASS_P (comp_inv))
4941     cost = no_cost;
4942 
4943   /* Need type narrowing to represent use with cand.  */
4944   if (TYPE_PRECISION (utype) < TYPE_PRECISION (ctype))
4945     {
4946       machine_mode outer_mode = TYPE_MODE (utype);
4947       machine_mode inner_mode = TYPE_MODE (ctype);
4948       cost += comp_cost (convert_cost (outer_mode, inner_mode, speed), 0);
4949     }
4950 
4951   /* Turn a + i * (-c) into a - i * c.  */
4952   if (ratio < 0 && comp_inv && !integer_zerop (comp_inv))
4953     aratio = -ratio;
4954   else
4955     aratio = ratio;
4956 
4957   if (ratio != 1)
4958     cost += mult_by_coeff_cost (aratio, TYPE_MODE (utype), speed);
4959 
4960   /* TODO: We may also need to check if we can compute  a + i * 4 in one
4961      instruction.  */
4962   /* Need to add up the invariant and variant parts.  */
4963   if (comp_inv && !integer_zerop (comp_inv))
4964     cost += add_cost (speed, TYPE_MODE (utype));
4965 
4966   cost = get_scaled_computation_cost_at (data, at, cost);
4967 
4968   /* For doloop IV cand, add on the extra cost.  */
4969   if (cand->doloop_p && use->type == USE_NONLINEAR_EXPR)
4970     cost += targetm.doloop_cost_for_generic;
4971 
4972   return cost;
4973 }
4974 
4975 /* Determines cost of computing the use in GROUP with CAND in a generic
4976    expression.  */
4977 
4978 static bool
determine_group_iv_cost_generic(struct ivopts_data * data,struct iv_group * group,struct iv_cand * cand)4979 determine_group_iv_cost_generic (struct ivopts_data *data,
4980 				 struct iv_group *group, struct iv_cand *cand)
4981 {
4982   comp_cost cost;
4983   iv_inv_expr_ent *inv_expr = NULL;
4984   bitmap inv_vars = NULL, inv_exprs = NULL;
4985   struct iv_use *use = group->vuses[0];
4986 
4987   /* The simple case first -- if we need to express value of the preserved
4988      original biv, the cost is 0.  This also prevents us from counting the
4989      cost of increment twice -- once at this use and once in the cost of
4990      the candidate.  */
4991   if (cand->pos == IP_ORIGINAL && cand->incremented_at == use->stmt)
4992     cost = no_cost;
4993   /* If the IV candidate involves undefined SSA values and is not the
4994      same IV as on the USE avoid using that candidate here.  */
4995   else if (cand->involves_undefs
4996 	   && (!use->iv || !operand_equal_p (cand->iv->base, use->iv->base, 0)))
4997     return false;
4998   else
4999     cost = get_computation_cost (data, use, cand, false,
5000 				 &inv_vars, NULL, &inv_expr);
5001 
5002   if (inv_expr)
5003     {
5004       inv_exprs = BITMAP_ALLOC (NULL);
5005       bitmap_set_bit (inv_exprs, inv_expr->id);
5006     }
5007   set_group_iv_cost (data, group, cand, cost, inv_vars,
5008 		     NULL_TREE, ERROR_MARK, inv_exprs);
5009   return !cost.infinite_cost_p ();
5010 }
5011 
5012 /* Determines cost of computing uses in GROUP with CAND in addresses.  */
5013 
5014 static bool
determine_group_iv_cost_address(struct ivopts_data * data,struct iv_group * group,struct iv_cand * cand)5015 determine_group_iv_cost_address (struct ivopts_data *data,
5016 				 struct iv_group *group, struct iv_cand *cand)
5017 {
5018   unsigned i;
5019   bitmap inv_vars = NULL, inv_exprs = NULL;
5020   bool can_autoinc;
5021   iv_inv_expr_ent *inv_expr = NULL;
5022   struct iv_use *use = group->vuses[0];
5023   comp_cost sum_cost = no_cost, cost;
5024 
5025   cost = get_computation_cost (data, use, cand, true,
5026 			       &inv_vars, &can_autoinc, &inv_expr);
5027 
5028   if (inv_expr)
5029     {
5030       inv_exprs = BITMAP_ALLOC (NULL);
5031       bitmap_set_bit (inv_exprs, inv_expr->id);
5032     }
5033   sum_cost = cost;
5034   if (!sum_cost.infinite_cost_p () && cand->ainc_use == use)
5035     {
5036       if (can_autoinc)
5037 	sum_cost -= cand->cost_step;
5038       /* If we generated the candidate solely for exploiting autoincrement
5039 	 opportunities, and it turns out it can't be used, set the cost to
5040 	 infinity to make sure we ignore it.  */
5041       else if (cand->pos == IP_AFTER_USE || cand->pos == IP_BEFORE_USE)
5042 	sum_cost = infinite_cost;
5043     }
5044 
5045   /* Uses in a group can share setup code, so only add setup cost once.  */
5046   cost -= cost.scratch;
5047   /* Compute and add costs for rest uses of this group.  */
5048   for (i = 1; i < group->vuses.length () && !sum_cost.infinite_cost_p (); i++)
5049     {
5050       struct iv_use *next = group->vuses[i];
5051 
5052       /* TODO: We could skip computing cost for sub iv_use when it has the
5053 	 same cost as the first iv_use, but the cost really depends on the
5054 	 offset and where the iv_use is.  */
5055 	cost = get_computation_cost (data, next, cand, true,
5056 				     NULL, &can_autoinc, &inv_expr);
5057 	if (inv_expr)
5058 	  {
5059 	    if (!inv_exprs)
5060 	      inv_exprs = BITMAP_ALLOC (NULL);
5061 
5062 	    bitmap_set_bit (inv_exprs, inv_expr->id);
5063 	  }
5064       sum_cost += cost;
5065     }
5066   set_group_iv_cost (data, group, cand, sum_cost, inv_vars,
5067 		     NULL_TREE, ERROR_MARK, inv_exprs);
5068 
5069   return !sum_cost.infinite_cost_p ();
5070 }
5071 
5072 /* Computes value of candidate CAND at position AT in iteration DESC->NITER,
5073    and stores it to VAL.  */
5074 
5075 static void
cand_value_at(class loop * loop,struct iv_cand * cand,gimple * at,class tree_niter_desc * desc,aff_tree * val)5076 cand_value_at (class loop *loop, struct iv_cand *cand, gimple *at,
5077 	       class tree_niter_desc *desc, aff_tree *val)
5078 {
5079   aff_tree step, delta, nit;
5080   struct iv *iv = cand->iv;
5081   tree type = TREE_TYPE (iv->base);
5082   tree niter = desc->niter;
5083   bool after_adjust = stmt_after_increment (loop, cand, at);
5084   tree steptype;
5085 
5086   if (POINTER_TYPE_P (type))
5087     steptype = sizetype;
5088   else
5089     steptype = unsigned_type_for (type);
5090 
5091   /* If AFTER_ADJUST is required, the code below generates the equivalent
5092      of BASE + NITER * STEP + STEP, when ideally we'd prefer the expression
5093      BASE + (NITER + 1) * STEP, especially when NITER is often of the form
5094      SSA_NAME - 1.  Unfortunately, guaranteeing that adding 1 to NITER
5095      doesn't overflow is tricky, so we peek inside the TREE_NITER_DESC
5096      class for common idioms that we know are safe.  */
5097   if (after_adjust
5098       && desc->control.no_overflow
5099       && integer_onep (desc->control.step)
5100       && (desc->cmp == LT_EXPR
5101 	  || desc->cmp == NE_EXPR)
5102       && TREE_CODE (desc->bound) == SSA_NAME)
5103     {
5104       if (integer_onep (desc->control.base))
5105 	{
5106 	  niter = desc->bound;
5107 	  after_adjust = false;
5108 	}
5109       else if (TREE_CODE (niter) == MINUS_EXPR
5110 	       && integer_onep (TREE_OPERAND (niter, 1)))
5111 	{
5112 	  niter = TREE_OPERAND (niter, 0);
5113 	  after_adjust = false;
5114 	}
5115     }
5116 
5117   tree_to_aff_combination (iv->step, TREE_TYPE (iv->step), &step);
5118   aff_combination_convert (&step, steptype);
5119   tree_to_aff_combination (niter, TREE_TYPE (niter), &nit);
5120   aff_combination_convert (&nit, steptype);
5121   aff_combination_mult (&nit, &step, &delta);
5122   if (after_adjust)
5123     aff_combination_add (&delta, &step);
5124 
5125   tree_to_aff_combination (iv->base, type, val);
5126   if (!POINTER_TYPE_P (type))
5127     aff_combination_convert (val, steptype);
5128   aff_combination_add (val, &delta);
5129 }
5130 
5131 /* Returns period of induction variable iv.  */
5132 
5133 static tree
iv_period(struct iv * iv)5134 iv_period (struct iv *iv)
5135 {
5136   tree step = iv->step, period, type;
5137   tree pow2div;
5138 
5139   gcc_assert (step && TREE_CODE (step) == INTEGER_CST);
5140 
5141   type = unsigned_type_for (TREE_TYPE (step));
5142   /* Period of the iv is lcm (step, type_range)/step -1,
5143      i.e., N*type_range/step - 1. Since type range is power
5144      of two, N == (step >> num_of_ending_zeros_binary (step),
5145      so the final result is
5146 
5147        (type_range >> num_of_ending_zeros_binary (step)) - 1
5148 
5149   */
5150   pow2div = num_ending_zeros (step);
5151 
5152   period = build_low_bits_mask (type,
5153 				(TYPE_PRECISION (type)
5154 				 - tree_to_uhwi (pow2div)));
5155 
5156   return period;
5157 }
5158 
5159 /* Returns the comparison operator used when eliminating the iv USE.  */
5160 
5161 static enum tree_code
iv_elimination_compare(struct ivopts_data * data,struct iv_use * use)5162 iv_elimination_compare (struct ivopts_data *data, struct iv_use *use)
5163 {
5164   class loop *loop = data->current_loop;
5165   basic_block ex_bb;
5166   edge exit;
5167 
5168   ex_bb = gimple_bb (use->stmt);
5169   exit = EDGE_SUCC (ex_bb, 0);
5170   if (flow_bb_inside_loop_p (loop, exit->dest))
5171     exit = EDGE_SUCC (ex_bb, 1);
5172 
5173   return (exit->flags & EDGE_TRUE_VALUE ? EQ_EXPR : NE_EXPR);
5174 }
5175 
5176 /* Returns true if we can prove that BASE - OFFSET does not overflow.  For now,
5177    we only detect the situation that BASE = SOMETHING + OFFSET, where the
5178    calculation is performed in non-wrapping type.
5179 
5180    TODO: More generally, we could test for the situation that
5181 	 BASE = SOMETHING + OFFSET' and OFFSET is between OFFSET' and zero.
5182 	 This would require knowing the sign of OFFSET.  */
5183 
5184 static bool
difference_cannot_overflow_p(struct ivopts_data * data,tree base,tree offset)5185 difference_cannot_overflow_p (struct ivopts_data *data, tree base, tree offset)
5186 {
5187   enum tree_code code;
5188   tree e1, e2;
5189   aff_tree aff_e1, aff_e2, aff_offset;
5190 
5191   if (!nowrap_type_p (TREE_TYPE (base)))
5192     return false;
5193 
5194   base = expand_simple_operations (base);
5195 
5196   if (TREE_CODE (base) == SSA_NAME)
5197     {
5198       gimple *stmt = SSA_NAME_DEF_STMT (base);
5199 
5200       if (gimple_code (stmt) != GIMPLE_ASSIGN)
5201 	return false;
5202 
5203       code = gimple_assign_rhs_code (stmt);
5204       if (get_gimple_rhs_class (code) != GIMPLE_BINARY_RHS)
5205 	return false;
5206 
5207       e1 = gimple_assign_rhs1 (stmt);
5208       e2 = gimple_assign_rhs2 (stmt);
5209     }
5210   else
5211     {
5212       code = TREE_CODE (base);
5213       if (get_gimple_rhs_class (code) != GIMPLE_BINARY_RHS)
5214 	return false;
5215       e1 = TREE_OPERAND (base, 0);
5216       e2 = TREE_OPERAND (base, 1);
5217     }
5218 
5219   /* Use affine expansion as deeper inspection to prove the equality.  */
5220   tree_to_aff_combination_expand (e2, TREE_TYPE (e2),
5221 				  &aff_e2, &data->name_expansion_cache);
5222   tree_to_aff_combination_expand (offset, TREE_TYPE (offset),
5223 				  &aff_offset, &data->name_expansion_cache);
5224   aff_combination_scale (&aff_offset, -1);
5225   switch (code)
5226     {
5227     case PLUS_EXPR:
5228       aff_combination_add (&aff_e2, &aff_offset);
5229       if (aff_combination_zero_p (&aff_e2))
5230 	return true;
5231 
5232       tree_to_aff_combination_expand (e1, TREE_TYPE (e1),
5233 				      &aff_e1, &data->name_expansion_cache);
5234       aff_combination_add (&aff_e1, &aff_offset);
5235       return aff_combination_zero_p (&aff_e1);
5236 
5237     case POINTER_PLUS_EXPR:
5238       aff_combination_add (&aff_e2, &aff_offset);
5239       return aff_combination_zero_p (&aff_e2);
5240 
5241     default:
5242       return false;
5243     }
5244 }
5245 
5246 /* Tries to replace loop exit by one formulated in terms of a LT_EXPR
5247    comparison with CAND.  NITER describes the number of iterations of
5248    the loops.  If successful, the comparison in COMP_P is altered accordingly.
5249 
5250    We aim to handle the following situation:
5251 
5252    sometype *base, *p;
5253    int a, b, i;
5254 
5255    i = a;
5256    p = p_0 = base + a;
5257 
5258    do
5259      {
5260        bla (*p);
5261        p++;
5262        i++;
5263      }
5264    while (i < b);
5265 
5266    Here, the number of iterations of the loop is (a + 1 > b) ? 0 : b - a - 1.
5267    We aim to optimize this to
5268 
5269    p = p_0 = base + a;
5270    do
5271      {
5272        bla (*p);
5273        p++;
5274      }
5275    while (p < p_0 - a + b);
5276 
5277    This preserves the correctness, since the pointer arithmetics does not
5278    overflow.  More precisely:
5279 
5280    1) if a + 1 <= b, then p_0 - a + b is the final value of p, hence there is no
5281       overflow in computing it or the values of p.
5282    2) if a + 1 > b, then we need to verify that the expression p_0 - a does not
5283       overflow.  To prove this, we use the fact that p_0 = base + a.  */
5284 
5285 static bool
iv_elimination_compare_lt(struct ivopts_data * data,struct iv_cand * cand,enum tree_code * comp_p,class tree_niter_desc * niter)5286 iv_elimination_compare_lt (struct ivopts_data *data,
5287 			   struct iv_cand *cand, enum tree_code *comp_p,
5288 			   class tree_niter_desc *niter)
5289 {
5290   tree cand_type, a, b, mbz, nit_type = TREE_TYPE (niter->niter), offset;
5291   class aff_tree nit, tmpa, tmpb;
5292   enum tree_code comp;
5293   HOST_WIDE_INT step;
5294 
5295   /* We need to know that the candidate induction variable does not overflow.
5296      While more complex analysis may be used to prove this, for now just
5297      check that the variable appears in the original program and that it
5298      is computed in a type that guarantees no overflows.  */
5299   cand_type = TREE_TYPE (cand->iv->base);
5300   if (cand->pos != IP_ORIGINAL || !nowrap_type_p (cand_type))
5301     return false;
5302 
5303   /* Make sure that the loop iterates till the loop bound is hit, as otherwise
5304      the calculation of the BOUND could overflow, making the comparison
5305      invalid.  */
5306   if (!data->loop_single_exit_p)
5307     return false;
5308 
5309   /* We need to be able to decide whether candidate is increasing or decreasing
5310      in order to choose the right comparison operator.  */
5311   if (!cst_and_fits_in_hwi (cand->iv->step))
5312     return false;
5313   step = int_cst_value (cand->iv->step);
5314 
5315   /* Check that the number of iterations matches the expected pattern:
5316      a + 1 > b ? 0 : b - a - 1.  */
5317   mbz = niter->may_be_zero;
5318   if (TREE_CODE (mbz) == GT_EXPR)
5319     {
5320       /* Handle a + 1 > b.  */
5321       tree op0 = TREE_OPERAND (mbz, 0);
5322       if (TREE_CODE (op0) == PLUS_EXPR && integer_onep (TREE_OPERAND (op0, 1)))
5323 	{
5324 	  a = TREE_OPERAND (op0, 0);
5325 	  b = TREE_OPERAND (mbz, 1);
5326 	}
5327       else
5328 	return false;
5329     }
5330   else if (TREE_CODE (mbz) == LT_EXPR)
5331     {
5332       tree op1 = TREE_OPERAND (mbz, 1);
5333 
5334       /* Handle b < a + 1.  */
5335       if (TREE_CODE (op1) == PLUS_EXPR && integer_onep (TREE_OPERAND (op1, 1)))
5336 	{
5337 	  a = TREE_OPERAND (op1, 0);
5338 	  b = TREE_OPERAND (mbz, 0);
5339 	}
5340       else
5341 	return false;
5342     }
5343   else
5344     return false;
5345 
5346   /* Expected number of iterations is B - A - 1.  Check that it matches
5347      the actual number, i.e., that B - A - NITER = 1.  */
5348   tree_to_aff_combination (niter->niter, nit_type, &nit);
5349   tree_to_aff_combination (fold_convert (nit_type, a), nit_type, &tmpa);
5350   tree_to_aff_combination (fold_convert (nit_type, b), nit_type, &tmpb);
5351   aff_combination_scale (&nit, -1);
5352   aff_combination_scale (&tmpa, -1);
5353   aff_combination_add (&tmpb, &tmpa);
5354   aff_combination_add (&tmpb, &nit);
5355   if (tmpb.n != 0 || maybe_ne (tmpb.offset, 1))
5356     return false;
5357 
5358   /* Finally, check that CAND->IV->BASE - CAND->IV->STEP * A does not
5359      overflow.  */
5360   offset = fold_build2 (MULT_EXPR, TREE_TYPE (cand->iv->step),
5361 			cand->iv->step,
5362 			fold_convert (TREE_TYPE (cand->iv->step), a));
5363   if (!difference_cannot_overflow_p (data, cand->iv->base, offset))
5364     return false;
5365 
5366   /* Determine the new comparison operator.  */
5367   comp = step < 0 ? GT_EXPR : LT_EXPR;
5368   if (*comp_p == NE_EXPR)
5369     *comp_p = comp;
5370   else if (*comp_p == EQ_EXPR)
5371     *comp_p = invert_tree_comparison (comp, false);
5372   else
5373     gcc_unreachable ();
5374 
5375   return true;
5376 }
5377 
5378 /* Check whether it is possible to express the condition in USE by comparison
5379    of candidate CAND.  If so, store the value compared with to BOUND, and the
5380    comparison operator to COMP.  */
5381 
5382 static bool
may_eliminate_iv(struct ivopts_data * data,struct iv_use * use,struct iv_cand * cand,tree * bound,enum tree_code * comp)5383 may_eliminate_iv (struct ivopts_data *data,
5384 		  struct iv_use *use, struct iv_cand *cand, tree *bound,
5385 		  enum tree_code *comp)
5386 {
5387   basic_block ex_bb;
5388   edge exit;
5389   tree period;
5390   class loop *loop = data->current_loop;
5391   aff_tree bnd;
5392   class tree_niter_desc *desc = NULL;
5393 
5394   if (TREE_CODE (cand->iv->step) != INTEGER_CST)
5395     return false;
5396 
5397   /* For now works only for exits that dominate the loop latch.
5398      TODO: extend to other conditions inside loop body.  */
5399   ex_bb = gimple_bb (use->stmt);
5400   if (use->stmt != last_stmt (ex_bb)
5401       || gimple_code (use->stmt) != GIMPLE_COND
5402       || !dominated_by_p (CDI_DOMINATORS, loop->latch, ex_bb))
5403     return false;
5404 
5405   exit = EDGE_SUCC (ex_bb, 0);
5406   if (flow_bb_inside_loop_p (loop, exit->dest))
5407     exit = EDGE_SUCC (ex_bb, 1);
5408   if (flow_bb_inside_loop_p (loop, exit->dest))
5409     return false;
5410 
5411   desc = niter_for_exit (data, exit);
5412   if (!desc)
5413     return false;
5414 
5415   /* Determine whether we can use the variable to test the exit condition.
5416      This is the case iff the period of the induction variable is greater
5417      than the number of iterations for which the exit condition is true.  */
5418   period = iv_period (cand->iv);
5419 
5420   /* If the number of iterations is constant, compare against it directly.  */
5421   if (TREE_CODE (desc->niter) == INTEGER_CST)
5422     {
5423       /* See cand_value_at.  */
5424       if (stmt_after_increment (loop, cand, use->stmt))
5425 	{
5426 	  if (!tree_int_cst_lt (desc->niter, period))
5427 	    return false;
5428 	}
5429       else
5430 	{
5431 	  if (tree_int_cst_lt (period, desc->niter))
5432 	    return false;
5433 	}
5434     }
5435 
5436   /* If not, and if this is the only possible exit of the loop, see whether
5437      we can get a conservative estimate on the number of iterations of the
5438      entire loop and compare against that instead.  */
5439   else
5440     {
5441       widest_int period_value, max_niter;
5442 
5443       max_niter = desc->max;
5444       if (stmt_after_increment (loop, cand, use->stmt))
5445 	max_niter += 1;
5446       period_value = wi::to_widest (period);
5447       if (wi::gtu_p (max_niter, period_value))
5448 	{
5449 	  /* See if we can take advantage of inferred loop bound
5450 	     information.  */
5451 	  if (data->loop_single_exit_p)
5452 	    {
5453 	      if (!max_loop_iterations (loop, &max_niter))
5454 		return false;
5455 	      /* The loop bound is already adjusted by adding 1.  */
5456 	      if (wi::gtu_p (max_niter, period_value))
5457 		return false;
5458 	    }
5459 	  else
5460 	    return false;
5461 	}
5462     }
5463 
5464   /* For doloop IV cand, the bound would be zero.  It's safe whether
5465      may_be_zero set or not.  */
5466   if (cand->doloop_p)
5467     {
5468       *bound = build_int_cst (TREE_TYPE (cand->iv->base), 0);
5469       *comp = iv_elimination_compare (data, use);
5470       return true;
5471     }
5472 
5473   cand_value_at (loop, cand, use->stmt, desc, &bnd);
5474 
5475   *bound = fold_convert (TREE_TYPE (cand->iv->base),
5476 			 aff_combination_to_tree (&bnd));
5477   *comp = iv_elimination_compare (data, use);
5478 
5479   /* It is unlikely that computing the number of iterations using division
5480      would be more profitable than keeping the original induction variable.  */
5481   if (expression_expensive_p (*bound))
5482     return false;
5483 
5484   /* Sometimes, it is possible to handle the situation that the number of
5485      iterations may be zero unless additional assumptions by using <
5486      instead of != in the exit condition.
5487 
5488      TODO: we could also calculate the value MAY_BE_ZERO ? 0 : NITER and
5489 	   base the exit condition on it.  However, that is often too
5490 	   expensive.  */
5491   if (!integer_zerop (desc->may_be_zero))
5492     return iv_elimination_compare_lt (data, cand, comp, desc);
5493 
5494   return true;
5495 }
5496 
5497  /* Calculates the cost of BOUND, if it is a PARM_DECL.  A PARM_DECL must
5498     be copied, if it is used in the loop body and DATA->body_includes_call.  */
5499 
5500 static int
parm_decl_cost(struct ivopts_data * data,tree bound)5501 parm_decl_cost (struct ivopts_data *data, tree bound)
5502 {
5503   tree sbound = bound;
5504   STRIP_NOPS (sbound);
5505 
5506   if (TREE_CODE (sbound) == SSA_NAME
5507       && SSA_NAME_IS_DEFAULT_DEF (sbound)
5508       && TREE_CODE (SSA_NAME_VAR (sbound)) == PARM_DECL
5509       && data->body_includes_call)
5510     return COSTS_N_INSNS (1);
5511 
5512   return 0;
5513 }
5514 
5515 /* Determines cost of computing the use in GROUP with CAND in a condition.  */
5516 
5517 static bool
determine_group_iv_cost_cond(struct ivopts_data * data,struct iv_group * group,struct iv_cand * cand)5518 determine_group_iv_cost_cond (struct ivopts_data *data,
5519 			      struct iv_group *group, struct iv_cand *cand)
5520 {
5521   tree bound = NULL_TREE;
5522   struct iv *cmp_iv;
5523   bitmap inv_exprs = NULL;
5524   bitmap inv_vars_elim = NULL, inv_vars_express = NULL, inv_vars;
5525   comp_cost elim_cost = infinite_cost, express_cost, cost, bound_cost;
5526   enum comp_iv_rewrite rewrite_type;
5527   iv_inv_expr_ent *inv_expr_elim = NULL, *inv_expr_express = NULL, *inv_expr;
5528   tree *control_var, *bound_cst;
5529   enum tree_code comp = ERROR_MARK;
5530   struct iv_use *use = group->vuses[0];
5531 
5532   /* Extract condition operands.  */
5533   rewrite_type = extract_cond_operands (data, use->stmt, &control_var,
5534 					&bound_cst, NULL, &cmp_iv);
5535   gcc_assert (rewrite_type != COMP_IV_NA);
5536 
5537   /* Try iv elimination.  */
5538   if (rewrite_type == COMP_IV_ELIM
5539       && may_eliminate_iv (data, use, cand, &bound, &comp))
5540     {
5541       elim_cost = force_var_cost (data, bound, &inv_vars_elim);
5542       if (elim_cost.cost == 0)
5543 	elim_cost.cost = parm_decl_cost (data, bound);
5544       else if (TREE_CODE (bound) == INTEGER_CST)
5545 	elim_cost.cost = 0;
5546       /* If we replace a loop condition 'i < n' with 'p < base + n',
5547 	 inv_vars_elim will have 'base' and 'n' set, which implies that both
5548 	 'base' and 'n' will be live during the loop.	 More likely,
5549 	 'base + n' will be loop invariant, resulting in only one live value
5550 	 during the loop.  So in that case we clear inv_vars_elim and set
5551 	 inv_expr_elim instead.  */
5552       if (inv_vars_elim && bitmap_count_bits (inv_vars_elim) > 1)
5553 	{
5554 	  inv_expr_elim = get_loop_invariant_expr (data, bound);
5555 	  bitmap_clear (inv_vars_elim);
5556 	}
5557       /* The bound is a loop invariant, so it will be only computed
5558 	 once.  */
5559       elim_cost.cost = adjust_setup_cost (data, elim_cost.cost);
5560     }
5561 
5562   /* When the condition is a comparison of the candidate IV against
5563      zero, prefer this IV.
5564 
5565      TODO: The constant that we're subtracting from the cost should
5566      be target-dependent.  This information should be added to the
5567      target costs for each backend.  */
5568   if (!elim_cost.infinite_cost_p () /* Do not try to decrease infinite! */
5569       && integer_zerop (*bound_cst)
5570       && (operand_equal_p (*control_var, cand->var_after, 0)
5571 	  || operand_equal_p (*control_var, cand->var_before, 0)))
5572     elim_cost -= 1;
5573 
5574   express_cost = get_computation_cost (data, use, cand, false,
5575 				       &inv_vars_express, NULL,
5576 				       &inv_expr_express);
5577   if (cmp_iv != NULL)
5578     find_inv_vars (data, &cmp_iv->base, &inv_vars_express);
5579 
5580   /* Count the cost of the original bound as well.  */
5581   bound_cost = force_var_cost (data, *bound_cst, NULL);
5582   if (bound_cost.cost == 0)
5583     bound_cost.cost = parm_decl_cost (data, *bound_cst);
5584   else if (TREE_CODE (*bound_cst) == INTEGER_CST)
5585     bound_cost.cost = 0;
5586   express_cost += bound_cost;
5587 
5588   /* Choose the better approach, preferring the eliminated IV. */
5589   if (elim_cost <= express_cost)
5590     {
5591       cost = elim_cost;
5592       inv_vars = inv_vars_elim;
5593       inv_vars_elim = NULL;
5594       inv_expr = inv_expr_elim;
5595       /* For doloop candidate/use pair, adjust to zero cost.  */
5596       if (group->doloop_p && cand->doloop_p && elim_cost.cost > no_cost.cost)
5597 	cost = no_cost;
5598     }
5599   else
5600     {
5601       cost = express_cost;
5602       inv_vars = inv_vars_express;
5603       inv_vars_express = NULL;
5604       bound = NULL_TREE;
5605       comp = ERROR_MARK;
5606       inv_expr = inv_expr_express;
5607     }
5608 
5609   if (inv_expr)
5610     {
5611       inv_exprs = BITMAP_ALLOC (NULL);
5612       bitmap_set_bit (inv_exprs, inv_expr->id);
5613     }
5614   set_group_iv_cost (data, group, cand, cost,
5615 		     inv_vars, bound, comp, inv_exprs);
5616 
5617   if (inv_vars_elim)
5618     BITMAP_FREE (inv_vars_elim);
5619   if (inv_vars_express)
5620     BITMAP_FREE (inv_vars_express);
5621 
5622   return !cost.infinite_cost_p ();
5623 }
5624 
5625 /* Determines cost of computing uses in GROUP with CAND.  Returns false
5626    if USE cannot be represented with CAND.  */
5627 
5628 static bool
determine_group_iv_cost(struct ivopts_data * data,struct iv_group * group,struct iv_cand * cand)5629 determine_group_iv_cost (struct ivopts_data *data,
5630 			 struct iv_group *group, struct iv_cand *cand)
5631 {
5632   switch (group->type)
5633     {
5634     case USE_NONLINEAR_EXPR:
5635       return determine_group_iv_cost_generic (data, group, cand);
5636 
5637     case USE_REF_ADDRESS:
5638     case USE_PTR_ADDRESS:
5639       return determine_group_iv_cost_address (data, group, cand);
5640 
5641     case USE_COMPARE:
5642       return determine_group_iv_cost_cond (data, group, cand);
5643 
5644     default:
5645       gcc_unreachable ();
5646     }
5647 }
5648 
5649 /* Return true if get_computation_cost indicates that autoincrement is
5650    a possibility for the pair of USE and CAND, false otherwise.  */
5651 
5652 static bool
autoinc_possible_for_pair(struct ivopts_data * data,struct iv_use * use,struct iv_cand * cand)5653 autoinc_possible_for_pair (struct ivopts_data *data, struct iv_use *use,
5654 			   struct iv_cand *cand)
5655 {
5656   if (!address_p (use->type))
5657     return false;
5658 
5659   bool can_autoinc = false;
5660   get_computation_cost (data, use, cand, true, NULL, &can_autoinc, NULL);
5661   return can_autoinc;
5662 }
5663 
5664 /* Examine IP_ORIGINAL candidates to see if they are incremented next to a
5665    use that allows autoincrement, and set their AINC_USE if possible.  */
5666 
5667 static void
set_autoinc_for_original_candidates(struct ivopts_data * data)5668 set_autoinc_for_original_candidates (struct ivopts_data *data)
5669 {
5670   unsigned i, j;
5671 
5672   for (i = 0; i < data->vcands.length (); i++)
5673     {
5674       struct iv_cand *cand = data->vcands[i];
5675       struct iv_use *closest_before = NULL;
5676       struct iv_use *closest_after = NULL;
5677       if (cand->pos != IP_ORIGINAL)
5678 	continue;
5679 
5680       for (j = 0; j < data->vgroups.length (); j++)
5681 	{
5682 	  struct iv_group *group = data->vgroups[j];
5683 	  struct iv_use *use = group->vuses[0];
5684 	  unsigned uid = gimple_uid (use->stmt);
5685 
5686 	  if (gimple_bb (use->stmt) != gimple_bb (cand->incremented_at))
5687 	    continue;
5688 
5689 	  if (uid < gimple_uid (cand->incremented_at)
5690 	      && (closest_before == NULL
5691 		  || uid > gimple_uid (closest_before->stmt)))
5692 	    closest_before = use;
5693 
5694 	  if (uid > gimple_uid (cand->incremented_at)
5695 	      && (closest_after == NULL
5696 		  || uid < gimple_uid (closest_after->stmt)))
5697 	    closest_after = use;
5698 	}
5699 
5700       if (closest_before != NULL
5701 	  && autoinc_possible_for_pair (data, closest_before, cand))
5702 	cand->ainc_use = closest_before;
5703       else if (closest_after != NULL
5704 	       && autoinc_possible_for_pair (data, closest_after, cand))
5705 	cand->ainc_use = closest_after;
5706     }
5707 }
5708 
5709 /* Relate compare use with all candidates.  */
5710 
5711 static void
relate_compare_use_with_all_cands(struct ivopts_data * data)5712 relate_compare_use_with_all_cands (struct ivopts_data *data)
5713 {
5714   unsigned i, count = data->vcands.length ();
5715   for (i = 0; i < data->vgroups.length (); i++)
5716     {
5717       struct iv_group *group = data->vgroups[i];
5718 
5719       if (group->type == USE_COMPARE)
5720 	bitmap_set_range (group->related_cands, 0, count);
5721     }
5722 }
5723 
5724 /* If PREFERRED_MODE is suitable and profitable, use the preferred
5725    PREFERRED_MODE to compute doloop iv base from niter: base = niter + 1.  */
5726 
5727 static tree
compute_doloop_base_on_mode(machine_mode preferred_mode,tree niter,const widest_int & iterations_max)5728 compute_doloop_base_on_mode (machine_mode preferred_mode, tree niter,
5729 			     const widest_int &iterations_max)
5730 {
5731   tree ntype = TREE_TYPE (niter);
5732   tree pref_type = lang_hooks.types.type_for_mode (preferred_mode, 1);
5733   if (!pref_type)
5734     return fold_build2 (PLUS_EXPR, ntype, unshare_expr (niter),
5735 			build_int_cst (ntype, 1));
5736 
5737   gcc_assert (TREE_CODE (pref_type) == INTEGER_TYPE);
5738 
5739   int prec = TYPE_PRECISION (ntype);
5740   int pref_prec = TYPE_PRECISION (pref_type);
5741 
5742   tree base;
5743 
5744   /* Check if the PREFERRED_MODED is able to present niter.  */
5745   if (pref_prec > prec
5746       || wi::ltu_p (iterations_max,
5747 		    widest_int::from (wi::max_value (pref_prec, UNSIGNED),
5748 				      UNSIGNED)))
5749     {
5750       /* No wrap, it is safe to use preferred type after niter + 1.  */
5751       if (wi::ltu_p (iterations_max,
5752 		     widest_int::from (wi::max_value (prec, UNSIGNED),
5753 				       UNSIGNED)))
5754 	{
5755 	  /* This could help to optimize "-1 +1" pair when niter looks
5756 	     like "n-1": n is in original mode.  "base = (n - 1) + 1"
5757 	     in PREFERRED_MODED: it could be base = (PREFERRED_TYPE)n.  */
5758 	  base = fold_build2 (PLUS_EXPR, ntype, unshare_expr (niter),
5759 			      build_int_cst (ntype, 1));
5760 	  base = fold_convert (pref_type, base);
5761 	}
5762 
5763       /* To avoid wrap, convert niter to preferred type before plus 1.  */
5764       else
5765 	{
5766 	  niter = fold_convert (pref_type, niter);
5767 	  base = fold_build2 (PLUS_EXPR, pref_type, unshare_expr (niter),
5768 			      build_int_cst (pref_type, 1));
5769 	}
5770     }
5771   else
5772     base = fold_build2 (PLUS_EXPR, ntype, unshare_expr (niter),
5773 			build_int_cst (ntype, 1));
5774   return base;
5775 }
5776 
5777 /* Add one doloop dedicated IV candidate:
5778      - Base is (may_be_zero ? 1 : (niter + 1)).
5779      - Step is -1.  */
5780 
5781 static void
add_iv_candidate_for_doloop(struct ivopts_data * data)5782 add_iv_candidate_for_doloop (struct ivopts_data *data)
5783 {
5784   tree_niter_desc *niter_desc = niter_for_single_dom_exit (data);
5785   gcc_assert (niter_desc && niter_desc->assumptions);
5786 
5787   tree niter = niter_desc->niter;
5788   tree ntype = TREE_TYPE (niter);
5789   gcc_assert (TREE_CODE (ntype) == INTEGER_TYPE);
5790 
5791   tree may_be_zero = niter_desc->may_be_zero;
5792   if (may_be_zero && integer_zerop (may_be_zero))
5793     may_be_zero = NULL_TREE;
5794   if (may_be_zero)
5795     {
5796       if (COMPARISON_CLASS_P (may_be_zero))
5797 	{
5798 	  niter = fold_build3 (COND_EXPR, ntype, may_be_zero,
5799 			       build_int_cst (ntype, 0),
5800 			       rewrite_to_non_trapping_overflow (niter));
5801 	}
5802       /* Don't try to obtain the iteration count expression when may_be_zero is
5803 	 integer_nonzerop (actually iteration count is one) or else.  */
5804       else
5805 	return;
5806     }
5807 
5808   machine_mode mode = TYPE_MODE (ntype);
5809   machine_mode pref_mode = targetm.preferred_doloop_mode (mode);
5810 
5811   tree base;
5812   if (mode != pref_mode)
5813     {
5814       base = compute_doloop_base_on_mode (pref_mode, niter, niter_desc->max);
5815       ntype = TREE_TYPE (base);
5816     }
5817   else
5818     base = fold_build2 (PLUS_EXPR, ntype, unshare_expr (niter),
5819 			build_int_cst (ntype, 1));
5820 
5821 
5822   add_candidate (data, base, build_int_cst (ntype, -1), true, NULL, NULL, true);
5823 }
5824 
5825 /* Finds the candidates for the induction variables.  */
5826 
5827 static void
find_iv_candidates(struct ivopts_data * data)5828 find_iv_candidates (struct ivopts_data *data)
5829 {
5830   /* Add commonly used ivs.  */
5831   add_standard_iv_candidates (data);
5832 
5833   /* Add doloop dedicated ivs.  */
5834   if (data->doloop_use_p)
5835     add_iv_candidate_for_doloop (data);
5836 
5837   /* Add old induction variables.  */
5838   add_iv_candidate_for_bivs (data);
5839 
5840   /* Add induction variables derived from uses.  */
5841   add_iv_candidate_for_groups (data);
5842 
5843   set_autoinc_for_original_candidates (data);
5844 
5845   /* Record the important candidates.  */
5846   record_important_candidates (data);
5847 
5848   /* Relate compare iv_use with all candidates.  */
5849   if (!data->consider_all_candidates)
5850     relate_compare_use_with_all_cands (data);
5851 
5852   if (dump_file && (dump_flags & TDF_DETAILS))
5853     {
5854       unsigned i;
5855 
5856       fprintf (dump_file, "\n<Important Candidates>:\t");
5857       for (i = 0; i < data->vcands.length (); i++)
5858 	if (data->vcands[i]->important)
5859 	  fprintf (dump_file, " %d,", data->vcands[i]->id);
5860       fprintf (dump_file, "\n");
5861 
5862       fprintf (dump_file, "\n<Group, Cand> Related:\n");
5863       for (i = 0; i < data->vgroups.length (); i++)
5864 	{
5865 	  struct iv_group *group = data->vgroups[i];
5866 
5867 	  if (group->related_cands)
5868 	    {
5869 	      fprintf (dump_file, "  Group %d:\t", group->id);
5870 	      dump_bitmap (dump_file, group->related_cands);
5871 	    }
5872 	}
5873       fprintf (dump_file, "\n");
5874     }
5875 }
5876 
5877 /* Determines costs of computing use of iv with an iv candidate.  */
5878 
5879 static void
determine_group_iv_costs(struct ivopts_data * data)5880 determine_group_iv_costs (struct ivopts_data *data)
5881 {
5882   unsigned i, j;
5883   struct iv_cand *cand;
5884   struct iv_group *group;
5885   bitmap to_clear = BITMAP_ALLOC (NULL);
5886 
5887   alloc_use_cost_map (data);
5888 
5889   for (i = 0; i < data->vgroups.length (); i++)
5890     {
5891       group = data->vgroups[i];
5892 
5893       if (data->consider_all_candidates)
5894 	{
5895 	  for (j = 0; j < data->vcands.length (); j++)
5896 	    {
5897 	      cand = data->vcands[j];
5898 	      determine_group_iv_cost (data, group, cand);
5899 	    }
5900 	}
5901       else
5902 	{
5903 	  bitmap_iterator bi;
5904 
5905 	  EXECUTE_IF_SET_IN_BITMAP (group->related_cands, 0, j, bi)
5906 	    {
5907 	      cand = data->vcands[j];
5908 	      if (!determine_group_iv_cost (data, group, cand))
5909 		bitmap_set_bit (to_clear, j);
5910 	    }
5911 
5912 	  /* Remove the candidates for that the cost is infinite from
5913 	     the list of related candidates.  */
5914 	  bitmap_and_compl_into (group->related_cands, to_clear);
5915 	  bitmap_clear (to_clear);
5916 	}
5917     }
5918 
5919   BITMAP_FREE (to_clear);
5920 
5921   if (dump_file && (dump_flags & TDF_DETAILS))
5922     {
5923       bitmap_iterator bi;
5924 
5925       /* Dump invariant variables.  */
5926       fprintf (dump_file, "\n<Invariant Vars>:\n");
5927       EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
5928 	{
5929 	  struct version_info *info = ver_info (data, i);
5930 	  if (info->inv_id)
5931 	    {
5932 	      fprintf (dump_file, "Inv %d:\t", info->inv_id);
5933 	      print_generic_expr (dump_file, info->name, TDF_SLIM);
5934 	      fprintf (dump_file, "%s\n",
5935 		       info->has_nonlin_use ? "" : "\t(eliminable)");
5936 	    }
5937 	}
5938 
5939       /* Dump invariant expressions.  */
5940       fprintf (dump_file, "\n<Invariant Expressions>:\n");
5941       auto_vec <iv_inv_expr_ent *> list (data->inv_expr_tab->elements ());
5942 
5943       for (hash_table<iv_inv_expr_hasher>::iterator it
5944 	   = data->inv_expr_tab->begin (); it != data->inv_expr_tab->end ();
5945 	   ++it)
5946 	list.safe_push (*it);
5947 
5948       list.qsort (sort_iv_inv_expr_ent);
5949 
5950       for (i = 0; i < list.length (); ++i)
5951 	{
5952 	  fprintf (dump_file, "inv_expr %d: \t", list[i]->id);
5953 	  print_generic_expr (dump_file, list[i]->expr, TDF_SLIM);
5954 	  fprintf (dump_file, "\n");
5955 	}
5956 
5957       fprintf (dump_file, "\n<Group-candidate Costs>:\n");
5958 
5959       for (i = 0; i < data->vgroups.length (); i++)
5960 	{
5961 	  group = data->vgroups[i];
5962 
5963 	  fprintf (dump_file, "Group %d:\n", i);
5964 	  fprintf (dump_file, "  cand\tcost\tcompl.\tinv.expr.\tinv.vars\n");
5965 	  for (j = 0; j < group->n_map_members; j++)
5966 	    {
5967 	      if (!group->cost_map[j].cand
5968 		  || group->cost_map[j].cost.infinite_cost_p ())
5969 		continue;
5970 
5971 	      fprintf (dump_file, "  %d\t%" PRId64 "\t%d\t",
5972 		       group->cost_map[j].cand->id,
5973 		       group->cost_map[j].cost.cost,
5974 		       group->cost_map[j].cost.complexity);
5975 	      if (!group->cost_map[j].inv_exprs
5976 		  || bitmap_empty_p (group->cost_map[j].inv_exprs))
5977 		fprintf (dump_file, "NIL;\t");
5978 	      else
5979 		bitmap_print (dump_file,
5980 			      group->cost_map[j].inv_exprs, "", ";\t");
5981 	      if (!group->cost_map[j].inv_vars
5982 		  || bitmap_empty_p (group->cost_map[j].inv_vars))
5983 		fprintf (dump_file, "NIL;\n");
5984 	      else
5985 		bitmap_print (dump_file,
5986 			      group->cost_map[j].inv_vars, "", "\n");
5987 	    }
5988 
5989 	  fprintf (dump_file, "\n");
5990 	}
5991       fprintf (dump_file, "\n");
5992     }
5993 }
5994 
5995 /* Determines cost of the candidate CAND.  */
5996 
5997 static void
determine_iv_cost(struct ivopts_data * data,struct iv_cand * cand)5998 determine_iv_cost (struct ivopts_data *data, struct iv_cand *cand)
5999 {
6000   comp_cost cost_base;
6001   int64_t cost, cost_step;
6002   tree base;
6003 
6004   gcc_assert (cand->iv != NULL);
6005 
6006   /* There are two costs associated with the candidate -- its increment
6007      and its initialization.  The second is almost negligible for any loop
6008      that rolls enough, so we take it just very little into account.  */
6009 
6010   base = cand->iv->base;
6011   cost_base = force_var_cost (data, base, NULL);
6012   /* It will be exceptional that the iv register happens to be initialized with
6013      the proper value at no cost.  In general, there will at least be a regcopy
6014      or a const set.  */
6015   if (cost_base.cost == 0)
6016     cost_base.cost = COSTS_N_INSNS (1);
6017   /* Doloop decrement should be considered as zero cost.  */
6018   if (cand->doloop_p)
6019     cost_step = 0;
6020   else
6021     cost_step = add_cost (data->speed, TYPE_MODE (TREE_TYPE (base)));
6022   cost = cost_step + adjust_setup_cost (data, cost_base.cost);
6023 
6024   /* Prefer the original ivs unless we may gain something by replacing it.
6025      The reason is to make debugging simpler; so this is not relevant for
6026      artificial ivs created by other optimization passes.  */
6027   if ((cand->pos != IP_ORIGINAL
6028        || !SSA_NAME_VAR (cand->var_before)
6029        || DECL_ARTIFICIAL (SSA_NAME_VAR (cand->var_before)))
6030       /* Prefer doloop as well.  */
6031       && !cand->doloop_p)
6032     cost++;
6033 
6034   /* Prefer not to insert statements into latch unless there are some
6035      already (so that we do not create unnecessary jumps).  */
6036   if (cand->pos == IP_END
6037       && empty_block_p (ip_end_pos (data->current_loop)))
6038     cost++;
6039 
6040   cand->cost = cost;
6041   cand->cost_step = cost_step;
6042 }
6043 
6044 /* Determines costs of computation of the candidates.  */
6045 
6046 static void
determine_iv_costs(struct ivopts_data * data)6047 determine_iv_costs (struct ivopts_data *data)
6048 {
6049   unsigned i;
6050 
6051   if (dump_file && (dump_flags & TDF_DETAILS))
6052     {
6053       fprintf (dump_file, "<Candidate Costs>:\n");
6054       fprintf (dump_file, "  cand\tcost\n");
6055     }
6056 
6057   for (i = 0; i < data->vcands.length (); i++)
6058     {
6059       struct iv_cand *cand = data->vcands[i];
6060 
6061       determine_iv_cost (data, cand);
6062 
6063       if (dump_file && (dump_flags & TDF_DETAILS))
6064 	fprintf (dump_file, "  %d\t%d\n", i, cand->cost);
6065     }
6066 
6067   if (dump_file && (dump_flags & TDF_DETAILS))
6068     fprintf (dump_file, "\n");
6069 }
6070 
6071 /* Estimate register pressure for loop having N_INVS invariants and N_CANDS
6072    induction variables.  Note N_INVS includes both invariant variables and
6073    invariant expressions.  */
6074 
6075 static unsigned
ivopts_estimate_reg_pressure(struct ivopts_data * data,unsigned n_invs,unsigned n_cands)6076 ivopts_estimate_reg_pressure (struct ivopts_data *data, unsigned n_invs,
6077 			      unsigned n_cands)
6078 {
6079   unsigned cost;
6080   unsigned n_old = data->regs_used, n_new = n_invs + n_cands;
6081   unsigned regs_needed = n_new + n_old, available_regs = target_avail_regs;
6082   bool speed = data->speed;
6083 
6084   /* If there is a call in the loop body, the call-clobbered registers
6085      are not available for loop invariants.  */
6086   if (data->body_includes_call)
6087     available_regs = available_regs - target_clobbered_regs;
6088 
6089   /* If we have enough registers.  */
6090   if (regs_needed + target_res_regs < available_regs)
6091     cost = n_new;
6092   /* If close to running out of registers, try to preserve them.  */
6093   else if (regs_needed <= available_regs)
6094     cost = target_reg_cost [speed] * regs_needed;
6095   /* If we run out of available registers but the number of candidates
6096      does not, we penalize extra registers using target_spill_cost.  */
6097   else if (n_cands <= available_regs)
6098     cost = target_reg_cost [speed] * available_regs
6099 	   + target_spill_cost [speed] * (regs_needed - available_regs);
6100   /* If the number of candidates runs out available registers, we penalize
6101      extra candidate registers using target_spill_cost * 2.  Because it is
6102      more expensive to spill induction variable than invariant.  */
6103   else
6104     cost = target_reg_cost [speed] * available_regs
6105 	   + target_spill_cost [speed] * (n_cands - available_regs) * 2
6106 	   + target_spill_cost [speed] * (regs_needed - n_cands);
6107 
6108   /* Finally, add the number of candidates, so that we prefer eliminating
6109      induction variables if possible.  */
6110   return cost + n_cands;
6111 }
6112 
6113 /* For each size of the induction variable set determine the penalty.  */
6114 
6115 static void
determine_set_costs(struct ivopts_data * data)6116 determine_set_costs (struct ivopts_data *data)
6117 {
6118   unsigned j, n;
6119   gphi *phi;
6120   gphi_iterator psi;
6121   tree op;
6122   class loop *loop = data->current_loop;
6123   bitmap_iterator bi;
6124 
6125   if (dump_file && (dump_flags & TDF_DETAILS))
6126     {
6127       fprintf (dump_file, "<Global Costs>:\n");
6128       fprintf (dump_file, "  target_avail_regs %d\n", target_avail_regs);
6129       fprintf (dump_file, "  target_clobbered_regs %d\n", target_clobbered_regs);
6130       fprintf (dump_file, "  target_reg_cost %d\n", target_reg_cost[data->speed]);
6131       fprintf (dump_file, "  target_spill_cost %d\n", target_spill_cost[data->speed]);
6132     }
6133 
6134   n = 0;
6135   for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
6136     {
6137       phi = psi.phi ();
6138       op = PHI_RESULT (phi);
6139 
6140       if (virtual_operand_p (op))
6141 	continue;
6142 
6143       if (get_iv (data, op))
6144 	continue;
6145 
6146       if (!POINTER_TYPE_P (TREE_TYPE (op))
6147 	  && !INTEGRAL_TYPE_P (TREE_TYPE (op)))
6148 	continue;
6149 
6150       n++;
6151     }
6152 
6153   EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j, bi)
6154     {
6155       struct version_info *info = ver_info (data, j);
6156 
6157       if (info->inv_id && info->has_nonlin_use)
6158 	n++;
6159     }
6160 
6161   data->regs_used = n;
6162   if (dump_file && (dump_flags & TDF_DETAILS))
6163     fprintf (dump_file, "  regs_used %d\n", n);
6164 
6165   if (dump_file && (dump_flags & TDF_DETAILS))
6166     {
6167       fprintf (dump_file, "  cost for size:\n");
6168       fprintf (dump_file, "  ivs\tcost\n");
6169       for (j = 0; j <= 2 * target_avail_regs; j++)
6170 	fprintf (dump_file, "  %d\t%d\n", j,
6171 		 ivopts_estimate_reg_pressure (data, 0, j));
6172       fprintf (dump_file, "\n");
6173     }
6174 }
6175 
6176 /* Returns true if A is a cheaper cost pair than B.  */
6177 
6178 static bool
cheaper_cost_pair(class cost_pair * a,class cost_pair * b)6179 cheaper_cost_pair (class cost_pair *a, class cost_pair *b)
6180 {
6181   if (!a)
6182     return false;
6183 
6184   if (!b)
6185     return true;
6186 
6187   if (a->cost < b->cost)
6188     return true;
6189 
6190   if (b->cost < a->cost)
6191     return false;
6192 
6193   /* In case the costs are the same, prefer the cheaper candidate.  */
6194   if (a->cand->cost < b->cand->cost)
6195     return true;
6196 
6197   return false;
6198 }
6199 
6200 /* Compare if A is a more expensive cost pair than B.  Return 1, 0 and -1
6201    for more expensive, equal and cheaper respectively.  */
6202 
6203 static int
compare_cost_pair(class cost_pair * a,class cost_pair * b)6204 compare_cost_pair (class cost_pair *a, class cost_pair *b)
6205 {
6206   if (cheaper_cost_pair (a, b))
6207     return -1;
6208   if (cheaper_cost_pair (b, a))
6209     return 1;
6210 
6211   return 0;
6212 }
6213 
6214 /* Returns candidate by that USE is expressed in IVS.  */
6215 
6216 static class cost_pair *
iv_ca_cand_for_group(class iv_ca * ivs,struct iv_group * group)6217 iv_ca_cand_for_group (class iv_ca *ivs, struct iv_group *group)
6218 {
6219   return ivs->cand_for_group[group->id];
6220 }
6221 
6222 /* Computes the cost field of IVS structure.  */
6223 
6224 static void
iv_ca_recount_cost(struct ivopts_data * data,class iv_ca * ivs)6225 iv_ca_recount_cost (struct ivopts_data *data, class iv_ca *ivs)
6226 {
6227   comp_cost cost = ivs->cand_use_cost;
6228 
6229   cost += ivs->cand_cost;
6230   cost += ivopts_estimate_reg_pressure (data, ivs->n_invs, ivs->n_cands);
6231   ivs->cost = cost;
6232 }
6233 
6234 /* Remove use of invariants in set INVS by decreasing counter in N_INV_USES
6235    and IVS.  */
6236 
6237 static void
iv_ca_set_remove_invs(class iv_ca * ivs,bitmap invs,unsigned * n_inv_uses)6238 iv_ca_set_remove_invs (class iv_ca *ivs, bitmap invs, unsigned *n_inv_uses)
6239 {
6240   bitmap_iterator bi;
6241   unsigned iid;
6242 
6243   if (!invs)
6244     return;
6245 
6246   gcc_assert (n_inv_uses != NULL);
6247   EXECUTE_IF_SET_IN_BITMAP (invs, 0, iid, bi)
6248     {
6249       n_inv_uses[iid]--;
6250       if (n_inv_uses[iid] == 0)
6251 	ivs->n_invs--;
6252     }
6253 }
6254 
6255 /* Set USE not to be expressed by any candidate in IVS.  */
6256 
6257 static void
iv_ca_set_no_cp(struct ivopts_data * data,class iv_ca * ivs,struct iv_group * group)6258 iv_ca_set_no_cp (struct ivopts_data *data, class iv_ca *ivs,
6259 		 struct iv_group *group)
6260 {
6261   unsigned gid = group->id, cid;
6262   class cost_pair *cp;
6263 
6264   cp = ivs->cand_for_group[gid];
6265   if (!cp)
6266     return;
6267   cid = cp->cand->id;
6268 
6269   ivs->bad_groups++;
6270   ivs->cand_for_group[gid] = NULL;
6271   ivs->n_cand_uses[cid]--;
6272 
6273   if (ivs->n_cand_uses[cid] == 0)
6274     {
6275       bitmap_clear_bit (ivs->cands, cid);
6276       if (!cp->cand->doloop_p || !targetm.have_count_reg_decr_p)
6277 	ivs->n_cands--;
6278       ivs->cand_cost -= cp->cand->cost;
6279       iv_ca_set_remove_invs (ivs, cp->cand->inv_vars, ivs->n_inv_var_uses);
6280       iv_ca_set_remove_invs (ivs, cp->cand->inv_exprs, ivs->n_inv_expr_uses);
6281     }
6282 
6283   ivs->cand_use_cost -= cp->cost;
6284   iv_ca_set_remove_invs (ivs, cp->inv_vars, ivs->n_inv_var_uses);
6285   iv_ca_set_remove_invs (ivs, cp->inv_exprs, ivs->n_inv_expr_uses);
6286   iv_ca_recount_cost (data, ivs);
6287 }
6288 
6289 /* Add use of invariants in set INVS by increasing counter in N_INV_USES and
6290    IVS.  */
6291 
6292 static void
iv_ca_set_add_invs(class iv_ca * ivs,bitmap invs,unsigned * n_inv_uses)6293 iv_ca_set_add_invs (class iv_ca *ivs, bitmap invs, unsigned *n_inv_uses)
6294 {
6295   bitmap_iterator bi;
6296   unsigned iid;
6297 
6298   if (!invs)
6299     return;
6300 
6301   gcc_assert (n_inv_uses != NULL);
6302   EXECUTE_IF_SET_IN_BITMAP (invs, 0, iid, bi)
6303     {
6304       n_inv_uses[iid]++;
6305       if (n_inv_uses[iid] == 1)
6306 	ivs->n_invs++;
6307     }
6308 }
6309 
6310 /* Set cost pair for GROUP in set IVS to CP.  */
6311 
6312 static void
iv_ca_set_cp(struct ivopts_data * data,class iv_ca * ivs,struct iv_group * group,class cost_pair * cp)6313 iv_ca_set_cp (struct ivopts_data *data, class iv_ca *ivs,
6314 	      struct iv_group *group, class cost_pair *cp)
6315 {
6316   unsigned gid = group->id, cid;
6317 
6318   if (ivs->cand_for_group[gid] == cp)
6319     return;
6320 
6321   if (ivs->cand_for_group[gid])
6322     iv_ca_set_no_cp (data, ivs, group);
6323 
6324   if (cp)
6325     {
6326       cid = cp->cand->id;
6327 
6328       ivs->bad_groups--;
6329       ivs->cand_for_group[gid] = cp;
6330       ivs->n_cand_uses[cid]++;
6331       if (ivs->n_cand_uses[cid] == 1)
6332 	{
6333 	  bitmap_set_bit (ivs->cands, cid);
6334 	  if (!cp->cand->doloop_p || !targetm.have_count_reg_decr_p)
6335 	    ivs->n_cands++;
6336 	  ivs->cand_cost += cp->cand->cost;
6337 	  iv_ca_set_add_invs (ivs, cp->cand->inv_vars, ivs->n_inv_var_uses);
6338 	  iv_ca_set_add_invs (ivs, cp->cand->inv_exprs, ivs->n_inv_expr_uses);
6339 	}
6340 
6341       ivs->cand_use_cost += cp->cost;
6342       iv_ca_set_add_invs (ivs, cp->inv_vars, ivs->n_inv_var_uses);
6343       iv_ca_set_add_invs (ivs, cp->inv_exprs, ivs->n_inv_expr_uses);
6344       iv_ca_recount_cost (data, ivs);
6345     }
6346 }
6347 
6348 /* Extend set IVS by expressing USE by some of the candidates in it
6349    if possible.  Consider all important candidates if candidates in
6350    set IVS don't give any result.  */
6351 
6352 static void
iv_ca_add_group(struct ivopts_data * data,class iv_ca * ivs,struct iv_group * group)6353 iv_ca_add_group (struct ivopts_data *data, class iv_ca *ivs,
6354 	       struct iv_group *group)
6355 {
6356   class cost_pair *best_cp = NULL, *cp;
6357   bitmap_iterator bi;
6358   unsigned i;
6359   struct iv_cand *cand;
6360 
6361   gcc_assert (ivs->upto >= group->id);
6362   ivs->upto++;
6363   ivs->bad_groups++;
6364 
6365   EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
6366     {
6367       cand = data->vcands[i];
6368       cp = get_group_iv_cost (data, group, cand);
6369       if (cheaper_cost_pair (cp, best_cp))
6370 	best_cp = cp;
6371     }
6372 
6373   if (best_cp == NULL)
6374     {
6375       EXECUTE_IF_SET_IN_BITMAP (data->important_candidates, 0, i, bi)
6376 	{
6377 	  cand = data->vcands[i];
6378 	  cp = get_group_iv_cost (data, group, cand);
6379 	  if (cheaper_cost_pair (cp, best_cp))
6380 	    best_cp = cp;
6381 	}
6382     }
6383 
6384   iv_ca_set_cp (data, ivs, group, best_cp);
6385 }
6386 
6387 /* Get cost for assignment IVS.  */
6388 
6389 static comp_cost
iv_ca_cost(class iv_ca * ivs)6390 iv_ca_cost (class iv_ca *ivs)
6391 {
6392   /* This was a conditional expression but it triggered a bug in
6393      Sun C 5.5.  */
6394   if (ivs->bad_groups)
6395     return infinite_cost;
6396   else
6397     return ivs->cost;
6398 }
6399 
6400 /* Compare if applying NEW_CP to GROUP for IVS introduces more invariants
6401    than OLD_CP.  Return 1, 0 and -1 for more, equal and fewer invariants
6402    respectively.  */
6403 
6404 static int
iv_ca_compare_deps(struct ivopts_data * data,class iv_ca * ivs,struct iv_group * group,class cost_pair * old_cp,class cost_pair * new_cp)6405 iv_ca_compare_deps (struct ivopts_data *data, class iv_ca *ivs,
6406 		    struct iv_group *group, class cost_pair *old_cp,
6407 		    class cost_pair *new_cp)
6408 {
6409   gcc_assert (old_cp && new_cp && old_cp != new_cp);
6410   unsigned old_n_invs = ivs->n_invs;
6411   iv_ca_set_cp (data, ivs, group, new_cp);
6412   unsigned new_n_invs = ivs->n_invs;
6413   iv_ca_set_cp (data, ivs, group, old_cp);
6414 
6415   return new_n_invs > old_n_invs ? 1 : (new_n_invs < old_n_invs ? -1 : 0);
6416 }
6417 
6418 /* Creates change of expressing GROUP by NEW_CP instead of OLD_CP and chains
6419    it before NEXT.  */
6420 
6421 static struct iv_ca_delta *
iv_ca_delta_add(struct iv_group * group,class cost_pair * old_cp,class cost_pair * new_cp,struct iv_ca_delta * next)6422 iv_ca_delta_add (struct iv_group *group, class cost_pair *old_cp,
6423 		 class cost_pair *new_cp, struct iv_ca_delta *next)
6424 {
6425   struct iv_ca_delta *change = XNEW (struct iv_ca_delta);
6426 
6427   change->group = group;
6428   change->old_cp = old_cp;
6429   change->new_cp = new_cp;
6430   change->next = next;
6431 
6432   return change;
6433 }
6434 
6435 /* Joins two lists of changes L1 and L2.  Destructive -- old lists
6436    are rewritten.  */
6437 
6438 static struct iv_ca_delta *
iv_ca_delta_join(struct iv_ca_delta * l1,struct iv_ca_delta * l2)6439 iv_ca_delta_join (struct iv_ca_delta *l1, struct iv_ca_delta *l2)
6440 {
6441   struct iv_ca_delta *last;
6442 
6443   if (!l2)
6444     return l1;
6445 
6446   if (!l1)
6447     return l2;
6448 
6449   for (last = l1; last->next; last = last->next)
6450     continue;
6451   last->next = l2;
6452 
6453   return l1;
6454 }
6455 
6456 /* Reverse the list of changes DELTA, forming the inverse to it.  */
6457 
6458 static struct iv_ca_delta *
iv_ca_delta_reverse(struct iv_ca_delta * delta)6459 iv_ca_delta_reverse (struct iv_ca_delta *delta)
6460 {
6461   struct iv_ca_delta *act, *next, *prev = NULL;
6462 
6463   for (act = delta; act; act = next)
6464     {
6465       next = act->next;
6466       act->next = prev;
6467       prev = act;
6468 
6469       std::swap (act->old_cp, act->new_cp);
6470     }
6471 
6472   return prev;
6473 }
6474 
6475 /* Commit changes in DELTA to IVS.  If FORWARD is false, the changes are
6476    reverted instead.  */
6477 
6478 static void
iv_ca_delta_commit(struct ivopts_data * data,class iv_ca * ivs,struct iv_ca_delta * delta,bool forward)6479 iv_ca_delta_commit (struct ivopts_data *data, class iv_ca *ivs,
6480 		    struct iv_ca_delta *delta, bool forward)
6481 {
6482   class cost_pair *from, *to;
6483   struct iv_ca_delta *act;
6484 
6485   if (!forward)
6486     delta = iv_ca_delta_reverse (delta);
6487 
6488   for (act = delta; act; act = act->next)
6489     {
6490       from = act->old_cp;
6491       to = act->new_cp;
6492       gcc_assert (iv_ca_cand_for_group (ivs, act->group) == from);
6493       iv_ca_set_cp (data, ivs, act->group, to);
6494     }
6495 
6496   if (!forward)
6497     iv_ca_delta_reverse (delta);
6498 }
6499 
6500 /* Returns true if CAND is used in IVS.  */
6501 
6502 static bool
iv_ca_cand_used_p(class iv_ca * ivs,struct iv_cand * cand)6503 iv_ca_cand_used_p (class iv_ca *ivs, struct iv_cand *cand)
6504 {
6505   return ivs->n_cand_uses[cand->id] > 0;
6506 }
6507 
6508 /* Returns number of induction variable candidates in the set IVS.  */
6509 
6510 static unsigned
iv_ca_n_cands(class iv_ca * ivs)6511 iv_ca_n_cands (class iv_ca *ivs)
6512 {
6513   return ivs->n_cands;
6514 }
6515 
6516 /* Free the list of changes DELTA.  */
6517 
6518 static void
iv_ca_delta_free(struct iv_ca_delta ** delta)6519 iv_ca_delta_free (struct iv_ca_delta **delta)
6520 {
6521   struct iv_ca_delta *act, *next;
6522 
6523   for (act = *delta; act; act = next)
6524     {
6525       next = act->next;
6526       free (act);
6527     }
6528 
6529   *delta = NULL;
6530 }
6531 
6532 /* Allocates new iv candidates assignment.  */
6533 
6534 static class iv_ca *
iv_ca_new(struct ivopts_data * data)6535 iv_ca_new (struct ivopts_data *data)
6536 {
6537   class iv_ca *nw = XNEW (class iv_ca);
6538 
6539   nw->upto = 0;
6540   nw->bad_groups = 0;
6541   nw->cand_for_group = XCNEWVEC (class cost_pair *,
6542 				 data->vgroups.length ());
6543   nw->n_cand_uses = XCNEWVEC (unsigned, data->vcands.length ());
6544   nw->cands = BITMAP_ALLOC (NULL);
6545   nw->n_cands = 0;
6546   nw->n_invs = 0;
6547   nw->cand_use_cost = no_cost;
6548   nw->cand_cost = 0;
6549   nw->n_inv_var_uses = XCNEWVEC (unsigned, data->max_inv_var_id + 1);
6550   nw->n_inv_expr_uses = XCNEWVEC (unsigned, data->max_inv_expr_id + 1);
6551   nw->cost = no_cost;
6552 
6553   return nw;
6554 }
6555 
6556 /* Free memory occupied by the set IVS.  */
6557 
6558 static void
iv_ca_free(class iv_ca ** ivs)6559 iv_ca_free (class iv_ca **ivs)
6560 {
6561   free ((*ivs)->cand_for_group);
6562   free ((*ivs)->n_cand_uses);
6563   BITMAP_FREE ((*ivs)->cands);
6564   free ((*ivs)->n_inv_var_uses);
6565   free ((*ivs)->n_inv_expr_uses);
6566   free (*ivs);
6567   *ivs = NULL;
6568 }
6569 
6570 /* Dumps IVS to FILE.  */
6571 
6572 static void
iv_ca_dump(struct ivopts_data * data,FILE * file,class iv_ca * ivs)6573 iv_ca_dump (struct ivopts_data *data, FILE *file, class iv_ca *ivs)
6574 {
6575   unsigned i;
6576   comp_cost cost = iv_ca_cost (ivs);
6577 
6578   fprintf (file, "  cost: %" PRId64 " (complexity %d)\n", cost.cost,
6579 	   cost.complexity);
6580   fprintf (file, "  reg_cost: %d\n",
6581 	   ivopts_estimate_reg_pressure (data, ivs->n_invs, ivs->n_cands));
6582   fprintf (file, "  cand_cost: %" PRId64 "\n  cand_group_cost: "
6583 	   "%" PRId64 " (complexity %d)\n", ivs->cand_cost,
6584 	   ivs->cand_use_cost.cost, ivs->cand_use_cost.complexity);
6585   bitmap_print (file, ivs->cands, "  candidates: ","\n");
6586 
6587   for (i = 0; i < ivs->upto; i++)
6588     {
6589       struct iv_group *group = data->vgroups[i];
6590       class cost_pair *cp = iv_ca_cand_for_group (ivs, group);
6591       if (cp)
6592         fprintf (file, "   group:%d --> iv_cand:%d, cost=("
6593 		 "%" PRId64 ",%d)\n", group->id, cp->cand->id,
6594 		 cp->cost.cost, cp->cost.complexity);
6595       else
6596 	fprintf (file, "   group:%d --> ??\n", group->id);
6597     }
6598 
6599   const char *pref = "";
6600   fprintf (file, "  invariant variables: ");
6601   for (i = 1; i <= data->max_inv_var_id; i++)
6602     if (ivs->n_inv_var_uses[i])
6603       {
6604 	fprintf (file, "%s%d", pref, i);
6605 	pref = ", ";
6606       }
6607 
6608   pref = "";
6609   fprintf (file, "\n  invariant expressions: ");
6610   for (i = 1; i <= data->max_inv_expr_id; i++)
6611     if (ivs->n_inv_expr_uses[i])
6612       {
6613 	fprintf (file, "%s%d", pref, i);
6614 	pref = ", ";
6615       }
6616 
6617   fprintf (file, "\n\n");
6618 }
6619 
6620 /* Try changing candidate in IVS to CAND for each use.  Return cost of the
6621    new set, and store differences in DELTA.  Number of induction variables
6622    in the new set is stored to N_IVS. MIN_NCAND is a flag. When it is true
6623    the function will try to find a solution with mimimal iv candidates.  */
6624 
6625 static comp_cost
iv_ca_extend(struct ivopts_data * data,class iv_ca * ivs,struct iv_cand * cand,struct iv_ca_delta ** delta,unsigned * n_ivs,bool min_ncand)6626 iv_ca_extend (struct ivopts_data *data, class iv_ca *ivs,
6627 	      struct iv_cand *cand, struct iv_ca_delta **delta,
6628 	      unsigned *n_ivs, bool min_ncand)
6629 {
6630   unsigned i;
6631   comp_cost cost;
6632   struct iv_group *group;
6633   class cost_pair *old_cp, *new_cp;
6634 
6635   *delta = NULL;
6636   for (i = 0; i < ivs->upto; i++)
6637     {
6638       group = data->vgroups[i];
6639       old_cp = iv_ca_cand_for_group (ivs, group);
6640 
6641       if (old_cp
6642 	  && old_cp->cand == cand)
6643 	continue;
6644 
6645       new_cp = get_group_iv_cost (data, group, cand);
6646       if (!new_cp)
6647 	continue;
6648 
6649       if (!min_ncand)
6650 	{
6651 	  int cmp_invs = iv_ca_compare_deps (data, ivs, group, old_cp, new_cp);
6652 	  /* Skip if new_cp depends on more invariants.  */
6653 	  if (cmp_invs > 0)
6654 	    continue;
6655 
6656 	  int cmp_cost = compare_cost_pair (new_cp, old_cp);
6657 	  /* Skip if new_cp is not cheaper.  */
6658 	  if (cmp_cost > 0 || (cmp_cost == 0 && cmp_invs == 0))
6659 	    continue;
6660 	}
6661 
6662       *delta = iv_ca_delta_add (group, old_cp, new_cp, *delta);
6663     }
6664 
6665   iv_ca_delta_commit (data, ivs, *delta, true);
6666   cost = iv_ca_cost (ivs);
6667   if (n_ivs)
6668     *n_ivs = iv_ca_n_cands (ivs);
6669   iv_ca_delta_commit (data, ivs, *delta, false);
6670 
6671   return cost;
6672 }
6673 
6674 /* Try narrowing set IVS by removing CAND.  Return the cost of
6675    the new set and store the differences in DELTA.  START is
6676    the candidate with which we start narrowing.  */
6677 
6678 static comp_cost
iv_ca_narrow(struct ivopts_data * data,class iv_ca * ivs,struct iv_cand * cand,struct iv_cand * start,struct iv_ca_delta ** delta)6679 iv_ca_narrow (struct ivopts_data *data, class iv_ca *ivs,
6680 	      struct iv_cand *cand, struct iv_cand *start,
6681 	      struct iv_ca_delta **delta)
6682 {
6683   unsigned i, ci;
6684   struct iv_group *group;
6685   class cost_pair *old_cp, *new_cp, *cp;
6686   bitmap_iterator bi;
6687   struct iv_cand *cnd;
6688   comp_cost cost, best_cost, acost;
6689 
6690   *delta = NULL;
6691   for (i = 0; i < data->vgroups.length (); i++)
6692     {
6693       group = data->vgroups[i];
6694 
6695       old_cp = iv_ca_cand_for_group (ivs, group);
6696       if (old_cp->cand != cand)
6697 	continue;
6698 
6699       best_cost = iv_ca_cost (ivs);
6700       /* Start narrowing with START.  */
6701       new_cp = get_group_iv_cost (data, group, start);
6702 
6703       if (data->consider_all_candidates)
6704 	{
6705 	  EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, ci, bi)
6706 	    {
6707 	      if (ci == cand->id || (start && ci == start->id))
6708 		continue;
6709 
6710 	      cnd = data->vcands[ci];
6711 
6712 	      cp = get_group_iv_cost (data, group, cnd);
6713 	      if (!cp)
6714 		continue;
6715 
6716 	      iv_ca_set_cp (data, ivs, group, cp);
6717 	      acost = iv_ca_cost (ivs);
6718 
6719 	      if (acost < best_cost)
6720 		{
6721 		  best_cost = acost;
6722 		  new_cp = cp;
6723 		}
6724 	    }
6725 	}
6726       else
6727 	{
6728 	  EXECUTE_IF_AND_IN_BITMAP (group->related_cands, ivs->cands, 0, ci, bi)
6729 	    {
6730 	      if (ci == cand->id || (start && ci == start->id))
6731 		continue;
6732 
6733 	      cnd = data->vcands[ci];
6734 
6735 	      cp = get_group_iv_cost (data, group, cnd);
6736 	      if (!cp)
6737 		continue;
6738 
6739 	      iv_ca_set_cp (data, ivs, group, cp);
6740 	      acost = iv_ca_cost (ivs);
6741 
6742 	      if (acost < best_cost)
6743 		{
6744 		  best_cost = acost;
6745 		  new_cp = cp;
6746 		}
6747 	    }
6748 	}
6749       /* Restore to old cp for use.  */
6750       iv_ca_set_cp (data, ivs, group, old_cp);
6751 
6752       if (!new_cp)
6753 	{
6754 	  iv_ca_delta_free (delta);
6755 	  return infinite_cost;
6756 	}
6757 
6758       *delta = iv_ca_delta_add (group, old_cp, new_cp, *delta);
6759     }
6760 
6761   iv_ca_delta_commit (data, ivs, *delta, true);
6762   cost = iv_ca_cost (ivs);
6763   iv_ca_delta_commit (data, ivs, *delta, false);
6764 
6765   return cost;
6766 }
6767 
6768 /* Try optimizing the set of candidates IVS by removing candidates different
6769    from to EXCEPT_CAND from it.  Return cost of the new set, and store
6770    differences in DELTA.  */
6771 
6772 static comp_cost
iv_ca_prune(struct ivopts_data * data,class iv_ca * ivs,struct iv_cand * except_cand,struct iv_ca_delta ** delta)6773 iv_ca_prune (struct ivopts_data *data, class iv_ca *ivs,
6774 	     struct iv_cand *except_cand, struct iv_ca_delta **delta)
6775 {
6776   bitmap_iterator bi;
6777   struct iv_ca_delta *act_delta, *best_delta;
6778   unsigned i;
6779   comp_cost best_cost, acost;
6780   struct iv_cand *cand;
6781 
6782   best_delta = NULL;
6783   best_cost = iv_ca_cost (ivs);
6784 
6785   EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
6786     {
6787       cand = data->vcands[i];
6788 
6789       if (cand == except_cand)
6790 	continue;
6791 
6792       acost = iv_ca_narrow (data, ivs, cand, except_cand, &act_delta);
6793 
6794       if (acost < best_cost)
6795 	{
6796 	  best_cost = acost;
6797 	  iv_ca_delta_free (&best_delta);
6798 	  best_delta = act_delta;
6799 	}
6800       else
6801 	iv_ca_delta_free (&act_delta);
6802     }
6803 
6804   if (!best_delta)
6805     {
6806       *delta = NULL;
6807       return best_cost;
6808     }
6809 
6810   /* Recurse to possibly remove other unnecessary ivs.  */
6811   iv_ca_delta_commit (data, ivs, best_delta, true);
6812   best_cost = iv_ca_prune (data, ivs, except_cand, delta);
6813   iv_ca_delta_commit (data, ivs, best_delta, false);
6814   *delta = iv_ca_delta_join (best_delta, *delta);
6815   return best_cost;
6816 }
6817 
6818 /* Check if CAND_IDX is a candidate other than OLD_CAND and has
6819    cheaper local cost for GROUP than BEST_CP.  Return pointer to
6820    the corresponding cost_pair, otherwise just return BEST_CP.  */
6821 
6822 static class cost_pair*
cheaper_cost_with_cand(struct ivopts_data * data,struct iv_group * group,unsigned int cand_idx,struct iv_cand * old_cand,class cost_pair * best_cp)6823 cheaper_cost_with_cand (struct ivopts_data *data, struct iv_group *group,
6824 			unsigned int cand_idx, struct iv_cand *old_cand,
6825 			class cost_pair *best_cp)
6826 {
6827   struct iv_cand *cand;
6828   class cost_pair *cp;
6829 
6830   gcc_assert (old_cand != NULL && best_cp != NULL);
6831   if (cand_idx == old_cand->id)
6832     return best_cp;
6833 
6834   cand = data->vcands[cand_idx];
6835   cp = get_group_iv_cost (data, group, cand);
6836   if (cp != NULL && cheaper_cost_pair (cp, best_cp))
6837     return cp;
6838 
6839   return best_cp;
6840 }
6841 
6842 /* Try breaking local optimal fixed-point for IVS by replacing candidates
6843    which are used by more than one iv uses.  For each of those candidates,
6844    this function tries to represent iv uses under that candidate using
6845    other ones with lower local cost, then tries to prune the new set.
6846    If the new set has lower cost, It returns the new cost after recording
6847    candidate replacement in list DELTA.  */
6848 
6849 static comp_cost
iv_ca_replace(struct ivopts_data * data,class iv_ca * ivs,struct iv_ca_delta ** delta)6850 iv_ca_replace (struct ivopts_data *data, class iv_ca *ivs,
6851 	       struct iv_ca_delta **delta)
6852 {
6853   bitmap_iterator bi, bj;
6854   unsigned int i, j, k;
6855   struct iv_cand *cand;
6856   comp_cost orig_cost, acost;
6857   struct iv_ca_delta *act_delta, *tmp_delta;
6858   class cost_pair *old_cp, *best_cp = NULL;
6859 
6860   *delta = NULL;
6861   orig_cost = iv_ca_cost (ivs);
6862 
6863   EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
6864     {
6865       if (ivs->n_cand_uses[i] == 1
6866 	  || ivs->n_cand_uses[i] > ALWAYS_PRUNE_CAND_SET_BOUND)
6867 	continue;
6868 
6869       cand = data->vcands[i];
6870 
6871       act_delta = NULL;
6872       /*  Represent uses under current candidate using other ones with
6873 	  lower local cost.  */
6874       for (j = 0; j < ivs->upto; j++)
6875 	{
6876 	  struct iv_group *group = data->vgroups[j];
6877 	  old_cp = iv_ca_cand_for_group (ivs, group);
6878 
6879 	  if (old_cp->cand != cand)
6880 	    continue;
6881 
6882 	  best_cp = old_cp;
6883 	  if (data->consider_all_candidates)
6884 	    for (k = 0; k < data->vcands.length (); k++)
6885 	      best_cp = cheaper_cost_with_cand (data, group, k,
6886 						old_cp->cand, best_cp);
6887 	  else
6888 	    EXECUTE_IF_SET_IN_BITMAP (group->related_cands, 0, k, bj)
6889 	      best_cp = cheaper_cost_with_cand (data, group, k,
6890 						old_cp->cand, best_cp);
6891 
6892 	  if (best_cp == old_cp)
6893 	    continue;
6894 
6895 	  act_delta = iv_ca_delta_add (group, old_cp, best_cp, act_delta);
6896 	}
6897       /* No need for further prune.  */
6898       if (!act_delta)
6899 	continue;
6900 
6901       /* Prune the new candidate set.  */
6902       iv_ca_delta_commit (data, ivs, act_delta, true);
6903       acost = iv_ca_prune (data, ivs, NULL, &tmp_delta);
6904       iv_ca_delta_commit (data, ivs, act_delta, false);
6905       act_delta = iv_ca_delta_join (act_delta, tmp_delta);
6906 
6907       if (acost < orig_cost)
6908 	{
6909 	  *delta = act_delta;
6910 	  return acost;
6911 	}
6912       else
6913 	iv_ca_delta_free (&act_delta);
6914     }
6915 
6916   return orig_cost;
6917 }
6918 
6919 /* Tries to extend the sets IVS in the best possible way in order to
6920    express the GROUP.  If ORIGINALP is true, prefer candidates from
6921    the original set of IVs, otherwise favor important candidates not
6922    based on any memory object.  */
6923 
6924 static bool
try_add_cand_for(struct ivopts_data * data,class iv_ca * ivs,struct iv_group * group,bool originalp)6925 try_add_cand_for (struct ivopts_data *data, class iv_ca *ivs,
6926 		  struct iv_group *group, bool originalp)
6927 {
6928   comp_cost best_cost, act_cost;
6929   unsigned i;
6930   bitmap_iterator bi;
6931   struct iv_cand *cand;
6932   struct iv_ca_delta *best_delta = NULL, *act_delta;
6933   class cost_pair *cp;
6934 
6935   iv_ca_add_group (data, ivs, group);
6936   best_cost = iv_ca_cost (ivs);
6937   cp = iv_ca_cand_for_group (ivs, group);
6938   if (cp)
6939     {
6940       best_delta = iv_ca_delta_add (group, NULL, cp, NULL);
6941       iv_ca_set_no_cp (data, ivs, group);
6942     }
6943 
6944   /* If ORIGINALP is true, try to find the original IV for the use.  Otherwise
6945      first try important candidates not based on any memory object.  Only if
6946      this fails, try the specific ones.  Rationale -- in loops with many
6947      variables the best choice often is to use just one generic biv.  If we
6948      added here many ivs specific to the uses, the optimization algorithm later
6949      would be likely to get stuck in a local minimum, thus causing us to create
6950      too many ivs.  The approach from few ivs to more seems more likely to be
6951      successful -- starting from few ivs, replacing an expensive use by a
6952      specific iv should always be a win.  */
6953   EXECUTE_IF_SET_IN_BITMAP (group->related_cands, 0, i, bi)
6954     {
6955       cand = data->vcands[i];
6956 
6957       if (originalp && cand->pos !=IP_ORIGINAL)
6958 	continue;
6959 
6960       if (!originalp && cand->iv->base_object != NULL_TREE)
6961 	continue;
6962 
6963       if (iv_ca_cand_used_p (ivs, cand))
6964 	continue;
6965 
6966       cp = get_group_iv_cost (data, group, cand);
6967       if (!cp)
6968 	continue;
6969 
6970       iv_ca_set_cp (data, ivs, group, cp);
6971       act_cost = iv_ca_extend (data, ivs, cand, &act_delta, NULL,
6972 			       true);
6973       iv_ca_set_no_cp (data, ivs, group);
6974       act_delta = iv_ca_delta_add (group, NULL, cp, act_delta);
6975 
6976       if (act_cost < best_cost)
6977 	{
6978 	  best_cost = act_cost;
6979 
6980 	  iv_ca_delta_free (&best_delta);
6981 	  best_delta = act_delta;
6982 	}
6983       else
6984 	iv_ca_delta_free (&act_delta);
6985     }
6986 
6987   if (best_cost.infinite_cost_p ())
6988     {
6989       for (i = 0; i < group->n_map_members; i++)
6990 	{
6991 	  cp = group->cost_map + i;
6992 	  cand = cp->cand;
6993 	  if (!cand)
6994 	    continue;
6995 
6996 	  /* Already tried this.  */
6997 	  if (cand->important)
6998 	    {
6999 	      if (originalp && cand->pos == IP_ORIGINAL)
7000 		continue;
7001 	      if (!originalp && cand->iv->base_object == NULL_TREE)
7002 		continue;
7003 	    }
7004 
7005 	  if (iv_ca_cand_used_p (ivs, cand))
7006 	    continue;
7007 
7008 	  act_delta = NULL;
7009 	  iv_ca_set_cp (data, ivs, group, cp);
7010 	  act_cost = iv_ca_extend (data, ivs, cand, &act_delta, NULL, true);
7011 	  iv_ca_set_no_cp (data, ivs, group);
7012 	  act_delta = iv_ca_delta_add (group,
7013 				       iv_ca_cand_for_group (ivs, group),
7014 				       cp, act_delta);
7015 
7016 	  if (act_cost < best_cost)
7017 	    {
7018 	      best_cost = act_cost;
7019 
7020 	      if (best_delta)
7021 		iv_ca_delta_free (&best_delta);
7022 	      best_delta = act_delta;
7023 	    }
7024 	  else
7025 	    iv_ca_delta_free (&act_delta);
7026 	}
7027     }
7028 
7029   iv_ca_delta_commit (data, ivs, best_delta, true);
7030   iv_ca_delta_free (&best_delta);
7031 
7032   return !best_cost.infinite_cost_p ();
7033 }
7034 
7035 /* Finds an initial assignment of candidates to uses.  */
7036 
7037 static class iv_ca *
get_initial_solution(struct ivopts_data * data,bool originalp)7038 get_initial_solution (struct ivopts_data *data, bool originalp)
7039 {
7040   unsigned i;
7041   class iv_ca *ivs = iv_ca_new (data);
7042 
7043   for (i = 0; i < data->vgroups.length (); i++)
7044     if (!try_add_cand_for (data, ivs, data->vgroups[i], originalp))
7045       {
7046 	iv_ca_free (&ivs);
7047 	return NULL;
7048       }
7049 
7050   return ivs;
7051 }
7052 
7053 /* Tries to improve set of induction variables IVS.  TRY_REPLACE_P
7054    points to a bool variable, this function tries to break local
7055    optimal fixed-point by replacing candidates in IVS if it's true.  */
7056 
7057 static bool
try_improve_iv_set(struct ivopts_data * data,class iv_ca * ivs,bool * try_replace_p)7058 try_improve_iv_set (struct ivopts_data *data,
7059 		    class iv_ca *ivs, bool *try_replace_p)
7060 {
7061   unsigned i, n_ivs;
7062   comp_cost acost, best_cost = iv_ca_cost (ivs);
7063   struct iv_ca_delta *best_delta = NULL, *act_delta, *tmp_delta;
7064   struct iv_cand *cand;
7065 
7066   /* Try extending the set of induction variables by one.  */
7067   for (i = 0; i < data->vcands.length (); i++)
7068     {
7069       cand = data->vcands[i];
7070 
7071       if (iv_ca_cand_used_p (ivs, cand))
7072 	continue;
7073 
7074       acost = iv_ca_extend (data, ivs, cand, &act_delta, &n_ivs, false);
7075       if (!act_delta)
7076 	continue;
7077 
7078       /* If we successfully added the candidate and the set is small enough,
7079 	 try optimizing it by removing other candidates.  */
7080       if (n_ivs <= ALWAYS_PRUNE_CAND_SET_BOUND)
7081       	{
7082 	  iv_ca_delta_commit (data, ivs, act_delta, true);
7083 	  acost = iv_ca_prune (data, ivs, cand, &tmp_delta);
7084 	  iv_ca_delta_commit (data, ivs, act_delta, false);
7085 	  act_delta = iv_ca_delta_join (act_delta, tmp_delta);
7086 	}
7087 
7088       if (acost < best_cost)
7089 	{
7090 	  best_cost = acost;
7091 	  iv_ca_delta_free (&best_delta);
7092 	  best_delta = act_delta;
7093 	}
7094       else
7095 	iv_ca_delta_free (&act_delta);
7096     }
7097 
7098   if (!best_delta)
7099     {
7100       /* Try removing the candidates from the set instead.  */
7101       best_cost = iv_ca_prune (data, ivs, NULL, &best_delta);
7102 
7103       if (!best_delta && *try_replace_p)
7104 	{
7105 	  *try_replace_p = false;
7106 	  /* So far candidate selecting algorithm tends to choose fewer IVs
7107 	     so that it can handle cases in which loops have many variables
7108 	     but the best choice is often to use only one general biv.  One
7109 	     weakness is it can't handle opposite cases, in which different
7110 	     candidates should be chosen with respect to each use.  To solve
7111 	     the problem, we replace candidates in a manner described by the
7112 	     comments of iv_ca_replace, thus give general algorithm a chance
7113 	     to break local optimal fixed-point in these cases.  */
7114 	  best_cost = iv_ca_replace (data, ivs, &best_delta);
7115 	}
7116 
7117       if (!best_delta)
7118 	return false;
7119     }
7120 
7121   iv_ca_delta_commit (data, ivs, best_delta, true);
7122   iv_ca_delta_free (&best_delta);
7123   return best_cost == iv_ca_cost (ivs);
7124 }
7125 
7126 /* Attempts to find the optimal set of induction variables.  We do simple
7127    greedy heuristic -- we try to replace at most one candidate in the selected
7128    solution and remove the unused ivs while this improves the cost.  */
7129 
7130 static class iv_ca *
find_optimal_iv_set_1(struct ivopts_data * data,bool originalp)7131 find_optimal_iv_set_1 (struct ivopts_data *data, bool originalp)
7132 {
7133   class iv_ca *set;
7134   bool try_replace_p = true;
7135 
7136   /* Get the initial solution.  */
7137   set = get_initial_solution (data, originalp);
7138   if (!set)
7139     {
7140       if (dump_file && (dump_flags & TDF_DETAILS))
7141 	fprintf (dump_file, "Unable to substitute for ivs, failed.\n");
7142       return NULL;
7143     }
7144 
7145   if (dump_file && (dump_flags & TDF_DETAILS))
7146     {
7147       fprintf (dump_file, "Initial set of candidates:\n");
7148       iv_ca_dump (data, dump_file, set);
7149     }
7150 
7151   while (try_improve_iv_set (data, set, &try_replace_p))
7152     {
7153       if (dump_file && (dump_flags & TDF_DETAILS))
7154 	{
7155 	  fprintf (dump_file, "Improved to:\n");
7156 	  iv_ca_dump (data, dump_file, set);
7157 	}
7158     }
7159 
7160   /* If the set has infinite_cost, it can't be optimal.  */
7161   if (iv_ca_cost (set).infinite_cost_p ())
7162     {
7163       if (dump_file && (dump_flags & TDF_DETAILS))
7164 	fprintf (dump_file,
7165 		 "Overflow to infinite cost in try_improve_iv_set.\n");
7166       iv_ca_free (&set);
7167     }
7168   return set;
7169 }
7170 
7171 static class iv_ca *
find_optimal_iv_set(struct ivopts_data * data)7172 find_optimal_iv_set (struct ivopts_data *data)
7173 {
7174   unsigned i;
7175   comp_cost cost, origcost;
7176   class iv_ca *set, *origset;
7177 
7178   /* Determine the cost based on a strategy that starts with original IVs,
7179      and try again using a strategy that prefers candidates not based
7180      on any IVs.  */
7181   origset = find_optimal_iv_set_1 (data, true);
7182   set = find_optimal_iv_set_1 (data, false);
7183 
7184   if (!origset && !set)
7185     return NULL;
7186 
7187   origcost = origset ? iv_ca_cost (origset) : infinite_cost;
7188   cost = set ? iv_ca_cost (set) : infinite_cost;
7189 
7190   if (dump_file && (dump_flags & TDF_DETAILS))
7191     {
7192       fprintf (dump_file, "Original cost %" PRId64 " (complexity %d)\n\n",
7193 	       origcost.cost, origcost.complexity);
7194       fprintf (dump_file, "Final cost %" PRId64 " (complexity %d)\n\n",
7195 	       cost.cost, cost.complexity);
7196     }
7197 
7198   /* Choose the one with the best cost.  */
7199   if (origcost <= cost)
7200     {
7201       if (set)
7202 	iv_ca_free (&set);
7203       set = origset;
7204     }
7205   else if (origset)
7206     iv_ca_free (&origset);
7207 
7208   for (i = 0; i < data->vgroups.length (); i++)
7209     {
7210       struct iv_group *group = data->vgroups[i];
7211       group->selected = iv_ca_cand_for_group (set, group)->cand;
7212     }
7213 
7214   return set;
7215 }
7216 
7217 /* Creates a new induction variable corresponding to CAND.  */
7218 
7219 static void
create_new_iv(struct ivopts_data * data,struct iv_cand * cand)7220 create_new_iv (struct ivopts_data *data, struct iv_cand *cand)
7221 {
7222   gimple_stmt_iterator incr_pos;
7223   tree base;
7224   struct iv_use *use;
7225   struct iv_group *group;
7226   bool after = false;
7227 
7228   gcc_assert (cand->iv != NULL);
7229 
7230   switch (cand->pos)
7231     {
7232     case IP_NORMAL:
7233       incr_pos = gsi_last_bb (ip_normal_pos (data->current_loop));
7234       break;
7235 
7236     case IP_END:
7237       incr_pos = gsi_last_bb (ip_end_pos (data->current_loop));
7238       after = true;
7239       if (!gsi_end_p (incr_pos) && stmt_ends_bb_p (gsi_stmt (incr_pos)))
7240 	{
7241 	  edge e = find_edge (gsi_bb (incr_pos), data->current_loop->header);
7242 	  incr_pos = gsi_after_labels (split_edge (e));
7243 	  after = false;
7244 	}
7245       break;
7246 
7247     case IP_AFTER_USE:
7248       after = true;
7249       /* fall through */
7250     case IP_BEFORE_USE:
7251       incr_pos = gsi_for_stmt (cand->incremented_at);
7252       break;
7253 
7254     case IP_ORIGINAL:
7255       /* Mark that the iv is preserved.  */
7256       name_info (data, cand->var_before)->preserve_biv = true;
7257       name_info (data, cand->var_after)->preserve_biv = true;
7258 
7259       /* Rewrite the increment so that it uses var_before directly.  */
7260       use = find_interesting_uses_op (data, cand->var_after);
7261       group = data->vgroups[use->group_id];
7262       group->selected = cand;
7263       return;
7264     }
7265 
7266   gimple_add_tmp_var (cand->var_before);
7267 
7268   base = unshare_expr (cand->iv->base);
7269 
7270   create_iv (base, unshare_expr (cand->iv->step),
7271 	     cand->var_before, data->current_loop,
7272 	     &incr_pos, after, &cand->var_before, &cand->var_after);
7273 }
7274 
7275 /* Creates new induction variables described in SET.  */
7276 
7277 static void
create_new_ivs(struct ivopts_data * data,class iv_ca * set)7278 create_new_ivs (struct ivopts_data *data, class iv_ca *set)
7279 {
7280   unsigned i;
7281   struct iv_cand *cand;
7282   bitmap_iterator bi;
7283 
7284   EXECUTE_IF_SET_IN_BITMAP (set->cands, 0, i, bi)
7285     {
7286       cand = data->vcands[i];
7287       create_new_iv (data, cand);
7288     }
7289 
7290   if (dump_file && (dump_flags & TDF_DETAILS))
7291     {
7292       fprintf (dump_file, "Selected IV set for loop %d",
7293 	       data->current_loop->num);
7294       if (data->loop_loc != UNKNOWN_LOCATION)
7295 	fprintf (dump_file, " at %s:%d", LOCATION_FILE (data->loop_loc),
7296 		 LOCATION_LINE (data->loop_loc));
7297       fprintf (dump_file, ", " HOST_WIDE_INT_PRINT_DEC " avg niters",
7298 	       avg_loop_niter (data->current_loop));
7299       fprintf (dump_file, ", %lu IVs:\n", bitmap_count_bits (set->cands));
7300       EXECUTE_IF_SET_IN_BITMAP (set->cands, 0, i, bi)
7301 	{
7302 	  cand = data->vcands[i];
7303 	  dump_cand (dump_file, cand);
7304 	}
7305       fprintf (dump_file, "\n");
7306     }
7307 }
7308 
7309 /* Rewrites USE (definition of iv used in a nonlinear expression)
7310    using candidate CAND.  */
7311 
7312 static void
rewrite_use_nonlinear_expr(struct ivopts_data * data,struct iv_use * use,struct iv_cand * cand)7313 rewrite_use_nonlinear_expr (struct ivopts_data *data,
7314 			    struct iv_use *use, struct iv_cand *cand)
7315 {
7316   gassign *ass;
7317   gimple_stmt_iterator bsi;
7318   tree comp, type = get_use_type (use), tgt;
7319 
7320   /* An important special case -- if we are asked to express value of
7321      the original iv by itself, just exit; there is no need to
7322      introduce a new computation (that might also need casting the
7323      variable to unsigned and back).  */
7324   if (cand->pos == IP_ORIGINAL
7325       && cand->incremented_at == use->stmt)
7326     {
7327       tree op = NULL_TREE;
7328       enum tree_code stmt_code;
7329 
7330       gcc_assert (is_gimple_assign (use->stmt));
7331       gcc_assert (gimple_assign_lhs (use->stmt) == cand->var_after);
7332 
7333       /* Check whether we may leave the computation unchanged.
7334 	 This is the case only if it does not rely on other
7335 	 computations in the loop -- otherwise, the computation
7336 	 we rely upon may be removed in remove_unused_ivs,
7337 	 thus leading to ICE.  */
7338       stmt_code = gimple_assign_rhs_code (use->stmt);
7339       if (stmt_code == PLUS_EXPR
7340 	  || stmt_code == MINUS_EXPR
7341 	  || stmt_code == POINTER_PLUS_EXPR)
7342 	{
7343 	  if (gimple_assign_rhs1 (use->stmt) == cand->var_before)
7344 	    op = gimple_assign_rhs2 (use->stmt);
7345 	  else if (gimple_assign_rhs2 (use->stmt) == cand->var_before)
7346 	    op = gimple_assign_rhs1 (use->stmt);
7347 	}
7348 
7349       if (op != NULL_TREE)
7350 	{
7351 	  if (expr_invariant_in_loop_p (data->current_loop, op))
7352 	    return;
7353 	  if (TREE_CODE (op) == SSA_NAME)
7354 	    {
7355 	      struct iv *iv = get_iv (data, op);
7356 	      if (iv != NULL && integer_zerop (iv->step))
7357 		return;
7358 	    }
7359 	}
7360     }
7361 
7362   switch (gimple_code (use->stmt))
7363     {
7364     case GIMPLE_PHI:
7365       tgt = PHI_RESULT (use->stmt);
7366 
7367       /* If we should keep the biv, do not replace it.  */
7368       if (name_info (data, tgt)->preserve_biv)
7369 	return;
7370 
7371       bsi = gsi_after_labels (gimple_bb (use->stmt));
7372       break;
7373 
7374     case GIMPLE_ASSIGN:
7375       tgt = gimple_assign_lhs (use->stmt);
7376       bsi = gsi_for_stmt (use->stmt);
7377       break;
7378 
7379     default:
7380       gcc_unreachable ();
7381     }
7382 
7383   aff_tree aff_inv, aff_var;
7384   if (!get_computation_aff_1 (data->current_loop, use->stmt,
7385 			      use, cand, &aff_inv, &aff_var))
7386     gcc_unreachable ();
7387 
7388   unshare_aff_combination (&aff_inv);
7389   unshare_aff_combination (&aff_var);
7390   /* Prefer CSE opportunity than loop invariant by adding offset at last
7391      so that iv_uses have different offsets can be CSEed.  */
7392   poly_widest_int offset = aff_inv.offset;
7393   aff_inv.offset = 0;
7394 
7395   gimple_seq stmt_list = NULL, seq = NULL;
7396   tree comp_op1 = aff_combination_to_tree (&aff_inv);
7397   tree comp_op2 = aff_combination_to_tree (&aff_var);
7398   gcc_assert (comp_op1 && comp_op2);
7399 
7400   comp_op1 = force_gimple_operand (comp_op1, &seq, true, NULL);
7401   gimple_seq_add_seq (&stmt_list, seq);
7402   comp_op2 = force_gimple_operand (comp_op2, &seq, true, NULL);
7403   gimple_seq_add_seq (&stmt_list, seq);
7404 
7405   if (POINTER_TYPE_P (TREE_TYPE (comp_op2)))
7406     std::swap (comp_op1, comp_op2);
7407 
7408   if (POINTER_TYPE_P (TREE_TYPE (comp_op1)))
7409     {
7410       comp = fold_build_pointer_plus (comp_op1,
7411 				      fold_convert (sizetype, comp_op2));
7412       comp = fold_build_pointer_plus (comp,
7413 				      wide_int_to_tree (sizetype, offset));
7414     }
7415   else
7416     {
7417       comp = fold_build2 (PLUS_EXPR, TREE_TYPE (comp_op1), comp_op1,
7418 			  fold_convert (TREE_TYPE (comp_op1), comp_op2));
7419       comp = fold_build2 (PLUS_EXPR, TREE_TYPE (comp_op1), comp,
7420 			  wide_int_to_tree (TREE_TYPE (comp_op1), offset));
7421     }
7422 
7423   comp = fold_convert (type, comp);
7424   comp = force_gimple_operand (comp, &seq, false, NULL);
7425   gimple_seq_add_seq (&stmt_list, seq);
7426   if (gimple_code (use->stmt) != GIMPLE_PHI
7427       /* We can't allow re-allocating the stmt as it might be pointed
7428 	 to still.  */
7429       && (get_gimple_rhs_num_ops (TREE_CODE (comp))
7430 	  >= gimple_num_ops (gsi_stmt (bsi))))
7431     {
7432       comp = force_gimple_operand (comp, &seq, true, NULL);
7433       gimple_seq_add_seq (&stmt_list, seq);
7434       if (POINTER_TYPE_P (TREE_TYPE (tgt)))
7435 	{
7436 	  duplicate_ssa_name_ptr_info (comp, SSA_NAME_PTR_INFO (tgt));
7437 	  /* As this isn't a plain copy we have to reset alignment
7438 	     information.  */
7439 	  if (SSA_NAME_PTR_INFO (comp))
7440 	    mark_ptr_info_alignment_unknown (SSA_NAME_PTR_INFO (comp));
7441 	}
7442     }
7443 
7444   gsi_insert_seq_before (&bsi, stmt_list, GSI_SAME_STMT);
7445   if (gimple_code (use->stmt) == GIMPLE_PHI)
7446     {
7447       ass = gimple_build_assign (tgt, comp);
7448       gsi_insert_before (&bsi, ass, GSI_SAME_STMT);
7449 
7450       bsi = gsi_for_stmt (use->stmt);
7451       remove_phi_node (&bsi, false);
7452     }
7453   else
7454     {
7455       gimple_assign_set_rhs_from_tree (&bsi, comp);
7456       use->stmt = gsi_stmt (bsi);
7457     }
7458 }
7459 
7460 /* Performs a peephole optimization to reorder the iv update statement with
7461    a mem ref to enable instruction combining in later phases. The mem ref uses
7462    the iv value before the update, so the reordering transformation requires
7463    adjustment of the offset. CAND is the selected IV_CAND.
7464 
7465    Example:
7466 
7467    t = MEM_REF (base, iv1, 8, 16);  // base, index, stride, offset
7468    iv2 = iv1 + 1;
7469 
7470    if (t < val)      (1)
7471      goto L;
7472    goto Head;
7473 
7474 
7475    directly propagating t over to (1) will introduce overlapping live range
7476    thus increase register pressure. This peephole transform it into:
7477 
7478 
7479    iv2 = iv1 + 1;
7480    t = MEM_REF (base, iv2, 8, 8);
7481    if (t < val)
7482      goto L;
7483    goto Head;
7484 */
7485 
7486 static void
adjust_iv_update_pos(struct iv_cand * cand,struct iv_use * use)7487 adjust_iv_update_pos (struct iv_cand *cand, struct iv_use *use)
7488 {
7489   tree var_after;
7490   gimple *iv_update, *stmt;
7491   basic_block bb;
7492   gimple_stmt_iterator gsi, gsi_iv;
7493 
7494   if (cand->pos != IP_NORMAL)
7495     return;
7496 
7497   var_after = cand->var_after;
7498   iv_update = SSA_NAME_DEF_STMT (var_after);
7499 
7500   bb = gimple_bb (iv_update);
7501   gsi = gsi_last_nondebug_bb (bb);
7502   stmt = gsi_stmt (gsi);
7503 
7504   /* Only handle conditional statement for now.  */
7505   if (gimple_code (stmt) != GIMPLE_COND)
7506     return;
7507 
7508   gsi_prev_nondebug (&gsi);
7509   stmt = gsi_stmt (gsi);
7510   if (stmt != iv_update)
7511     return;
7512 
7513   gsi_prev_nondebug (&gsi);
7514   if (gsi_end_p (gsi))
7515     return;
7516 
7517   stmt = gsi_stmt (gsi);
7518   if (gimple_code (stmt) != GIMPLE_ASSIGN)
7519     return;
7520 
7521   if (stmt != use->stmt)
7522     return;
7523 
7524   if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
7525     return;
7526 
7527   if (dump_file && (dump_flags & TDF_DETAILS))
7528     {
7529       fprintf (dump_file, "Reordering \n");
7530       print_gimple_stmt (dump_file, iv_update, 0);
7531       print_gimple_stmt (dump_file, use->stmt, 0);
7532       fprintf (dump_file, "\n");
7533     }
7534 
7535   gsi = gsi_for_stmt (use->stmt);
7536   gsi_iv = gsi_for_stmt (iv_update);
7537   gsi_move_before (&gsi_iv, &gsi);
7538 
7539   cand->pos = IP_BEFORE_USE;
7540   cand->incremented_at = use->stmt;
7541 }
7542 
7543 /* Return the alias pointer type that should be used for a MEM_REF
7544    associated with USE, which has type USE_PTR_ADDRESS.  */
7545 
7546 static tree
get_alias_ptr_type_for_ptr_address(iv_use * use)7547 get_alias_ptr_type_for_ptr_address (iv_use *use)
7548 {
7549   gcall *call = as_a <gcall *> (use->stmt);
7550   switch (gimple_call_internal_fn (call))
7551     {
7552     case IFN_MASK_LOAD:
7553     case IFN_MASK_STORE:
7554     case IFN_MASK_LOAD_LANES:
7555     case IFN_MASK_STORE_LANES:
7556     case IFN_LEN_LOAD:
7557     case IFN_LEN_STORE:
7558       /* The second argument contains the correct alias type.  */
7559       gcc_assert (use->op_p = gimple_call_arg_ptr (call, 0));
7560       return TREE_TYPE (gimple_call_arg (call, 1));
7561 
7562     default:
7563       gcc_unreachable ();
7564     }
7565 }
7566 
7567 
7568 /* Rewrites USE (address that is an iv) using candidate CAND.  */
7569 
7570 static void
rewrite_use_address(struct ivopts_data * data,struct iv_use * use,struct iv_cand * cand)7571 rewrite_use_address (struct ivopts_data *data,
7572 		     struct iv_use *use, struct iv_cand *cand)
7573 {
7574   aff_tree aff;
7575   bool ok;
7576 
7577   adjust_iv_update_pos (cand, use);
7578   ok = get_computation_aff (data->current_loop, use->stmt, use, cand, &aff);
7579   gcc_assert (ok);
7580   unshare_aff_combination (&aff);
7581 
7582   /* To avoid undefined overflow problems, all IV candidates use unsigned
7583      integer types.  The drawback is that this makes it impossible for
7584      create_mem_ref to distinguish an IV that is based on a memory object
7585      from one that represents simply an offset.
7586 
7587      To work around this problem, we pass a hint to create_mem_ref that
7588      indicates which variable (if any) in aff is an IV based on a memory
7589      object.  Note that we only consider the candidate.  If this is not
7590      based on an object, the base of the reference is in some subexpression
7591      of the use -- but these will use pointer types, so they are recognized
7592      by the create_mem_ref heuristics anyway.  */
7593   tree iv = var_at_stmt (data->current_loop, cand, use->stmt);
7594   tree base_hint = (cand->iv->base_object) ? iv : NULL_TREE;
7595   gimple_stmt_iterator bsi = gsi_for_stmt (use->stmt);
7596   tree type = use->mem_type;
7597   tree alias_ptr_type;
7598   if (use->type == USE_PTR_ADDRESS)
7599     alias_ptr_type = get_alias_ptr_type_for_ptr_address (use);
7600   else
7601     {
7602       gcc_assert (type == TREE_TYPE (*use->op_p));
7603       unsigned int align = get_object_alignment (*use->op_p);
7604       if (align != TYPE_ALIGN (type))
7605 	type = build_aligned_type (type, align);
7606       alias_ptr_type = reference_alias_ptr_type (*use->op_p);
7607     }
7608   tree ref = create_mem_ref (&bsi, type, &aff, alias_ptr_type,
7609 			     iv, base_hint, data->speed);
7610 
7611   if (use->type == USE_PTR_ADDRESS)
7612     {
7613       ref = fold_build1 (ADDR_EXPR, build_pointer_type (use->mem_type), ref);
7614       ref = fold_convert (get_use_type (use), ref);
7615       ref = force_gimple_operand_gsi (&bsi, ref, true, NULL_TREE,
7616 				      true, GSI_SAME_STMT);
7617     }
7618   else
7619     {
7620       /* When we end up confused enough and have no suitable base but
7621 	 stuffed everything to index2 use a LEA for the address and
7622 	 create a plain MEM_REF to avoid basing a memory reference
7623 	 on address zero which create_mem_ref_raw does as fallback.  */
7624       if (TREE_CODE (ref) == TARGET_MEM_REF
7625 	  && TMR_INDEX2 (ref) != NULL_TREE
7626 	  && integer_zerop (TREE_OPERAND (ref, 0)))
7627 	{
7628 	  ref = fold_build1 (ADDR_EXPR, TREE_TYPE (TREE_OPERAND (ref, 0)), ref);
7629 	  ref = force_gimple_operand_gsi (&bsi, ref, true, NULL_TREE,
7630 					  true, GSI_SAME_STMT);
7631 	  ref = build2 (MEM_REF, type, ref, build_zero_cst (alias_ptr_type));
7632 	}
7633       copy_ref_info (ref, *use->op_p);
7634     }
7635 
7636   *use->op_p = ref;
7637 }
7638 
7639 /* Rewrites USE (the condition such that one of the arguments is an iv) using
7640    candidate CAND.  */
7641 
7642 static void
rewrite_use_compare(struct ivopts_data * data,struct iv_use * use,struct iv_cand * cand)7643 rewrite_use_compare (struct ivopts_data *data,
7644 		     struct iv_use *use, struct iv_cand *cand)
7645 {
7646   tree comp, op, bound;
7647   gimple_stmt_iterator bsi = gsi_for_stmt (use->stmt);
7648   enum tree_code compare;
7649   struct iv_group *group = data->vgroups[use->group_id];
7650   class cost_pair *cp = get_group_iv_cost (data, group, cand);
7651 
7652   bound = cp->value;
7653   if (bound)
7654     {
7655       tree var = var_at_stmt (data->current_loop, cand, use->stmt);
7656       tree var_type = TREE_TYPE (var);
7657       gimple_seq stmts;
7658 
7659       if (dump_file && (dump_flags & TDF_DETAILS))
7660 	{
7661 	  fprintf (dump_file, "Replacing exit test: ");
7662 	  print_gimple_stmt (dump_file, use->stmt, 0, TDF_SLIM);
7663 	}
7664       compare = cp->comp;
7665       bound = unshare_expr (fold_convert (var_type, bound));
7666       op = force_gimple_operand (bound, &stmts, true, NULL_TREE);
7667       if (stmts)
7668 	gsi_insert_seq_on_edge_immediate (
7669 		loop_preheader_edge (data->current_loop),
7670 		stmts);
7671 
7672       gcond *cond_stmt = as_a <gcond *> (use->stmt);
7673       gimple_cond_set_lhs (cond_stmt, var);
7674       gimple_cond_set_code (cond_stmt, compare);
7675       gimple_cond_set_rhs (cond_stmt, op);
7676       return;
7677     }
7678 
7679   /* The induction variable elimination failed; just express the original
7680      giv.  */
7681   comp = get_computation_at (data->current_loop, use->stmt, use, cand);
7682   gcc_assert (comp != NULL_TREE);
7683   gcc_assert (use->op_p != NULL);
7684   *use->op_p = force_gimple_operand_gsi (&bsi, comp, true,
7685 					 SSA_NAME_VAR (*use->op_p),
7686 					 true, GSI_SAME_STMT);
7687 }
7688 
7689 /* Rewrite the groups using the selected induction variables.  */
7690 
7691 static void
rewrite_groups(struct ivopts_data * data)7692 rewrite_groups (struct ivopts_data *data)
7693 {
7694   unsigned i, j;
7695 
7696   for (i = 0; i < data->vgroups.length (); i++)
7697     {
7698       struct iv_group *group = data->vgroups[i];
7699       struct iv_cand *cand = group->selected;
7700 
7701       gcc_assert (cand);
7702 
7703       if (group->type == USE_NONLINEAR_EXPR)
7704 	{
7705 	  for (j = 0; j < group->vuses.length (); j++)
7706 	    {
7707 	      rewrite_use_nonlinear_expr (data, group->vuses[j], cand);
7708 	      update_stmt (group->vuses[j]->stmt);
7709 	    }
7710 	}
7711       else if (address_p (group->type))
7712 	{
7713 	  for (j = 0; j < group->vuses.length (); j++)
7714 	    {
7715 	      rewrite_use_address (data, group->vuses[j], cand);
7716 	      update_stmt (group->vuses[j]->stmt);
7717 	    }
7718 	}
7719       else
7720 	{
7721 	  gcc_assert (group->type == USE_COMPARE);
7722 
7723 	  for (j = 0; j < group->vuses.length (); j++)
7724 	    {
7725 	      rewrite_use_compare (data, group->vuses[j], cand);
7726 	      update_stmt (group->vuses[j]->stmt);
7727 	    }
7728 	}
7729     }
7730 }
7731 
7732 /* Removes the ivs that are not used after rewriting.  */
7733 
7734 static void
remove_unused_ivs(struct ivopts_data * data,bitmap toremove)7735 remove_unused_ivs (struct ivopts_data *data, bitmap toremove)
7736 {
7737   unsigned j;
7738   bitmap_iterator bi;
7739 
7740   /* Figure out an order in which to release SSA DEFs so that we don't
7741      release something that we'd have to propagate into a debug stmt
7742      afterwards.  */
7743   EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j, bi)
7744     {
7745       struct version_info *info;
7746 
7747       info = ver_info (data, j);
7748       if (info->iv
7749 	  && !integer_zerop (info->iv->step)
7750 	  && !info->inv_id
7751 	  && !info->iv->nonlin_use
7752 	  && !info->preserve_biv)
7753 	{
7754 	  bitmap_set_bit (toremove, SSA_NAME_VERSION (info->iv->ssa_name));
7755 
7756 	  tree def = info->iv->ssa_name;
7757 
7758 	  if (MAY_HAVE_DEBUG_BIND_STMTS && SSA_NAME_DEF_STMT (def))
7759 	    {
7760 	      imm_use_iterator imm_iter;
7761 	      use_operand_p use_p;
7762 	      gimple *stmt;
7763 	      int count = 0;
7764 
7765 	      FOR_EACH_IMM_USE_STMT (stmt, imm_iter, def)
7766 		{
7767 		  if (!gimple_debug_bind_p (stmt))
7768 		    continue;
7769 
7770 		  /* We just want to determine whether to do nothing
7771 		     (count == 0), to substitute the computed
7772 		     expression into a single use of the SSA DEF by
7773 		     itself (count == 1), or to use a debug temp
7774 		     because the SSA DEF is used multiple times or as
7775 		     part of a larger expression (count > 1). */
7776 		  count++;
7777 		  if (gimple_debug_bind_get_value (stmt) != def)
7778 		    count++;
7779 
7780 		  if (count > 1)
7781 		    break;
7782 		}
7783 
7784 	      if (!count)
7785 		continue;
7786 
7787 	      struct iv_use dummy_use;
7788 	      struct iv_cand *best_cand = NULL, *cand;
7789 	      unsigned i, best_pref = 0, cand_pref;
7790 	      tree comp = NULL_TREE;
7791 
7792 	      memset (&dummy_use, 0, sizeof (dummy_use));
7793 	      dummy_use.iv = info->iv;
7794 	      for (i = 0; i < data->vgroups.length () && i < 64; i++)
7795 		{
7796 		  cand = data->vgroups[i]->selected;
7797 		  if (cand == best_cand)
7798 		    continue;
7799 		  cand_pref = operand_equal_p (cand->iv->step,
7800 					       info->iv->step, 0)
7801 		    ? 4 : 0;
7802 		  cand_pref
7803 		    += TYPE_MODE (TREE_TYPE (cand->iv->base))
7804 		    == TYPE_MODE (TREE_TYPE (info->iv->base))
7805 		    ? 2 : 0;
7806 		  cand_pref
7807 		    += TREE_CODE (cand->iv->base) == INTEGER_CST
7808 		    ? 1 : 0;
7809 		  if (best_cand == NULL || best_pref < cand_pref)
7810 		    {
7811 		      tree this_comp
7812 			= get_debug_computation_at (data->current_loop,
7813 						    SSA_NAME_DEF_STMT (def),
7814 						    &dummy_use, cand);
7815 		      if (this_comp)
7816 			{
7817 			  best_cand = cand;
7818 			  best_pref = cand_pref;
7819 			  comp = this_comp;
7820 			}
7821 		    }
7822 		}
7823 
7824 	      if (!best_cand)
7825 		continue;
7826 
7827 	      comp = unshare_expr (comp);
7828 	      if (count > 1)
7829 		{
7830 		  tree vexpr = build_debug_expr_decl (TREE_TYPE (comp));
7831 		  /* FIXME: Is setting the mode really necessary? */
7832 		  if (SSA_NAME_VAR (def))
7833 		    SET_DECL_MODE (vexpr, DECL_MODE (SSA_NAME_VAR (def)));
7834 		  else
7835 		    SET_DECL_MODE (vexpr, TYPE_MODE (TREE_TYPE (vexpr)));
7836 		  gdebug *def_temp
7837 		    = gimple_build_debug_bind (vexpr, comp, NULL);
7838 		  gimple_stmt_iterator gsi;
7839 
7840 		  if (gimple_code (SSA_NAME_DEF_STMT (def)) == GIMPLE_PHI)
7841 		    gsi = gsi_after_labels (gimple_bb
7842 					    (SSA_NAME_DEF_STMT (def)));
7843 		  else
7844 		    gsi = gsi_for_stmt (SSA_NAME_DEF_STMT (def));
7845 
7846 		  gsi_insert_before (&gsi, def_temp, GSI_SAME_STMT);
7847 		  comp = vexpr;
7848 		}
7849 
7850 	      FOR_EACH_IMM_USE_STMT (stmt, imm_iter, def)
7851 		{
7852 		  if (!gimple_debug_bind_p (stmt))
7853 		    continue;
7854 
7855 		  FOR_EACH_IMM_USE_ON_STMT (use_p, imm_iter)
7856 		    SET_USE (use_p, comp);
7857 
7858 		  update_stmt (stmt);
7859 		}
7860 	    }
7861 	}
7862     }
7863 }
7864 
7865 /* Frees memory occupied by class tree_niter_desc in *VALUE. Callback
7866    for hash_map::traverse.  */
7867 
7868 bool
free_tree_niter_desc(edge const &,tree_niter_desc * const & value,void *)7869 free_tree_niter_desc (edge const &, tree_niter_desc *const &value, void *)
7870 {
7871   free (value);
7872   return true;
7873 }
7874 
7875 /* Frees data allocated by the optimization of a single loop.  */
7876 
7877 static void
free_loop_data(struct ivopts_data * data)7878 free_loop_data (struct ivopts_data *data)
7879 {
7880   unsigned i, j;
7881   bitmap_iterator bi;
7882   tree obj;
7883 
7884   if (data->niters)
7885     {
7886       data->niters->traverse<void *, free_tree_niter_desc> (NULL);
7887       delete data->niters;
7888       data->niters = NULL;
7889     }
7890 
7891   EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
7892     {
7893       struct version_info *info;
7894 
7895       info = ver_info (data, i);
7896       info->iv = NULL;
7897       info->has_nonlin_use = false;
7898       info->preserve_biv = false;
7899       info->inv_id = 0;
7900     }
7901   bitmap_clear (data->relevant);
7902   bitmap_clear (data->important_candidates);
7903 
7904   for (i = 0; i < data->vgroups.length (); i++)
7905     {
7906       struct iv_group *group = data->vgroups[i];
7907 
7908       for (j = 0; j < group->vuses.length (); j++)
7909 	free (group->vuses[j]);
7910       group->vuses.release ();
7911 
7912       BITMAP_FREE (group->related_cands);
7913       for (j = 0; j < group->n_map_members; j++)
7914 	{
7915 	  if (group->cost_map[j].inv_vars)
7916 	    BITMAP_FREE (group->cost_map[j].inv_vars);
7917 	  if (group->cost_map[j].inv_exprs)
7918 	    BITMAP_FREE (group->cost_map[j].inv_exprs);
7919 	}
7920 
7921       free (group->cost_map);
7922       free (group);
7923     }
7924   data->vgroups.truncate (0);
7925 
7926   for (i = 0; i < data->vcands.length (); i++)
7927     {
7928       struct iv_cand *cand = data->vcands[i];
7929 
7930       if (cand->inv_vars)
7931 	BITMAP_FREE (cand->inv_vars);
7932       if (cand->inv_exprs)
7933 	BITMAP_FREE (cand->inv_exprs);
7934       free (cand);
7935     }
7936   data->vcands.truncate (0);
7937 
7938   if (data->version_info_size < num_ssa_names)
7939     {
7940       data->version_info_size = 2 * num_ssa_names;
7941       free (data->version_info);
7942       data->version_info = XCNEWVEC (struct version_info, data->version_info_size);
7943     }
7944 
7945   data->max_inv_var_id = 0;
7946   data->max_inv_expr_id = 0;
7947 
7948   FOR_EACH_VEC_ELT (decl_rtl_to_reset, i, obj)
7949     SET_DECL_RTL (obj, NULL_RTX);
7950 
7951   decl_rtl_to_reset.truncate (0);
7952 
7953   data->inv_expr_tab->empty ();
7954 
7955   data->iv_common_cand_tab->empty ();
7956   data->iv_common_cands.truncate (0);
7957 }
7958 
7959 /* Finalizes data structures used by the iv optimization pass.  LOOPS is the
7960    loop tree.  */
7961 
7962 static void
tree_ssa_iv_optimize_finalize(struct ivopts_data * data)7963 tree_ssa_iv_optimize_finalize (struct ivopts_data *data)
7964 {
7965   free_loop_data (data);
7966   free (data->version_info);
7967   BITMAP_FREE (data->relevant);
7968   BITMAP_FREE (data->important_candidates);
7969 
7970   decl_rtl_to_reset.release ();
7971   data->vgroups.release ();
7972   data->vcands.release ();
7973   delete data->inv_expr_tab;
7974   data->inv_expr_tab = NULL;
7975   free_affine_expand_cache (&data->name_expansion_cache);
7976   if (data->base_object_map)
7977     delete data->base_object_map;
7978   delete data->iv_common_cand_tab;
7979   data->iv_common_cand_tab = NULL;
7980   data->iv_common_cands.release ();
7981   obstack_free (&data->iv_obstack, NULL);
7982 }
7983 
7984 /* Returns true if the loop body BODY includes any function calls.  */
7985 
7986 static bool
loop_body_includes_call(basic_block * body,unsigned num_nodes)7987 loop_body_includes_call (basic_block *body, unsigned num_nodes)
7988 {
7989   gimple_stmt_iterator gsi;
7990   unsigned i;
7991 
7992   for (i = 0; i < num_nodes; i++)
7993     for (gsi = gsi_start_bb (body[i]); !gsi_end_p (gsi); gsi_next (&gsi))
7994       {
7995 	gimple *stmt = gsi_stmt (gsi);
7996 	if (is_gimple_call (stmt)
7997 	    && !gimple_call_internal_p (stmt)
7998 	    && !is_inexpensive_builtin (gimple_call_fndecl (stmt)))
7999 	  return true;
8000       }
8001   return false;
8002 }
8003 
8004 /* Determine cost scaling factor for basic blocks in loop.  */
8005 #define COST_SCALING_FACTOR_BOUND (20)
8006 
8007 static void
determine_scaling_factor(struct ivopts_data * data,basic_block * body)8008 determine_scaling_factor (struct ivopts_data *data, basic_block *body)
8009 {
8010   int lfreq = data->current_loop->header->count.to_frequency (cfun);
8011   if (!data->speed || lfreq <= 0)
8012     return;
8013 
8014   int max_freq = lfreq;
8015   for (unsigned i = 0; i < data->current_loop->num_nodes; i++)
8016     {
8017       body[i]->aux = (void *)(intptr_t) 1;
8018       if (max_freq < body[i]->count.to_frequency (cfun))
8019 	max_freq = body[i]->count.to_frequency (cfun);
8020     }
8021   if (max_freq > lfreq)
8022     {
8023       int divisor, factor;
8024       /* Check if scaling factor itself needs to be scaled by the bound.  This
8025 	 is to avoid overflow when scaling cost according to profile info.  */
8026       if (max_freq / lfreq > COST_SCALING_FACTOR_BOUND)
8027 	{
8028 	  divisor = max_freq;
8029 	  factor = COST_SCALING_FACTOR_BOUND;
8030 	}
8031       else
8032 	{
8033 	  divisor = lfreq;
8034 	  factor = 1;
8035 	}
8036       for (unsigned i = 0; i < data->current_loop->num_nodes; i++)
8037 	{
8038 	  int bfreq = body[i]->count.to_frequency (cfun);
8039 	  if (bfreq <= lfreq)
8040 	    continue;
8041 
8042 	  body[i]->aux = (void*)(intptr_t) (factor * bfreq / divisor);
8043 	}
8044     }
8045 }
8046 
8047 /* Find doloop comparison use and set its doloop_p on if found.  */
8048 
8049 static bool
find_doloop_use(struct ivopts_data * data)8050 find_doloop_use (struct ivopts_data *data)
8051 {
8052   struct loop *loop = data->current_loop;
8053 
8054   for (unsigned i = 0; i < data->vgroups.length (); i++)
8055     {
8056       struct iv_group *group = data->vgroups[i];
8057       if (group->type == USE_COMPARE)
8058 	{
8059 	  gcc_assert (group->vuses.length () == 1);
8060 	  struct iv_use *use = group->vuses[0];
8061 	  gimple *stmt = use->stmt;
8062 	  if (gimple_code (stmt) == GIMPLE_COND)
8063 	    {
8064 	      basic_block bb = gimple_bb (stmt);
8065 	      edge true_edge, false_edge;
8066 	      extract_true_false_edges_from_block (bb, &true_edge, &false_edge);
8067 	      /* This comparison is used for loop latch.  Require latch is empty
8068 		 for now.  */
8069 	      if ((loop->latch == true_edge->dest
8070 		   || loop->latch == false_edge->dest)
8071 		  && empty_block_p (loop->latch))
8072 		{
8073 		  group->doloop_p = true;
8074 		  if (dump_file && (dump_flags & TDF_DETAILS))
8075 		    {
8076 		      fprintf (dump_file, "Doloop cmp iv use: ");
8077 		      print_gimple_stmt (dump_file, stmt, TDF_DETAILS);
8078 		    }
8079 		  return true;
8080 		}
8081 	    }
8082 	}
8083     }
8084 
8085   return false;
8086 }
8087 
8088 /* For the targets which support doloop, to predict whether later RTL doloop
8089    transformation will perform on this loop, further detect the doloop use and
8090    mark the flag doloop_use_p if predicted.  */
8091 
8092 void
analyze_and_mark_doloop_use(struct ivopts_data * data)8093 analyze_and_mark_doloop_use (struct ivopts_data *data)
8094 {
8095   data->doloop_use_p = false;
8096 
8097   if (!flag_branch_on_count_reg)
8098     return;
8099 
8100   if (data->current_loop->unroll == USHRT_MAX)
8101     return;
8102 
8103   if (!generic_predict_doloop_p (data))
8104     return;
8105 
8106   if (find_doloop_use (data))
8107     {
8108       data->doloop_use_p = true;
8109       if (dump_file && (dump_flags & TDF_DETAILS))
8110 	{
8111 	  struct loop *loop = data->current_loop;
8112 	  fprintf (dump_file,
8113 		   "Predict loop %d can perform"
8114 		   " doloop optimization later.\n",
8115 		   loop->num);
8116 	  flow_loop_dump (loop, dump_file, NULL, 1);
8117 	}
8118     }
8119 }
8120 
8121 /* Optimizes the LOOP.  Returns true if anything changed.  */
8122 
8123 static bool
tree_ssa_iv_optimize_loop(struct ivopts_data * data,class loop * loop,bitmap toremove)8124 tree_ssa_iv_optimize_loop (struct ivopts_data *data, class loop *loop,
8125 			   bitmap toremove)
8126 {
8127   bool changed = false;
8128   class iv_ca *iv_ca;
8129   edge exit = single_dom_exit (loop);
8130   basic_block *body;
8131 
8132   gcc_assert (!data->niters);
8133   data->current_loop = loop;
8134   data->loop_loc = find_loop_location (loop).get_location_t ();
8135   data->speed = optimize_loop_for_speed_p (loop);
8136 
8137   if (dump_file && (dump_flags & TDF_DETAILS))
8138     {
8139       fprintf (dump_file, "Processing loop %d", loop->num);
8140       if (data->loop_loc != UNKNOWN_LOCATION)
8141 	fprintf (dump_file, " at %s:%d", LOCATION_FILE (data->loop_loc),
8142 		 LOCATION_LINE (data->loop_loc));
8143       fprintf (dump_file, "\n");
8144 
8145       if (exit)
8146 	{
8147 	  fprintf (dump_file, "  single exit %d -> %d, exit condition ",
8148 		   exit->src->index, exit->dest->index);
8149 	  print_gimple_stmt (dump_file, last_stmt (exit->src), 0, TDF_SLIM);
8150 	  fprintf (dump_file, "\n");
8151 	}
8152 
8153       fprintf (dump_file, "\n");
8154     }
8155 
8156   body = get_loop_body (loop);
8157   data->body_includes_call = loop_body_includes_call (body, loop->num_nodes);
8158   renumber_gimple_stmt_uids_in_blocks (body, loop->num_nodes);
8159 
8160   data->loop_single_exit_p
8161     = exit != NULL && loop_only_exit_p (loop, body, exit);
8162 
8163   /* For each ssa name determines whether it behaves as an induction variable
8164      in some loop.  */
8165   if (!find_induction_variables (data, body))
8166     goto finish;
8167 
8168   /* Finds interesting uses (item 1).  */
8169   find_interesting_uses (data, body);
8170   if (data->vgroups.length () > MAX_CONSIDERED_GROUPS)
8171     goto finish;
8172 
8173   /* Determine cost scaling factor for basic blocks in loop.  */
8174   determine_scaling_factor (data, body);
8175 
8176   /* Analyze doloop possibility and mark the doloop use if predicted.  */
8177   analyze_and_mark_doloop_use (data);
8178 
8179   /* Finds candidates for the induction variables (item 2).  */
8180   find_iv_candidates (data);
8181 
8182   /* Calculates the costs (item 3, part 1).  */
8183   determine_iv_costs (data);
8184   determine_group_iv_costs (data);
8185   determine_set_costs (data);
8186 
8187   /* Find the optimal set of induction variables (item 3, part 2).  */
8188   iv_ca = find_optimal_iv_set (data);
8189   /* Cleanup basic block aux field.  */
8190   for (unsigned i = 0; i < data->current_loop->num_nodes; i++)
8191     body[i]->aux = NULL;
8192   if (!iv_ca)
8193     goto finish;
8194   changed = true;
8195 
8196   /* Create the new induction variables (item 4, part 1).  */
8197   create_new_ivs (data, iv_ca);
8198   iv_ca_free (&iv_ca);
8199 
8200   /* Rewrite the uses (item 4, part 2).  */
8201   rewrite_groups (data);
8202 
8203   /* Remove the ivs that are unused after rewriting.  */
8204   remove_unused_ivs (data, toremove);
8205 
8206 finish:
8207   free (body);
8208   free_loop_data (data);
8209 
8210   return changed;
8211 }
8212 
8213 /* Main entry point.  Optimizes induction variables in loops.  */
8214 
8215 void
tree_ssa_iv_optimize(void)8216 tree_ssa_iv_optimize (void)
8217 {
8218   struct ivopts_data data;
8219   auto_bitmap toremove;
8220 
8221   tree_ssa_iv_optimize_init (&data);
8222   mark_ssa_maybe_undefs ();
8223 
8224   /* Optimize the loops starting with the innermost ones.  */
8225   for (auto loop : loops_list (cfun, LI_FROM_INNERMOST))
8226     {
8227       if (!dbg_cnt (ivopts_loop))
8228 	continue;
8229 
8230       if (dump_file && (dump_flags & TDF_DETAILS))
8231 	flow_loop_dump (loop, dump_file, NULL, 1);
8232 
8233       tree_ssa_iv_optimize_loop (&data, loop, toremove);
8234     }
8235 
8236   /* Remove eliminated IV defs.  */
8237   release_defs_bitset (toremove);
8238 
8239   /* We have changed the structure of induction variables; it might happen
8240      that definitions in the scev database refer to some of them that were
8241      eliminated.  */
8242   scev_reset_htab ();
8243   /* Likewise niter and control-IV information.  */
8244   free_numbers_of_iterations_estimates (cfun);
8245 
8246   tree_ssa_iv_optimize_finalize (&data);
8247 }
8248 
8249 #include "gt-tree-ssa-loop-ivopts.h"
8250