xref: /netbsd-src/external/gpl3/gcc.old/dist/gcc/tree-ssa-loop-ivopts.c (revision 23f5f46327e37e7811da3520f4bb933f9489322f)
1 /* Induction variable optimizations.
2    Copyright (C) 2003-2020 Free Software Foundation, Inc.
3 
4 This file is part of GCC.
5 
6 GCC is free software; you can redistribute it and/or modify it
7 under the terms of the GNU General Public License as published by the
8 Free Software Foundation; either version 3, or (at your option) any
9 later version.
10 
11 GCC is distributed in the hope that it will be useful, but WITHOUT
12 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
14 for more details.
15 
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING3.  If not see
18 <http://www.gnu.org/licenses/>.  */
19 
20 /* This pass tries to find the optimal set of induction variables for the loop.
21    It optimizes just the basic linear induction variables (although adding
22    support for other types should not be too hard).  It includes the
23    optimizations commonly known as strength reduction, induction variable
24    coalescing and induction variable elimination.  It does it in the
25    following steps:
26 
27    1) The interesting uses of induction variables are found.  This includes
28 
29       -- uses of induction variables in non-linear expressions
30       -- addresses of arrays
31       -- comparisons of induction variables
32 
33       Note the interesting uses are categorized and handled in group.
34       Generally, address type uses are grouped together if their iv bases
35       are different in constant offset.
36 
37    2) Candidates for the induction variables are found.  This includes
38 
39       -- old induction variables
40       -- the variables defined by expressions derived from the "interesting
41 	 groups/uses" above
42 
43    3) The optimal (w.r. to a cost function) set of variables is chosen.  The
44       cost function assigns a cost to sets of induction variables and consists
45       of three parts:
46 
47       -- The group/use costs.  Each of the interesting groups/uses chooses
48 	 the best induction variable in the set and adds its cost to the sum.
49 	 The cost reflects the time spent on modifying the induction variables
50 	 value to be usable for the given purpose (adding base and offset for
51 	 arrays, etc.).
52       -- The variable costs.  Each of the variables has a cost assigned that
53 	 reflects the costs associated with incrementing the value of the
54 	 variable.  The original variables are somewhat preferred.
55       -- The set cost.  Depending on the size of the set, extra cost may be
56 	 added to reflect register pressure.
57 
58       All the costs are defined in a machine-specific way, using the target
59       hooks and machine descriptions to determine them.
60 
61    4) The trees are transformed to use the new variables, the dead code is
62       removed.
63 
64    All of this is done loop by loop.  Doing it globally is theoretically
65    possible, it might give a better performance and it might enable us
66    to decide costs more precisely, but getting all the interactions right
67    would be complicated.
68 
69    For the targets supporting low-overhead loops, IVOPTs has to take care of
70    the loops which will probably be transformed in RTL doloop optimization,
71    to try to make selected IV candidate set optimal.  The process of doloop
72    support includes:
73 
74    1) Analyze the current loop will be transformed to doloop or not, find and
75       mark its compare type IV use as doloop use (iv_group field doloop_p), and
76       set flag doloop_use_p of ivopts_data to notify subsequent processings on
77       doloop.  See analyze_and_mark_doloop_use and its callees for the details.
78       The target hook predict_doloop_p can be used for target specific checks.
79 
80    2) Add one doloop dedicated IV cand {(may_be_zero ? 1 : (niter + 1)), +, -1},
81       set flag doloop_p of iv_cand, step cost is set as zero and no extra cost
82       like biv.  For cost determination between doloop IV cand and IV use, the
83       target hooks doloop_cost_for_generic and doloop_cost_for_address are
84       provided to add on extra costs for generic type and address type IV use.
85       Zero cost is assigned to the pair between doloop IV cand and doloop IV
86       use, and bound zero is set for IV elimination.
87 
88    3) With the cost setting in step 2), the current cost model based IV
89       selection algorithm will process as usual, pick up doloop dedicated IV if
90       profitable.  */
91 
92 #include "config.h"
93 #include "system.h"
94 #include "coretypes.h"
95 #include "backend.h"
96 #include "rtl.h"
97 #include "tree.h"
98 #include "gimple.h"
99 #include "cfghooks.h"
100 #include "tree-pass.h"
101 #include "memmodel.h"
102 #include "tm_p.h"
103 #include "ssa.h"
104 #include "expmed.h"
105 #include "insn-config.h"
106 #include "emit-rtl.h"
107 #include "recog.h"
108 #include "cgraph.h"
109 #include "gimple-pretty-print.h"
110 #include "alias.h"
111 #include "fold-const.h"
112 #include "stor-layout.h"
113 #include "tree-eh.h"
114 #include "gimplify.h"
115 #include "gimple-iterator.h"
116 #include "gimplify-me.h"
117 #include "tree-cfg.h"
118 #include "tree-ssa-loop-ivopts.h"
119 #include "tree-ssa-loop-manip.h"
120 #include "tree-ssa-loop-niter.h"
121 #include "tree-ssa-loop.h"
122 #include "explow.h"
123 #include "expr.h"
124 #include "tree-dfa.h"
125 #include "tree-ssa.h"
126 #include "cfgloop.h"
127 #include "tree-scalar-evolution.h"
128 #include "tree-affine.h"
129 #include "tree-ssa-propagate.h"
130 #include "tree-ssa-address.h"
131 #include "builtins.h"
132 #include "tree-vectorizer.h"
133 #include "dbgcnt.h"
134 #include "cfganal.h"
135 
136 /* For lang_hooks.types.type_for_mode.  */
137 #include "langhooks.h"
138 
139 /* FIXME: Expressions are expanded to RTL in this pass to determine the
140    cost of different addressing modes.  This should be moved to a TBD
141    interface between the GIMPLE and RTL worlds.  */
142 
143 /* The infinite cost.  */
144 #define INFTY 1000000000
145 
146 /* Returns the expected number of loop iterations for LOOP.
147    The average trip count is computed from profile data if it
148    exists. */
149 
150 static inline HOST_WIDE_INT
avg_loop_niter(class loop * loop)151 avg_loop_niter (class loop *loop)
152 {
153   HOST_WIDE_INT niter = estimated_stmt_executions_int (loop);
154   if (niter == -1)
155     {
156       niter = likely_max_stmt_executions_int (loop);
157 
158       if (niter == -1 || niter > param_avg_loop_niter)
159 	return param_avg_loop_niter;
160     }
161 
162   return niter;
163 }
164 
165 struct iv_use;
166 
167 /* Representation of the induction variable.  */
168 struct iv
169 {
170   tree base;		/* Initial value of the iv.  */
171   tree base_object;	/* A memory object to that the induction variable points.  */
172   tree step;		/* Step of the iv (constant only).  */
173   tree ssa_name;	/* The ssa name with the value.  */
174   struct iv_use *nonlin_use;	/* The identifier in the use if it is the case.  */
175   bool biv_p;		/* Is it a biv?  */
176   bool no_overflow;	/* True if the iv doesn't overflow.  */
177   bool have_address_use;/* For biv, indicate if it's used in any address
178 			   type use.  */
179 };
180 
181 /* Per-ssa version information (induction variable descriptions, etc.).  */
182 struct version_info
183 {
184   tree name;		/* The ssa name.  */
185   struct iv *iv;	/* Induction variable description.  */
186   bool has_nonlin_use;	/* For a loop-level invariant, whether it is used in
187 			   an expression that is not an induction variable.  */
188   bool preserve_biv;	/* For the original biv, whether to preserve it.  */
189   unsigned inv_id;	/* Id of an invariant.  */
190 };
191 
192 /* Types of uses.  */
193 enum use_type
194 {
195   USE_NONLINEAR_EXPR,	/* Use in a nonlinear expression.  */
196   USE_REF_ADDRESS,	/* Use is an address for an explicit memory
197 			   reference.  */
198   USE_PTR_ADDRESS,	/* Use is a pointer argument to a function in
199 			   cases where the expansion of the function
200 			   will turn the argument into a normal address.  */
201   USE_COMPARE		/* Use is a compare.  */
202 };
203 
204 /* Cost of a computation.  */
205 class comp_cost
206 {
207 public:
comp_cost()208   comp_cost (): cost (0), complexity (0), scratch (0)
209   {}
210 
211   comp_cost (int64_t cost, unsigned complexity, int64_t scratch = 0)
cost(cost)212     : cost (cost), complexity (complexity), scratch (scratch)
213   {}
214 
215   /* Returns true if COST is infinite.  */
216   bool infinite_cost_p ();
217 
218   /* Adds costs COST1 and COST2.  */
219   friend comp_cost operator+ (comp_cost cost1, comp_cost cost2);
220 
221   /* Adds COST to the comp_cost.  */
222   comp_cost operator+= (comp_cost cost);
223 
224   /* Adds constant C to this comp_cost.  */
225   comp_cost operator+= (HOST_WIDE_INT c);
226 
227   /* Subtracts constant C to this comp_cost.  */
228   comp_cost operator-= (HOST_WIDE_INT c);
229 
230   /* Divide the comp_cost by constant C.  */
231   comp_cost operator/= (HOST_WIDE_INT c);
232 
233   /* Multiply the comp_cost by constant C.  */
234   comp_cost operator*= (HOST_WIDE_INT c);
235 
236   /* Subtracts costs COST1 and COST2.  */
237   friend comp_cost operator- (comp_cost cost1, comp_cost cost2);
238 
239   /* Subtracts COST from this comp_cost.  */
240   comp_cost operator-= (comp_cost cost);
241 
242   /* Returns true if COST1 is smaller than COST2.  */
243   friend bool operator< (comp_cost cost1, comp_cost cost2);
244 
245   /* Returns true if COST1 and COST2 are equal.  */
246   friend bool operator== (comp_cost cost1, comp_cost cost2);
247 
248   /* Returns true if COST1 is smaller or equal than COST2.  */
249   friend bool operator<= (comp_cost cost1, comp_cost cost2);
250 
251   int64_t cost;		/* The runtime cost.  */
252   unsigned complexity;  /* The estimate of the complexity of the code for
253 			   the computation (in no concrete units --
254 			   complexity field should be larger for more
255 			   complex expressions and addressing modes).  */
256   int64_t scratch;	/* Scratch used during cost computation.  */
257 };
258 
259 static const comp_cost no_cost;
260 static const comp_cost infinite_cost (INFTY, 0, INFTY);
261 
262 bool
infinite_cost_p()263 comp_cost::infinite_cost_p ()
264 {
265   return cost == INFTY;
266 }
267 
268 comp_cost
269 operator+ (comp_cost cost1, comp_cost cost2)
270 {
271   if (cost1.infinite_cost_p () || cost2.infinite_cost_p ())
272     return infinite_cost;
273 
274   gcc_assert (cost1.cost + cost2.cost < infinite_cost.cost);
275   cost1.cost += cost2.cost;
276   cost1.complexity += cost2.complexity;
277 
278   return cost1;
279 }
280 
281 comp_cost
282 operator- (comp_cost cost1, comp_cost cost2)
283 {
284   if (cost1.infinite_cost_p ())
285     return infinite_cost;
286 
287   gcc_assert (!cost2.infinite_cost_p ());
288   gcc_assert (cost1.cost - cost2.cost < infinite_cost.cost);
289 
290   cost1.cost -= cost2.cost;
291   cost1.complexity -= cost2.complexity;
292 
293   return cost1;
294 }
295 
296 comp_cost
297 comp_cost::operator+= (comp_cost cost)
298 {
299   *this = *this + cost;
300   return *this;
301 }
302 
303 comp_cost
304 comp_cost::operator+= (HOST_WIDE_INT c)
305 {
306   if (c >= INFTY)
307     this->cost = INFTY;
308 
309   if (infinite_cost_p ())
310     return *this;
311 
312   gcc_assert (this->cost + c < infinite_cost.cost);
313   this->cost += c;
314 
315   return *this;
316 }
317 
318 comp_cost
319 comp_cost::operator-= (HOST_WIDE_INT c)
320 {
321   if (infinite_cost_p ())
322     return *this;
323 
324   gcc_assert (this->cost - c < infinite_cost.cost);
325   this->cost -= c;
326 
327   return *this;
328 }
329 
330 comp_cost
331 comp_cost::operator/= (HOST_WIDE_INT c)
332 {
333   gcc_assert (c != 0);
334   if (infinite_cost_p ())
335     return *this;
336 
337   this->cost /= c;
338 
339   return *this;
340 }
341 
342 comp_cost
343 comp_cost::operator*= (HOST_WIDE_INT c)
344 {
345   if (infinite_cost_p ())
346     return *this;
347 
348   gcc_assert (this->cost * c < infinite_cost.cost);
349   this->cost *= c;
350 
351   return *this;
352 }
353 
354 comp_cost
355 comp_cost::operator-= (comp_cost cost)
356 {
357   *this = *this - cost;
358   return *this;
359 }
360 
361 bool
362 operator< (comp_cost cost1, comp_cost cost2)
363 {
364   if (cost1.cost == cost2.cost)
365     return cost1.complexity < cost2.complexity;
366 
367   return cost1.cost < cost2.cost;
368 }
369 
370 bool
371 operator== (comp_cost cost1, comp_cost cost2)
372 {
373   return cost1.cost == cost2.cost
374     && cost1.complexity == cost2.complexity;
375 }
376 
377 bool
378 operator<= (comp_cost cost1, comp_cost cost2)
379 {
380   return cost1 < cost2 || cost1 == cost2;
381 }
382 
383 struct iv_inv_expr_ent;
384 
385 /* The candidate - cost pair.  */
386 class cost_pair
387 {
388 public:
389   struct iv_cand *cand;	/* The candidate.  */
390   comp_cost cost;	/* The cost.  */
391   enum tree_code comp;	/* For iv elimination, the comparison.  */
392   bitmap inv_vars;	/* The list of invariant ssa_vars that have to be
393 			   preserved when representing iv_use with iv_cand.  */
394   bitmap inv_exprs;	/* The list of newly created invariant expressions
395 			   when representing iv_use with iv_cand.  */
396   tree value;		/* For final value elimination, the expression for
397 			   the final value of the iv.  For iv elimination,
398 			   the new bound to compare with.  */
399 };
400 
401 /* Use.  */
402 struct iv_use
403 {
404   unsigned id;		/* The id of the use.  */
405   unsigned group_id;	/* The group id the use belongs to.  */
406   enum use_type type;	/* Type of the use.  */
407   tree mem_type;	/* The memory type to use when testing whether an
408 			   address is legitimate, and what the address's
409 			   cost is.  */
410   struct iv *iv;	/* The induction variable it is based on.  */
411   gimple *stmt;		/* Statement in that it occurs.  */
412   tree *op_p;		/* The place where it occurs.  */
413 
414   tree addr_base;	/* Base address with const offset stripped.  */
415   poly_uint64_pod addr_offset;
416 			/* Const offset stripped from base address.  */
417 };
418 
419 /* Group of uses.  */
420 struct iv_group
421 {
422   /* The id of the group.  */
423   unsigned id;
424   /* Uses of the group are of the same type.  */
425   enum use_type type;
426   /* The set of "related" IV candidates, plus the important ones.  */
427   bitmap related_cands;
428   /* Number of IV candidates in the cost_map.  */
429   unsigned n_map_members;
430   /* The costs wrto the iv candidates.  */
431   class cost_pair *cost_map;
432   /* The selected candidate for the group.  */
433   struct iv_cand *selected;
434   /* To indicate this is a doloop use group.  */
435   bool doloop_p;
436   /* Uses in the group.  */
437   vec<struct iv_use *> vuses;
438 };
439 
440 /* The position where the iv is computed.  */
441 enum iv_position
442 {
443   IP_NORMAL,		/* At the end, just before the exit condition.  */
444   IP_END,		/* At the end of the latch block.  */
445   IP_BEFORE_USE,	/* Immediately before a specific use.  */
446   IP_AFTER_USE,		/* Immediately after a specific use.  */
447   IP_ORIGINAL		/* The original biv.  */
448 };
449 
450 /* The induction variable candidate.  */
451 struct iv_cand
452 {
453   unsigned id;		/* The number of the candidate.  */
454   bool important;	/* Whether this is an "important" candidate, i.e. such
455 			   that it should be considered by all uses.  */
456   ENUM_BITFIELD(iv_position) pos : 8;	/* Where it is computed.  */
457   gimple *incremented_at;/* For original biv, the statement where it is
458 			   incremented.  */
459   tree var_before;	/* The variable used for it before increment.  */
460   tree var_after;	/* The variable used for it after increment.  */
461   struct iv *iv;	/* The value of the candidate.  NULL for
462 			   "pseudocandidate" used to indicate the possibility
463 			   to replace the final value of an iv by direct
464 			   computation of the value.  */
465   unsigned cost;	/* Cost of the candidate.  */
466   unsigned cost_step;	/* Cost of the candidate's increment operation.  */
467   struct iv_use *ainc_use; /* For IP_{BEFORE,AFTER}_USE candidates, the place
468 			      where it is incremented.  */
469   bitmap inv_vars;	/* The list of invariant ssa_vars used in step of the
470 			   iv_cand.  */
471   bitmap inv_exprs;	/* If step is more complicated than a single ssa_var,
472 			   hanlde it as a new invariant expression which will
473 			   be hoisted out of loop.  */
474   struct iv *orig_iv;	/* The original iv if this cand is added from biv with
475 			   smaller type.  */
476   bool doloop_p;	/* Whether this is a doloop candidate.  */
477 };
478 
479 /* Hashtable entry for common candidate derived from iv uses.  */
480 class iv_common_cand
481 {
482 public:
483   tree base;
484   tree step;
485   /* IV uses from which this common candidate is derived.  */
486   auto_vec<struct iv_use *> uses;
487   hashval_t hash;
488 };
489 
490 /* Hashtable helpers.  */
491 
492 struct iv_common_cand_hasher : delete_ptr_hash <iv_common_cand>
493 {
494   static inline hashval_t hash (const iv_common_cand *);
495   static inline bool equal (const iv_common_cand *, const iv_common_cand *);
496 };
497 
498 /* Hash function for possible common candidates.  */
499 
500 inline hashval_t
hash(const iv_common_cand * ccand)501 iv_common_cand_hasher::hash (const iv_common_cand *ccand)
502 {
503   return ccand->hash;
504 }
505 
506 /* Hash table equality function for common candidates.  */
507 
508 inline bool
equal(const iv_common_cand * ccand1,const iv_common_cand * ccand2)509 iv_common_cand_hasher::equal (const iv_common_cand *ccand1,
510 			      const iv_common_cand *ccand2)
511 {
512   return (ccand1->hash == ccand2->hash
513 	  && operand_equal_p (ccand1->base, ccand2->base, 0)
514 	  && operand_equal_p (ccand1->step, ccand2->step, 0)
515 	  && (TYPE_PRECISION (TREE_TYPE (ccand1->base))
516 	      == TYPE_PRECISION (TREE_TYPE (ccand2->base))));
517 }
518 
519 /* Loop invariant expression hashtable entry.  */
520 
521 struct iv_inv_expr_ent
522 {
523   /* Tree expression of the entry.  */
524   tree expr;
525   /* Unique indentifier.  */
526   int id;
527   /* Hash value.  */
528   hashval_t hash;
529 };
530 
531 /* Sort iv_inv_expr_ent pair A and B by id field.  */
532 
533 static int
sort_iv_inv_expr_ent(const void * a,const void * b)534 sort_iv_inv_expr_ent (const void *a, const void *b)
535 {
536   const iv_inv_expr_ent * const *e1 = (const iv_inv_expr_ent * const *) (a);
537   const iv_inv_expr_ent * const *e2 = (const iv_inv_expr_ent * const *) (b);
538 
539   unsigned id1 = (*e1)->id;
540   unsigned id2 = (*e2)->id;
541 
542   if (id1 < id2)
543     return -1;
544   else if (id1 > id2)
545     return 1;
546   else
547     return 0;
548 }
549 
550 /* Hashtable helpers.  */
551 
552 struct iv_inv_expr_hasher : free_ptr_hash <iv_inv_expr_ent>
553 {
554   static inline hashval_t hash (const iv_inv_expr_ent *);
555   static inline bool equal (const iv_inv_expr_ent *, const iv_inv_expr_ent *);
556 };
557 
558 /* Return true if uses of type TYPE represent some form of address.  */
559 
560 inline bool
address_p(use_type type)561 address_p (use_type type)
562 {
563   return type == USE_REF_ADDRESS || type == USE_PTR_ADDRESS;
564 }
565 
566 /* Hash function for loop invariant expressions.  */
567 
568 inline hashval_t
hash(const iv_inv_expr_ent * expr)569 iv_inv_expr_hasher::hash (const iv_inv_expr_ent *expr)
570 {
571   return expr->hash;
572 }
573 
574 /* Hash table equality function for expressions.  */
575 
576 inline bool
equal(const iv_inv_expr_ent * expr1,const iv_inv_expr_ent * expr2)577 iv_inv_expr_hasher::equal (const iv_inv_expr_ent *expr1,
578 			   const iv_inv_expr_ent *expr2)
579 {
580   return expr1->hash == expr2->hash
581 	 && operand_equal_p (expr1->expr, expr2->expr, 0);
582 }
583 
584 struct ivopts_data
585 {
586   /* The currently optimized loop.  */
587   class loop *current_loop;
588   location_t loop_loc;
589 
590   /* Numbers of iterations for all exits of the current loop.  */
591   hash_map<edge, tree_niter_desc *> *niters;
592 
593   /* Number of registers used in it.  */
594   unsigned regs_used;
595 
596   /* The size of version_info array allocated.  */
597   unsigned version_info_size;
598 
599   /* The array of information for the ssa names.  */
600   struct version_info *version_info;
601 
602   /* The hashtable of loop invariant expressions created
603      by ivopt.  */
604   hash_table<iv_inv_expr_hasher> *inv_expr_tab;
605 
606   /* The bitmap of indices in version_info whose value was changed.  */
607   bitmap relevant;
608 
609   /* The uses of induction variables.  */
610   vec<iv_group *> vgroups;
611 
612   /* The candidates.  */
613   vec<iv_cand *> vcands;
614 
615   /* A bitmap of important candidates.  */
616   bitmap important_candidates;
617 
618   /* Cache used by tree_to_aff_combination_expand.  */
619   hash_map<tree, name_expansion *> *name_expansion_cache;
620 
621   /* The hashtable of common candidates derived from iv uses.  */
622   hash_table<iv_common_cand_hasher> *iv_common_cand_tab;
623 
624   /* The common candidates.  */
625   vec<iv_common_cand *> iv_common_cands;
626 
627   /* Hash map recording base object information of tree exp.  */
628   hash_map<tree, tree> *base_object_map;
629 
630   /* The maximum invariant variable id.  */
631   unsigned max_inv_var_id;
632 
633   /* The maximum invariant expression id.  */
634   unsigned max_inv_expr_id;
635 
636   /* Number of no_overflow BIVs which are not used in memory address.  */
637   unsigned bivs_not_used_in_addr;
638 
639   /* Obstack for iv structure.  */
640   struct obstack iv_obstack;
641 
642   /* Whether to consider just related and important candidates when replacing a
643      use.  */
644   bool consider_all_candidates;
645 
646   /* Are we optimizing for speed?  */
647   bool speed;
648 
649   /* Whether the loop body includes any function calls.  */
650   bool body_includes_call;
651 
652   /* Whether the loop body can only be exited via single exit.  */
653   bool loop_single_exit_p;
654 
655   /* Whether the loop has doloop comparison use.  */
656   bool doloop_use_p;
657 };
658 
659 /* An assignment of iv candidates to uses.  */
660 
661 class iv_ca
662 {
663 public:
664   /* The number of uses covered by the assignment.  */
665   unsigned upto;
666 
667   /* Number of uses that cannot be expressed by the candidates in the set.  */
668   unsigned bad_groups;
669 
670   /* Candidate assigned to a use, together with the related costs.  */
671   class cost_pair **cand_for_group;
672 
673   /* Number of times each candidate is used.  */
674   unsigned *n_cand_uses;
675 
676   /* The candidates used.  */
677   bitmap cands;
678 
679   /* The number of candidates in the set.  */
680   unsigned n_cands;
681 
682   /* The number of invariants needed, including both invariant variants and
683      invariant expressions.  */
684   unsigned n_invs;
685 
686   /* Total cost of expressing uses.  */
687   comp_cost cand_use_cost;
688 
689   /* Total cost of candidates.  */
690   int64_t cand_cost;
691 
692   /* Number of times each invariant variable is used.  */
693   unsigned *n_inv_var_uses;
694 
695   /* Number of times each invariant expression is used.  */
696   unsigned *n_inv_expr_uses;
697 
698   /* Total cost of the assignment.  */
699   comp_cost cost;
700 };
701 
702 /* Difference of two iv candidate assignments.  */
703 
704 struct iv_ca_delta
705 {
706   /* Changed group.  */
707   struct iv_group *group;
708 
709   /* An old assignment (for rollback purposes).  */
710   class cost_pair *old_cp;
711 
712   /* A new assignment.  */
713   class cost_pair *new_cp;
714 
715   /* Next change in the list.  */
716   struct iv_ca_delta *next;
717 };
718 
719 /* Bound on number of candidates below that all candidates are considered.  */
720 
721 #define CONSIDER_ALL_CANDIDATES_BOUND \
722   ((unsigned) param_iv_consider_all_candidates_bound)
723 
724 /* If there are more iv occurrences, we just give up (it is quite unlikely that
725    optimizing such a loop would help, and it would take ages).  */
726 
727 #define MAX_CONSIDERED_GROUPS \
728   ((unsigned) param_iv_max_considered_uses)
729 
730 /* If there are at most this number of ivs in the set, try removing unnecessary
731    ivs from the set always.  */
732 
733 #define ALWAYS_PRUNE_CAND_SET_BOUND \
734   ((unsigned) param_iv_always_prune_cand_set_bound)
735 
736 /* The list of trees for that the decl_rtl field must be reset is stored
737    here.  */
738 
739 static vec<tree> decl_rtl_to_reset;
740 
741 static comp_cost force_expr_to_var_cost (tree, bool);
742 
743 /* The single loop exit if it dominates the latch, NULL otherwise.  */
744 
745 edge
single_dom_exit(class loop * loop)746 single_dom_exit (class loop *loop)
747 {
748   edge exit = single_exit (loop);
749 
750   if (!exit)
751     return NULL;
752 
753   if (!just_once_each_iteration_p (loop, exit->src))
754     return NULL;
755 
756   return exit;
757 }
758 
759 /* Dumps information about the induction variable IV to FILE.  Don't dump
760    variable's name if DUMP_NAME is FALSE.  The information is dumped with
761    preceding spaces indicated by INDENT_LEVEL.  */
762 
763 void
dump_iv(FILE * file,struct iv * iv,bool dump_name,unsigned indent_level)764 dump_iv (FILE *file, struct iv *iv, bool dump_name, unsigned indent_level)
765 {
766   const char *p;
767   const char spaces[9] = {' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '\0'};
768 
769   if (indent_level > 4)
770     indent_level = 4;
771   p = spaces + 8 - (indent_level << 1);
772 
773   fprintf (file, "%sIV struct:\n", p);
774   if (iv->ssa_name && dump_name)
775     {
776       fprintf (file, "%s  SSA_NAME:\t", p);
777       print_generic_expr (file, iv->ssa_name, TDF_SLIM);
778       fprintf (file, "\n");
779     }
780 
781   fprintf (file, "%s  Type:\t", p);
782   print_generic_expr (file, TREE_TYPE (iv->base), TDF_SLIM);
783   fprintf (file, "\n");
784 
785   fprintf (file, "%s  Base:\t", p);
786   print_generic_expr (file, iv->base, TDF_SLIM);
787   fprintf (file, "\n");
788 
789   fprintf (file, "%s  Step:\t", p);
790   print_generic_expr (file, iv->step, TDF_SLIM);
791   fprintf (file, "\n");
792 
793   if (iv->base_object)
794     {
795       fprintf (file, "%s  Object:\t", p);
796       print_generic_expr (file, iv->base_object, TDF_SLIM);
797       fprintf (file, "\n");
798     }
799 
800   fprintf (file, "%s  Biv:\t%c\n", p, iv->biv_p ? 'Y' : 'N');
801 
802   fprintf (file, "%s  Overflowness wrto loop niter:\t%s\n",
803 	   p, iv->no_overflow ? "No-overflow" : "Overflow");
804 }
805 
806 /* Dumps information about the USE to FILE.  */
807 
808 void
dump_use(FILE * file,struct iv_use * use)809 dump_use (FILE *file, struct iv_use *use)
810 {
811   fprintf (file, "  Use %d.%d:\n", use->group_id, use->id);
812   fprintf (file, "    At stmt:\t");
813   print_gimple_stmt (file, use->stmt, 0);
814   fprintf (file, "    At pos:\t");
815   if (use->op_p)
816     print_generic_expr (file, *use->op_p, TDF_SLIM);
817   fprintf (file, "\n");
818   dump_iv (file, use->iv, false, 2);
819 }
820 
821 /* Dumps information about the uses to FILE.  */
822 
823 void
dump_groups(FILE * file,struct ivopts_data * data)824 dump_groups (FILE *file, struct ivopts_data *data)
825 {
826   unsigned i, j;
827   struct iv_group *group;
828 
829   for (i = 0; i < data->vgroups.length (); i++)
830     {
831       group = data->vgroups[i];
832       fprintf (file, "Group %d:\n", group->id);
833       if (group->type == USE_NONLINEAR_EXPR)
834 	fprintf (file, "  Type:\tGENERIC\n");
835       else if (group->type == USE_REF_ADDRESS)
836 	fprintf (file, "  Type:\tREFERENCE ADDRESS\n");
837       else if (group->type == USE_PTR_ADDRESS)
838 	fprintf (file, "  Type:\tPOINTER ARGUMENT ADDRESS\n");
839       else
840 	{
841 	  gcc_assert (group->type == USE_COMPARE);
842 	  fprintf (file, "  Type:\tCOMPARE\n");
843 	}
844       for (j = 0; j < group->vuses.length (); j++)
845 	dump_use (file, group->vuses[j]);
846     }
847 }
848 
849 /* Dumps information about induction variable candidate CAND to FILE.  */
850 
851 void
dump_cand(FILE * file,struct iv_cand * cand)852 dump_cand (FILE *file, struct iv_cand *cand)
853 {
854   struct iv *iv = cand->iv;
855 
856   fprintf (file, "Candidate %d:\n", cand->id);
857   if (cand->inv_vars)
858     {
859       fprintf (file, "  Depend on inv.vars: ");
860       dump_bitmap (file, cand->inv_vars);
861     }
862   if (cand->inv_exprs)
863     {
864       fprintf (file, "  Depend on inv.exprs: ");
865       dump_bitmap (file, cand->inv_exprs);
866     }
867 
868   if (cand->var_before)
869     {
870       fprintf (file, "  Var befor: ");
871       print_generic_expr (file, cand->var_before, TDF_SLIM);
872       fprintf (file, "\n");
873     }
874   if (cand->var_after)
875     {
876       fprintf (file, "  Var after: ");
877       print_generic_expr (file, cand->var_after, TDF_SLIM);
878       fprintf (file, "\n");
879     }
880 
881   switch (cand->pos)
882     {
883     case IP_NORMAL:
884       fprintf (file, "  Incr POS: before exit test\n");
885       break;
886 
887     case IP_BEFORE_USE:
888       fprintf (file, "  Incr POS: before use %d\n", cand->ainc_use->id);
889       break;
890 
891     case IP_AFTER_USE:
892       fprintf (file, "  Incr POS: after use %d\n", cand->ainc_use->id);
893       break;
894 
895     case IP_END:
896       fprintf (file, "  Incr POS: at end\n");
897       break;
898 
899     case IP_ORIGINAL:
900       fprintf (file, "  Incr POS: orig biv\n");
901       break;
902     }
903 
904   dump_iv (file, iv, false, 1);
905 }
906 
907 /* Returns the info for ssa version VER.  */
908 
909 static inline struct version_info *
ver_info(struct ivopts_data * data,unsigned ver)910 ver_info (struct ivopts_data *data, unsigned ver)
911 {
912   return data->version_info + ver;
913 }
914 
915 /* Returns the info for ssa name NAME.  */
916 
917 static inline struct version_info *
name_info(struct ivopts_data * data,tree name)918 name_info (struct ivopts_data *data, tree name)
919 {
920   return ver_info (data, SSA_NAME_VERSION (name));
921 }
922 
923 /* Returns true if STMT is after the place where the IP_NORMAL ivs will be
924    emitted in LOOP.  */
925 
926 static bool
stmt_after_ip_normal_pos(class loop * loop,gimple * stmt)927 stmt_after_ip_normal_pos (class loop *loop, gimple *stmt)
928 {
929   basic_block bb = ip_normal_pos (loop), sbb = gimple_bb (stmt);
930 
931   gcc_assert (bb);
932 
933   if (sbb == loop->latch)
934     return true;
935 
936   if (sbb != bb)
937     return false;
938 
939   return stmt == last_stmt (bb);
940 }
941 
942 /* Returns true if STMT if after the place where the original induction
943    variable CAND is incremented.  If TRUE_IF_EQUAL is set, we return true
944    if the positions are identical.  */
945 
946 static bool
stmt_after_inc_pos(struct iv_cand * cand,gimple * stmt,bool true_if_equal)947 stmt_after_inc_pos (struct iv_cand *cand, gimple *stmt, bool true_if_equal)
948 {
949   basic_block cand_bb = gimple_bb (cand->incremented_at);
950   basic_block stmt_bb = gimple_bb (stmt);
951 
952   if (!dominated_by_p (CDI_DOMINATORS, stmt_bb, cand_bb))
953     return false;
954 
955   if (stmt_bb != cand_bb)
956     return true;
957 
958   if (true_if_equal
959       && gimple_uid (stmt) == gimple_uid (cand->incremented_at))
960     return true;
961   return gimple_uid (stmt) > gimple_uid (cand->incremented_at);
962 }
963 
964 /* Returns true if STMT if after the place where the induction variable
965    CAND is incremented in LOOP.  */
966 
967 static bool
stmt_after_increment(class loop * loop,struct iv_cand * cand,gimple * stmt)968 stmt_after_increment (class loop *loop, struct iv_cand *cand, gimple *stmt)
969 {
970   switch (cand->pos)
971     {
972     case IP_END:
973       return false;
974 
975     case IP_NORMAL:
976       return stmt_after_ip_normal_pos (loop, stmt);
977 
978     case IP_ORIGINAL:
979     case IP_AFTER_USE:
980       return stmt_after_inc_pos (cand, stmt, false);
981 
982     case IP_BEFORE_USE:
983       return stmt_after_inc_pos (cand, stmt, true);
984 
985     default:
986       gcc_unreachable ();
987     }
988 }
989 
990 /* walk_tree callback for contains_abnormal_ssa_name_p.  */
991 
992 static tree
contains_abnormal_ssa_name_p_1(tree * tp,int * walk_subtrees,void *)993 contains_abnormal_ssa_name_p_1 (tree *tp, int *walk_subtrees, void *)
994 {
995   if (TREE_CODE (*tp) == SSA_NAME
996       && SSA_NAME_OCCURS_IN_ABNORMAL_PHI (*tp))
997     return *tp;
998 
999   if (!EXPR_P (*tp))
1000     *walk_subtrees = 0;
1001 
1002   return NULL_TREE;
1003 }
1004 
1005 /* Returns true if EXPR contains a ssa name that occurs in an
1006    abnormal phi node.  */
1007 
1008 bool
contains_abnormal_ssa_name_p(tree expr)1009 contains_abnormal_ssa_name_p (tree expr)
1010 {
1011   return walk_tree_without_duplicates
1012 	   (&expr, contains_abnormal_ssa_name_p_1, NULL) != NULL_TREE;
1013 }
1014 
1015 /*  Returns the structure describing number of iterations determined from
1016     EXIT of DATA->current_loop, or NULL if something goes wrong.  */
1017 
1018 static class tree_niter_desc *
niter_for_exit(struct ivopts_data * data,edge exit)1019 niter_for_exit (struct ivopts_data *data, edge exit)
1020 {
1021   class tree_niter_desc *desc;
1022   tree_niter_desc **slot;
1023 
1024   if (!data->niters)
1025     {
1026       data->niters = new hash_map<edge, tree_niter_desc *>;
1027       slot = NULL;
1028     }
1029   else
1030     slot = data->niters->get (exit);
1031 
1032   if (!slot)
1033     {
1034       /* Try to determine number of iterations.  We cannot safely work with ssa
1035 	 names that appear in phi nodes on abnormal edges, so that we do not
1036 	 create overlapping life ranges for them (PR 27283).  */
1037       desc = XNEW (class tree_niter_desc);
1038       if (!number_of_iterations_exit (data->current_loop,
1039 				      exit, desc, true)
1040      	  || contains_abnormal_ssa_name_p (desc->niter))
1041 	{
1042 	  XDELETE (desc);
1043 	  desc = NULL;
1044 	}
1045       data->niters->put (exit, desc);
1046     }
1047   else
1048     desc = *slot;
1049 
1050   return desc;
1051 }
1052 
1053 /* Returns the structure describing number of iterations determined from
1054    single dominating exit of DATA->current_loop, or NULL if something
1055    goes wrong.  */
1056 
1057 static class tree_niter_desc *
niter_for_single_dom_exit(struct ivopts_data * data)1058 niter_for_single_dom_exit (struct ivopts_data *data)
1059 {
1060   edge exit = single_dom_exit (data->current_loop);
1061 
1062   if (!exit)
1063     return NULL;
1064 
1065   return niter_for_exit (data, exit);
1066 }
1067 
1068 /* Initializes data structures used by the iv optimization pass, stored
1069    in DATA.  */
1070 
1071 static void
tree_ssa_iv_optimize_init(struct ivopts_data * data)1072 tree_ssa_iv_optimize_init (struct ivopts_data *data)
1073 {
1074   data->version_info_size = 2 * num_ssa_names;
1075   data->version_info = XCNEWVEC (struct version_info, data->version_info_size);
1076   data->relevant = BITMAP_ALLOC (NULL);
1077   data->important_candidates = BITMAP_ALLOC (NULL);
1078   data->max_inv_var_id = 0;
1079   data->max_inv_expr_id = 0;
1080   data->niters = NULL;
1081   data->vgroups.create (20);
1082   data->vcands.create (20);
1083   data->inv_expr_tab = new hash_table<iv_inv_expr_hasher> (10);
1084   data->name_expansion_cache = NULL;
1085   data->base_object_map = NULL;
1086   data->iv_common_cand_tab = new hash_table<iv_common_cand_hasher> (10);
1087   data->iv_common_cands.create (20);
1088   decl_rtl_to_reset.create (20);
1089   gcc_obstack_init (&data->iv_obstack);
1090 }
1091 
1092 /* walk_tree callback for determine_base_object.  */
1093 
1094 static tree
determine_base_object_1(tree * tp,int * walk_subtrees,void * wdata)1095 determine_base_object_1 (tree *tp, int *walk_subtrees, void *wdata)
1096 {
1097   tree_code code = TREE_CODE (*tp);
1098   tree obj = NULL_TREE;
1099   if (code == ADDR_EXPR)
1100     {
1101       tree base = get_base_address (TREE_OPERAND (*tp, 0));
1102       if (!base)
1103 	obj = *tp;
1104       else if (TREE_CODE (base) != MEM_REF)
1105 	obj = fold_convert (ptr_type_node, build_fold_addr_expr (base));
1106     }
1107   else if (code == SSA_NAME && POINTER_TYPE_P (TREE_TYPE (*tp)))
1108 	obj = fold_convert (ptr_type_node, *tp);
1109 
1110   if (!obj)
1111     {
1112       if (!EXPR_P (*tp))
1113 	*walk_subtrees = 0;
1114 
1115       return NULL_TREE;
1116     }
1117   /* Record special node for multiple base objects and stop.  */
1118   if (*static_cast<tree *> (wdata))
1119     {
1120       *static_cast<tree *> (wdata) = integer_zero_node;
1121       return integer_zero_node;
1122     }
1123   /* Record the base object and continue looking.  */
1124   *static_cast<tree *> (wdata) = obj;
1125   return NULL_TREE;
1126 }
1127 
1128 /* Returns a memory object to that EXPR points with caching.  Return NULL if we
1129    are able to determine that it does not point to any such object; specially
1130    return integer_zero_node if EXPR contains multiple base objects.  */
1131 
1132 static tree
determine_base_object(struct ivopts_data * data,tree expr)1133 determine_base_object (struct ivopts_data *data, tree expr)
1134 {
1135   tree *slot, obj = NULL_TREE;
1136   if (data->base_object_map)
1137     {
1138       if ((slot = data->base_object_map->get(expr)) != NULL)
1139 	return *slot;
1140     }
1141   else
1142     data->base_object_map = new hash_map<tree, tree>;
1143 
1144   (void) walk_tree_without_duplicates (&expr, determine_base_object_1, &obj);
1145   data->base_object_map->put (expr, obj);
1146   return obj;
1147 }
1148 
1149 /* Return true if address expression with non-DECL_P operand appears
1150    in EXPR.  */
1151 
1152 static bool
contain_complex_addr_expr(tree expr)1153 contain_complex_addr_expr (tree expr)
1154 {
1155   bool res = false;
1156 
1157   STRIP_NOPS (expr);
1158   switch (TREE_CODE (expr))
1159     {
1160     case POINTER_PLUS_EXPR:
1161     case PLUS_EXPR:
1162     case MINUS_EXPR:
1163       res |= contain_complex_addr_expr (TREE_OPERAND (expr, 0));
1164       res |= contain_complex_addr_expr (TREE_OPERAND (expr, 1));
1165       break;
1166 
1167     case ADDR_EXPR:
1168       return (!DECL_P (TREE_OPERAND (expr, 0)));
1169 
1170     default:
1171       return false;
1172     }
1173 
1174   return res;
1175 }
1176 
1177 /* Allocates an induction variable with given initial value BASE and step STEP
1178    for loop LOOP.  NO_OVERFLOW implies the iv doesn't overflow.  */
1179 
1180 static struct iv *
1181 alloc_iv (struct ivopts_data *data, tree base, tree step,
1182 	  bool no_overflow = false)
1183 {
1184   tree expr = base;
1185   struct iv *iv = (struct iv*) obstack_alloc (&data->iv_obstack,
1186 					      sizeof (struct iv));
1187   gcc_assert (step != NULL_TREE);
1188 
1189   /* Lower address expression in base except ones with DECL_P as operand.
1190      By doing this:
1191        1) More accurate cost can be computed for address expressions;
1192        2) Duplicate candidates won't be created for bases in different
1193 	  forms, like &a[0] and &a.  */
1194   STRIP_NOPS (expr);
1195   if ((TREE_CODE (expr) == ADDR_EXPR && !DECL_P (TREE_OPERAND (expr, 0)))
1196       || contain_complex_addr_expr (expr))
1197     {
1198       aff_tree comb;
1199       tree_to_aff_combination (expr, TREE_TYPE (expr), &comb);
1200       base = fold_convert (TREE_TYPE (base), aff_combination_to_tree (&comb));
1201     }
1202 
1203   iv->base = base;
1204   iv->base_object = determine_base_object (data, base);
1205   iv->step = step;
1206   iv->biv_p = false;
1207   iv->nonlin_use = NULL;
1208   iv->ssa_name = NULL_TREE;
1209   if (!no_overflow
1210        && !iv_can_overflow_p (data->current_loop, TREE_TYPE (base),
1211 			      base, step))
1212     no_overflow = true;
1213   iv->no_overflow = no_overflow;
1214   iv->have_address_use = false;
1215 
1216   return iv;
1217 }
1218 
1219 /* Sets STEP and BASE for induction variable IV.  NO_OVERFLOW implies the IV
1220    doesn't overflow.  */
1221 
1222 static void
set_iv(struct ivopts_data * data,tree iv,tree base,tree step,bool no_overflow)1223 set_iv (struct ivopts_data *data, tree iv, tree base, tree step,
1224 	bool no_overflow)
1225 {
1226   struct version_info *info = name_info (data, iv);
1227 
1228   gcc_assert (!info->iv);
1229 
1230   bitmap_set_bit (data->relevant, SSA_NAME_VERSION (iv));
1231   info->iv = alloc_iv (data, base, step, no_overflow);
1232   info->iv->ssa_name = iv;
1233 }
1234 
1235 /* Finds induction variable declaration for VAR.  */
1236 
1237 static struct iv *
get_iv(struct ivopts_data * data,tree var)1238 get_iv (struct ivopts_data *data, tree var)
1239 {
1240   basic_block bb;
1241   tree type = TREE_TYPE (var);
1242 
1243   if (!POINTER_TYPE_P (type)
1244       && !INTEGRAL_TYPE_P (type))
1245     return NULL;
1246 
1247   if (!name_info (data, var)->iv)
1248     {
1249       bb = gimple_bb (SSA_NAME_DEF_STMT (var));
1250 
1251       if (!bb
1252 	  || !flow_bb_inside_loop_p (data->current_loop, bb))
1253 	{
1254 	  if (POINTER_TYPE_P (type))
1255 	    type = sizetype;
1256 	  set_iv (data, var, var, build_int_cst (type, 0), true);
1257 	}
1258     }
1259 
1260   return name_info (data, var)->iv;
1261 }
1262 
1263 /* Return the first non-invariant ssa var found in EXPR.  */
1264 
1265 static tree
extract_single_var_from_expr(tree expr)1266 extract_single_var_from_expr (tree expr)
1267 {
1268   int i, n;
1269   tree tmp;
1270   enum tree_code code;
1271 
1272   if (!expr || is_gimple_min_invariant (expr))
1273     return NULL;
1274 
1275   code = TREE_CODE (expr);
1276   if (IS_EXPR_CODE_CLASS (TREE_CODE_CLASS (code)))
1277     {
1278       n = TREE_OPERAND_LENGTH (expr);
1279       for (i = 0; i < n; i++)
1280 	{
1281 	  tmp = extract_single_var_from_expr (TREE_OPERAND (expr, i));
1282 
1283 	  if (tmp)
1284 	    return tmp;
1285 	}
1286     }
1287   return (TREE_CODE (expr) == SSA_NAME) ? expr : NULL;
1288 }
1289 
1290 /* Finds basic ivs.  */
1291 
1292 static bool
find_bivs(struct ivopts_data * data)1293 find_bivs (struct ivopts_data *data)
1294 {
1295   gphi *phi;
1296   affine_iv iv;
1297   tree step, type, base, stop;
1298   bool found = false;
1299   class loop *loop = data->current_loop;
1300   gphi_iterator psi;
1301 
1302   for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
1303     {
1304       phi = psi.phi ();
1305 
1306       if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (PHI_RESULT (phi)))
1307 	continue;
1308 
1309       if (virtual_operand_p (PHI_RESULT (phi)))
1310 	continue;
1311 
1312       if (!simple_iv (loop, loop, PHI_RESULT (phi), &iv, true))
1313 	continue;
1314 
1315       if (integer_zerop (iv.step))
1316 	continue;
1317 
1318       step = iv.step;
1319       base = PHI_ARG_DEF_FROM_EDGE (phi, loop_preheader_edge (loop));
1320       /* Stop expanding iv base at the first ssa var referred by iv step.
1321 	 Ideally we should stop at any ssa var, because that's expensive
1322 	 and unusual to happen, we just do it on the first one.
1323 
1324 	 See PR64705 for the rationale.  */
1325       stop = extract_single_var_from_expr (step);
1326       base = expand_simple_operations (base, stop);
1327       if (contains_abnormal_ssa_name_p (base)
1328 	  || contains_abnormal_ssa_name_p (step))
1329 	continue;
1330 
1331       type = TREE_TYPE (PHI_RESULT (phi));
1332       base = fold_convert (type, base);
1333       if (step)
1334 	{
1335 	  if (POINTER_TYPE_P (type))
1336 	    step = convert_to_ptrofftype (step);
1337 	  else
1338 	    step = fold_convert (type, step);
1339 	}
1340 
1341       set_iv (data, PHI_RESULT (phi), base, step, iv.no_overflow);
1342       found = true;
1343     }
1344 
1345   return found;
1346 }
1347 
1348 /* Marks basic ivs.  */
1349 
1350 static void
mark_bivs(struct ivopts_data * data)1351 mark_bivs (struct ivopts_data *data)
1352 {
1353   gphi *phi;
1354   gimple *def;
1355   tree var;
1356   struct iv *iv, *incr_iv;
1357   class loop *loop = data->current_loop;
1358   basic_block incr_bb;
1359   gphi_iterator psi;
1360 
1361   data->bivs_not_used_in_addr = 0;
1362   for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
1363     {
1364       phi = psi.phi ();
1365 
1366       iv = get_iv (data, PHI_RESULT (phi));
1367       if (!iv)
1368 	continue;
1369 
1370       var = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (loop));
1371       def = SSA_NAME_DEF_STMT (var);
1372       /* Don't mark iv peeled from other one as biv.  */
1373       if (def
1374 	  && gimple_code (def) == GIMPLE_PHI
1375 	  && gimple_bb (def) == loop->header)
1376 	continue;
1377 
1378       incr_iv = get_iv (data, var);
1379       if (!incr_iv)
1380 	continue;
1381 
1382       /* If the increment is in the subloop, ignore it.  */
1383       incr_bb = gimple_bb (SSA_NAME_DEF_STMT (var));
1384       if (incr_bb->loop_father != data->current_loop
1385 	  || (incr_bb->flags & BB_IRREDUCIBLE_LOOP))
1386 	continue;
1387 
1388       iv->biv_p = true;
1389       incr_iv->biv_p = true;
1390       if (iv->no_overflow)
1391 	data->bivs_not_used_in_addr++;
1392       if (incr_iv->no_overflow)
1393 	data->bivs_not_used_in_addr++;
1394     }
1395 }
1396 
1397 /* Checks whether STMT defines a linear induction variable and stores its
1398    parameters to IV.  */
1399 
1400 static bool
find_givs_in_stmt_scev(struct ivopts_data * data,gimple * stmt,affine_iv * iv)1401 find_givs_in_stmt_scev (struct ivopts_data *data, gimple *stmt, affine_iv *iv)
1402 {
1403   tree lhs, stop;
1404   class loop *loop = data->current_loop;
1405 
1406   iv->base = NULL_TREE;
1407   iv->step = NULL_TREE;
1408 
1409   if (gimple_code (stmt) != GIMPLE_ASSIGN)
1410     return false;
1411 
1412   lhs = gimple_assign_lhs (stmt);
1413   if (TREE_CODE (lhs) != SSA_NAME)
1414     return false;
1415 
1416   if (!simple_iv (loop, loop_containing_stmt (stmt), lhs, iv, true))
1417     return false;
1418 
1419   /* Stop expanding iv base at the first ssa var referred by iv step.
1420      Ideally we should stop at any ssa var, because that's expensive
1421      and unusual to happen, we just do it on the first one.
1422 
1423      See PR64705 for the rationale.  */
1424   stop = extract_single_var_from_expr (iv->step);
1425   iv->base = expand_simple_operations (iv->base, stop);
1426   if (contains_abnormal_ssa_name_p (iv->base)
1427       || contains_abnormal_ssa_name_p (iv->step))
1428     return false;
1429 
1430   /* If STMT could throw, then do not consider STMT as defining a GIV.
1431      While this will suppress optimizations, we cannot safely delete this
1432      GIV and associated statements, even if it appears it is not used.  */
1433   if (stmt_could_throw_p (cfun, stmt))
1434     return false;
1435 
1436   return true;
1437 }
1438 
1439 /* Finds general ivs in statement STMT.  */
1440 
1441 static void
find_givs_in_stmt(struct ivopts_data * data,gimple * stmt)1442 find_givs_in_stmt (struct ivopts_data *data, gimple *stmt)
1443 {
1444   affine_iv iv;
1445 
1446   if (!find_givs_in_stmt_scev (data, stmt, &iv))
1447     return;
1448 
1449   set_iv (data, gimple_assign_lhs (stmt), iv.base, iv.step, iv.no_overflow);
1450 }
1451 
1452 /* Finds general ivs in basic block BB.  */
1453 
1454 static void
find_givs_in_bb(struct ivopts_data * data,basic_block bb)1455 find_givs_in_bb (struct ivopts_data *data, basic_block bb)
1456 {
1457   gimple_stmt_iterator bsi;
1458 
1459   for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
1460     find_givs_in_stmt (data, gsi_stmt (bsi));
1461 }
1462 
1463 /* Finds general ivs.  */
1464 
1465 static void
find_givs(struct ivopts_data * data)1466 find_givs (struct ivopts_data *data)
1467 {
1468   class loop *loop = data->current_loop;
1469   basic_block *body = get_loop_body_in_dom_order (loop);
1470   unsigned i;
1471 
1472   for (i = 0; i < loop->num_nodes; i++)
1473     find_givs_in_bb (data, body[i]);
1474   free (body);
1475 }
1476 
1477 /* For each ssa name defined in LOOP determines whether it is an induction
1478    variable and if so, its initial value and step.  */
1479 
1480 static bool
find_induction_variables(struct ivopts_data * data)1481 find_induction_variables (struct ivopts_data *data)
1482 {
1483   unsigned i;
1484   bitmap_iterator bi;
1485 
1486   if (!find_bivs (data))
1487     return false;
1488 
1489   find_givs (data);
1490   mark_bivs (data);
1491 
1492   if (dump_file && (dump_flags & TDF_DETAILS))
1493     {
1494       class tree_niter_desc *niter = niter_for_single_dom_exit (data);
1495 
1496       if (niter)
1497 	{
1498 	  fprintf (dump_file, "  number of iterations ");
1499 	  print_generic_expr (dump_file, niter->niter, TDF_SLIM);
1500 	  if (!integer_zerop (niter->may_be_zero))
1501 	    {
1502 	      fprintf (dump_file, "; zero if ");
1503 	      print_generic_expr (dump_file, niter->may_be_zero, TDF_SLIM);
1504 	    }
1505 	  fprintf (dump_file, "\n");
1506 	};
1507 
1508       fprintf (dump_file, "\n<Induction Vars>:\n");
1509       EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
1510 	{
1511 	  struct version_info *info = ver_info (data, i);
1512 	  if (info->iv && info->iv->step && !integer_zerop (info->iv->step))
1513 	    dump_iv (dump_file, ver_info (data, i)->iv, true, 0);
1514 	}
1515     }
1516 
1517   return true;
1518 }
1519 
1520 /* Records a use of TYPE at *USE_P in STMT whose value is IV in GROUP.
1521    For address type use, ADDR_BASE is the stripped IV base, ADDR_OFFSET
1522    is the const offset stripped from IV base and MEM_TYPE is the type
1523    of the memory being addressed.  For uses of other types, ADDR_BASE
1524    and ADDR_OFFSET are zero by default and MEM_TYPE is NULL_TREE.  */
1525 
1526 static struct iv_use *
record_use(struct iv_group * group,tree * use_p,struct iv * iv,gimple * stmt,enum use_type type,tree mem_type,tree addr_base,poly_uint64 addr_offset)1527 record_use (struct iv_group *group, tree *use_p, struct iv *iv,
1528 	    gimple *stmt, enum use_type type, tree mem_type,
1529 	    tree addr_base, poly_uint64 addr_offset)
1530 {
1531   struct iv_use *use = XCNEW (struct iv_use);
1532 
1533   use->id = group->vuses.length ();
1534   use->group_id = group->id;
1535   use->type = type;
1536   use->mem_type = mem_type;
1537   use->iv = iv;
1538   use->stmt = stmt;
1539   use->op_p = use_p;
1540   use->addr_base = addr_base;
1541   use->addr_offset = addr_offset;
1542 
1543   group->vuses.safe_push (use);
1544   return use;
1545 }
1546 
1547 /* Checks whether OP is a loop-level invariant and if so, records it.
1548    NONLINEAR_USE is true if the invariant is used in a way we do not
1549    handle specially.  */
1550 
1551 static void
record_invariant(struct ivopts_data * data,tree op,bool nonlinear_use)1552 record_invariant (struct ivopts_data *data, tree op, bool nonlinear_use)
1553 {
1554   basic_block bb;
1555   struct version_info *info;
1556 
1557   if (TREE_CODE (op) != SSA_NAME
1558       || virtual_operand_p (op))
1559     return;
1560 
1561   bb = gimple_bb (SSA_NAME_DEF_STMT (op));
1562   if (bb
1563       && flow_bb_inside_loop_p (data->current_loop, bb))
1564     return;
1565 
1566   info = name_info (data, op);
1567   info->name = op;
1568   info->has_nonlin_use |= nonlinear_use;
1569   if (!info->inv_id)
1570     info->inv_id = ++data->max_inv_var_id;
1571   bitmap_set_bit (data->relevant, SSA_NAME_VERSION (op));
1572 }
1573 
1574 /* Record a group of TYPE.  */
1575 
1576 static struct iv_group *
record_group(struct ivopts_data * data,enum use_type type)1577 record_group (struct ivopts_data *data, enum use_type type)
1578 {
1579   struct iv_group *group = XCNEW (struct iv_group);
1580 
1581   group->id = data->vgroups.length ();
1582   group->type = type;
1583   group->related_cands = BITMAP_ALLOC (NULL);
1584   group->vuses.create (1);
1585   group->doloop_p = false;
1586 
1587   data->vgroups.safe_push (group);
1588   return group;
1589 }
1590 
1591 /* Record a use of TYPE at *USE_P in STMT whose value is IV in a group.
1592    New group will be created if there is no existing group for the use.
1593    MEM_TYPE is the type of memory being addressed, or NULL if this
1594    isn't an address reference.  */
1595 
1596 static struct iv_use *
record_group_use(struct ivopts_data * data,tree * use_p,struct iv * iv,gimple * stmt,enum use_type type,tree mem_type)1597 record_group_use (struct ivopts_data *data, tree *use_p,
1598 		  struct iv *iv, gimple *stmt, enum use_type type,
1599 		  tree mem_type)
1600 {
1601   tree addr_base = NULL;
1602   struct iv_group *group = NULL;
1603   poly_uint64 addr_offset = 0;
1604 
1605   /* Record non address type use in a new group.  */
1606   if (address_p (type))
1607     {
1608       unsigned int i;
1609 
1610       addr_base = strip_offset (iv->base, &addr_offset);
1611       for (i = 0; i < data->vgroups.length (); i++)
1612 	{
1613 	  struct iv_use *use;
1614 
1615 	  group = data->vgroups[i];
1616 	  use = group->vuses[0];
1617 	  if (!address_p (use->type))
1618 	    continue;
1619 
1620 	  /* Check if it has the same stripped base and step.  */
1621 	  if (operand_equal_p (iv->base_object, use->iv->base_object, 0)
1622 	      && operand_equal_p (iv->step, use->iv->step, 0)
1623 	      && operand_equal_p (addr_base, use->addr_base, 0))
1624 	    break;
1625 	}
1626       if (i == data->vgroups.length ())
1627 	group = NULL;
1628     }
1629 
1630   if (!group)
1631     group = record_group (data, type);
1632 
1633   return record_use (group, use_p, iv, stmt, type, mem_type,
1634 		     addr_base, addr_offset);
1635 }
1636 
1637 /* Checks whether the use OP is interesting and if so, records it.  */
1638 
1639 static struct iv_use *
find_interesting_uses_op(struct ivopts_data * data,tree op)1640 find_interesting_uses_op (struct ivopts_data *data, tree op)
1641 {
1642   struct iv *iv;
1643   gimple *stmt;
1644   struct iv_use *use;
1645 
1646   if (TREE_CODE (op) != SSA_NAME)
1647     return NULL;
1648 
1649   iv = get_iv (data, op);
1650   if (!iv)
1651     return NULL;
1652 
1653   if (iv->nonlin_use)
1654     {
1655       gcc_assert (iv->nonlin_use->type == USE_NONLINEAR_EXPR);
1656       return iv->nonlin_use;
1657     }
1658 
1659   if (integer_zerop (iv->step))
1660     {
1661       record_invariant (data, op, true);
1662       return NULL;
1663     }
1664 
1665   stmt = SSA_NAME_DEF_STMT (op);
1666   gcc_assert (gimple_code (stmt) == GIMPLE_PHI || is_gimple_assign (stmt));
1667 
1668   use = record_group_use (data, NULL, iv, stmt, USE_NONLINEAR_EXPR, NULL_TREE);
1669   iv->nonlin_use = use;
1670   return use;
1671 }
1672 
1673 /* Indicate how compare type iv_use can be handled.  */
1674 enum comp_iv_rewrite
1675 {
1676   COMP_IV_NA,
1677   /* We may rewrite compare type iv_use by expressing value of the iv_use.  */
1678   COMP_IV_EXPR,
1679   /* We may rewrite compare type iv_uses on both sides of comparison by
1680      expressing value of each iv_use.  */
1681   COMP_IV_EXPR_2,
1682   /* We may rewrite compare type iv_use by expressing value of the iv_use
1683      or by eliminating it with other iv_cand.  */
1684   COMP_IV_ELIM
1685 };
1686 
1687 /* Given a condition in statement STMT, checks whether it is a compare
1688    of an induction variable and an invariant.  If this is the case,
1689    CONTROL_VAR is set to location of the iv, BOUND to the location of
1690    the invariant, IV_VAR and IV_BOUND are set to the corresponding
1691    induction variable descriptions, and true is returned.  If this is not
1692    the case, CONTROL_VAR and BOUND are set to the arguments of the
1693    condition and false is returned.  */
1694 
1695 static enum comp_iv_rewrite
extract_cond_operands(struct ivopts_data * data,gimple * stmt,tree ** control_var,tree ** bound,struct iv ** iv_var,struct iv ** iv_bound)1696 extract_cond_operands (struct ivopts_data *data, gimple *stmt,
1697 		       tree **control_var, tree **bound,
1698 		       struct iv **iv_var, struct iv **iv_bound)
1699 {
1700   /* The objects returned when COND has constant operands.  */
1701   static struct iv const_iv;
1702   static tree zero;
1703   tree *op0 = &zero, *op1 = &zero;
1704   struct iv *iv0 = &const_iv, *iv1 = &const_iv;
1705   enum comp_iv_rewrite rewrite_type = COMP_IV_NA;
1706 
1707   if (gimple_code (stmt) == GIMPLE_COND)
1708     {
1709       gcond *cond_stmt = as_a <gcond *> (stmt);
1710       op0 = gimple_cond_lhs_ptr (cond_stmt);
1711       op1 = gimple_cond_rhs_ptr (cond_stmt);
1712     }
1713   else
1714     {
1715       op0 = gimple_assign_rhs1_ptr (stmt);
1716       op1 = gimple_assign_rhs2_ptr (stmt);
1717     }
1718 
1719   zero = integer_zero_node;
1720   const_iv.step = integer_zero_node;
1721 
1722   if (TREE_CODE (*op0) == SSA_NAME)
1723     iv0 = get_iv (data, *op0);
1724   if (TREE_CODE (*op1) == SSA_NAME)
1725     iv1 = get_iv (data, *op1);
1726 
1727   /* If both sides of comparison are IVs.  We can express ivs on both end.  */
1728   if (iv0 && iv1 && !integer_zerop (iv0->step) && !integer_zerop (iv1->step))
1729     {
1730       rewrite_type = COMP_IV_EXPR_2;
1731       goto end;
1732     }
1733 
1734   /* If none side of comparison is IV.  */
1735   if ((!iv0 || integer_zerop (iv0->step))
1736       && (!iv1 || integer_zerop (iv1->step)))
1737     goto end;
1738 
1739   /* Control variable may be on the other side.  */
1740   if (!iv0 || integer_zerop (iv0->step))
1741     {
1742       std::swap (op0, op1);
1743       std::swap (iv0, iv1);
1744     }
1745   /* If one side is IV and the other side isn't loop invariant.  */
1746   if (!iv1)
1747     rewrite_type = COMP_IV_EXPR;
1748   /* If one side is IV and the other side is loop invariant.  */
1749   else if (!integer_zerop (iv0->step) && integer_zerop (iv1->step))
1750     rewrite_type = COMP_IV_ELIM;
1751 
1752 end:
1753   if (control_var)
1754     *control_var = op0;
1755   if (iv_var)
1756     *iv_var = iv0;
1757   if (bound)
1758     *bound = op1;
1759   if (iv_bound)
1760     *iv_bound = iv1;
1761 
1762   return rewrite_type;
1763 }
1764 
1765 /* Checks whether the condition in STMT is interesting and if so,
1766    records it.  */
1767 
1768 static void
find_interesting_uses_cond(struct ivopts_data * data,gimple * stmt)1769 find_interesting_uses_cond (struct ivopts_data *data, gimple *stmt)
1770 {
1771   tree *var_p, *bound_p;
1772   struct iv *var_iv, *bound_iv;
1773   enum comp_iv_rewrite ret;
1774 
1775   ret = extract_cond_operands (data, stmt,
1776 			       &var_p, &bound_p, &var_iv, &bound_iv);
1777   if (ret == COMP_IV_NA)
1778     {
1779       find_interesting_uses_op (data, *var_p);
1780       find_interesting_uses_op (data, *bound_p);
1781       return;
1782     }
1783 
1784   record_group_use (data, var_p, var_iv, stmt, USE_COMPARE, NULL_TREE);
1785   /* Record compare type iv_use for iv on the other side of comparison.  */
1786   if (ret == COMP_IV_EXPR_2)
1787     record_group_use (data, bound_p, bound_iv, stmt, USE_COMPARE, NULL_TREE);
1788 }
1789 
1790 /* Returns the outermost loop EXPR is obviously invariant in
1791    relative to the loop LOOP, i.e. if all its operands are defined
1792    outside of the returned loop.  Returns NULL if EXPR is not
1793    even obviously invariant in LOOP.  */
1794 
1795 class loop *
outermost_invariant_loop_for_expr(class loop * loop,tree expr)1796 outermost_invariant_loop_for_expr (class loop *loop, tree expr)
1797 {
1798   basic_block def_bb;
1799   unsigned i, len;
1800 
1801   if (is_gimple_min_invariant (expr))
1802     return current_loops->tree_root;
1803 
1804   if (TREE_CODE (expr) == SSA_NAME)
1805     {
1806       def_bb = gimple_bb (SSA_NAME_DEF_STMT (expr));
1807       if (def_bb)
1808 	{
1809 	  if (flow_bb_inside_loop_p (loop, def_bb))
1810 	    return NULL;
1811 	  return superloop_at_depth (loop,
1812 				     loop_depth (def_bb->loop_father) + 1);
1813 	}
1814 
1815       return current_loops->tree_root;
1816     }
1817 
1818   if (!EXPR_P (expr))
1819     return NULL;
1820 
1821   unsigned maxdepth = 0;
1822   len = TREE_OPERAND_LENGTH (expr);
1823   for (i = 0; i < len; i++)
1824     {
1825       class loop *ivloop;
1826       if (!TREE_OPERAND (expr, i))
1827 	continue;
1828 
1829       ivloop = outermost_invariant_loop_for_expr (loop, TREE_OPERAND (expr, i));
1830       if (!ivloop)
1831 	return NULL;
1832       maxdepth = MAX (maxdepth, loop_depth (ivloop));
1833     }
1834 
1835   return superloop_at_depth (loop, maxdepth);
1836 }
1837 
1838 /* Returns true if expression EXPR is obviously invariant in LOOP,
1839    i.e. if all its operands are defined outside of the LOOP.  LOOP
1840    should not be the function body.  */
1841 
1842 bool
expr_invariant_in_loop_p(class loop * loop,tree expr)1843 expr_invariant_in_loop_p (class loop *loop, tree expr)
1844 {
1845   basic_block def_bb;
1846   unsigned i, len;
1847 
1848   gcc_assert (loop_depth (loop) > 0);
1849 
1850   if (is_gimple_min_invariant (expr))
1851     return true;
1852 
1853   if (TREE_CODE (expr) == SSA_NAME)
1854     {
1855       def_bb = gimple_bb (SSA_NAME_DEF_STMT (expr));
1856       if (def_bb
1857 	  && flow_bb_inside_loop_p (loop, def_bb))
1858 	return false;
1859 
1860       return true;
1861     }
1862 
1863   if (!EXPR_P (expr))
1864     return false;
1865 
1866   len = TREE_OPERAND_LENGTH (expr);
1867   for (i = 0; i < len; i++)
1868     if (TREE_OPERAND (expr, i)
1869 	&& !expr_invariant_in_loop_p (loop, TREE_OPERAND (expr, i)))
1870       return false;
1871 
1872   return true;
1873 }
1874 
1875 /* Given expression EXPR which computes inductive values with respect
1876    to loop recorded in DATA, this function returns biv from which EXPR
1877    is derived by tracing definition chains of ssa variables in EXPR.  */
1878 
1879 static struct iv*
find_deriving_biv_for_expr(struct ivopts_data * data,tree expr)1880 find_deriving_biv_for_expr (struct ivopts_data *data, tree expr)
1881 {
1882   struct iv *iv;
1883   unsigned i, n;
1884   tree e2, e1;
1885   enum tree_code code;
1886   gimple *stmt;
1887 
1888   if (expr == NULL_TREE)
1889     return NULL;
1890 
1891   if (is_gimple_min_invariant (expr))
1892     return NULL;
1893 
1894   code = TREE_CODE (expr);
1895   if (IS_EXPR_CODE_CLASS (TREE_CODE_CLASS (code)))
1896     {
1897       n = TREE_OPERAND_LENGTH (expr);
1898       for (i = 0; i < n; i++)
1899 	{
1900 	  iv = find_deriving_biv_for_expr (data, TREE_OPERAND (expr, i));
1901 	  if (iv)
1902 	    return iv;
1903 	}
1904     }
1905 
1906   /* Stop if it's not ssa name.  */
1907   if (code != SSA_NAME)
1908     return NULL;
1909 
1910   iv = get_iv (data, expr);
1911   if (!iv || integer_zerop (iv->step))
1912     return NULL;
1913   else if (iv->biv_p)
1914     return iv;
1915 
1916   stmt = SSA_NAME_DEF_STMT (expr);
1917   if (gphi *phi = dyn_cast <gphi *> (stmt))
1918     {
1919       ssa_op_iter iter;
1920       use_operand_p use_p;
1921       basic_block phi_bb = gimple_bb (phi);
1922 
1923       /* Skip loop header PHI that doesn't define biv.  */
1924       if (phi_bb->loop_father == data->current_loop)
1925 	return NULL;
1926 
1927       if (virtual_operand_p (gimple_phi_result (phi)))
1928 	return NULL;
1929 
1930       FOR_EACH_PHI_ARG (use_p, phi, iter, SSA_OP_USE)
1931 	{
1932 	  tree use = USE_FROM_PTR (use_p);
1933 	  iv = find_deriving_biv_for_expr (data, use);
1934 	  if (iv)
1935 	    return iv;
1936 	}
1937       return NULL;
1938     }
1939   if (gimple_code (stmt) != GIMPLE_ASSIGN)
1940     return NULL;
1941 
1942   e1 = gimple_assign_rhs1 (stmt);
1943   code = gimple_assign_rhs_code (stmt);
1944   if (get_gimple_rhs_class (code) == GIMPLE_SINGLE_RHS)
1945     return find_deriving_biv_for_expr (data, e1);
1946 
1947   switch (code)
1948     {
1949     case MULT_EXPR:
1950     case PLUS_EXPR:
1951     case MINUS_EXPR:
1952     case POINTER_PLUS_EXPR:
1953       /* Increments, decrements and multiplications by a constant
1954 	 are simple.  */
1955       e2 = gimple_assign_rhs2 (stmt);
1956       iv = find_deriving_biv_for_expr (data, e2);
1957       if (iv)
1958 	return iv;
1959       gcc_fallthrough ();
1960 
1961     CASE_CONVERT:
1962       /* Casts are simple.  */
1963       return find_deriving_biv_for_expr (data, e1);
1964 
1965     default:
1966       break;
1967     }
1968 
1969   return NULL;
1970 }
1971 
1972 /* Record BIV, its predecessor and successor that they are used in
1973    address type uses.  */
1974 
1975 static void
record_biv_for_address_use(struct ivopts_data * data,struct iv * biv)1976 record_biv_for_address_use (struct ivopts_data *data, struct iv *biv)
1977 {
1978   unsigned i;
1979   tree type, base_1, base_2;
1980   bitmap_iterator bi;
1981 
1982   if (!biv || !biv->biv_p || integer_zerop (biv->step)
1983       || biv->have_address_use || !biv->no_overflow)
1984     return;
1985 
1986   type = TREE_TYPE (biv->base);
1987   if (!INTEGRAL_TYPE_P (type))
1988     return;
1989 
1990   biv->have_address_use = true;
1991   data->bivs_not_used_in_addr--;
1992   base_1 = fold_build2 (PLUS_EXPR, type, biv->base, biv->step);
1993   EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
1994     {
1995       struct iv *iv = ver_info (data, i)->iv;
1996 
1997       if (!iv || !iv->biv_p || integer_zerop (iv->step)
1998 	  || iv->have_address_use || !iv->no_overflow)
1999 	continue;
2000 
2001       if (type != TREE_TYPE (iv->base)
2002 	  || !INTEGRAL_TYPE_P (TREE_TYPE (iv->base)))
2003 	continue;
2004 
2005       if (!operand_equal_p (biv->step, iv->step, 0))
2006 	continue;
2007 
2008       base_2 = fold_build2 (PLUS_EXPR, type, iv->base, iv->step);
2009       if (operand_equal_p (base_1, iv->base, 0)
2010 	  || operand_equal_p (base_2, biv->base, 0))
2011 	{
2012 	  iv->have_address_use = true;
2013 	  data->bivs_not_used_in_addr--;
2014 	}
2015     }
2016 }
2017 
2018 /* Cumulates the steps of indices into DATA and replaces their values with the
2019    initial ones.  Returns false when the value of the index cannot be determined.
2020    Callback for for_each_index.  */
2021 
2022 struct ifs_ivopts_data
2023 {
2024   struct ivopts_data *ivopts_data;
2025   gimple *stmt;
2026   tree step;
2027 };
2028 
2029 static bool
idx_find_step(tree base,tree * idx,void * data)2030 idx_find_step (tree base, tree *idx, void *data)
2031 {
2032   struct ifs_ivopts_data *dta = (struct ifs_ivopts_data *) data;
2033   struct iv *iv;
2034   bool use_overflow_semantics = false;
2035   tree step, iv_base, iv_step, lbound, off;
2036   class loop *loop = dta->ivopts_data->current_loop;
2037 
2038   /* If base is a component ref, require that the offset of the reference
2039      be invariant.  */
2040   if (TREE_CODE (base) == COMPONENT_REF)
2041     {
2042       off = component_ref_field_offset (base);
2043       return expr_invariant_in_loop_p (loop, off);
2044     }
2045 
2046   /* If base is array, first check whether we will be able to move the
2047      reference out of the loop (in order to take its address in strength
2048      reduction).  In order for this to work we need both lower bound
2049      and step to be loop invariants.  */
2050   if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
2051     {
2052       /* Moreover, for a range, the size needs to be invariant as well.  */
2053       if (TREE_CODE (base) == ARRAY_RANGE_REF
2054 	  && !expr_invariant_in_loop_p (loop, TYPE_SIZE (TREE_TYPE (base))))
2055 	return false;
2056 
2057       step = array_ref_element_size (base);
2058       lbound = array_ref_low_bound (base);
2059 
2060       if (!expr_invariant_in_loop_p (loop, step)
2061 	  || !expr_invariant_in_loop_p (loop, lbound))
2062 	return false;
2063     }
2064 
2065   if (TREE_CODE (*idx) != SSA_NAME)
2066     return true;
2067 
2068   iv = get_iv (dta->ivopts_data, *idx);
2069   if (!iv)
2070     return false;
2071 
2072   /* XXX  We produce for a base of *D42 with iv->base being &x[0]
2073 	  *&x[0], which is not folded and does not trigger the
2074 	  ARRAY_REF path below.  */
2075   *idx = iv->base;
2076 
2077   if (integer_zerop (iv->step))
2078     return true;
2079 
2080   if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
2081     {
2082       step = array_ref_element_size (base);
2083 
2084       /* We only handle addresses whose step is an integer constant.  */
2085       if (TREE_CODE (step) != INTEGER_CST)
2086 	return false;
2087     }
2088   else
2089     /* The step for pointer arithmetics already is 1 byte.  */
2090     step = size_one_node;
2091 
2092   iv_base = iv->base;
2093   iv_step = iv->step;
2094   if (iv->no_overflow && nowrap_type_p (TREE_TYPE (iv_step)))
2095     use_overflow_semantics = true;
2096 
2097   if (!convert_affine_scev (dta->ivopts_data->current_loop,
2098 			    sizetype, &iv_base, &iv_step, dta->stmt,
2099 			    use_overflow_semantics))
2100     {
2101       /* The index might wrap.  */
2102       return false;
2103     }
2104 
2105   step = fold_build2 (MULT_EXPR, sizetype, step, iv_step);
2106   dta->step = fold_build2 (PLUS_EXPR, sizetype, dta->step, step);
2107 
2108   if (dta->ivopts_data->bivs_not_used_in_addr)
2109     {
2110       if (!iv->biv_p)
2111 	iv = find_deriving_biv_for_expr (dta->ivopts_data, iv->ssa_name);
2112 
2113       record_biv_for_address_use (dta->ivopts_data, iv);
2114     }
2115   return true;
2116 }
2117 
2118 /* Records use in index IDX.  Callback for for_each_index.  Ivopts data
2119    object is passed to it in DATA.  */
2120 
2121 static bool
idx_record_use(tree base,tree * idx,void * vdata)2122 idx_record_use (tree base, tree *idx,
2123 		void *vdata)
2124 {
2125   struct ivopts_data *data = (struct ivopts_data *) vdata;
2126   find_interesting_uses_op (data, *idx);
2127   if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
2128     {
2129       find_interesting_uses_op (data, array_ref_element_size (base));
2130       find_interesting_uses_op (data, array_ref_low_bound (base));
2131     }
2132   return true;
2133 }
2134 
2135 /* If we can prove that TOP = cst * BOT for some constant cst,
2136    store cst to MUL and return true.  Otherwise return false.
2137    The returned value is always sign-extended, regardless of the
2138    signedness of TOP and BOT.  */
2139 
2140 static bool
constant_multiple_of(tree top,tree bot,widest_int * mul)2141 constant_multiple_of (tree top, tree bot, widest_int *mul)
2142 {
2143   tree mby;
2144   enum tree_code code;
2145   unsigned precision = TYPE_PRECISION (TREE_TYPE (top));
2146   widest_int res, p0, p1;
2147 
2148   STRIP_NOPS (top);
2149   STRIP_NOPS (bot);
2150 
2151   if (operand_equal_p (top, bot, 0))
2152     {
2153       *mul = 1;
2154       return true;
2155     }
2156 
2157   code = TREE_CODE (top);
2158   switch (code)
2159     {
2160     case MULT_EXPR:
2161       mby = TREE_OPERAND (top, 1);
2162       if (TREE_CODE (mby) != INTEGER_CST)
2163 	return false;
2164 
2165       if (!constant_multiple_of (TREE_OPERAND (top, 0), bot, &res))
2166 	return false;
2167 
2168       *mul = wi::sext (res * wi::to_widest (mby), precision);
2169       return true;
2170 
2171     case PLUS_EXPR:
2172     case MINUS_EXPR:
2173       if (!constant_multiple_of (TREE_OPERAND (top, 0), bot, &p0)
2174 	  || !constant_multiple_of (TREE_OPERAND (top, 1), bot, &p1))
2175 	return false;
2176 
2177       if (code == MINUS_EXPR)
2178 	p1 = -p1;
2179       *mul = wi::sext (p0 + p1, precision);
2180       return true;
2181 
2182     case INTEGER_CST:
2183       if (TREE_CODE (bot) != INTEGER_CST)
2184 	return false;
2185 
2186       p0 = widest_int::from (wi::to_wide (top), SIGNED);
2187       p1 = widest_int::from (wi::to_wide (bot), SIGNED);
2188       if (p1 == 0)
2189 	return false;
2190       *mul = wi::sext (wi::divmod_trunc (p0, p1, SIGNED, &res), precision);
2191       return res == 0;
2192 
2193     default:
2194       if (POLY_INT_CST_P (top)
2195 	  && POLY_INT_CST_P (bot)
2196 	  && constant_multiple_p (wi::to_poly_widest (top),
2197 				  wi::to_poly_widest (bot), mul))
2198 	return true;
2199 
2200       return false;
2201     }
2202 }
2203 
2204 /* Return true if memory reference REF with step STEP may be unaligned.  */
2205 
2206 static bool
may_be_unaligned_p(tree ref,tree step)2207 may_be_unaligned_p (tree ref, tree step)
2208 {
2209   /* TARGET_MEM_REFs are translated directly to valid MEMs on the target,
2210      thus they are not misaligned.  */
2211   if (TREE_CODE (ref) == TARGET_MEM_REF)
2212     return false;
2213 
2214   unsigned int align = TYPE_ALIGN (TREE_TYPE (ref));
2215   if (GET_MODE_ALIGNMENT (TYPE_MODE (TREE_TYPE (ref))) > align)
2216     align = GET_MODE_ALIGNMENT (TYPE_MODE (TREE_TYPE (ref)));
2217 
2218   unsigned HOST_WIDE_INT bitpos;
2219   unsigned int ref_align;
2220   get_object_alignment_1 (ref, &ref_align, &bitpos);
2221   if (ref_align < align
2222       || (bitpos % align) != 0
2223       || (bitpos % BITS_PER_UNIT) != 0)
2224     return true;
2225 
2226   unsigned int trailing_zeros = tree_ctz (step);
2227   if (trailing_zeros < HOST_BITS_PER_INT
2228       && (1U << trailing_zeros) * BITS_PER_UNIT < align)
2229     return true;
2230 
2231   return false;
2232 }
2233 
2234 /* Return true if EXPR may be non-addressable.   */
2235 
2236 bool
may_be_nonaddressable_p(tree expr)2237 may_be_nonaddressable_p (tree expr)
2238 {
2239   switch (TREE_CODE (expr))
2240     {
2241     case VAR_DECL:
2242       /* Check if it's a register variable.  */
2243       return DECL_HARD_REGISTER (expr);
2244 
2245     case TARGET_MEM_REF:
2246       /* TARGET_MEM_REFs are translated directly to valid MEMs on the
2247 	 target, thus they are always addressable.  */
2248       return false;
2249 
2250     case MEM_REF:
2251       /* Likewise for MEM_REFs, modulo the storage order.  */
2252       return REF_REVERSE_STORAGE_ORDER (expr);
2253 
2254     case BIT_FIELD_REF:
2255       if (REF_REVERSE_STORAGE_ORDER (expr))
2256 	return true;
2257       return may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
2258 
2259     case COMPONENT_REF:
2260       if (TYPE_REVERSE_STORAGE_ORDER (TREE_TYPE (TREE_OPERAND (expr, 0))))
2261 	return true;
2262       return DECL_NONADDRESSABLE_P (TREE_OPERAND (expr, 1))
2263 	     || may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
2264 
2265     case ARRAY_REF:
2266     case ARRAY_RANGE_REF:
2267       if (TYPE_REVERSE_STORAGE_ORDER (TREE_TYPE (TREE_OPERAND (expr, 0))))
2268 	return true;
2269       return may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
2270 
2271     case VIEW_CONVERT_EXPR:
2272       /* This kind of view-conversions may wrap non-addressable objects
2273 	 and make them look addressable.  After some processing the
2274 	 non-addressability may be uncovered again, causing ADDR_EXPRs
2275 	 of inappropriate objects to be built.  */
2276       if (is_gimple_reg (TREE_OPERAND (expr, 0))
2277 	  || !is_gimple_addressable (TREE_OPERAND (expr, 0)))
2278 	return true;
2279       return may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
2280 
2281     CASE_CONVERT:
2282       return true;
2283 
2284     default:
2285       break;
2286     }
2287 
2288   return false;
2289 }
2290 
2291 /* Finds addresses in *OP_P inside STMT.  */
2292 
2293 static void
find_interesting_uses_address(struct ivopts_data * data,gimple * stmt,tree * op_p)2294 find_interesting_uses_address (struct ivopts_data *data, gimple *stmt,
2295 			       tree *op_p)
2296 {
2297   tree base = *op_p, step = size_zero_node;
2298   struct iv *civ;
2299   struct ifs_ivopts_data ifs_ivopts_data;
2300 
2301   /* Do not play with volatile memory references.  A bit too conservative,
2302      perhaps, but safe.  */
2303   if (gimple_has_volatile_ops (stmt))
2304     goto fail;
2305 
2306   /* Ignore bitfields for now.  Not really something terribly complicated
2307      to handle.  TODO.  */
2308   if (TREE_CODE (base) == BIT_FIELD_REF)
2309     goto fail;
2310 
2311   base = unshare_expr (base);
2312 
2313   if (TREE_CODE (base) == TARGET_MEM_REF)
2314     {
2315       tree type = build_pointer_type (TREE_TYPE (base));
2316       tree astep;
2317 
2318       if (TMR_BASE (base)
2319 	  && TREE_CODE (TMR_BASE (base)) == SSA_NAME)
2320 	{
2321 	  civ = get_iv (data, TMR_BASE (base));
2322 	  if (!civ)
2323 	    goto fail;
2324 
2325 	  TMR_BASE (base) = civ->base;
2326 	  step = civ->step;
2327 	}
2328       if (TMR_INDEX2 (base)
2329 	  && TREE_CODE (TMR_INDEX2 (base)) == SSA_NAME)
2330 	{
2331 	  civ = get_iv (data, TMR_INDEX2 (base));
2332 	  if (!civ)
2333 	    goto fail;
2334 
2335 	  TMR_INDEX2 (base) = civ->base;
2336 	  step = civ->step;
2337 	}
2338       if (TMR_INDEX (base)
2339 	  && TREE_CODE (TMR_INDEX (base)) == SSA_NAME)
2340 	{
2341 	  civ = get_iv (data, TMR_INDEX (base));
2342 	  if (!civ)
2343 	    goto fail;
2344 
2345 	  TMR_INDEX (base) = civ->base;
2346 	  astep = civ->step;
2347 
2348 	  if (astep)
2349 	    {
2350 	      if (TMR_STEP (base))
2351 		astep = fold_build2 (MULT_EXPR, type, TMR_STEP (base), astep);
2352 
2353 	      step = fold_build2 (PLUS_EXPR, type, step, astep);
2354 	    }
2355 	}
2356 
2357       if (integer_zerop (step))
2358 	goto fail;
2359       base = tree_mem_ref_addr (type, base);
2360     }
2361   else
2362     {
2363       ifs_ivopts_data.ivopts_data = data;
2364       ifs_ivopts_data.stmt = stmt;
2365       ifs_ivopts_data.step = size_zero_node;
2366       if (!for_each_index (&base, idx_find_step, &ifs_ivopts_data)
2367 	  || integer_zerop (ifs_ivopts_data.step))
2368 	goto fail;
2369       step = ifs_ivopts_data.step;
2370 
2371       /* Check that the base expression is addressable.  This needs
2372 	 to be done after substituting bases of IVs into it.  */
2373       if (may_be_nonaddressable_p (base))
2374 	goto fail;
2375 
2376       /* Moreover, on strict alignment platforms, check that it is
2377 	 sufficiently aligned.  */
2378       if (STRICT_ALIGNMENT && may_be_unaligned_p (base, step))
2379 	goto fail;
2380 
2381       base = build_fold_addr_expr (base);
2382 
2383       /* Substituting bases of IVs into the base expression might
2384 	 have caused folding opportunities.  */
2385       if (TREE_CODE (base) == ADDR_EXPR)
2386 	{
2387 	  tree *ref = &TREE_OPERAND (base, 0);
2388 	  while (handled_component_p (*ref))
2389 	    ref = &TREE_OPERAND (*ref, 0);
2390 	  if (TREE_CODE (*ref) == MEM_REF)
2391 	    {
2392 	      tree tem = fold_binary (MEM_REF, TREE_TYPE (*ref),
2393 				      TREE_OPERAND (*ref, 0),
2394 				      TREE_OPERAND (*ref, 1));
2395 	      if (tem)
2396 		*ref = tem;
2397 	    }
2398 	}
2399     }
2400 
2401   civ = alloc_iv (data, base, step);
2402   /* Fail if base object of this memory reference is unknown.  */
2403   if (civ->base_object == NULL_TREE)
2404     goto fail;
2405 
2406   record_group_use (data, op_p, civ, stmt, USE_REF_ADDRESS, TREE_TYPE (*op_p));
2407   return;
2408 
2409 fail:
2410   for_each_index (op_p, idx_record_use, data);
2411 }
2412 
2413 /* Finds and records invariants used in STMT.  */
2414 
2415 static void
find_invariants_stmt(struct ivopts_data * data,gimple * stmt)2416 find_invariants_stmt (struct ivopts_data *data, gimple *stmt)
2417 {
2418   ssa_op_iter iter;
2419   use_operand_p use_p;
2420   tree op;
2421 
2422   FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
2423     {
2424       op = USE_FROM_PTR (use_p);
2425       record_invariant (data, op, false);
2426     }
2427 }
2428 
2429 /* CALL calls an internal function.  If operand *OP_P will become an
2430    address when the call is expanded, return the type of the memory
2431    being addressed, otherwise return null.  */
2432 
2433 static tree
get_mem_type_for_internal_fn(gcall * call,tree * op_p)2434 get_mem_type_for_internal_fn (gcall *call, tree *op_p)
2435 {
2436   switch (gimple_call_internal_fn (call))
2437     {
2438     case IFN_MASK_LOAD:
2439     case IFN_MASK_LOAD_LANES:
2440       if (op_p == gimple_call_arg_ptr (call, 0))
2441 	return TREE_TYPE (gimple_call_lhs (call));
2442       return NULL_TREE;
2443 
2444     case IFN_MASK_STORE:
2445     case IFN_MASK_STORE_LANES:
2446       if (op_p == gimple_call_arg_ptr (call, 0))
2447 	return TREE_TYPE (gimple_call_arg (call, 3));
2448       return NULL_TREE;
2449 
2450     default:
2451       return NULL_TREE;
2452     }
2453 }
2454 
2455 /* IV is a (non-address) iv that describes operand *OP_P of STMT.
2456    Return true if the operand will become an address when STMT
2457    is expanded and record the associated address use if so.  */
2458 
2459 static bool
find_address_like_use(struct ivopts_data * data,gimple * stmt,tree * op_p,struct iv * iv)2460 find_address_like_use (struct ivopts_data *data, gimple *stmt, tree *op_p,
2461 		       struct iv *iv)
2462 {
2463   /* Fail if base object of this memory reference is unknown.  */
2464   if (iv->base_object == NULL_TREE)
2465     return false;
2466 
2467   tree mem_type = NULL_TREE;
2468   if (gcall *call = dyn_cast <gcall *> (stmt))
2469     if (gimple_call_internal_p (call))
2470       mem_type = get_mem_type_for_internal_fn (call, op_p);
2471   if (mem_type)
2472     {
2473       iv = alloc_iv (data, iv->base, iv->step);
2474       record_group_use (data, op_p, iv, stmt, USE_PTR_ADDRESS, mem_type);
2475       return true;
2476     }
2477   return false;
2478 }
2479 
2480 /* Finds interesting uses of induction variables in the statement STMT.  */
2481 
2482 static void
find_interesting_uses_stmt(struct ivopts_data * data,gimple * stmt)2483 find_interesting_uses_stmt (struct ivopts_data *data, gimple *stmt)
2484 {
2485   struct iv *iv;
2486   tree op, *lhs, *rhs;
2487   ssa_op_iter iter;
2488   use_operand_p use_p;
2489   enum tree_code code;
2490 
2491   find_invariants_stmt (data, stmt);
2492 
2493   if (gimple_code (stmt) == GIMPLE_COND)
2494     {
2495       find_interesting_uses_cond (data, stmt);
2496       return;
2497     }
2498 
2499   if (is_gimple_assign (stmt))
2500     {
2501       lhs = gimple_assign_lhs_ptr (stmt);
2502       rhs = gimple_assign_rhs1_ptr (stmt);
2503 
2504       if (TREE_CODE (*lhs) == SSA_NAME)
2505 	{
2506 	  /* If the statement defines an induction variable, the uses are not
2507 	     interesting by themselves.  */
2508 
2509 	  iv = get_iv (data, *lhs);
2510 
2511 	  if (iv && !integer_zerop (iv->step))
2512 	    return;
2513 	}
2514 
2515       code = gimple_assign_rhs_code (stmt);
2516       if (get_gimple_rhs_class (code) == GIMPLE_SINGLE_RHS
2517 	  && (REFERENCE_CLASS_P (*rhs)
2518 	      || is_gimple_val (*rhs)))
2519 	{
2520 	  if (REFERENCE_CLASS_P (*rhs))
2521 	    find_interesting_uses_address (data, stmt, rhs);
2522 	  else
2523 	    find_interesting_uses_op (data, *rhs);
2524 
2525 	  if (REFERENCE_CLASS_P (*lhs))
2526 	    find_interesting_uses_address (data, stmt, lhs);
2527 	  return;
2528 	}
2529       else if (TREE_CODE_CLASS (code) == tcc_comparison)
2530 	{
2531 	  find_interesting_uses_cond (data, stmt);
2532 	  return;
2533 	}
2534 
2535       /* TODO -- we should also handle address uses of type
2536 
2537 	 memory = call (whatever);
2538 
2539 	 and
2540 
2541 	 call (memory).  */
2542     }
2543 
2544   if (gimple_code (stmt) == GIMPLE_PHI
2545       && gimple_bb (stmt) == data->current_loop->header)
2546     {
2547       iv = get_iv (data, PHI_RESULT (stmt));
2548 
2549       if (iv && !integer_zerop (iv->step))
2550 	return;
2551     }
2552 
2553   FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
2554     {
2555       op = USE_FROM_PTR (use_p);
2556 
2557       if (TREE_CODE (op) != SSA_NAME)
2558 	continue;
2559 
2560       iv = get_iv (data, op);
2561       if (!iv)
2562 	continue;
2563 
2564       if (!find_address_like_use (data, stmt, use_p->use, iv))
2565 	find_interesting_uses_op (data, op);
2566     }
2567 }
2568 
2569 /* Finds interesting uses of induction variables outside of loops
2570    on loop exit edge EXIT.  */
2571 
2572 static void
find_interesting_uses_outside(struct ivopts_data * data,edge exit)2573 find_interesting_uses_outside (struct ivopts_data *data, edge exit)
2574 {
2575   gphi *phi;
2576   gphi_iterator psi;
2577   tree def;
2578 
2579   for (psi = gsi_start_phis (exit->dest); !gsi_end_p (psi); gsi_next (&psi))
2580     {
2581       phi = psi.phi ();
2582       def = PHI_ARG_DEF_FROM_EDGE (phi, exit);
2583       if (!virtual_operand_p (def))
2584 	find_interesting_uses_op (data, def);
2585     }
2586 }
2587 
2588 /* Return TRUE if OFFSET is within the range of [base + offset] addressing
2589    mode for memory reference represented by USE.  */
2590 
2591 static GTY (()) vec<rtx, va_gc> *addr_list;
2592 
2593 static bool
addr_offset_valid_p(struct iv_use * use,poly_int64 offset)2594 addr_offset_valid_p (struct iv_use *use, poly_int64 offset)
2595 {
2596   rtx reg, addr;
2597   unsigned list_index;
2598   addr_space_t as = TYPE_ADDR_SPACE (TREE_TYPE (use->iv->base));
2599   machine_mode addr_mode, mem_mode = TYPE_MODE (use->mem_type);
2600 
2601   list_index = (unsigned) as * MAX_MACHINE_MODE + (unsigned) mem_mode;
2602   if (list_index >= vec_safe_length (addr_list))
2603     vec_safe_grow_cleared (addr_list, list_index + MAX_MACHINE_MODE);
2604 
2605   addr = (*addr_list)[list_index];
2606   if (!addr)
2607     {
2608       addr_mode = targetm.addr_space.address_mode (as);
2609       reg = gen_raw_REG (addr_mode, LAST_VIRTUAL_REGISTER + 1);
2610       addr = gen_rtx_fmt_ee (PLUS, addr_mode, reg, NULL_RTX);
2611       (*addr_list)[list_index] = addr;
2612     }
2613   else
2614     addr_mode = GET_MODE (addr);
2615 
2616   XEXP (addr, 1) = gen_int_mode (offset, addr_mode);
2617   return (memory_address_addr_space_p (mem_mode, addr, as));
2618 }
2619 
2620 /* Comparison function to sort group in ascending order of addr_offset.  */
2621 
2622 static int
group_compare_offset(const void * a,const void * b)2623 group_compare_offset (const void *a, const void *b)
2624 {
2625   const struct iv_use *const *u1 = (const struct iv_use *const *) a;
2626   const struct iv_use *const *u2 = (const struct iv_use *const *) b;
2627 
2628   return compare_sizes_for_sort ((*u1)->addr_offset, (*u2)->addr_offset);
2629 }
2630 
2631 /* Check if small groups should be split.  Return true if no group
2632    contains more than two uses with distinct addr_offsets.  Return
2633    false otherwise.  We want to split such groups because:
2634 
2635      1) Small groups don't have much benefit and may interfer with
2636 	general candidate selection.
2637      2) Size for problem with only small groups is usually small and
2638 	general algorithm can handle it well.
2639 
2640    TODO -- Above claim may not hold when we want to merge memory
2641    accesses with conseuctive addresses.  */
2642 
2643 static bool
split_small_address_groups_p(struct ivopts_data * data)2644 split_small_address_groups_p (struct ivopts_data *data)
2645 {
2646   unsigned int i, j, distinct = 1;
2647   struct iv_use *pre;
2648   struct iv_group *group;
2649 
2650   for (i = 0; i < data->vgroups.length (); i++)
2651     {
2652       group = data->vgroups[i];
2653       if (group->vuses.length () == 1)
2654 	continue;
2655 
2656       gcc_assert (address_p (group->type));
2657       if (group->vuses.length () == 2)
2658 	{
2659 	  if (compare_sizes_for_sort (group->vuses[0]->addr_offset,
2660 				      group->vuses[1]->addr_offset) > 0)
2661 	    std::swap (group->vuses[0], group->vuses[1]);
2662 	}
2663       else
2664 	group->vuses.qsort (group_compare_offset);
2665 
2666       if (distinct > 2)
2667 	continue;
2668 
2669       distinct = 1;
2670       for (pre = group->vuses[0], j = 1; j < group->vuses.length (); j++)
2671 	{
2672 	  if (maybe_ne (group->vuses[j]->addr_offset, pre->addr_offset))
2673 	    {
2674 	      pre = group->vuses[j];
2675 	      distinct++;
2676 	    }
2677 
2678 	  if (distinct > 2)
2679 	    break;
2680 	}
2681     }
2682 
2683   return (distinct <= 2);
2684 }
2685 
2686 /* For each group of address type uses, this function further groups
2687    these uses according to the maximum offset supported by target's
2688    [base + offset] addressing mode.  */
2689 
2690 static void
split_address_groups(struct ivopts_data * data)2691 split_address_groups (struct ivopts_data *data)
2692 {
2693   unsigned int i, j;
2694   /* Always split group.  */
2695   bool split_p = split_small_address_groups_p (data);
2696 
2697   for (i = 0; i < data->vgroups.length (); i++)
2698     {
2699       struct iv_group *new_group = NULL;
2700       struct iv_group *group = data->vgroups[i];
2701       struct iv_use *use = group->vuses[0];
2702 
2703       use->id = 0;
2704       use->group_id = group->id;
2705       if (group->vuses.length () == 1)
2706 	continue;
2707 
2708       gcc_assert (address_p (use->type));
2709 
2710       for (j = 1; j < group->vuses.length ();)
2711 	{
2712 	  struct iv_use *next = group->vuses[j];
2713 	  poly_int64 offset = next->addr_offset - use->addr_offset;
2714 
2715 	  /* Split group if aksed to, or the offset against the first
2716 	     use can't fit in offset part of addressing mode.  IV uses
2717 	     having the same offset are still kept in one group.  */
2718 	  if (maybe_ne (offset, 0)
2719 	      && (split_p || !addr_offset_valid_p (use, offset)))
2720 	    {
2721 	      if (!new_group)
2722 		new_group = record_group (data, group->type);
2723 	      group->vuses.ordered_remove (j);
2724 	      new_group->vuses.safe_push (next);
2725 	      continue;
2726 	    }
2727 
2728 	  next->id = j;
2729 	  next->group_id = group->id;
2730 	  j++;
2731 	}
2732     }
2733 }
2734 
2735 /* Finds uses of the induction variables that are interesting.  */
2736 
2737 static void
find_interesting_uses(struct ivopts_data * data)2738 find_interesting_uses (struct ivopts_data *data)
2739 {
2740   basic_block bb;
2741   gimple_stmt_iterator bsi;
2742   basic_block *body = get_loop_body (data->current_loop);
2743   unsigned i;
2744   edge e;
2745 
2746   for (i = 0; i < data->current_loop->num_nodes; i++)
2747     {
2748       edge_iterator ei;
2749       bb = body[i];
2750 
2751       FOR_EACH_EDGE (e, ei, bb->succs)
2752 	if (e->dest != EXIT_BLOCK_PTR_FOR_FN (cfun)
2753 	    && !flow_bb_inside_loop_p (data->current_loop, e->dest))
2754 	  find_interesting_uses_outside (data, e);
2755 
2756       for (bsi = gsi_start_phis (bb); !gsi_end_p (bsi); gsi_next (&bsi))
2757 	find_interesting_uses_stmt (data, gsi_stmt (bsi));
2758       for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
2759 	if (!is_gimple_debug (gsi_stmt (bsi)))
2760 	  find_interesting_uses_stmt (data, gsi_stmt (bsi));
2761     }
2762   free (body);
2763 
2764   split_address_groups (data);
2765 
2766   if (dump_file && (dump_flags & TDF_DETAILS))
2767     {
2768       fprintf (dump_file, "\n<IV Groups>:\n");
2769       dump_groups (dump_file, data);
2770       fprintf (dump_file, "\n");
2771     }
2772 }
2773 
2774 /* Strips constant offsets from EXPR and stores them to OFFSET.  If INSIDE_ADDR
2775    is true, assume we are inside an address.  If TOP_COMPREF is true, assume
2776    we are at the top-level of the processed address.  */
2777 
2778 static tree
strip_offset_1(tree expr,bool inside_addr,bool top_compref,poly_int64 * offset)2779 strip_offset_1 (tree expr, bool inside_addr, bool top_compref,
2780 		poly_int64 *offset)
2781 {
2782   tree op0 = NULL_TREE, op1 = NULL_TREE, tmp, step;
2783   enum tree_code code;
2784   tree type, orig_type = TREE_TYPE (expr);
2785   poly_int64 off0, off1;
2786   HOST_WIDE_INT st;
2787   tree orig_expr = expr;
2788 
2789   STRIP_NOPS (expr);
2790 
2791   type = TREE_TYPE (expr);
2792   code = TREE_CODE (expr);
2793   *offset = 0;
2794 
2795   switch (code)
2796     {
2797     case POINTER_PLUS_EXPR:
2798     case PLUS_EXPR:
2799     case MINUS_EXPR:
2800       op0 = TREE_OPERAND (expr, 0);
2801       op1 = TREE_OPERAND (expr, 1);
2802 
2803       op0 = strip_offset_1 (op0, false, false, &off0);
2804       op1 = strip_offset_1 (op1, false, false, &off1);
2805 
2806       *offset = (code == MINUS_EXPR ? off0 - off1 : off0 + off1);
2807       if (op0 == TREE_OPERAND (expr, 0)
2808 	  && op1 == TREE_OPERAND (expr, 1))
2809 	return orig_expr;
2810 
2811       if (integer_zerop (op1))
2812 	expr = op0;
2813       else if (integer_zerop (op0))
2814 	{
2815 	  if (code == MINUS_EXPR)
2816 	    expr = fold_build1 (NEGATE_EXPR, type, op1);
2817 	  else
2818 	    expr = op1;
2819 	}
2820       else
2821 	expr = fold_build2 (code, type, op0, op1);
2822 
2823       return fold_convert (orig_type, expr);
2824 
2825     case MULT_EXPR:
2826       op1 = TREE_OPERAND (expr, 1);
2827       if (!cst_and_fits_in_hwi (op1))
2828 	return orig_expr;
2829 
2830       op0 = TREE_OPERAND (expr, 0);
2831       op0 = strip_offset_1 (op0, false, false, &off0);
2832       if (op0 == TREE_OPERAND (expr, 0))
2833 	return orig_expr;
2834 
2835       *offset = off0 * int_cst_value (op1);
2836       if (integer_zerop (op0))
2837 	expr = op0;
2838       else
2839 	expr = fold_build2 (MULT_EXPR, type, op0, op1);
2840 
2841       return fold_convert (orig_type, expr);
2842 
2843     case ARRAY_REF:
2844     case ARRAY_RANGE_REF:
2845       if (!inside_addr)
2846 	return orig_expr;
2847 
2848       step = array_ref_element_size (expr);
2849       if (!cst_and_fits_in_hwi (step))
2850 	break;
2851 
2852       st = int_cst_value (step);
2853       op1 = TREE_OPERAND (expr, 1);
2854       op1 = strip_offset_1 (op1, false, false, &off1);
2855       *offset = off1 * st;
2856 
2857       if (top_compref
2858 	  && integer_zerop (op1))
2859 	{
2860 	  /* Strip the component reference completely.  */
2861 	  op0 = TREE_OPERAND (expr, 0);
2862 	  op0 = strip_offset_1 (op0, inside_addr, top_compref, &off0);
2863 	  *offset += off0;
2864 	  return op0;
2865 	}
2866       break;
2867 
2868     case COMPONENT_REF:
2869       {
2870 	tree field;
2871 
2872 	if (!inside_addr)
2873 	  return orig_expr;
2874 
2875 	tmp = component_ref_field_offset (expr);
2876 	field = TREE_OPERAND (expr, 1);
2877 	if (top_compref
2878 	    && cst_and_fits_in_hwi (tmp)
2879 	    && cst_and_fits_in_hwi (DECL_FIELD_BIT_OFFSET (field)))
2880 	  {
2881 	    HOST_WIDE_INT boffset, abs_off;
2882 
2883 	    /* Strip the component reference completely.  */
2884 	    op0 = TREE_OPERAND (expr, 0);
2885 	    op0 = strip_offset_1 (op0, inside_addr, top_compref, &off0);
2886 	    boffset = int_cst_value (DECL_FIELD_BIT_OFFSET (field));
2887 	    abs_off = abs_hwi (boffset) / BITS_PER_UNIT;
2888 	    if (boffset < 0)
2889 	      abs_off = -abs_off;
2890 
2891 	    *offset = off0 + int_cst_value (tmp) + abs_off;
2892 	    return op0;
2893 	  }
2894       }
2895       break;
2896 
2897     case ADDR_EXPR:
2898       op0 = TREE_OPERAND (expr, 0);
2899       op0 = strip_offset_1 (op0, true, true, &off0);
2900       *offset += off0;
2901 
2902       if (op0 == TREE_OPERAND (expr, 0))
2903 	return orig_expr;
2904 
2905       expr = build_fold_addr_expr (op0);
2906       return fold_convert (orig_type, expr);
2907 
2908     case MEM_REF:
2909       /* ???  Offset operand?  */
2910       inside_addr = false;
2911       break;
2912 
2913     default:
2914       if (ptrdiff_tree_p (expr, offset) && maybe_ne (*offset, 0))
2915 	return build_int_cst (orig_type, 0);
2916       return orig_expr;
2917     }
2918 
2919   /* Default handling of expressions for that we want to recurse into
2920      the first operand.  */
2921   op0 = TREE_OPERAND (expr, 0);
2922   op0 = strip_offset_1 (op0, inside_addr, false, &off0);
2923   *offset += off0;
2924 
2925   if (op0 == TREE_OPERAND (expr, 0)
2926       && (!op1 || op1 == TREE_OPERAND (expr, 1)))
2927     return orig_expr;
2928 
2929   expr = copy_node (expr);
2930   TREE_OPERAND (expr, 0) = op0;
2931   if (op1)
2932     TREE_OPERAND (expr, 1) = op1;
2933 
2934   /* Inside address, we might strip the top level component references,
2935      thus changing type of the expression.  Handling of ADDR_EXPR
2936      will fix that.  */
2937   expr = fold_convert (orig_type, expr);
2938 
2939   return expr;
2940 }
2941 
2942 /* Strips constant offsets from EXPR and stores them to OFFSET.  */
2943 
2944 tree
strip_offset(tree expr,poly_uint64_pod * offset)2945 strip_offset (tree expr, poly_uint64_pod *offset)
2946 {
2947   poly_int64 off;
2948   tree core = strip_offset_1 (expr, false, false, &off);
2949   *offset = off;
2950   return core;
2951 }
2952 
2953 /* Returns variant of TYPE that can be used as base for different uses.
2954    We return unsigned type with the same precision, which avoids problems
2955    with overflows.  */
2956 
2957 static tree
generic_type_for(tree type)2958 generic_type_for (tree type)
2959 {
2960   if (POINTER_TYPE_P (type))
2961     return unsigned_type_for (type);
2962 
2963   if (TYPE_UNSIGNED (type))
2964     return type;
2965 
2966   return unsigned_type_for (type);
2967 }
2968 
2969 /* Private data for walk_tree.  */
2970 
2971 struct walk_tree_data
2972 {
2973   bitmap *inv_vars;
2974   struct ivopts_data *idata;
2975 };
2976 
2977 /* Callback function for walk_tree, it records invariants and symbol
2978    reference in *EXPR_P.  DATA is the structure storing result info.  */
2979 
2980 static tree
find_inv_vars_cb(tree * expr_p,int * ws ATTRIBUTE_UNUSED,void * data)2981 find_inv_vars_cb (tree *expr_p, int *ws ATTRIBUTE_UNUSED, void *data)
2982 {
2983   tree op = *expr_p;
2984   struct version_info *info;
2985   struct walk_tree_data *wdata = (struct walk_tree_data*) data;
2986 
2987   if (TREE_CODE (op) != SSA_NAME)
2988     return NULL_TREE;
2989 
2990   info = name_info (wdata->idata, op);
2991   /* Because we expand simple operations when finding IVs, loop invariant
2992      variable that isn't referred by the original loop could be used now.
2993      Record such invariant variables here.  */
2994   if (!info->iv)
2995     {
2996       struct ivopts_data *idata = wdata->idata;
2997       basic_block bb = gimple_bb (SSA_NAME_DEF_STMT (op));
2998 
2999       if (!bb || !flow_bb_inside_loop_p (idata->current_loop, bb))
3000 	{
3001 	  tree steptype = TREE_TYPE (op);
3002 	  if (POINTER_TYPE_P (steptype))
3003 	    steptype = sizetype;
3004 	  set_iv (idata, op, op, build_int_cst (steptype, 0), true);
3005 	  record_invariant (idata, op, false);
3006 	}
3007     }
3008   if (!info->inv_id || info->has_nonlin_use)
3009     return NULL_TREE;
3010 
3011   if (!*wdata->inv_vars)
3012     *wdata->inv_vars = BITMAP_ALLOC (NULL);
3013   bitmap_set_bit (*wdata->inv_vars, info->inv_id);
3014 
3015   return NULL_TREE;
3016 }
3017 
3018 /* Records invariants in *EXPR_P.  INV_VARS is the bitmap to that we should
3019    store it.  */
3020 
3021 static inline void
find_inv_vars(struct ivopts_data * data,tree * expr_p,bitmap * inv_vars)3022 find_inv_vars (struct ivopts_data *data, tree *expr_p, bitmap *inv_vars)
3023 {
3024   struct walk_tree_data wdata;
3025 
3026   if (!inv_vars)
3027     return;
3028 
3029   wdata.idata = data;
3030   wdata.inv_vars = inv_vars;
3031   walk_tree (expr_p, find_inv_vars_cb, &wdata, NULL);
3032 }
3033 
3034 /* Get entry from invariant expr hash table for INV_EXPR.  New entry
3035    will be recorded if it doesn't exist yet.  Given below two exprs:
3036      inv_expr + cst1, inv_expr + cst2
3037    It's hard to make decision whether constant part should be stripped
3038    or not.  We choose to not strip based on below facts:
3039      1) We need to count ADD cost for constant part if it's stripped,
3040 	which isn't always trivial where this functions is called.
3041      2) Stripping constant away may be conflict with following loop
3042 	invariant hoisting pass.
3043      3) Not stripping constant away results in more invariant exprs,
3044 	which usually leads to decision preferring lower reg pressure.  */
3045 
3046 static iv_inv_expr_ent *
get_loop_invariant_expr(struct ivopts_data * data,tree inv_expr)3047 get_loop_invariant_expr (struct ivopts_data *data, tree inv_expr)
3048 {
3049   STRIP_NOPS (inv_expr);
3050 
3051   if (poly_int_tree_p (inv_expr)
3052       || TREE_CODE (inv_expr) == SSA_NAME)
3053     return NULL;
3054 
3055   /* Don't strip constant part away as we used to.  */
3056 
3057   /* Stores EXPR in DATA->inv_expr_tab, return pointer to iv_inv_expr_ent.  */
3058   struct iv_inv_expr_ent ent;
3059   ent.expr = inv_expr;
3060   ent.hash = iterative_hash_expr (inv_expr, 0);
3061   struct iv_inv_expr_ent **slot = data->inv_expr_tab->find_slot (&ent, INSERT);
3062 
3063   if (!*slot)
3064     {
3065       *slot = XNEW (struct iv_inv_expr_ent);
3066       (*slot)->expr = inv_expr;
3067       (*slot)->hash = ent.hash;
3068       (*slot)->id = ++data->max_inv_expr_id;
3069     }
3070 
3071   return *slot;
3072 }
3073 
3074 /* Adds a candidate BASE + STEP * i.  Important field is set to IMPORTANT and
3075    position to POS.  If USE is not NULL, the candidate is set as related to
3076    it.  If both BASE and STEP are NULL, we add a pseudocandidate for the
3077    replacement of the final value of the iv by a direct computation.  */
3078 
3079 static struct iv_cand *
3080 add_candidate_1 (struct ivopts_data *data, tree base, tree step, bool important,
3081 		 enum iv_position pos, struct iv_use *use,
3082 		 gimple *incremented_at, struct iv *orig_iv = NULL,
3083 		 bool doloop = false)
3084 {
3085   unsigned i;
3086   struct iv_cand *cand = NULL;
3087   tree type, orig_type;
3088 
3089   gcc_assert (base && step);
3090 
3091   /* -fkeep-gc-roots-live means that we have to keep a real pointer
3092      live, but the ivopts code may replace a real pointer with one
3093      pointing before or after the memory block that is then adjusted
3094      into the memory block during the loop.  FIXME: It would likely be
3095      better to actually force the pointer live and still use ivopts;
3096      for example, it would be enough to write the pointer into memory
3097      and keep it there until after the loop.  */
3098   if (flag_keep_gc_roots_live && POINTER_TYPE_P (TREE_TYPE (base)))
3099     return NULL;
3100 
3101   /* For non-original variables, make sure their values are computed in a type
3102      that does not invoke undefined behavior on overflows (since in general,
3103      we cannot prove that these induction variables are non-wrapping).  */
3104   if (pos != IP_ORIGINAL)
3105     {
3106       orig_type = TREE_TYPE (base);
3107       type = generic_type_for (orig_type);
3108       if (type != orig_type)
3109 	{
3110 	  base = fold_convert (type, base);
3111 	  step = fold_convert (type, step);
3112 	}
3113     }
3114 
3115   for (i = 0; i < data->vcands.length (); i++)
3116     {
3117       cand = data->vcands[i];
3118 
3119       if (cand->pos != pos)
3120 	continue;
3121 
3122       if (cand->incremented_at != incremented_at
3123 	  || ((pos == IP_AFTER_USE || pos == IP_BEFORE_USE)
3124 	      && cand->ainc_use != use))
3125 	continue;
3126 
3127       if (operand_equal_p (base, cand->iv->base, 0)
3128 	  && operand_equal_p (step, cand->iv->step, 0)
3129 	  && (TYPE_PRECISION (TREE_TYPE (base))
3130 	      == TYPE_PRECISION (TREE_TYPE (cand->iv->base))))
3131 	break;
3132     }
3133 
3134   if (i == data->vcands.length ())
3135     {
3136       cand = XCNEW (struct iv_cand);
3137       cand->id = i;
3138       cand->iv = alloc_iv (data, base, step);
3139       cand->pos = pos;
3140       if (pos != IP_ORIGINAL)
3141 	{
3142 	  if (doloop)
3143 	    cand->var_before = create_tmp_var_raw (TREE_TYPE (base), "doloop");
3144 	  else
3145 	    cand->var_before = create_tmp_var_raw (TREE_TYPE (base), "ivtmp");
3146 	  cand->var_after = cand->var_before;
3147 	}
3148       cand->important = important;
3149       cand->incremented_at = incremented_at;
3150       cand->doloop_p = doloop;
3151       data->vcands.safe_push (cand);
3152 
3153       if (!poly_int_tree_p (step))
3154 	{
3155 	  find_inv_vars (data, &step, &cand->inv_vars);
3156 
3157 	  iv_inv_expr_ent *inv_expr = get_loop_invariant_expr (data, step);
3158 	  /* Share bitmap between inv_vars and inv_exprs for cand.  */
3159 	  if (inv_expr != NULL)
3160 	    {
3161 	      cand->inv_exprs = cand->inv_vars;
3162 	      cand->inv_vars = NULL;
3163 	      if (cand->inv_exprs)
3164 		bitmap_clear (cand->inv_exprs);
3165 	      else
3166 		cand->inv_exprs = BITMAP_ALLOC (NULL);
3167 
3168 	      bitmap_set_bit (cand->inv_exprs, inv_expr->id);
3169 	    }
3170 	}
3171 
3172       if (pos == IP_AFTER_USE || pos == IP_BEFORE_USE)
3173 	cand->ainc_use = use;
3174       else
3175 	cand->ainc_use = NULL;
3176 
3177       cand->orig_iv = orig_iv;
3178       if (dump_file && (dump_flags & TDF_DETAILS))
3179 	dump_cand (dump_file, cand);
3180     }
3181 
3182   cand->important |= important;
3183   cand->doloop_p |= doloop;
3184 
3185   /* Relate candidate to the group for which it is added.  */
3186   if (use)
3187     bitmap_set_bit (data->vgroups[use->group_id]->related_cands, i);
3188 
3189   return cand;
3190 }
3191 
3192 /* Returns true if incrementing the induction variable at the end of the LOOP
3193    is allowed.
3194 
3195    The purpose is to avoid splitting latch edge with a biv increment, thus
3196    creating a jump, possibly confusing other optimization passes and leaving
3197    less freedom to scheduler.  So we allow IP_END only if IP_NORMAL is not
3198    available (so we do not have a better alternative), or if the latch edge
3199    is already nonempty.  */
3200 
3201 static bool
allow_ip_end_pos_p(class loop * loop)3202 allow_ip_end_pos_p (class loop *loop)
3203 {
3204   if (!ip_normal_pos (loop))
3205     return true;
3206 
3207   if (!empty_block_p (ip_end_pos (loop)))
3208     return true;
3209 
3210   return false;
3211 }
3212 
3213 /* If possible, adds autoincrement candidates BASE + STEP * i based on use USE.
3214    Important field is set to IMPORTANT.  */
3215 
3216 static void
add_autoinc_candidates(struct ivopts_data * data,tree base,tree step,bool important,struct iv_use * use)3217 add_autoinc_candidates (struct ivopts_data *data, tree base, tree step,
3218 			bool important, struct iv_use *use)
3219 {
3220   basic_block use_bb = gimple_bb (use->stmt);
3221   machine_mode mem_mode;
3222   unsigned HOST_WIDE_INT cstepi;
3223 
3224   /* If we insert the increment in any position other than the standard
3225      ones, we must ensure that it is incremented once per iteration.
3226      It must not be in an inner nested loop, or one side of an if
3227      statement.  */
3228   if (use_bb->loop_father != data->current_loop
3229       || !dominated_by_p (CDI_DOMINATORS, data->current_loop->latch, use_bb)
3230       || stmt_can_throw_internal (cfun, use->stmt)
3231       || !cst_and_fits_in_hwi (step))
3232     return;
3233 
3234   cstepi = int_cst_value (step);
3235 
3236   mem_mode = TYPE_MODE (use->mem_type);
3237   if (((USE_LOAD_PRE_INCREMENT (mem_mode)
3238 	|| USE_STORE_PRE_INCREMENT (mem_mode))
3239        && known_eq (GET_MODE_SIZE (mem_mode), cstepi))
3240       || ((USE_LOAD_PRE_DECREMENT (mem_mode)
3241 	   || USE_STORE_PRE_DECREMENT (mem_mode))
3242 	  && known_eq (GET_MODE_SIZE (mem_mode), -cstepi)))
3243     {
3244       enum tree_code code = MINUS_EXPR;
3245       tree new_base;
3246       tree new_step = step;
3247 
3248       if (POINTER_TYPE_P (TREE_TYPE (base)))
3249 	{
3250 	  new_step = fold_build1 (NEGATE_EXPR, TREE_TYPE (step), step);
3251 	  code = POINTER_PLUS_EXPR;
3252 	}
3253       else
3254 	new_step = fold_convert (TREE_TYPE (base), new_step);
3255       new_base = fold_build2 (code, TREE_TYPE (base), base, new_step);
3256       add_candidate_1 (data, new_base, step, important, IP_BEFORE_USE, use,
3257 		       use->stmt);
3258     }
3259   if (((USE_LOAD_POST_INCREMENT (mem_mode)
3260 	|| USE_STORE_POST_INCREMENT (mem_mode))
3261        && known_eq (GET_MODE_SIZE (mem_mode), cstepi))
3262       || ((USE_LOAD_POST_DECREMENT (mem_mode)
3263 	   || USE_STORE_POST_DECREMENT (mem_mode))
3264 	  && known_eq (GET_MODE_SIZE (mem_mode), -cstepi)))
3265     {
3266       add_candidate_1 (data, base, step, important, IP_AFTER_USE, use,
3267 		       use->stmt);
3268     }
3269 }
3270 
3271 /* Adds a candidate BASE + STEP * i.  Important field is set to IMPORTANT and
3272    position to POS.  If USE is not NULL, the candidate is set as related to
3273    it.  The candidate computation is scheduled before exit condition and at
3274    the end of loop.  */
3275 
3276 static void
3277 add_candidate (struct ivopts_data *data, tree base, tree step, bool important,
3278 	       struct iv_use *use, struct iv *orig_iv = NULL,
3279 	       bool doloop = false)
3280 {
3281   if (ip_normal_pos (data->current_loop))
3282     add_candidate_1 (data, base, step, important, IP_NORMAL, use, NULL, orig_iv,
3283 		     doloop);
3284   /* Exclude doloop candidate here since it requires decrement then comparison
3285      and jump, the IP_END position doesn't match.  */
3286   if (!doloop && ip_end_pos (data->current_loop)
3287       && allow_ip_end_pos_p (data->current_loop))
3288     add_candidate_1 (data, base, step, important, IP_END, use, NULL, orig_iv);
3289 }
3290 
3291 /* Adds standard iv candidates.  */
3292 
3293 static void
add_standard_iv_candidates(struct ivopts_data * data)3294 add_standard_iv_candidates (struct ivopts_data *data)
3295 {
3296   add_candidate (data, integer_zero_node, integer_one_node, true, NULL);
3297 
3298   /* The same for a double-integer type if it is still fast enough.  */
3299   if (TYPE_PRECISION
3300 	(long_integer_type_node) > TYPE_PRECISION (integer_type_node)
3301       && TYPE_PRECISION (long_integer_type_node) <= BITS_PER_WORD)
3302     add_candidate (data, build_int_cst (long_integer_type_node, 0),
3303 		   build_int_cst (long_integer_type_node, 1), true, NULL);
3304 
3305   /* The same for a double-integer type if it is still fast enough.  */
3306   if (TYPE_PRECISION
3307 	(long_long_integer_type_node) > TYPE_PRECISION (long_integer_type_node)
3308       && TYPE_PRECISION (long_long_integer_type_node) <= BITS_PER_WORD)
3309     add_candidate (data, build_int_cst (long_long_integer_type_node, 0),
3310 		   build_int_cst (long_long_integer_type_node, 1), true, NULL);
3311 }
3312 
3313 
3314 /* Adds candidates bases on the old induction variable IV.  */
3315 
3316 static void
add_iv_candidate_for_biv(struct ivopts_data * data,struct iv * iv)3317 add_iv_candidate_for_biv (struct ivopts_data *data, struct iv *iv)
3318 {
3319   gimple *phi;
3320   tree def;
3321   struct iv_cand *cand;
3322 
3323   /* Check if this biv is used in address type use.  */
3324   if (iv->no_overflow  && iv->have_address_use
3325       && INTEGRAL_TYPE_P (TREE_TYPE (iv->base))
3326       && TYPE_PRECISION (TREE_TYPE (iv->base)) < TYPE_PRECISION (sizetype))
3327     {
3328       tree base = fold_convert (sizetype, iv->base);
3329       tree step = fold_convert (sizetype, iv->step);
3330 
3331       /* Add iv cand of same precision as index part in TARGET_MEM_REF.  */
3332       add_candidate (data, base, step, true, NULL, iv);
3333       /* Add iv cand of the original type only if it has nonlinear use.  */
3334       if (iv->nonlin_use)
3335 	add_candidate (data, iv->base, iv->step, true, NULL);
3336     }
3337   else
3338     add_candidate (data, iv->base, iv->step, true, NULL);
3339 
3340   /* The same, but with initial value zero.  */
3341   if (POINTER_TYPE_P (TREE_TYPE (iv->base)))
3342     add_candidate (data, size_int (0), iv->step, true, NULL);
3343   else
3344     add_candidate (data, build_int_cst (TREE_TYPE (iv->base), 0),
3345 		   iv->step, true, NULL);
3346 
3347   phi = SSA_NAME_DEF_STMT (iv->ssa_name);
3348   if (gimple_code (phi) == GIMPLE_PHI)
3349     {
3350       /* Additionally record the possibility of leaving the original iv
3351 	 untouched.  */
3352       def = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (data->current_loop));
3353       /* Don't add candidate if it's from another PHI node because
3354 	 it's an affine iv appearing in the form of PEELED_CHREC.  */
3355       phi = SSA_NAME_DEF_STMT (def);
3356       if (gimple_code (phi) != GIMPLE_PHI)
3357 	{
3358 	  cand = add_candidate_1 (data,
3359 				  iv->base, iv->step, true, IP_ORIGINAL, NULL,
3360 				  SSA_NAME_DEF_STMT (def));
3361 	  if (cand)
3362 	    {
3363 	      cand->var_before = iv->ssa_name;
3364 	      cand->var_after = def;
3365 	    }
3366 	}
3367       else
3368 	gcc_assert (gimple_bb (phi) == data->current_loop->header);
3369     }
3370 }
3371 
3372 /* Adds candidates based on the old induction variables.  */
3373 
3374 static void
add_iv_candidate_for_bivs(struct ivopts_data * data)3375 add_iv_candidate_for_bivs (struct ivopts_data *data)
3376 {
3377   unsigned i;
3378   struct iv *iv;
3379   bitmap_iterator bi;
3380 
3381   EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
3382     {
3383       iv = ver_info (data, i)->iv;
3384       if (iv && iv->biv_p && !integer_zerop (iv->step))
3385 	add_iv_candidate_for_biv (data, iv);
3386     }
3387 }
3388 
3389 /* Record common candidate {BASE, STEP} derived from USE in hashtable.  */
3390 
3391 static void
record_common_cand(struct ivopts_data * data,tree base,tree step,struct iv_use * use)3392 record_common_cand (struct ivopts_data *data, tree base,
3393 		    tree step, struct iv_use *use)
3394 {
3395   class iv_common_cand ent;
3396   class iv_common_cand **slot;
3397 
3398   ent.base = base;
3399   ent.step = step;
3400   ent.hash = iterative_hash_expr (base, 0);
3401   ent.hash = iterative_hash_expr (step, ent.hash);
3402 
3403   slot = data->iv_common_cand_tab->find_slot (&ent, INSERT);
3404   if (*slot == NULL)
3405     {
3406       *slot = new iv_common_cand ();
3407       (*slot)->base = base;
3408       (*slot)->step = step;
3409       (*slot)->uses.create (8);
3410       (*slot)->hash = ent.hash;
3411       data->iv_common_cands.safe_push ((*slot));
3412     }
3413 
3414   gcc_assert (use != NULL);
3415   (*slot)->uses.safe_push (use);
3416   return;
3417 }
3418 
3419 /* Comparison function used to sort common candidates.  */
3420 
3421 static int
common_cand_cmp(const void * p1,const void * p2)3422 common_cand_cmp (const void *p1, const void *p2)
3423 {
3424   unsigned n1, n2;
3425   const class iv_common_cand *const *const ccand1
3426     = (const class iv_common_cand *const *)p1;
3427   const class iv_common_cand *const *const ccand2
3428     = (const class iv_common_cand *const *)p2;
3429 
3430   n1 = (*ccand1)->uses.length ();
3431   n2 = (*ccand2)->uses.length ();
3432   return n2 - n1;
3433 }
3434 
3435 /* Adds IV candidates based on common candidated recorded.  */
3436 
3437 static void
add_iv_candidate_derived_from_uses(struct ivopts_data * data)3438 add_iv_candidate_derived_from_uses (struct ivopts_data *data)
3439 {
3440   unsigned i, j;
3441   struct iv_cand *cand_1, *cand_2;
3442 
3443   data->iv_common_cands.qsort (common_cand_cmp);
3444   for (i = 0; i < data->iv_common_cands.length (); i++)
3445     {
3446       class iv_common_cand *ptr = data->iv_common_cands[i];
3447 
3448       /* Only add IV candidate if it's derived from multiple uses.  */
3449       if (ptr->uses.length () <= 1)
3450 	break;
3451 
3452       cand_1 = NULL;
3453       cand_2 = NULL;
3454       if (ip_normal_pos (data->current_loop))
3455 	cand_1 = add_candidate_1 (data, ptr->base, ptr->step,
3456 				  false, IP_NORMAL, NULL, NULL);
3457 
3458       if (ip_end_pos (data->current_loop)
3459 	  && allow_ip_end_pos_p (data->current_loop))
3460 	cand_2 = add_candidate_1 (data, ptr->base, ptr->step,
3461 				  false, IP_END, NULL, NULL);
3462 
3463       /* Bind deriving uses and the new candidates.  */
3464       for (j = 0; j < ptr->uses.length (); j++)
3465 	{
3466 	  struct iv_group *group = data->vgroups[ptr->uses[j]->group_id];
3467 	  if (cand_1)
3468 	    bitmap_set_bit (group->related_cands, cand_1->id);
3469 	  if (cand_2)
3470 	    bitmap_set_bit (group->related_cands, cand_2->id);
3471 	}
3472     }
3473 
3474   /* Release data since it is useless from this point.  */
3475   data->iv_common_cand_tab->empty ();
3476   data->iv_common_cands.truncate (0);
3477 }
3478 
3479 /* Adds candidates based on the value of USE's iv.  */
3480 
3481 static void
add_iv_candidate_for_use(struct ivopts_data * data,struct iv_use * use)3482 add_iv_candidate_for_use (struct ivopts_data *data, struct iv_use *use)
3483 {
3484   poly_uint64 offset;
3485   tree base;
3486   struct iv *iv = use->iv;
3487   tree basetype = TREE_TYPE (iv->base);
3488 
3489   /* Don't add candidate for iv_use with non integer, pointer or non-mode
3490      precision types, instead, add candidate for the corresponding scev in
3491      unsigned type with the same precision.  See PR93674 for more info.  */
3492   if ((TREE_CODE (basetype) != INTEGER_TYPE && !POINTER_TYPE_P (basetype))
3493       || !type_has_mode_precision_p (basetype))
3494     {
3495       basetype = lang_hooks.types.type_for_mode (TYPE_MODE (basetype),
3496 						 TYPE_UNSIGNED (basetype));
3497       add_candidate (data, fold_convert (basetype, iv->base),
3498 		     fold_convert (basetype, iv->step), false, NULL);
3499       return;
3500     }
3501 
3502   add_candidate (data, iv->base, iv->step, false, use);
3503 
3504   /* Record common candidate for use in case it can be shared by others.  */
3505   record_common_cand (data, iv->base, iv->step, use);
3506 
3507   /* Record common candidate with initial value zero.  */
3508   basetype = TREE_TYPE (iv->base);
3509   if (POINTER_TYPE_P (basetype))
3510     basetype = sizetype;
3511   record_common_cand (data, build_int_cst (basetype, 0), iv->step, use);
3512 
3513   /* Compare the cost of an address with an unscaled index with the cost of
3514     an address with a scaled index and add candidate if useful.  */
3515   poly_int64 step;
3516   if (use != NULL
3517       && poly_int_tree_p (iv->step, &step)
3518       && address_p (use->type))
3519     {
3520       poly_int64 new_step;
3521       unsigned int fact = preferred_mem_scale_factor
3522 	(use->iv->base,
3523 	 TYPE_MODE (use->mem_type),
3524 	 optimize_loop_for_speed_p (data->current_loop));
3525 
3526       if (fact != 1
3527 	  && multiple_p (step, fact, &new_step))
3528 	add_candidate (data, size_int (0),
3529 		       wide_int_to_tree (sizetype, new_step),
3530 		       true, NULL);
3531     }
3532 
3533   /* Record common candidate with constant offset stripped in base.
3534      Like the use itself, we also add candidate directly for it.  */
3535   base = strip_offset (iv->base, &offset);
3536   if (maybe_ne (offset, 0U) || base != iv->base)
3537     {
3538       record_common_cand (data, base, iv->step, use);
3539       add_candidate (data, base, iv->step, false, use);
3540     }
3541 
3542   /* Record common candidate with base_object removed in base.  */
3543   base = iv->base;
3544   STRIP_NOPS (base);
3545   if (iv->base_object != NULL && TREE_CODE (base) == POINTER_PLUS_EXPR)
3546     {
3547       tree step = iv->step;
3548 
3549       STRIP_NOPS (step);
3550       base = TREE_OPERAND (base, 1);
3551       step = fold_convert (sizetype, step);
3552       record_common_cand (data, base, step, use);
3553       /* Also record common candidate with offset stripped.  */
3554       base = strip_offset (base, &offset);
3555       if (maybe_ne (offset, 0U))
3556 	record_common_cand (data, base, step, use);
3557     }
3558 
3559   /* At last, add auto-incremental candidates.  Make such variables
3560      important since other iv uses with same base object may be based
3561      on it.  */
3562   if (use != NULL && address_p (use->type))
3563     add_autoinc_candidates (data, iv->base, iv->step, true, use);
3564 }
3565 
3566 /* Adds candidates based on the uses.  */
3567 
3568 static void
add_iv_candidate_for_groups(struct ivopts_data * data)3569 add_iv_candidate_for_groups (struct ivopts_data *data)
3570 {
3571   unsigned i;
3572 
3573   /* Only add candidate for the first use in group.  */
3574   for (i = 0; i < data->vgroups.length (); i++)
3575     {
3576       struct iv_group *group = data->vgroups[i];
3577 
3578       gcc_assert (group->vuses[0] != NULL);
3579       add_iv_candidate_for_use (data, group->vuses[0]);
3580     }
3581   add_iv_candidate_derived_from_uses (data);
3582 }
3583 
3584 /* Record important candidates and add them to related_cands bitmaps.  */
3585 
3586 static void
record_important_candidates(struct ivopts_data * data)3587 record_important_candidates (struct ivopts_data *data)
3588 {
3589   unsigned i;
3590   struct iv_group *group;
3591 
3592   for (i = 0; i < data->vcands.length (); i++)
3593     {
3594       struct iv_cand *cand = data->vcands[i];
3595 
3596       if (cand->important)
3597 	bitmap_set_bit (data->important_candidates, i);
3598     }
3599 
3600   data->consider_all_candidates = (data->vcands.length ()
3601 				   <= CONSIDER_ALL_CANDIDATES_BOUND);
3602 
3603   /* Add important candidates to groups' related_cands bitmaps.  */
3604   for (i = 0; i < data->vgroups.length (); i++)
3605     {
3606       group = data->vgroups[i];
3607       bitmap_ior_into (group->related_cands, data->important_candidates);
3608     }
3609 }
3610 
3611 /* Allocates the data structure mapping the (use, candidate) pairs to costs.
3612    If consider_all_candidates is true, we use a two-dimensional array, otherwise
3613    we allocate a simple list to every use.  */
3614 
3615 static void
alloc_use_cost_map(struct ivopts_data * data)3616 alloc_use_cost_map (struct ivopts_data *data)
3617 {
3618   unsigned i, size, s;
3619 
3620   for (i = 0; i < data->vgroups.length (); i++)
3621     {
3622       struct iv_group *group = data->vgroups[i];
3623 
3624       if (data->consider_all_candidates)
3625 	size = data->vcands.length ();
3626       else
3627 	{
3628 	  s = bitmap_count_bits (group->related_cands);
3629 
3630 	  /* Round up to the power of two, so that moduling by it is fast.  */
3631 	  size = s ? (1 << ceil_log2 (s)) : 1;
3632 	}
3633 
3634       group->n_map_members = size;
3635       group->cost_map = XCNEWVEC (class cost_pair, size);
3636     }
3637 }
3638 
3639 /* Sets cost of (GROUP, CAND) pair to COST and record that it depends
3640    on invariants INV_VARS and that the value used in expressing it is
3641    VALUE, and in case of iv elimination the comparison operator is COMP.  */
3642 
3643 static void
set_group_iv_cost(struct ivopts_data * data,struct iv_group * group,struct iv_cand * cand,comp_cost cost,bitmap inv_vars,tree value,enum tree_code comp,bitmap inv_exprs)3644 set_group_iv_cost (struct ivopts_data *data,
3645 		   struct iv_group *group, struct iv_cand *cand,
3646 		   comp_cost cost, bitmap inv_vars, tree value,
3647 		   enum tree_code comp, bitmap inv_exprs)
3648 {
3649   unsigned i, s;
3650 
3651   if (cost.infinite_cost_p ())
3652     {
3653       BITMAP_FREE (inv_vars);
3654       BITMAP_FREE (inv_exprs);
3655       return;
3656     }
3657 
3658   if (data->consider_all_candidates)
3659     {
3660       group->cost_map[cand->id].cand = cand;
3661       group->cost_map[cand->id].cost = cost;
3662       group->cost_map[cand->id].inv_vars = inv_vars;
3663       group->cost_map[cand->id].inv_exprs = inv_exprs;
3664       group->cost_map[cand->id].value = value;
3665       group->cost_map[cand->id].comp = comp;
3666       return;
3667     }
3668 
3669   /* n_map_members is a power of two, so this computes modulo.  */
3670   s = cand->id & (group->n_map_members - 1);
3671   for (i = s; i < group->n_map_members; i++)
3672     if (!group->cost_map[i].cand)
3673       goto found;
3674   for (i = 0; i < s; i++)
3675     if (!group->cost_map[i].cand)
3676       goto found;
3677 
3678   gcc_unreachable ();
3679 
3680 found:
3681   group->cost_map[i].cand = cand;
3682   group->cost_map[i].cost = cost;
3683   group->cost_map[i].inv_vars = inv_vars;
3684   group->cost_map[i].inv_exprs = inv_exprs;
3685   group->cost_map[i].value = value;
3686   group->cost_map[i].comp = comp;
3687 }
3688 
3689 /* Gets cost of (GROUP, CAND) pair.  */
3690 
3691 static class cost_pair *
get_group_iv_cost(struct ivopts_data * data,struct iv_group * group,struct iv_cand * cand)3692 get_group_iv_cost (struct ivopts_data *data, struct iv_group *group,
3693 		   struct iv_cand *cand)
3694 {
3695   unsigned i, s;
3696   class cost_pair *ret;
3697 
3698   if (!cand)
3699     return NULL;
3700 
3701   if (data->consider_all_candidates)
3702     {
3703       ret = group->cost_map + cand->id;
3704       if (!ret->cand)
3705 	return NULL;
3706 
3707       return ret;
3708     }
3709 
3710   /* n_map_members is a power of two, so this computes modulo.  */
3711   s = cand->id & (group->n_map_members - 1);
3712   for (i = s; i < group->n_map_members; i++)
3713     if (group->cost_map[i].cand == cand)
3714       return group->cost_map + i;
3715     else if (group->cost_map[i].cand == NULL)
3716       return NULL;
3717   for (i = 0; i < s; i++)
3718     if (group->cost_map[i].cand == cand)
3719       return group->cost_map + i;
3720     else if (group->cost_map[i].cand == NULL)
3721       return NULL;
3722 
3723   return NULL;
3724 }
3725 
3726 /* Produce DECL_RTL for object obj so it looks like it is stored in memory.  */
3727 static rtx
produce_memory_decl_rtl(tree obj,int * regno)3728 produce_memory_decl_rtl (tree obj, int *regno)
3729 {
3730   addr_space_t as = TYPE_ADDR_SPACE (TREE_TYPE (obj));
3731   machine_mode address_mode = targetm.addr_space.address_mode (as);
3732   rtx x;
3733 
3734   gcc_assert (obj);
3735   if (TREE_STATIC (obj) || DECL_EXTERNAL (obj))
3736     {
3737       const char *name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (obj));
3738       x = gen_rtx_SYMBOL_REF (address_mode, name);
3739       SET_SYMBOL_REF_DECL (x, obj);
3740       x = gen_rtx_MEM (DECL_MODE (obj), x);
3741       set_mem_addr_space (x, as);
3742       targetm.encode_section_info (obj, x, true);
3743     }
3744   else
3745     {
3746       x = gen_raw_REG (address_mode, (*regno)++);
3747       x = gen_rtx_MEM (DECL_MODE (obj), x);
3748       set_mem_addr_space (x, as);
3749     }
3750 
3751   return x;
3752 }
3753 
3754 /* Prepares decl_rtl for variables referred in *EXPR_P.  Callback for
3755    walk_tree.  DATA contains the actual fake register number.  */
3756 
3757 static tree
prepare_decl_rtl(tree * expr_p,int * ws,void * data)3758 prepare_decl_rtl (tree *expr_p, int *ws, void *data)
3759 {
3760   tree obj = NULL_TREE;
3761   rtx x = NULL_RTX;
3762   int *regno = (int *) data;
3763 
3764   switch (TREE_CODE (*expr_p))
3765     {
3766     case ADDR_EXPR:
3767       for (expr_p = &TREE_OPERAND (*expr_p, 0);
3768 	   handled_component_p (*expr_p);
3769 	   expr_p = &TREE_OPERAND (*expr_p, 0))
3770 	continue;
3771       obj = *expr_p;
3772       if (DECL_P (obj) && HAS_RTL_P (obj) && !DECL_RTL_SET_P (obj))
3773 	x = produce_memory_decl_rtl (obj, regno);
3774       break;
3775 
3776     case SSA_NAME:
3777       *ws = 0;
3778       obj = SSA_NAME_VAR (*expr_p);
3779       /* Defer handling of anonymous SSA_NAMEs to the expander.  */
3780       if (!obj)
3781 	return NULL_TREE;
3782       if (!DECL_RTL_SET_P (obj))
3783 	x = gen_raw_REG (DECL_MODE (obj), (*regno)++);
3784       break;
3785 
3786     case VAR_DECL:
3787     case PARM_DECL:
3788     case RESULT_DECL:
3789       *ws = 0;
3790       obj = *expr_p;
3791 
3792       if (DECL_RTL_SET_P (obj))
3793 	break;
3794 
3795       if (DECL_MODE (obj) == BLKmode)
3796 	x = produce_memory_decl_rtl (obj, regno);
3797       else
3798 	x = gen_raw_REG (DECL_MODE (obj), (*regno)++);
3799 
3800       break;
3801 
3802     default:
3803       break;
3804     }
3805 
3806   if (x)
3807     {
3808       decl_rtl_to_reset.safe_push (obj);
3809       SET_DECL_RTL (obj, x);
3810     }
3811 
3812   return NULL_TREE;
3813 }
3814 
3815 /* Predict whether the given loop will be transformed in the RTL
3816    doloop_optimize pass.  Attempt to duplicate some doloop_optimize checks.
3817    This is only for target independent checks, see targetm.predict_doloop_p
3818    for the target dependent ones.
3819 
3820    Note that according to some initial investigation, some checks like costly
3821    niter check and invalid stmt scanning don't have much gains among general
3822    cases, so keep this as simple as possible first.
3823 
3824    Some RTL specific checks seems unable to be checked in gimple, if any new
3825    checks or easy checks _are_ missing here, please add them.  */
3826 
3827 static bool
generic_predict_doloop_p(struct ivopts_data * data)3828 generic_predict_doloop_p (struct ivopts_data *data)
3829 {
3830   class loop *loop = data->current_loop;
3831 
3832   /* Call target hook for target dependent checks.  */
3833   if (!targetm.predict_doloop_p (loop))
3834     {
3835       if (dump_file && (dump_flags & TDF_DETAILS))
3836 	fprintf (dump_file, "Predict doloop failure due to"
3837 			    " target specific checks.\n");
3838       return false;
3839     }
3840 
3841   /* Similar to doloop_optimize, check iteration description to know it's
3842      suitable or not.  Keep it as simple as possible, feel free to extend it
3843      if you find any multiple exits cases matter.  */
3844   edge exit = single_dom_exit (loop);
3845   class tree_niter_desc *niter_desc;
3846   if (!exit || !(niter_desc = niter_for_exit (data, exit)))
3847     {
3848       if (dump_file && (dump_flags & TDF_DETAILS))
3849 	fprintf (dump_file, "Predict doloop failure due to"
3850 			    " unexpected niters.\n");
3851       return false;
3852     }
3853 
3854   /* Similar to doloop_optimize, check whether iteration count too small
3855      and not profitable.  */
3856   HOST_WIDE_INT est_niter = get_estimated_loop_iterations_int (loop);
3857   if (est_niter == -1)
3858     est_niter = get_likely_max_loop_iterations_int (loop);
3859   if (est_niter >= 0 && est_niter < 3)
3860     {
3861       if (dump_file && (dump_flags & TDF_DETAILS))
3862 	fprintf (dump_file,
3863 		 "Predict doloop failure due to"
3864 		 " too few iterations (%u).\n",
3865 		 (unsigned int) est_niter);
3866       return false;
3867     }
3868 
3869   return true;
3870 }
3871 
3872 /* Determines cost of the computation of EXPR.  */
3873 
3874 static unsigned
computation_cost(tree expr,bool speed)3875 computation_cost (tree expr, bool speed)
3876 {
3877   rtx_insn *seq;
3878   rtx rslt;
3879   tree type = TREE_TYPE (expr);
3880   unsigned cost;
3881   /* Avoid using hard regs in ways which may be unsupported.  */
3882   int regno = LAST_VIRTUAL_REGISTER + 1;
3883   struct cgraph_node *node = cgraph_node::get (current_function_decl);
3884   enum node_frequency real_frequency = node->frequency;
3885 
3886   node->frequency = NODE_FREQUENCY_NORMAL;
3887   crtl->maybe_hot_insn_p = speed;
3888   walk_tree (&expr, prepare_decl_rtl, &regno, NULL);
3889   start_sequence ();
3890   rslt = expand_expr (expr, NULL_RTX, TYPE_MODE (type), EXPAND_NORMAL);
3891   seq = get_insns ();
3892   end_sequence ();
3893   default_rtl_profile ();
3894   node->frequency = real_frequency;
3895 
3896   cost = seq_cost (seq, speed);
3897   if (MEM_P (rslt))
3898     cost += address_cost (XEXP (rslt, 0), TYPE_MODE (type),
3899 			  TYPE_ADDR_SPACE (type), speed);
3900   else if (!REG_P (rslt))
3901     cost += set_src_cost (rslt, TYPE_MODE (type), speed);
3902 
3903   return cost;
3904 }
3905 
3906 /* Returns variable containing the value of candidate CAND at statement AT.  */
3907 
3908 static tree
var_at_stmt(class loop * loop,struct iv_cand * cand,gimple * stmt)3909 var_at_stmt (class loop *loop, struct iv_cand *cand, gimple *stmt)
3910 {
3911   if (stmt_after_increment (loop, cand, stmt))
3912     return cand->var_after;
3913   else
3914     return cand->var_before;
3915 }
3916 
3917 /* If A is (TYPE) BA and B is (TYPE) BB, and the types of BA and BB have the
3918    same precision that is at least as wide as the precision of TYPE, stores
3919    BA to A and BB to B, and returns the type of BA.  Otherwise, returns the
3920    type of A and B.  */
3921 
3922 static tree
determine_common_wider_type(tree * a,tree * b)3923 determine_common_wider_type (tree *a, tree *b)
3924 {
3925   tree wider_type = NULL;
3926   tree suba, subb;
3927   tree atype = TREE_TYPE (*a);
3928 
3929   if (CONVERT_EXPR_P (*a))
3930     {
3931       suba = TREE_OPERAND (*a, 0);
3932       wider_type = TREE_TYPE (suba);
3933       if (TYPE_PRECISION (wider_type) < TYPE_PRECISION (atype))
3934 	return atype;
3935     }
3936   else
3937     return atype;
3938 
3939   if (CONVERT_EXPR_P (*b))
3940     {
3941       subb = TREE_OPERAND (*b, 0);
3942       if (TYPE_PRECISION (wider_type) != TYPE_PRECISION (TREE_TYPE (subb)))
3943 	return atype;
3944     }
3945   else
3946     return atype;
3947 
3948   *a = suba;
3949   *b = subb;
3950   return wider_type;
3951 }
3952 
3953 /* Determines the expression by that USE is expressed from induction variable
3954    CAND at statement AT in LOOP.  The expression is stored in two parts in a
3955    decomposed form.  The invariant part is stored in AFF_INV; while variant
3956    part in AFF_VAR.  Store ratio of CAND.step over USE.step in PRAT if it's
3957    non-null.  Returns false if USE cannot be expressed using CAND.  */
3958 
3959 static bool
3960 get_computation_aff_1 (class loop *loop, gimple *at, struct iv_use *use,
3961 		       struct iv_cand *cand, class aff_tree *aff_inv,
3962 		       class aff_tree *aff_var, widest_int *prat = NULL)
3963 {
3964   tree ubase = use->iv->base, ustep = use->iv->step;
3965   tree cbase = cand->iv->base, cstep = cand->iv->step;
3966   tree common_type, uutype, var, cstep_common;
3967   tree utype = TREE_TYPE (ubase), ctype = TREE_TYPE (cbase);
3968   aff_tree aff_cbase;
3969   widest_int rat;
3970 
3971   /* We must have a precision to express the values of use.  */
3972   if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
3973     return false;
3974 
3975   var = var_at_stmt (loop, cand, at);
3976   uutype = unsigned_type_for (utype);
3977 
3978   /* If the conversion is not noop, perform it.  */
3979   if (TYPE_PRECISION (utype) < TYPE_PRECISION (ctype))
3980     {
3981       if (cand->orig_iv != NULL && CONVERT_EXPR_P (cbase)
3982 	  && (CONVERT_EXPR_P (cstep) || poly_int_tree_p (cstep)))
3983 	{
3984 	  tree inner_base, inner_step, inner_type;
3985 	  inner_base = TREE_OPERAND (cbase, 0);
3986 	  if (CONVERT_EXPR_P (cstep))
3987 	    inner_step = TREE_OPERAND (cstep, 0);
3988 	  else
3989 	    inner_step = cstep;
3990 
3991 	  inner_type = TREE_TYPE (inner_base);
3992 	  /* If candidate is added from a biv whose type is smaller than
3993 	     ctype, we know both candidate and the biv won't overflow.
3994 	     In this case, it's safe to skip the convertion in candidate.
3995 	     As an example, (unsigned short)((unsigned long)A) equals to
3996 	     (unsigned short)A, if A has a type no larger than short.  */
3997 	  if (TYPE_PRECISION (inner_type) <= TYPE_PRECISION (uutype))
3998 	    {
3999 	      cbase = inner_base;
4000 	      cstep = inner_step;
4001 	    }
4002 	}
4003       cbase = fold_convert (uutype, cbase);
4004       cstep = fold_convert (uutype, cstep);
4005       var = fold_convert (uutype, var);
4006     }
4007 
4008   /* Ratio is 1 when computing the value of biv cand by itself.
4009      We can't rely on constant_multiple_of in this case because the
4010      use is created after the original biv is selected.  The call
4011      could fail because of inconsistent fold behavior.  See PR68021
4012      for more information.  */
4013   if (cand->pos == IP_ORIGINAL && cand->incremented_at == use->stmt)
4014     {
4015       gcc_assert (is_gimple_assign (use->stmt));
4016       gcc_assert (use->iv->ssa_name == cand->var_after);
4017       gcc_assert (gimple_assign_lhs (use->stmt) == cand->var_after);
4018       rat = 1;
4019     }
4020   else if (!constant_multiple_of (ustep, cstep, &rat))
4021     return false;
4022 
4023   if (prat)
4024     *prat = rat;
4025 
4026   /* In case both UBASE and CBASE are shortened to UUTYPE from some common
4027      type, we achieve better folding by computing their difference in this
4028      wider type, and cast the result to UUTYPE.  We do not need to worry about
4029      overflows, as all the arithmetics will in the end be performed in UUTYPE
4030      anyway.  */
4031   common_type = determine_common_wider_type (&ubase, &cbase);
4032 
4033   /* use = ubase - ratio * cbase + ratio * var.  */
4034   tree_to_aff_combination (ubase, common_type, aff_inv);
4035   tree_to_aff_combination (cbase, common_type, &aff_cbase);
4036   tree_to_aff_combination (var, uutype, aff_var);
4037 
4038   /* We need to shift the value if we are after the increment.  */
4039   if (stmt_after_increment (loop, cand, at))
4040     {
4041       aff_tree cstep_aff;
4042 
4043       if (common_type != uutype)
4044 	cstep_common = fold_convert (common_type, cstep);
4045       else
4046 	cstep_common = cstep;
4047 
4048       tree_to_aff_combination (cstep_common, common_type, &cstep_aff);
4049       aff_combination_add (&aff_cbase, &cstep_aff);
4050     }
4051 
4052   aff_combination_scale (&aff_cbase, -rat);
4053   aff_combination_add (aff_inv, &aff_cbase);
4054   if (common_type != uutype)
4055     aff_combination_convert (aff_inv, uutype);
4056 
4057   aff_combination_scale (aff_var, rat);
4058   return true;
4059 }
4060 
4061 /* Determines the expression by that USE is expressed from induction variable
4062    CAND at statement AT in LOOP.  The expression is stored in a decomposed
4063    form into AFF.  Returns false if USE cannot be expressed using CAND.  */
4064 
4065 static bool
get_computation_aff(class loop * loop,gimple * at,struct iv_use * use,struct iv_cand * cand,class aff_tree * aff)4066 get_computation_aff (class loop *loop, gimple *at, struct iv_use *use,
4067 		     struct iv_cand *cand, class aff_tree *aff)
4068 {
4069   aff_tree aff_var;
4070 
4071   if (!get_computation_aff_1 (loop, at, use, cand, aff, &aff_var))
4072     return false;
4073 
4074   aff_combination_add (aff, &aff_var);
4075   return true;
4076 }
4077 
4078 /* Return the type of USE.  */
4079 
4080 static tree
get_use_type(struct iv_use * use)4081 get_use_type (struct iv_use *use)
4082 {
4083   tree base_type = TREE_TYPE (use->iv->base);
4084   tree type;
4085 
4086   if (use->type == USE_REF_ADDRESS)
4087     {
4088       /* The base_type may be a void pointer.  Create a pointer type based on
4089 	 the mem_ref instead.  */
4090       type = build_pointer_type (TREE_TYPE (*use->op_p));
4091       gcc_assert (TYPE_ADDR_SPACE (TREE_TYPE (type))
4092 		  == TYPE_ADDR_SPACE (TREE_TYPE (base_type)));
4093     }
4094   else
4095     type = base_type;
4096 
4097   return type;
4098 }
4099 
4100 /* Determines the expression by that USE is expressed from induction variable
4101    CAND at statement AT in LOOP.  The computation is unshared.  */
4102 
4103 static tree
get_computation_at(class loop * loop,gimple * at,struct iv_use * use,struct iv_cand * cand)4104 get_computation_at (class loop *loop, gimple *at,
4105 		    struct iv_use *use, struct iv_cand *cand)
4106 {
4107   aff_tree aff;
4108   tree type = get_use_type (use);
4109 
4110   if (!get_computation_aff (loop, at, use, cand, &aff))
4111     return NULL_TREE;
4112   unshare_aff_combination (&aff);
4113   return fold_convert (type, aff_combination_to_tree (&aff));
4114 }
4115 
4116 /* Like get_computation_at, but try harder, even if the computation
4117    is more expensive.  Intended for debug stmts.  */
4118 
4119 static tree
get_debug_computation_at(class loop * loop,gimple * at,struct iv_use * use,struct iv_cand * cand)4120 get_debug_computation_at (class loop *loop, gimple *at,
4121 			  struct iv_use *use, struct iv_cand *cand)
4122 {
4123   if (tree ret = get_computation_at (loop, at, use, cand))
4124     return ret;
4125 
4126   tree ubase = use->iv->base, ustep = use->iv->step;
4127   tree cbase = cand->iv->base, cstep = cand->iv->step;
4128   tree var;
4129   tree utype = TREE_TYPE (ubase), ctype = TREE_TYPE (cbase);
4130   widest_int rat;
4131 
4132   /* We must have a precision to express the values of use.  */
4133   if (TYPE_PRECISION (utype) >= TYPE_PRECISION (ctype))
4134     return NULL_TREE;
4135 
4136   /* Try to handle the case that get_computation_at doesn't,
4137      try to express
4138      use = ubase + (var - cbase) / ratio.  */
4139   if (!constant_multiple_of (cstep, fold_convert (TREE_TYPE (cstep), ustep),
4140 			     &rat))
4141     return NULL_TREE;
4142 
4143   bool neg_p = false;
4144   if (wi::neg_p (rat))
4145     {
4146       if (TYPE_UNSIGNED (ctype))
4147 	return NULL_TREE;
4148       neg_p = true;
4149       rat = wi::neg (rat);
4150     }
4151 
4152   /* If both IVs can wrap around and CAND doesn't have a power of two step,
4153      it is unsafe.  Consider uint16_t CAND with step 9, when wrapping around,
4154      the values will be ... 0xfff0, 0xfff9, 2, 11 ... and when use is say
4155      uint8_t with step 3, those values divided by 3 cast to uint8_t will be
4156      ... 0x50, 0x53, 0, 3 ... rather than expected 0x50, 0x53, 0x56, 0x59.  */
4157   if (!use->iv->no_overflow
4158       && !cand->iv->no_overflow
4159       && !integer_pow2p (cstep))
4160     return NULL_TREE;
4161 
4162   int bits = wi::exact_log2 (rat);
4163   if (bits == -1)
4164     bits = wi::floor_log2 (rat) + 1;
4165   if (!cand->iv->no_overflow
4166       && TYPE_PRECISION (utype) + bits > TYPE_PRECISION (ctype))
4167     return NULL_TREE;
4168 
4169   var = var_at_stmt (loop, cand, at);
4170 
4171   if (POINTER_TYPE_P (ctype))
4172     {
4173       ctype = unsigned_type_for (ctype);
4174       cbase = fold_convert (ctype, cbase);
4175       cstep = fold_convert (ctype, cstep);
4176       var = fold_convert (ctype, var);
4177     }
4178 
4179   if (stmt_after_increment (loop, cand, at))
4180     var = fold_build2 (MINUS_EXPR, TREE_TYPE (var), var,
4181 		       unshare_expr (cstep));
4182 
4183   var = fold_build2 (MINUS_EXPR, TREE_TYPE (var), var, cbase);
4184   var = fold_build2 (EXACT_DIV_EXPR, TREE_TYPE (var), var,
4185 		     wide_int_to_tree (TREE_TYPE (var), rat));
4186   if (POINTER_TYPE_P (utype))
4187     {
4188       var = fold_convert (sizetype, var);
4189       if (neg_p)
4190 	var = fold_build1 (NEGATE_EXPR, sizetype, var);
4191       var = fold_build2 (POINTER_PLUS_EXPR, utype, ubase, var);
4192     }
4193   else
4194     {
4195       var = fold_convert (utype, var);
4196       var = fold_build2 (neg_p ? MINUS_EXPR : PLUS_EXPR, utype,
4197 			 ubase, var);
4198     }
4199   return var;
4200 }
4201 
4202 /* Adjust the cost COST for being in loop setup rather than loop body.
4203    If we're optimizing for space, the loop setup overhead is constant;
4204    if we're optimizing for speed, amortize it over the per-iteration cost.
4205    If ROUND_UP_P is true, the result is round up rather than to zero when
4206    optimizing for speed.  */
4207 static int64_t
4208 adjust_setup_cost (struct ivopts_data *data, int64_t cost,
4209 		   bool round_up_p = false)
4210 {
4211   if (cost == INFTY)
4212     return cost;
4213   else if (optimize_loop_for_speed_p (data->current_loop))
4214     {
4215       int64_t niters = (int64_t) avg_loop_niter (data->current_loop);
4216       return (cost + (round_up_p ? niters - 1 : 0)) / niters;
4217     }
4218   else
4219     return cost;
4220 }
4221 
4222 /* Calculate the SPEED or size cost of shiftadd EXPR in MODE.  MULT is the
4223    EXPR operand holding the shift.  COST0 and COST1 are the costs for
4224    calculating the operands of EXPR.  Returns true if successful, and returns
4225    the cost in COST.  */
4226 
4227 static bool
get_shiftadd_cost(tree expr,scalar_int_mode mode,comp_cost cost0,comp_cost cost1,tree mult,bool speed,comp_cost * cost)4228 get_shiftadd_cost (tree expr, scalar_int_mode mode, comp_cost cost0,
4229 		   comp_cost cost1, tree mult, bool speed, comp_cost *cost)
4230 {
4231   comp_cost res;
4232   tree op1 = TREE_OPERAND (expr, 1);
4233   tree cst = TREE_OPERAND (mult, 1);
4234   tree multop = TREE_OPERAND (mult, 0);
4235   int m = exact_log2 (int_cst_value (cst));
4236   int maxm = MIN (BITS_PER_WORD, GET_MODE_BITSIZE (mode));
4237   int as_cost, sa_cost;
4238   bool mult_in_op1;
4239 
4240   if (!(m >= 0 && m < maxm))
4241     return false;
4242 
4243   STRIP_NOPS (op1);
4244   mult_in_op1 = operand_equal_p (op1, mult, 0);
4245 
4246   as_cost = add_cost (speed, mode) + shift_cost (speed, mode, m);
4247 
4248   /* If the target has a cheap shift-and-add or shift-and-sub instruction,
4249      use that in preference to a shift insn followed by an add insn.  */
4250   sa_cost = (TREE_CODE (expr) != MINUS_EXPR
4251 	     ? shiftadd_cost (speed, mode, m)
4252 	     : (mult_in_op1
4253 		? shiftsub1_cost (speed, mode, m)
4254 		: shiftsub0_cost (speed, mode, m)));
4255 
4256   res = comp_cost (MIN (as_cost, sa_cost), 0);
4257   res += (mult_in_op1 ? cost0 : cost1);
4258 
4259   STRIP_NOPS (multop);
4260   if (!is_gimple_val (multop))
4261     res += force_expr_to_var_cost (multop, speed);
4262 
4263   *cost = res;
4264   return true;
4265 }
4266 
4267 /* Estimates cost of forcing expression EXPR into a variable.  */
4268 
4269 static comp_cost
force_expr_to_var_cost(tree expr,bool speed)4270 force_expr_to_var_cost (tree expr, bool speed)
4271 {
4272   static bool costs_initialized = false;
4273   static unsigned integer_cost [2];
4274   static unsigned symbol_cost [2];
4275   static unsigned address_cost [2];
4276   tree op0, op1;
4277   comp_cost cost0, cost1, cost;
4278   machine_mode mode;
4279   scalar_int_mode int_mode;
4280 
4281   if (!costs_initialized)
4282     {
4283       tree type = build_pointer_type (integer_type_node);
4284       tree var, addr;
4285       rtx x;
4286       int i;
4287 
4288       var = create_tmp_var_raw (integer_type_node, "test_var");
4289       TREE_STATIC (var) = 1;
4290       x = produce_memory_decl_rtl (var, NULL);
4291       SET_DECL_RTL (var, x);
4292 
4293       addr = build1 (ADDR_EXPR, type, var);
4294 
4295 
4296       for (i = 0; i < 2; i++)
4297 	{
4298 	  integer_cost[i] = computation_cost (build_int_cst (integer_type_node,
4299 							     2000), i);
4300 
4301 	  symbol_cost[i] = computation_cost (addr, i) + 1;
4302 
4303 	  address_cost[i]
4304 	    = computation_cost (fold_build_pointer_plus_hwi (addr, 2000), i) + 1;
4305 	  if (dump_file && (dump_flags & TDF_DETAILS))
4306 	    {
4307 	      fprintf (dump_file, "force_expr_to_var_cost %s costs:\n", i ? "speed" : "size");
4308 	      fprintf (dump_file, "  integer %d\n", (int) integer_cost[i]);
4309 	      fprintf (dump_file, "  symbol %d\n", (int) symbol_cost[i]);
4310 	      fprintf (dump_file, "  address %d\n", (int) address_cost[i]);
4311 	      fprintf (dump_file, "  other %d\n", (int) target_spill_cost[i]);
4312 	      fprintf (dump_file, "\n");
4313 	    }
4314 	}
4315 
4316       costs_initialized = true;
4317     }
4318 
4319   STRIP_NOPS (expr);
4320 
4321   if (SSA_VAR_P (expr))
4322     return no_cost;
4323 
4324   if (is_gimple_min_invariant (expr))
4325     {
4326       if (poly_int_tree_p (expr))
4327 	return comp_cost (integer_cost [speed], 0);
4328 
4329       if (TREE_CODE (expr) == ADDR_EXPR)
4330 	{
4331 	  tree obj = TREE_OPERAND (expr, 0);
4332 
4333 	  if (VAR_P (obj)
4334 	      || TREE_CODE (obj) == PARM_DECL
4335 	      || TREE_CODE (obj) == RESULT_DECL)
4336 	    return comp_cost (symbol_cost [speed], 0);
4337 	}
4338 
4339       return comp_cost (address_cost [speed], 0);
4340     }
4341 
4342   switch (TREE_CODE (expr))
4343     {
4344     case POINTER_PLUS_EXPR:
4345     case PLUS_EXPR:
4346     case MINUS_EXPR:
4347     case MULT_EXPR:
4348     case TRUNC_DIV_EXPR:
4349     case BIT_AND_EXPR:
4350     case BIT_IOR_EXPR:
4351     case LSHIFT_EXPR:
4352     case RSHIFT_EXPR:
4353       op0 = TREE_OPERAND (expr, 0);
4354       op1 = TREE_OPERAND (expr, 1);
4355       STRIP_NOPS (op0);
4356       STRIP_NOPS (op1);
4357       break;
4358 
4359     CASE_CONVERT:
4360     case NEGATE_EXPR:
4361     case BIT_NOT_EXPR:
4362       op0 = TREE_OPERAND (expr, 0);
4363       STRIP_NOPS (op0);
4364       op1 = NULL_TREE;
4365       break;
4366     /* See add_iv_candidate_for_doloop, for doloop may_be_zero case, we
4367        introduce COND_EXPR for IV base, need to support better cost estimation
4368        for this COND_EXPR and tcc_comparison.  */
4369     case COND_EXPR:
4370       op0 = TREE_OPERAND (expr, 1);
4371       STRIP_NOPS (op0);
4372       op1 = TREE_OPERAND (expr, 2);
4373       STRIP_NOPS (op1);
4374       break;
4375     case LT_EXPR:
4376     case LE_EXPR:
4377     case GT_EXPR:
4378     case GE_EXPR:
4379     case EQ_EXPR:
4380     case NE_EXPR:
4381     case UNORDERED_EXPR:
4382     case ORDERED_EXPR:
4383     case UNLT_EXPR:
4384     case UNLE_EXPR:
4385     case UNGT_EXPR:
4386     case UNGE_EXPR:
4387     case UNEQ_EXPR:
4388     case LTGT_EXPR:
4389     case MAX_EXPR:
4390     case MIN_EXPR:
4391       op0 = TREE_OPERAND (expr, 0);
4392       STRIP_NOPS (op0);
4393       op1 = TREE_OPERAND (expr, 1);
4394       STRIP_NOPS (op1);
4395       break;
4396 
4397     default:
4398       /* Just an arbitrary value, FIXME.  */
4399       return comp_cost (target_spill_cost[speed], 0);
4400     }
4401 
4402   if (op0 == NULL_TREE
4403       || TREE_CODE (op0) == SSA_NAME || CONSTANT_CLASS_P (op0))
4404     cost0 = no_cost;
4405   else
4406     cost0 = force_expr_to_var_cost (op0, speed);
4407 
4408   if (op1 == NULL_TREE
4409       || TREE_CODE (op1) == SSA_NAME || CONSTANT_CLASS_P (op1))
4410     cost1 = no_cost;
4411   else
4412     cost1 = force_expr_to_var_cost (op1, speed);
4413 
4414   mode = TYPE_MODE (TREE_TYPE (expr));
4415   switch (TREE_CODE (expr))
4416     {
4417     case POINTER_PLUS_EXPR:
4418     case PLUS_EXPR:
4419     case MINUS_EXPR:
4420     case NEGATE_EXPR:
4421       cost = comp_cost (add_cost (speed, mode), 0);
4422       if (TREE_CODE (expr) != NEGATE_EXPR)
4423 	{
4424 	  tree mult = NULL_TREE;
4425 	  comp_cost sa_cost;
4426 	  if (TREE_CODE (op1) == MULT_EXPR)
4427 	    mult = op1;
4428 	  else if (TREE_CODE (op0) == MULT_EXPR)
4429 	    mult = op0;
4430 
4431 	  if (mult != NULL_TREE
4432 	      && is_a <scalar_int_mode> (mode, &int_mode)
4433 	      && cst_and_fits_in_hwi (TREE_OPERAND (mult, 1))
4434 	      && get_shiftadd_cost (expr, int_mode, cost0, cost1, mult,
4435 				    speed, &sa_cost))
4436 	    return sa_cost;
4437 	}
4438       break;
4439 
4440     CASE_CONVERT:
4441       {
4442 	tree inner_mode, outer_mode;
4443 	outer_mode = TREE_TYPE (expr);
4444 	inner_mode = TREE_TYPE (op0);
4445 	cost = comp_cost (convert_cost (TYPE_MODE (outer_mode),
4446 				       TYPE_MODE (inner_mode), speed), 0);
4447       }
4448       break;
4449 
4450     case MULT_EXPR:
4451       if (cst_and_fits_in_hwi (op0))
4452 	cost = comp_cost (mult_by_coeff_cost (int_cst_value (op0),
4453 					     mode, speed), 0);
4454       else if (cst_and_fits_in_hwi (op1))
4455 	cost = comp_cost (mult_by_coeff_cost (int_cst_value (op1),
4456 					     mode, speed), 0);
4457       else
4458 	return comp_cost (target_spill_cost [speed], 0);
4459       break;
4460 
4461     case TRUNC_DIV_EXPR:
4462       /* Division by power of two is usually cheap, so we allow it.  Forbid
4463 	 anything else.  */
4464       if (integer_pow2p (TREE_OPERAND (expr, 1)))
4465 	cost = comp_cost (add_cost (speed, mode), 0);
4466       else
4467 	cost = comp_cost (target_spill_cost[speed], 0);
4468       break;
4469 
4470     case BIT_AND_EXPR:
4471     case BIT_IOR_EXPR:
4472     case BIT_NOT_EXPR:
4473     case LSHIFT_EXPR:
4474     case RSHIFT_EXPR:
4475       cost = comp_cost (add_cost (speed, mode), 0);
4476       break;
4477     case COND_EXPR:
4478       op0 = TREE_OPERAND (expr, 0);
4479       STRIP_NOPS (op0);
4480       if (op0 == NULL_TREE || TREE_CODE (op0) == SSA_NAME
4481 	  || CONSTANT_CLASS_P (op0))
4482 	cost = no_cost;
4483       else
4484 	cost = force_expr_to_var_cost (op0, speed);
4485       break;
4486     case LT_EXPR:
4487     case LE_EXPR:
4488     case GT_EXPR:
4489     case GE_EXPR:
4490     case EQ_EXPR:
4491     case NE_EXPR:
4492     case UNORDERED_EXPR:
4493     case ORDERED_EXPR:
4494     case UNLT_EXPR:
4495     case UNLE_EXPR:
4496     case UNGT_EXPR:
4497     case UNGE_EXPR:
4498     case UNEQ_EXPR:
4499     case LTGT_EXPR:
4500     case MAX_EXPR:
4501     case MIN_EXPR:
4502       /* Simply use add cost for now, FIXME if there is some more accurate cost
4503 	 evaluation way.  */
4504       cost = comp_cost (add_cost (speed, mode), 0);
4505       break;
4506 
4507     default:
4508       gcc_unreachable ();
4509     }
4510 
4511   cost += cost0;
4512   cost += cost1;
4513   return cost;
4514 }
4515 
4516 /* Estimates cost of forcing EXPR into a variable.  INV_VARS is a set of the
4517    invariants the computation depends on.  */
4518 
4519 static comp_cost
force_var_cost(struct ivopts_data * data,tree expr,bitmap * inv_vars)4520 force_var_cost (struct ivopts_data *data, tree expr, bitmap *inv_vars)
4521 {
4522   if (!expr)
4523     return no_cost;
4524 
4525   find_inv_vars (data, &expr, inv_vars);
4526   return force_expr_to_var_cost (expr, data->speed);
4527 }
4528 
4529 /* Returns cost of auto-modifying address expression in shape base + offset.
4530    AINC_STEP is step size of the address IV.  AINC_OFFSET is offset of the
4531    address expression.  The address expression has ADDR_MODE in addr space
4532    AS.  The memory access has MEM_MODE.  SPEED means we are optimizing for
4533    speed or size.  */
4534 
4535 enum ainc_type
4536 {
4537   AINC_PRE_INC,		/* Pre increment.  */
4538   AINC_PRE_DEC,		/* Pre decrement.  */
4539   AINC_POST_INC,	/* Post increment.  */
4540   AINC_POST_DEC,	/* Post decrement.  */
4541   AINC_NONE		/* Also the number of auto increment types.  */
4542 };
4543 
4544 struct ainc_cost_data
4545 {
4546   int64_t costs[AINC_NONE];
4547 };
4548 
4549 static comp_cost
get_address_cost_ainc(poly_int64 ainc_step,poly_int64 ainc_offset,machine_mode addr_mode,machine_mode mem_mode,addr_space_t as,bool speed)4550 get_address_cost_ainc (poly_int64 ainc_step, poly_int64 ainc_offset,
4551 		       machine_mode addr_mode, machine_mode mem_mode,
4552 		       addr_space_t as, bool speed)
4553 {
4554   if (!USE_LOAD_PRE_DECREMENT (mem_mode)
4555       && !USE_STORE_PRE_DECREMENT (mem_mode)
4556       && !USE_LOAD_POST_DECREMENT (mem_mode)
4557       && !USE_STORE_POST_DECREMENT (mem_mode)
4558       && !USE_LOAD_PRE_INCREMENT (mem_mode)
4559       && !USE_STORE_PRE_INCREMENT (mem_mode)
4560       && !USE_LOAD_POST_INCREMENT (mem_mode)
4561       && !USE_STORE_POST_INCREMENT (mem_mode))
4562     return infinite_cost;
4563 
4564   static vec<ainc_cost_data *> ainc_cost_data_list;
4565   unsigned idx = (unsigned) as * MAX_MACHINE_MODE + (unsigned) mem_mode;
4566   if (idx >= ainc_cost_data_list.length ())
4567     {
4568       unsigned nsize = ((unsigned) as + 1) *MAX_MACHINE_MODE;
4569 
4570       gcc_assert (nsize > idx);
4571       ainc_cost_data_list.safe_grow_cleared (nsize);
4572     }
4573 
4574   ainc_cost_data *data = ainc_cost_data_list[idx];
4575   if (data == NULL)
4576     {
4577       rtx reg = gen_raw_REG (addr_mode, LAST_VIRTUAL_REGISTER + 1);
4578 
4579       data = (ainc_cost_data *) xcalloc (1, sizeof (*data));
4580       data->costs[AINC_PRE_DEC] = INFTY;
4581       data->costs[AINC_POST_DEC] = INFTY;
4582       data->costs[AINC_PRE_INC] = INFTY;
4583       data->costs[AINC_POST_INC] = INFTY;
4584       if (USE_LOAD_PRE_DECREMENT (mem_mode)
4585 	  || USE_STORE_PRE_DECREMENT (mem_mode))
4586 	{
4587 	  rtx addr = gen_rtx_PRE_DEC (addr_mode, reg);
4588 
4589 	  if (memory_address_addr_space_p (mem_mode, addr, as))
4590 	    data->costs[AINC_PRE_DEC]
4591 	      = address_cost (addr, mem_mode, as, speed);
4592 	}
4593       if (USE_LOAD_POST_DECREMENT (mem_mode)
4594 	  || USE_STORE_POST_DECREMENT (mem_mode))
4595 	{
4596 	  rtx addr = gen_rtx_POST_DEC (addr_mode, reg);
4597 
4598 	  if (memory_address_addr_space_p (mem_mode, addr, as))
4599 	    data->costs[AINC_POST_DEC]
4600 	      = address_cost (addr, mem_mode, as, speed);
4601 	}
4602       if (USE_LOAD_PRE_INCREMENT (mem_mode)
4603 	  || USE_STORE_PRE_INCREMENT (mem_mode))
4604 	{
4605 	  rtx addr = gen_rtx_PRE_INC (addr_mode, reg);
4606 
4607 	  if (memory_address_addr_space_p (mem_mode, addr, as))
4608 	    data->costs[AINC_PRE_INC]
4609 	      = address_cost (addr, mem_mode, as, speed);
4610 	}
4611       if (USE_LOAD_POST_INCREMENT (mem_mode)
4612 	  || USE_STORE_POST_INCREMENT (mem_mode))
4613 	{
4614 	  rtx addr = gen_rtx_POST_INC (addr_mode, reg);
4615 
4616 	  if (memory_address_addr_space_p (mem_mode, addr, as))
4617 	    data->costs[AINC_POST_INC]
4618 	      = address_cost (addr, mem_mode, as, speed);
4619 	}
4620       ainc_cost_data_list[idx] = data;
4621     }
4622 
4623   poly_int64 msize = GET_MODE_SIZE (mem_mode);
4624   if (known_eq (ainc_offset, 0) && known_eq (msize, ainc_step))
4625     return comp_cost (data->costs[AINC_POST_INC], 0);
4626   if (known_eq (ainc_offset, 0) && known_eq (msize, -ainc_step))
4627     return comp_cost (data->costs[AINC_POST_DEC], 0);
4628   if (known_eq (ainc_offset, msize) && known_eq (msize, ainc_step))
4629     return comp_cost (data->costs[AINC_PRE_INC], 0);
4630   if (known_eq (ainc_offset, -msize) && known_eq (msize, -ainc_step))
4631     return comp_cost (data->costs[AINC_PRE_DEC], 0);
4632 
4633   return infinite_cost;
4634 }
4635 
4636 /* Return cost of computing USE's address expression by using CAND.
4637    AFF_INV and AFF_VAR represent invariant and variant parts of the
4638    address expression, respectively.  If AFF_INV is simple, store
4639    the loop invariant variables which are depended by it in INV_VARS;
4640    if AFF_INV is complicated, handle it as a new invariant expression
4641    and record it in INV_EXPR.  RATIO indicates multiple times between
4642    steps of USE and CAND.  If CAN_AUTOINC is nonNULL, store boolean
4643    value to it indicating if this is an auto-increment address.  */
4644 
4645 static comp_cost
get_address_cost(struct ivopts_data * data,struct iv_use * use,struct iv_cand * cand,aff_tree * aff_inv,aff_tree * aff_var,HOST_WIDE_INT ratio,bitmap * inv_vars,iv_inv_expr_ent ** inv_expr,bool * can_autoinc,bool speed)4646 get_address_cost (struct ivopts_data *data, struct iv_use *use,
4647 		  struct iv_cand *cand, aff_tree *aff_inv,
4648 		  aff_tree *aff_var, HOST_WIDE_INT ratio,
4649 		  bitmap *inv_vars, iv_inv_expr_ent **inv_expr,
4650 		  bool *can_autoinc, bool speed)
4651 {
4652   rtx addr;
4653   bool simple_inv = true;
4654   tree comp_inv = NULL_TREE, type = aff_var->type;
4655   comp_cost var_cost = no_cost, cost = no_cost;
4656   struct mem_address parts = {NULL_TREE, integer_one_node,
4657 			      NULL_TREE, NULL_TREE, NULL_TREE};
4658   machine_mode addr_mode = TYPE_MODE (type);
4659   machine_mode mem_mode = TYPE_MODE (use->mem_type);
4660   addr_space_t as = TYPE_ADDR_SPACE (TREE_TYPE (use->iv->base));
4661   /* Only true if ratio != 1.  */
4662   bool ok_with_ratio_p = false;
4663   bool ok_without_ratio_p = false;
4664 
4665   if (!aff_combination_const_p (aff_inv))
4666     {
4667       parts.index = integer_one_node;
4668       /* Addressing mode "base + index".  */
4669       ok_without_ratio_p = valid_mem_ref_p (mem_mode, as, &parts);
4670       if (ratio != 1)
4671 	{
4672 	  parts.step = wide_int_to_tree (type, ratio);
4673 	  /* Addressing mode "base + index << scale".  */
4674 	  ok_with_ratio_p = valid_mem_ref_p (mem_mode, as, &parts);
4675 	  if (!ok_with_ratio_p)
4676 	    parts.step = NULL_TREE;
4677 	}
4678       if (ok_with_ratio_p || ok_without_ratio_p)
4679 	{
4680 	  if (maybe_ne (aff_inv->offset, 0))
4681 	    {
4682 	      parts.offset = wide_int_to_tree (sizetype, aff_inv->offset);
4683 	      /* Addressing mode "base + index [<< scale] + offset".  */
4684 	      if (!valid_mem_ref_p (mem_mode, as, &parts))
4685 		parts.offset = NULL_TREE;
4686 	      else
4687 		aff_inv->offset = 0;
4688 	    }
4689 
4690 	  move_fixed_address_to_symbol (&parts, aff_inv);
4691 	  /* Base is fixed address and is moved to symbol part.  */
4692 	  if (parts.symbol != NULL_TREE && aff_combination_zero_p (aff_inv))
4693 	    parts.base = NULL_TREE;
4694 
4695 	  /* Addressing mode "symbol + base + index [<< scale] [+ offset]".  */
4696 	  if (parts.symbol != NULL_TREE
4697 	      && !valid_mem_ref_p (mem_mode, as, &parts))
4698 	    {
4699 	      aff_combination_add_elt (aff_inv, parts.symbol, 1);
4700 	      parts.symbol = NULL_TREE;
4701 	      /* Reset SIMPLE_INV since symbol address needs to be computed
4702 		 outside of address expression in this case.  */
4703 	      simple_inv = false;
4704 	      /* Symbol part is moved back to base part, it can't be NULL.  */
4705 	      parts.base = integer_one_node;
4706 	    }
4707 	}
4708       else
4709 	parts.index = NULL_TREE;
4710     }
4711   else
4712     {
4713       poly_int64 ainc_step;
4714       if (can_autoinc
4715 	  && ratio == 1
4716 	  && ptrdiff_tree_p (cand->iv->step, &ainc_step))
4717 	{
4718 	  poly_int64 ainc_offset = (aff_inv->offset).force_shwi ();
4719 
4720 	  if (stmt_after_increment (data->current_loop, cand, use->stmt))
4721 	    ainc_offset += ainc_step;
4722 	  cost = get_address_cost_ainc (ainc_step, ainc_offset,
4723 					addr_mode, mem_mode, as, speed);
4724 	  if (!cost.infinite_cost_p ())
4725 	    {
4726 	      *can_autoinc = true;
4727 	      return cost;
4728 	    }
4729 	  cost = no_cost;
4730 	}
4731       if (!aff_combination_zero_p (aff_inv))
4732 	{
4733 	  parts.offset = wide_int_to_tree (sizetype, aff_inv->offset);
4734 	  /* Addressing mode "base + offset".  */
4735 	  if (!valid_mem_ref_p (mem_mode, as, &parts))
4736 	    parts.offset = NULL_TREE;
4737 	  else
4738 	    aff_inv->offset = 0;
4739 	}
4740     }
4741 
4742   if (simple_inv)
4743     simple_inv = (aff_inv == NULL
4744 		  || aff_combination_const_p (aff_inv)
4745 		  || aff_combination_singleton_var_p (aff_inv));
4746   if (!aff_combination_zero_p (aff_inv))
4747     comp_inv = aff_combination_to_tree (aff_inv);
4748   if (comp_inv != NULL_TREE)
4749     cost = force_var_cost (data, comp_inv, inv_vars);
4750   if (ratio != 1 && parts.step == NULL_TREE)
4751     var_cost += mult_by_coeff_cost (ratio, addr_mode, speed);
4752   if (comp_inv != NULL_TREE && parts.index == NULL_TREE)
4753     var_cost += add_cost (speed, addr_mode);
4754 
4755   if (comp_inv && inv_expr && !simple_inv)
4756     {
4757       *inv_expr = get_loop_invariant_expr (data, comp_inv);
4758       /* Clear depends on.  */
4759       if (*inv_expr != NULL && inv_vars && *inv_vars)
4760 	bitmap_clear (*inv_vars);
4761 
4762       /* Cost of small invariant expression adjusted against loop niters
4763 	 is usually zero, which makes it difficult to be differentiated
4764 	 from candidate based on loop invariant variables.  Secondly, the
4765 	 generated invariant expression may not be hoisted out of loop by
4766 	 following pass.  We penalize the cost by rounding up in order to
4767 	 neutralize such effects.  */
4768       cost.cost = adjust_setup_cost (data, cost.cost, true);
4769       cost.scratch = cost.cost;
4770     }
4771 
4772   cost += var_cost;
4773   addr = addr_for_mem_ref (&parts, as, false);
4774   gcc_assert (memory_address_addr_space_p (mem_mode, addr, as));
4775   cost += address_cost (addr, mem_mode, as, speed);
4776 
4777   if (parts.symbol != NULL_TREE)
4778     cost.complexity += 1;
4779   /* Don't increase the complexity of adding a scaled index if it's
4780      the only kind of index that the target allows.  */
4781   if (parts.step != NULL_TREE && ok_without_ratio_p)
4782     cost.complexity += 1;
4783   if (parts.base != NULL_TREE && parts.index != NULL_TREE)
4784     cost.complexity += 1;
4785   if (parts.offset != NULL_TREE && !integer_zerop (parts.offset))
4786     cost.complexity += 1;
4787 
4788   return cost;
4789 }
4790 
4791 /* Scale (multiply) the computed COST (except scratch part that should be
4792    hoisted out a loop) by header->frequency / AT->frequency, which makes
4793    expected cost more accurate.  */
4794 
4795 static comp_cost
get_scaled_computation_cost_at(ivopts_data * data,gimple * at,comp_cost cost)4796 get_scaled_computation_cost_at (ivopts_data *data, gimple *at, comp_cost cost)
4797 {
4798   if (data->speed
4799       && data->current_loop->header->count.to_frequency (cfun) > 0)
4800     {
4801       basic_block bb = gimple_bb (at);
4802       gcc_assert (cost.scratch <= cost.cost);
4803       int scale_factor = (int)(intptr_t) bb->aux;
4804       if (scale_factor == 1)
4805 	return cost;
4806 
4807       int64_t scaled_cost
4808 	= cost.scratch + (cost.cost - cost.scratch) * scale_factor;
4809 
4810       if (dump_file && (dump_flags & TDF_DETAILS))
4811 	fprintf (dump_file, "Scaling cost based on bb prob by %2.2f: "
4812 		 "%" PRId64 " (scratch: %" PRId64 ") -> %" PRId64 "\n",
4813 		 1.0f * scale_factor, cost.cost, cost.scratch, scaled_cost);
4814 
4815       cost.cost = scaled_cost;
4816     }
4817 
4818   return cost;
4819 }
4820 
4821 /* Determines the cost of the computation by that USE is expressed
4822    from induction variable CAND.  If ADDRESS_P is true, we just need
4823    to create an address from it, otherwise we want to get it into
4824    register.  A set of invariants we depend on is stored in INV_VARS.
4825    If CAN_AUTOINC is nonnull, use it to record whether autoinc
4826    addressing is likely.  If INV_EXPR is nonnull, record invariant
4827    expr entry in it.  */
4828 
4829 static comp_cost
get_computation_cost(struct ivopts_data * data,struct iv_use * use,struct iv_cand * cand,bool address_p,bitmap * inv_vars,bool * can_autoinc,iv_inv_expr_ent ** inv_expr)4830 get_computation_cost (struct ivopts_data *data, struct iv_use *use,
4831 		      struct iv_cand *cand, bool address_p, bitmap *inv_vars,
4832 		      bool *can_autoinc, iv_inv_expr_ent **inv_expr)
4833 {
4834   gimple *at = use->stmt;
4835   tree ubase = use->iv->base, cbase = cand->iv->base;
4836   tree utype = TREE_TYPE (ubase), ctype = TREE_TYPE (cbase);
4837   tree comp_inv = NULL_TREE;
4838   HOST_WIDE_INT ratio, aratio;
4839   comp_cost cost;
4840   widest_int rat;
4841   aff_tree aff_inv, aff_var;
4842   bool speed = optimize_bb_for_speed_p (gimple_bb (at));
4843 
4844   if (inv_vars)
4845     *inv_vars = NULL;
4846   if (can_autoinc)
4847     *can_autoinc = false;
4848   if (inv_expr)
4849     *inv_expr = NULL;
4850 
4851   /* Check if we have enough precision to express the values of use.  */
4852   if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
4853     return infinite_cost;
4854 
4855   if (address_p
4856       || (use->iv->base_object
4857 	  && cand->iv->base_object
4858 	  && POINTER_TYPE_P (TREE_TYPE (use->iv->base_object))
4859 	  && POINTER_TYPE_P (TREE_TYPE (cand->iv->base_object))))
4860     {
4861       /* Do not try to express address of an object with computation based
4862 	 on address of a different object.  This may cause problems in rtl
4863 	 level alias analysis (that does not expect this to be happening,
4864 	 as this is illegal in C), and would be unlikely to be useful
4865 	 anyway.  */
4866       if (use->iv->base_object
4867 	  && cand->iv->base_object
4868 	  && !operand_equal_p (use->iv->base_object, cand->iv->base_object, 0))
4869 	return infinite_cost;
4870     }
4871 
4872   if (!get_computation_aff_1 (data->current_loop, at, use,
4873 			      cand, &aff_inv, &aff_var, &rat)
4874       || !wi::fits_shwi_p (rat))
4875     return infinite_cost;
4876 
4877   ratio = rat.to_shwi ();
4878   if (address_p)
4879     {
4880       cost = get_address_cost (data, use, cand, &aff_inv, &aff_var, ratio,
4881 			       inv_vars, inv_expr, can_autoinc, speed);
4882       cost = get_scaled_computation_cost_at (data, at, cost);
4883       /* For doloop IV cand, add on the extra cost.  */
4884       cost += cand->doloop_p ? targetm.doloop_cost_for_address : 0;
4885       return cost;
4886     }
4887 
4888   bool simple_inv = (aff_combination_const_p (&aff_inv)
4889 		     || aff_combination_singleton_var_p (&aff_inv));
4890   tree signed_type = signed_type_for (aff_combination_type (&aff_inv));
4891   aff_combination_convert (&aff_inv, signed_type);
4892   if (!aff_combination_zero_p (&aff_inv))
4893     comp_inv = aff_combination_to_tree (&aff_inv);
4894 
4895   cost = force_var_cost (data, comp_inv, inv_vars);
4896   if (comp_inv && inv_expr && !simple_inv)
4897     {
4898       *inv_expr = get_loop_invariant_expr (data, comp_inv);
4899       /* Clear depends on.  */
4900       if (*inv_expr != NULL && inv_vars && *inv_vars)
4901 	bitmap_clear (*inv_vars);
4902 
4903       cost.cost = adjust_setup_cost (data, cost.cost);
4904       /* Record setup cost in scratch field.  */
4905       cost.scratch = cost.cost;
4906     }
4907   /* Cost of constant integer can be covered when adding invariant part to
4908      variant part.  */
4909   else if (comp_inv && CONSTANT_CLASS_P (comp_inv))
4910     cost = no_cost;
4911 
4912   /* Need type narrowing to represent use with cand.  */
4913   if (TYPE_PRECISION (utype) < TYPE_PRECISION (ctype))
4914     {
4915       machine_mode outer_mode = TYPE_MODE (utype);
4916       machine_mode inner_mode = TYPE_MODE (ctype);
4917       cost += comp_cost (convert_cost (outer_mode, inner_mode, speed), 0);
4918     }
4919 
4920   /* Turn a + i * (-c) into a - i * c.  */
4921   if (ratio < 0 && comp_inv && !integer_zerop (comp_inv))
4922     aratio = -ratio;
4923   else
4924     aratio = ratio;
4925 
4926   if (ratio != 1)
4927     cost += mult_by_coeff_cost (aratio, TYPE_MODE (utype), speed);
4928 
4929   /* TODO: We may also need to check if we can compute  a + i * 4 in one
4930      instruction.  */
4931   /* Need to add up the invariant and variant parts.  */
4932   if (comp_inv && !integer_zerop (comp_inv))
4933     cost += add_cost (speed, TYPE_MODE (utype));
4934 
4935   cost = get_scaled_computation_cost_at (data, at, cost);
4936 
4937   /* For doloop IV cand, add on the extra cost.  */
4938   if (cand->doloop_p && use->type == USE_NONLINEAR_EXPR)
4939     cost += targetm.doloop_cost_for_generic;
4940 
4941   return cost;
4942 }
4943 
4944 /* Determines cost of computing the use in GROUP with CAND in a generic
4945    expression.  */
4946 
4947 static bool
determine_group_iv_cost_generic(struct ivopts_data * data,struct iv_group * group,struct iv_cand * cand)4948 determine_group_iv_cost_generic (struct ivopts_data *data,
4949 				 struct iv_group *group, struct iv_cand *cand)
4950 {
4951   comp_cost cost;
4952   iv_inv_expr_ent *inv_expr = NULL;
4953   bitmap inv_vars = NULL, inv_exprs = NULL;
4954   struct iv_use *use = group->vuses[0];
4955 
4956   /* The simple case first -- if we need to express value of the preserved
4957      original biv, the cost is 0.  This also prevents us from counting the
4958      cost of increment twice -- once at this use and once in the cost of
4959      the candidate.  */
4960   if (cand->pos == IP_ORIGINAL && cand->incremented_at == use->stmt)
4961     cost = no_cost;
4962   else
4963     cost = get_computation_cost (data, use, cand, false,
4964 				 &inv_vars, NULL, &inv_expr);
4965 
4966   if (inv_expr)
4967     {
4968       inv_exprs = BITMAP_ALLOC (NULL);
4969       bitmap_set_bit (inv_exprs, inv_expr->id);
4970     }
4971   set_group_iv_cost (data, group, cand, cost, inv_vars,
4972 		     NULL_TREE, ERROR_MARK, inv_exprs);
4973   return !cost.infinite_cost_p ();
4974 }
4975 
4976 /* Determines cost of computing uses in GROUP with CAND in addresses.  */
4977 
4978 static bool
determine_group_iv_cost_address(struct ivopts_data * data,struct iv_group * group,struct iv_cand * cand)4979 determine_group_iv_cost_address (struct ivopts_data *data,
4980 				 struct iv_group *group, struct iv_cand *cand)
4981 {
4982   unsigned i;
4983   bitmap inv_vars = NULL, inv_exprs = NULL;
4984   bool can_autoinc;
4985   iv_inv_expr_ent *inv_expr = NULL;
4986   struct iv_use *use = group->vuses[0];
4987   comp_cost sum_cost = no_cost, cost;
4988 
4989   cost = get_computation_cost (data, use, cand, true,
4990 			       &inv_vars, &can_autoinc, &inv_expr);
4991 
4992   if (inv_expr)
4993     {
4994       inv_exprs = BITMAP_ALLOC (NULL);
4995       bitmap_set_bit (inv_exprs, inv_expr->id);
4996     }
4997   sum_cost = cost;
4998   if (!sum_cost.infinite_cost_p () && cand->ainc_use == use)
4999     {
5000       if (can_autoinc)
5001 	sum_cost -= cand->cost_step;
5002       /* If we generated the candidate solely for exploiting autoincrement
5003 	 opportunities, and it turns out it can't be used, set the cost to
5004 	 infinity to make sure we ignore it.  */
5005       else if (cand->pos == IP_AFTER_USE || cand->pos == IP_BEFORE_USE)
5006 	sum_cost = infinite_cost;
5007     }
5008 
5009   /* Uses in a group can share setup code, so only add setup cost once.  */
5010   cost -= cost.scratch;
5011   /* Compute and add costs for rest uses of this group.  */
5012   for (i = 1; i < group->vuses.length () && !sum_cost.infinite_cost_p (); i++)
5013     {
5014       struct iv_use *next = group->vuses[i];
5015 
5016       /* TODO: We could skip computing cost for sub iv_use when it has the
5017 	 same cost as the first iv_use, but the cost really depends on the
5018 	 offset and where the iv_use is.  */
5019 	cost = get_computation_cost (data, next, cand, true,
5020 				     NULL, &can_autoinc, &inv_expr);
5021 	if (inv_expr)
5022 	  {
5023 	    if (!inv_exprs)
5024 	      inv_exprs = BITMAP_ALLOC (NULL);
5025 
5026 	    bitmap_set_bit (inv_exprs, inv_expr->id);
5027 	  }
5028       sum_cost += cost;
5029     }
5030   set_group_iv_cost (data, group, cand, sum_cost, inv_vars,
5031 		     NULL_TREE, ERROR_MARK, inv_exprs);
5032 
5033   return !sum_cost.infinite_cost_p ();
5034 }
5035 
5036 /* Computes value of candidate CAND at position AT in iteration NITER, and
5037    stores it to VAL.  */
5038 
5039 static void
cand_value_at(class loop * loop,struct iv_cand * cand,gimple * at,tree niter,aff_tree * val)5040 cand_value_at (class loop *loop, struct iv_cand *cand, gimple *at, tree niter,
5041 	       aff_tree *val)
5042 {
5043   aff_tree step, delta, nit;
5044   struct iv *iv = cand->iv;
5045   tree type = TREE_TYPE (iv->base);
5046   tree steptype;
5047   if (POINTER_TYPE_P (type))
5048     steptype = sizetype;
5049   else
5050     steptype = unsigned_type_for (type);
5051 
5052   tree_to_aff_combination (iv->step, TREE_TYPE (iv->step), &step);
5053   aff_combination_convert (&step, steptype);
5054   tree_to_aff_combination (niter, TREE_TYPE (niter), &nit);
5055   aff_combination_convert (&nit, steptype);
5056   aff_combination_mult (&nit, &step, &delta);
5057   if (stmt_after_increment (loop, cand, at))
5058     aff_combination_add (&delta, &step);
5059 
5060   tree_to_aff_combination (iv->base, type, val);
5061   if (!POINTER_TYPE_P (type))
5062     aff_combination_convert (val, steptype);
5063   aff_combination_add (val, &delta);
5064 }
5065 
5066 /* Returns period of induction variable iv.  */
5067 
5068 static tree
iv_period(struct iv * iv)5069 iv_period (struct iv *iv)
5070 {
5071   tree step = iv->step, period, type;
5072   tree pow2div;
5073 
5074   gcc_assert (step && TREE_CODE (step) == INTEGER_CST);
5075 
5076   type = unsigned_type_for (TREE_TYPE (step));
5077   /* Period of the iv is lcm (step, type_range)/step -1,
5078      i.e., N*type_range/step - 1. Since type range is power
5079      of two, N == (step >> num_of_ending_zeros_binary (step),
5080      so the final result is
5081 
5082        (type_range >> num_of_ending_zeros_binary (step)) - 1
5083 
5084   */
5085   pow2div = num_ending_zeros (step);
5086 
5087   period = build_low_bits_mask (type,
5088 				(TYPE_PRECISION (type)
5089 				 - tree_to_uhwi (pow2div)));
5090 
5091   return period;
5092 }
5093 
5094 /* Returns the comparison operator used when eliminating the iv USE.  */
5095 
5096 static enum tree_code
iv_elimination_compare(struct ivopts_data * data,struct iv_use * use)5097 iv_elimination_compare (struct ivopts_data *data, struct iv_use *use)
5098 {
5099   class loop *loop = data->current_loop;
5100   basic_block ex_bb;
5101   edge exit;
5102 
5103   ex_bb = gimple_bb (use->stmt);
5104   exit = EDGE_SUCC (ex_bb, 0);
5105   if (flow_bb_inside_loop_p (loop, exit->dest))
5106     exit = EDGE_SUCC (ex_bb, 1);
5107 
5108   return (exit->flags & EDGE_TRUE_VALUE ? EQ_EXPR : NE_EXPR);
5109 }
5110 
5111 /* Returns true if we can prove that BASE - OFFSET does not overflow.  For now,
5112    we only detect the situation that BASE = SOMETHING + OFFSET, where the
5113    calculation is performed in non-wrapping type.
5114 
5115    TODO: More generally, we could test for the situation that
5116 	 BASE = SOMETHING + OFFSET' and OFFSET is between OFFSET' and zero.
5117 	 This would require knowing the sign of OFFSET.  */
5118 
5119 static bool
difference_cannot_overflow_p(struct ivopts_data * data,tree base,tree offset)5120 difference_cannot_overflow_p (struct ivopts_data *data, tree base, tree offset)
5121 {
5122   enum tree_code code;
5123   tree e1, e2;
5124   aff_tree aff_e1, aff_e2, aff_offset;
5125 
5126   if (!nowrap_type_p (TREE_TYPE (base)))
5127     return false;
5128 
5129   base = expand_simple_operations (base);
5130 
5131   if (TREE_CODE (base) == SSA_NAME)
5132     {
5133       gimple *stmt = SSA_NAME_DEF_STMT (base);
5134 
5135       if (gimple_code (stmt) != GIMPLE_ASSIGN)
5136 	return false;
5137 
5138       code = gimple_assign_rhs_code (stmt);
5139       if (get_gimple_rhs_class (code) != GIMPLE_BINARY_RHS)
5140 	return false;
5141 
5142       e1 = gimple_assign_rhs1 (stmt);
5143       e2 = gimple_assign_rhs2 (stmt);
5144     }
5145   else
5146     {
5147       code = TREE_CODE (base);
5148       if (get_gimple_rhs_class (code) != GIMPLE_BINARY_RHS)
5149 	return false;
5150       e1 = TREE_OPERAND (base, 0);
5151       e2 = TREE_OPERAND (base, 1);
5152     }
5153 
5154   /* Use affine expansion as deeper inspection to prove the equality.  */
5155   tree_to_aff_combination_expand (e2, TREE_TYPE (e2),
5156 				  &aff_e2, &data->name_expansion_cache);
5157   tree_to_aff_combination_expand (offset, TREE_TYPE (offset),
5158 				  &aff_offset, &data->name_expansion_cache);
5159   aff_combination_scale (&aff_offset, -1);
5160   switch (code)
5161     {
5162     case PLUS_EXPR:
5163       aff_combination_add (&aff_e2, &aff_offset);
5164       if (aff_combination_zero_p (&aff_e2))
5165 	return true;
5166 
5167       tree_to_aff_combination_expand (e1, TREE_TYPE (e1),
5168 				      &aff_e1, &data->name_expansion_cache);
5169       aff_combination_add (&aff_e1, &aff_offset);
5170       return aff_combination_zero_p (&aff_e1);
5171 
5172     case POINTER_PLUS_EXPR:
5173       aff_combination_add (&aff_e2, &aff_offset);
5174       return aff_combination_zero_p (&aff_e2);
5175 
5176     default:
5177       return false;
5178     }
5179 }
5180 
5181 /* Tries to replace loop exit by one formulated in terms of a LT_EXPR
5182    comparison with CAND.  NITER describes the number of iterations of
5183    the loops.  If successful, the comparison in COMP_P is altered accordingly.
5184 
5185    We aim to handle the following situation:
5186 
5187    sometype *base, *p;
5188    int a, b, i;
5189 
5190    i = a;
5191    p = p_0 = base + a;
5192 
5193    do
5194      {
5195        bla (*p);
5196        p++;
5197        i++;
5198      }
5199    while (i < b);
5200 
5201    Here, the number of iterations of the loop is (a + 1 > b) ? 0 : b - a - 1.
5202    We aim to optimize this to
5203 
5204    p = p_0 = base + a;
5205    do
5206      {
5207        bla (*p);
5208        p++;
5209      }
5210    while (p < p_0 - a + b);
5211 
5212    This preserves the correctness, since the pointer arithmetics does not
5213    overflow.  More precisely:
5214 
5215    1) if a + 1 <= b, then p_0 - a + b is the final value of p, hence there is no
5216       overflow in computing it or the values of p.
5217    2) if a + 1 > b, then we need to verify that the expression p_0 - a does not
5218       overflow.  To prove this, we use the fact that p_0 = base + a.  */
5219 
5220 static bool
iv_elimination_compare_lt(struct ivopts_data * data,struct iv_cand * cand,enum tree_code * comp_p,class tree_niter_desc * niter)5221 iv_elimination_compare_lt (struct ivopts_data *data,
5222 			   struct iv_cand *cand, enum tree_code *comp_p,
5223 			   class tree_niter_desc *niter)
5224 {
5225   tree cand_type, a, b, mbz, nit_type = TREE_TYPE (niter->niter), offset;
5226   class aff_tree nit, tmpa, tmpb;
5227   enum tree_code comp;
5228   HOST_WIDE_INT step;
5229 
5230   /* We need to know that the candidate induction variable does not overflow.
5231      While more complex analysis may be used to prove this, for now just
5232      check that the variable appears in the original program and that it
5233      is computed in a type that guarantees no overflows.  */
5234   cand_type = TREE_TYPE (cand->iv->base);
5235   if (cand->pos != IP_ORIGINAL || !nowrap_type_p (cand_type))
5236     return false;
5237 
5238   /* Make sure that the loop iterates till the loop bound is hit, as otherwise
5239      the calculation of the BOUND could overflow, making the comparison
5240      invalid.  */
5241   if (!data->loop_single_exit_p)
5242     return false;
5243 
5244   /* We need to be able to decide whether candidate is increasing or decreasing
5245      in order to choose the right comparison operator.  */
5246   if (!cst_and_fits_in_hwi (cand->iv->step))
5247     return false;
5248   step = int_cst_value (cand->iv->step);
5249 
5250   /* Check that the number of iterations matches the expected pattern:
5251      a + 1 > b ? 0 : b - a - 1.  */
5252   mbz = niter->may_be_zero;
5253   if (TREE_CODE (mbz) == GT_EXPR)
5254     {
5255       /* Handle a + 1 > b.  */
5256       tree op0 = TREE_OPERAND (mbz, 0);
5257       if (TREE_CODE (op0) == PLUS_EXPR && integer_onep (TREE_OPERAND (op0, 1)))
5258 	{
5259 	  a = TREE_OPERAND (op0, 0);
5260 	  b = TREE_OPERAND (mbz, 1);
5261 	}
5262       else
5263 	return false;
5264     }
5265   else if (TREE_CODE (mbz) == LT_EXPR)
5266     {
5267       tree op1 = TREE_OPERAND (mbz, 1);
5268 
5269       /* Handle b < a + 1.  */
5270       if (TREE_CODE (op1) == PLUS_EXPR && integer_onep (TREE_OPERAND (op1, 1)))
5271 	{
5272 	  a = TREE_OPERAND (op1, 0);
5273 	  b = TREE_OPERAND (mbz, 0);
5274 	}
5275       else
5276 	return false;
5277     }
5278   else
5279     return false;
5280 
5281   /* Expected number of iterations is B - A - 1.  Check that it matches
5282      the actual number, i.e., that B - A - NITER = 1.  */
5283   tree_to_aff_combination (niter->niter, nit_type, &nit);
5284   tree_to_aff_combination (fold_convert (nit_type, a), nit_type, &tmpa);
5285   tree_to_aff_combination (fold_convert (nit_type, b), nit_type, &tmpb);
5286   aff_combination_scale (&nit, -1);
5287   aff_combination_scale (&tmpa, -1);
5288   aff_combination_add (&tmpb, &tmpa);
5289   aff_combination_add (&tmpb, &nit);
5290   if (tmpb.n != 0 || maybe_ne (tmpb.offset, 1))
5291     return false;
5292 
5293   /* Finally, check that CAND->IV->BASE - CAND->IV->STEP * A does not
5294      overflow.  */
5295   offset = fold_build2 (MULT_EXPR, TREE_TYPE (cand->iv->step),
5296 			cand->iv->step,
5297 			fold_convert (TREE_TYPE (cand->iv->step), a));
5298   if (!difference_cannot_overflow_p (data, cand->iv->base, offset))
5299     return false;
5300 
5301   /* Determine the new comparison operator.  */
5302   comp = step < 0 ? GT_EXPR : LT_EXPR;
5303   if (*comp_p == NE_EXPR)
5304     *comp_p = comp;
5305   else if (*comp_p == EQ_EXPR)
5306     *comp_p = invert_tree_comparison (comp, false);
5307   else
5308     gcc_unreachable ();
5309 
5310   return true;
5311 }
5312 
5313 /* Check whether it is possible to express the condition in USE by comparison
5314    of candidate CAND.  If so, store the value compared with to BOUND, and the
5315    comparison operator to COMP.  */
5316 
5317 static bool
may_eliminate_iv(struct ivopts_data * data,struct iv_use * use,struct iv_cand * cand,tree * bound,enum tree_code * comp)5318 may_eliminate_iv (struct ivopts_data *data,
5319 		  struct iv_use *use, struct iv_cand *cand, tree *bound,
5320 		  enum tree_code *comp)
5321 {
5322   basic_block ex_bb;
5323   edge exit;
5324   tree period;
5325   class loop *loop = data->current_loop;
5326   aff_tree bnd;
5327   class tree_niter_desc *desc = NULL;
5328 
5329   if (TREE_CODE (cand->iv->step) != INTEGER_CST)
5330     return false;
5331 
5332   /* For now works only for exits that dominate the loop latch.
5333      TODO: extend to other conditions inside loop body.  */
5334   ex_bb = gimple_bb (use->stmt);
5335   if (use->stmt != last_stmt (ex_bb)
5336       || gimple_code (use->stmt) != GIMPLE_COND
5337       || !dominated_by_p (CDI_DOMINATORS, loop->latch, ex_bb))
5338     return false;
5339 
5340   exit = EDGE_SUCC (ex_bb, 0);
5341   if (flow_bb_inside_loop_p (loop, exit->dest))
5342     exit = EDGE_SUCC (ex_bb, 1);
5343   if (flow_bb_inside_loop_p (loop, exit->dest))
5344     return false;
5345 
5346   desc = niter_for_exit (data, exit);
5347   if (!desc)
5348     return false;
5349 
5350   /* Determine whether we can use the variable to test the exit condition.
5351      This is the case iff the period of the induction variable is greater
5352      than the number of iterations for which the exit condition is true.  */
5353   period = iv_period (cand->iv);
5354 
5355   /* If the number of iterations is constant, compare against it directly.  */
5356   if (TREE_CODE (desc->niter) == INTEGER_CST)
5357     {
5358       /* See cand_value_at.  */
5359       if (stmt_after_increment (loop, cand, use->stmt))
5360 	{
5361 	  if (!tree_int_cst_lt (desc->niter, period))
5362 	    return false;
5363 	}
5364       else
5365 	{
5366 	  if (tree_int_cst_lt (period, desc->niter))
5367 	    return false;
5368 	}
5369     }
5370 
5371   /* If not, and if this is the only possible exit of the loop, see whether
5372      we can get a conservative estimate on the number of iterations of the
5373      entire loop and compare against that instead.  */
5374   else
5375     {
5376       widest_int period_value, max_niter;
5377 
5378       max_niter = desc->max;
5379       if (stmt_after_increment (loop, cand, use->stmt))
5380 	max_niter += 1;
5381       period_value = wi::to_widest (period);
5382       if (wi::gtu_p (max_niter, period_value))
5383 	{
5384 	  /* See if we can take advantage of inferred loop bound
5385 	     information.  */
5386 	  if (data->loop_single_exit_p)
5387 	    {
5388 	      if (!max_loop_iterations (loop, &max_niter))
5389 		return false;
5390 	      /* The loop bound is already adjusted by adding 1.  */
5391 	      if (wi::gtu_p (max_niter, period_value))
5392 		return false;
5393 	    }
5394 	  else
5395 	    return false;
5396 	}
5397     }
5398 
5399   /* For doloop IV cand, the bound would be zero.  It's safe whether
5400      may_be_zero set or not.  */
5401   if (cand->doloop_p)
5402     {
5403       *bound = build_int_cst (TREE_TYPE (cand->iv->base), 0);
5404       *comp = iv_elimination_compare (data, use);
5405       return true;
5406     }
5407 
5408   cand_value_at (loop, cand, use->stmt, desc->niter, &bnd);
5409 
5410   *bound = fold_convert (TREE_TYPE (cand->iv->base),
5411 			 aff_combination_to_tree (&bnd));
5412   *comp = iv_elimination_compare (data, use);
5413 
5414   /* It is unlikely that computing the number of iterations using division
5415      would be more profitable than keeping the original induction variable.  */
5416   if (expression_expensive_p (*bound))
5417     return false;
5418 
5419   /* Sometimes, it is possible to handle the situation that the number of
5420      iterations may be zero unless additional assumptions by using <
5421      instead of != in the exit condition.
5422 
5423      TODO: we could also calculate the value MAY_BE_ZERO ? 0 : NITER and
5424 	   base the exit condition on it.  However, that is often too
5425 	   expensive.  */
5426   if (!integer_zerop (desc->may_be_zero))
5427     return iv_elimination_compare_lt (data, cand, comp, desc);
5428 
5429   return true;
5430 }
5431 
5432  /* Calculates the cost of BOUND, if it is a PARM_DECL.  A PARM_DECL must
5433     be copied, if it is used in the loop body and DATA->body_includes_call.  */
5434 
5435 static int
parm_decl_cost(struct ivopts_data * data,tree bound)5436 parm_decl_cost (struct ivopts_data *data, tree bound)
5437 {
5438   tree sbound = bound;
5439   STRIP_NOPS (sbound);
5440 
5441   if (TREE_CODE (sbound) == SSA_NAME
5442       && SSA_NAME_IS_DEFAULT_DEF (sbound)
5443       && TREE_CODE (SSA_NAME_VAR (sbound)) == PARM_DECL
5444       && data->body_includes_call)
5445     return COSTS_N_INSNS (1);
5446 
5447   return 0;
5448 }
5449 
5450 /* Determines cost of computing the use in GROUP with CAND in a condition.  */
5451 
5452 static bool
determine_group_iv_cost_cond(struct ivopts_data * data,struct iv_group * group,struct iv_cand * cand)5453 determine_group_iv_cost_cond (struct ivopts_data *data,
5454 			      struct iv_group *group, struct iv_cand *cand)
5455 {
5456   tree bound = NULL_TREE;
5457   struct iv *cmp_iv;
5458   bitmap inv_exprs = NULL;
5459   bitmap inv_vars_elim = NULL, inv_vars_express = NULL, inv_vars;
5460   comp_cost elim_cost = infinite_cost, express_cost, cost, bound_cost;
5461   enum comp_iv_rewrite rewrite_type;
5462   iv_inv_expr_ent *inv_expr_elim = NULL, *inv_expr_express = NULL, *inv_expr;
5463   tree *control_var, *bound_cst;
5464   enum tree_code comp = ERROR_MARK;
5465   struct iv_use *use = group->vuses[0];
5466 
5467   /* Extract condition operands.  */
5468   rewrite_type = extract_cond_operands (data, use->stmt, &control_var,
5469 					&bound_cst, NULL, &cmp_iv);
5470   gcc_assert (rewrite_type != COMP_IV_NA);
5471 
5472   /* Try iv elimination.  */
5473   if (rewrite_type == COMP_IV_ELIM
5474       && may_eliminate_iv (data, use, cand, &bound, &comp))
5475     {
5476       elim_cost = force_var_cost (data, bound, &inv_vars_elim);
5477       if (elim_cost.cost == 0)
5478 	elim_cost.cost = parm_decl_cost (data, bound);
5479       else if (TREE_CODE (bound) == INTEGER_CST)
5480 	elim_cost.cost = 0;
5481       /* If we replace a loop condition 'i < n' with 'p < base + n',
5482 	 inv_vars_elim will have 'base' and 'n' set, which implies that both
5483 	 'base' and 'n' will be live during the loop.	 More likely,
5484 	 'base + n' will be loop invariant, resulting in only one live value
5485 	 during the loop.  So in that case we clear inv_vars_elim and set
5486 	 inv_expr_elim instead.  */
5487       if (inv_vars_elim && bitmap_count_bits (inv_vars_elim) > 1)
5488 	{
5489 	  inv_expr_elim = get_loop_invariant_expr (data, bound);
5490 	  bitmap_clear (inv_vars_elim);
5491 	}
5492       /* The bound is a loop invariant, so it will be only computed
5493 	 once.  */
5494       elim_cost.cost = adjust_setup_cost (data, elim_cost.cost);
5495     }
5496 
5497   /* When the condition is a comparison of the candidate IV against
5498      zero, prefer this IV.
5499 
5500      TODO: The constant that we're subtracting from the cost should
5501      be target-dependent.  This information should be added to the
5502      target costs for each backend.  */
5503   if (!elim_cost.infinite_cost_p () /* Do not try to decrease infinite! */
5504       && integer_zerop (*bound_cst)
5505       && (operand_equal_p (*control_var, cand->var_after, 0)
5506 	  || operand_equal_p (*control_var, cand->var_before, 0)))
5507     elim_cost -= 1;
5508 
5509   express_cost = get_computation_cost (data, use, cand, false,
5510 				       &inv_vars_express, NULL,
5511 				       &inv_expr_express);
5512   if (cmp_iv != NULL)
5513     find_inv_vars (data, &cmp_iv->base, &inv_vars_express);
5514 
5515   /* Count the cost of the original bound as well.  */
5516   bound_cost = force_var_cost (data, *bound_cst, NULL);
5517   if (bound_cost.cost == 0)
5518     bound_cost.cost = parm_decl_cost (data, *bound_cst);
5519   else if (TREE_CODE (*bound_cst) == INTEGER_CST)
5520     bound_cost.cost = 0;
5521   express_cost += bound_cost;
5522 
5523   /* Choose the better approach, preferring the eliminated IV. */
5524   if (elim_cost <= express_cost)
5525     {
5526       cost = elim_cost;
5527       inv_vars = inv_vars_elim;
5528       inv_vars_elim = NULL;
5529       inv_expr = inv_expr_elim;
5530       /* For doloop candidate/use pair, adjust to zero cost.  */
5531       if (group->doloop_p && cand->doloop_p && elim_cost.cost > no_cost.cost)
5532 	cost = no_cost;
5533     }
5534   else
5535     {
5536       cost = express_cost;
5537       inv_vars = inv_vars_express;
5538       inv_vars_express = NULL;
5539       bound = NULL_TREE;
5540       comp = ERROR_MARK;
5541       inv_expr = inv_expr_express;
5542     }
5543 
5544   if (inv_expr)
5545     {
5546       inv_exprs = BITMAP_ALLOC (NULL);
5547       bitmap_set_bit (inv_exprs, inv_expr->id);
5548     }
5549   set_group_iv_cost (data, group, cand, cost,
5550 		     inv_vars, bound, comp, inv_exprs);
5551 
5552   if (inv_vars_elim)
5553     BITMAP_FREE (inv_vars_elim);
5554   if (inv_vars_express)
5555     BITMAP_FREE (inv_vars_express);
5556 
5557   return !cost.infinite_cost_p ();
5558 }
5559 
5560 /* Determines cost of computing uses in GROUP with CAND.  Returns false
5561    if USE cannot be represented with CAND.  */
5562 
5563 static bool
determine_group_iv_cost(struct ivopts_data * data,struct iv_group * group,struct iv_cand * cand)5564 determine_group_iv_cost (struct ivopts_data *data,
5565 			 struct iv_group *group, struct iv_cand *cand)
5566 {
5567   switch (group->type)
5568     {
5569     case USE_NONLINEAR_EXPR:
5570       return determine_group_iv_cost_generic (data, group, cand);
5571 
5572     case USE_REF_ADDRESS:
5573     case USE_PTR_ADDRESS:
5574       return determine_group_iv_cost_address (data, group, cand);
5575 
5576     case USE_COMPARE:
5577       return determine_group_iv_cost_cond (data, group, cand);
5578 
5579     default:
5580       gcc_unreachable ();
5581     }
5582 }
5583 
5584 /* Return true if get_computation_cost indicates that autoincrement is
5585    a possibility for the pair of USE and CAND, false otherwise.  */
5586 
5587 static bool
autoinc_possible_for_pair(struct ivopts_data * data,struct iv_use * use,struct iv_cand * cand)5588 autoinc_possible_for_pair (struct ivopts_data *data, struct iv_use *use,
5589 			   struct iv_cand *cand)
5590 {
5591   if (!address_p (use->type))
5592     return false;
5593 
5594   bool can_autoinc = false;
5595   get_computation_cost (data, use, cand, true, NULL, &can_autoinc, NULL);
5596   return can_autoinc;
5597 }
5598 
5599 /* Examine IP_ORIGINAL candidates to see if they are incremented next to a
5600    use that allows autoincrement, and set their AINC_USE if possible.  */
5601 
5602 static void
set_autoinc_for_original_candidates(struct ivopts_data * data)5603 set_autoinc_for_original_candidates (struct ivopts_data *data)
5604 {
5605   unsigned i, j;
5606 
5607   for (i = 0; i < data->vcands.length (); i++)
5608     {
5609       struct iv_cand *cand = data->vcands[i];
5610       struct iv_use *closest_before = NULL;
5611       struct iv_use *closest_after = NULL;
5612       if (cand->pos != IP_ORIGINAL)
5613 	continue;
5614 
5615       for (j = 0; j < data->vgroups.length (); j++)
5616 	{
5617 	  struct iv_group *group = data->vgroups[j];
5618 	  struct iv_use *use = group->vuses[0];
5619 	  unsigned uid = gimple_uid (use->stmt);
5620 
5621 	  if (gimple_bb (use->stmt) != gimple_bb (cand->incremented_at))
5622 	    continue;
5623 
5624 	  if (uid < gimple_uid (cand->incremented_at)
5625 	      && (closest_before == NULL
5626 		  || uid > gimple_uid (closest_before->stmt)))
5627 	    closest_before = use;
5628 
5629 	  if (uid > gimple_uid (cand->incremented_at)
5630 	      && (closest_after == NULL
5631 		  || uid < gimple_uid (closest_after->stmt)))
5632 	    closest_after = use;
5633 	}
5634 
5635       if (closest_before != NULL
5636 	  && autoinc_possible_for_pair (data, closest_before, cand))
5637 	cand->ainc_use = closest_before;
5638       else if (closest_after != NULL
5639 	       && autoinc_possible_for_pair (data, closest_after, cand))
5640 	cand->ainc_use = closest_after;
5641     }
5642 }
5643 
5644 /* Relate compare use with all candidates.  */
5645 
5646 static void
relate_compare_use_with_all_cands(struct ivopts_data * data)5647 relate_compare_use_with_all_cands (struct ivopts_data *data)
5648 {
5649   unsigned i, count = data->vcands.length ();
5650   for (i = 0; i < data->vgroups.length (); i++)
5651     {
5652       struct iv_group *group = data->vgroups[i];
5653 
5654       if (group->type == USE_COMPARE)
5655 	bitmap_set_range (group->related_cands, 0, count);
5656     }
5657 }
5658 
5659 /* Add one doloop dedicated IV candidate:
5660      - Base is (may_be_zero ? 1 : (niter + 1)).
5661      - Step is -1.  */
5662 
5663 static void
add_iv_candidate_for_doloop(struct ivopts_data * data)5664 add_iv_candidate_for_doloop (struct ivopts_data *data)
5665 {
5666   tree_niter_desc *niter_desc = niter_for_single_dom_exit (data);
5667   gcc_assert (niter_desc && niter_desc->assumptions);
5668 
5669   tree niter = niter_desc->niter;
5670   tree ntype = TREE_TYPE (niter);
5671   gcc_assert (TREE_CODE (ntype) == INTEGER_TYPE);
5672 
5673   tree may_be_zero = niter_desc->may_be_zero;
5674   if (may_be_zero && integer_zerop (may_be_zero))
5675     may_be_zero = NULL_TREE;
5676   if (may_be_zero)
5677     {
5678       if (COMPARISON_CLASS_P (may_be_zero))
5679 	{
5680 	  niter = fold_build3 (COND_EXPR, ntype, may_be_zero,
5681 			       build_int_cst (ntype, 0),
5682 			       rewrite_to_non_trapping_overflow (niter));
5683 	}
5684       /* Don't try to obtain the iteration count expression when may_be_zero is
5685 	 integer_nonzerop (actually iteration count is one) or else.  */
5686       else
5687 	return;
5688     }
5689 
5690   tree base = fold_build2 (PLUS_EXPR, ntype, unshare_expr (niter),
5691 			   build_int_cst (ntype, 1));
5692   add_candidate (data, base, build_int_cst (ntype, -1), true, NULL, NULL, true);
5693 }
5694 
5695 /* Finds the candidates for the induction variables.  */
5696 
5697 static void
find_iv_candidates(struct ivopts_data * data)5698 find_iv_candidates (struct ivopts_data *data)
5699 {
5700   /* Add commonly used ivs.  */
5701   add_standard_iv_candidates (data);
5702 
5703   /* Add doloop dedicated ivs.  */
5704   if (data->doloop_use_p)
5705     add_iv_candidate_for_doloop (data);
5706 
5707   /* Add old induction variables.  */
5708   add_iv_candidate_for_bivs (data);
5709 
5710   /* Add induction variables derived from uses.  */
5711   add_iv_candidate_for_groups (data);
5712 
5713   set_autoinc_for_original_candidates (data);
5714 
5715   /* Record the important candidates.  */
5716   record_important_candidates (data);
5717 
5718   /* Relate compare iv_use with all candidates.  */
5719   if (!data->consider_all_candidates)
5720     relate_compare_use_with_all_cands (data);
5721 
5722   if (dump_file && (dump_flags & TDF_DETAILS))
5723     {
5724       unsigned i;
5725 
5726       fprintf (dump_file, "\n<Important Candidates>:\t");
5727       for (i = 0; i < data->vcands.length (); i++)
5728 	if (data->vcands[i]->important)
5729 	  fprintf (dump_file, " %d,", data->vcands[i]->id);
5730       fprintf (dump_file, "\n");
5731 
5732       fprintf (dump_file, "\n<Group, Cand> Related:\n");
5733       for (i = 0; i < data->vgroups.length (); i++)
5734 	{
5735 	  struct iv_group *group = data->vgroups[i];
5736 
5737 	  if (group->related_cands)
5738 	    {
5739 	      fprintf (dump_file, "  Group %d:\t", group->id);
5740 	      dump_bitmap (dump_file, group->related_cands);
5741 	    }
5742 	}
5743       fprintf (dump_file, "\n");
5744     }
5745 }
5746 
5747 /* Determines costs of computing use of iv with an iv candidate.  */
5748 
5749 static void
determine_group_iv_costs(struct ivopts_data * data)5750 determine_group_iv_costs (struct ivopts_data *data)
5751 {
5752   unsigned i, j;
5753   struct iv_cand *cand;
5754   struct iv_group *group;
5755   bitmap to_clear = BITMAP_ALLOC (NULL);
5756 
5757   alloc_use_cost_map (data);
5758 
5759   for (i = 0; i < data->vgroups.length (); i++)
5760     {
5761       group = data->vgroups[i];
5762 
5763       if (data->consider_all_candidates)
5764 	{
5765 	  for (j = 0; j < data->vcands.length (); j++)
5766 	    {
5767 	      cand = data->vcands[j];
5768 	      determine_group_iv_cost (data, group, cand);
5769 	    }
5770 	}
5771       else
5772 	{
5773 	  bitmap_iterator bi;
5774 
5775 	  EXECUTE_IF_SET_IN_BITMAP (group->related_cands, 0, j, bi)
5776 	    {
5777 	      cand = data->vcands[j];
5778 	      if (!determine_group_iv_cost (data, group, cand))
5779 		bitmap_set_bit (to_clear, j);
5780 	    }
5781 
5782 	  /* Remove the candidates for that the cost is infinite from
5783 	     the list of related candidates.  */
5784 	  bitmap_and_compl_into (group->related_cands, to_clear);
5785 	  bitmap_clear (to_clear);
5786 	}
5787     }
5788 
5789   BITMAP_FREE (to_clear);
5790 
5791   if (dump_file && (dump_flags & TDF_DETAILS))
5792     {
5793       bitmap_iterator bi;
5794 
5795       /* Dump invariant variables.  */
5796       fprintf (dump_file, "\n<Invariant Vars>:\n");
5797       EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
5798 	{
5799 	  struct version_info *info = ver_info (data, i);
5800 	  if (info->inv_id)
5801 	    {
5802 	      fprintf (dump_file, "Inv %d:\t", info->inv_id);
5803 	      print_generic_expr (dump_file, info->name, TDF_SLIM);
5804 	      fprintf (dump_file, "%s\n",
5805 		       info->has_nonlin_use ? "" : "\t(eliminable)");
5806 	    }
5807 	}
5808 
5809       /* Dump invariant expressions.  */
5810       fprintf (dump_file, "\n<Invariant Expressions>:\n");
5811       auto_vec <iv_inv_expr_ent *> list (data->inv_expr_tab->elements ());
5812 
5813       for (hash_table<iv_inv_expr_hasher>::iterator it
5814 	   = data->inv_expr_tab->begin (); it != data->inv_expr_tab->end ();
5815 	   ++it)
5816 	list.safe_push (*it);
5817 
5818       list.qsort (sort_iv_inv_expr_ent);
5819 
5820       for (i = 0; i < list.length (); ++i)
5821 	{
5822 	  fprintf (dump_file, "inv_expr %d: \t", list[i]->id);
5823 	  print_generic_expr (dump_file, list[i]->expr, TDF_SLIM);
5824 	  fprintf (dump_file, "\n");
5825 	}
5826 
5827       fprintf (dump_file, "\n<Group-candidate Costs>:\n");
5828 
5829       for (i = 0; i < data->vgroups.length (); i++)
5830 	{
5831 	  group = data->vgroups[i];
5832 
5833 	  fprintf (dump_file, "Group %d:\n", i);
5834 	  fprintf (dump_file, "  cand\tcost\tcompl.\tinv.expr.\tinv.vars\n");
5835 	  for (j = 0; j < group->n_map_members; j++)
5836 	    {
5837 	      if (!group->cost_map[j].cand
5838 		  || group->cost_map[j].cost.infinite_cost_p ())
5839 		continue;
5840 
5841 	      fprintf (dump_file, "  %d\t%" PRId64 "\t%d\t",
5842 		       group->cost_map[j].cand->id,
5843 		       group->cost_map[j].cost.cost,
5844 		       group->cost_map[j].cost.complexity);
5845 	      if (!group->cost_map[j].inv_exprs
5846 		  || bitmap_empty_p (group->cost_map[j].inv_exprs))
5847 		fprintf (dump_file, "NIL;\t");
5848 	      else
5849 		bitmap_print (dump_file,
5850 			      group->cost_map[j].inv_exprs, "", ";\t");
5851 	      if (!group->cost_map[j].inv_vars
5852 		  || bitmap_empty_p (group->cost_map[j].inv_vars))
5853 		fprintf (dump_file, "NIL;\n");
5854 	      else
5855 		bitmap_print (dump_file,
5856 			      group->cost_map[j].inv_vars, "", "\n");
5857 	    }
5858 
5859 	  fprintf (dump_file, "\n");
5860 	}
5861       fprintf (dump_file, "\n");
5862     }
5863 }
5864 
5865 /* Determines cost of the candidate CAND.  */
5866 
5867 static void
determine_iv_cost(struct ivopts_data * data,struct iv_cand * cand)5868 determine_iv_cost (struct ivopts_data *data, struct iv_cand *cand)
5869 {
5870   comp_cost cost_base;
5871   int64_t cost, cost_step;
5872   tree base;
5873 
5874   gcc_assert (cand->iv != NULL);
5875 
5876   /* There are two costs associated with the candidate -- its increment
5877      and its initialization.  The second is almost negligible for any loop
5878      that rolls enough, so we take it just very little into account.  */
5879 
5880   base = cand->iv->base;
5881   cost_base = force_var_cost (data, base, NULL);
5882   /* It will be exceptional that the iv register happens to be initialized with
5883      the proper value at no cost.  In general, there will at least be a regcopy
5884      or a const set.  */
5885   if (cost_base.cost == 0)
5886     cost_base.cost = COSTS_N_INSNS (1);
5887   /* Doloop decrement should be considered as zero cost.  */
5888   if (cand->doloop_p)
5889     cost_step = 0;
5890   else
5891     cost_step = add_cost (data->speed, TYPE_MODE (TREE_TYPE (base)));
5892   cost = cost_step + adjust_setup_cost (data, cost_base.cost);
5893 
5894   /* Prefer the original ivs unless we may gain something by replacing it.
5895      The reason is to make debugging simpler; so this is not relevant for
5896      artificial ivs created by other optimization passes.  */
5897   if ((cand->pos != IP_ORIGINAL
5898        || !SSA_NAME_VAR (cand->var_before)
5899        || DECL_ARTIFICIAL (SSA_NAME_VAR (cand->var_before)))
5900       /* Prefer doloop as well.  */
5901       && !cand->doloop_p)
5902     cost++;
5903 
5904   /* Prefer not to insert statements into latch unless there are some
5905      already (so that we do not create unnecessary jumps).  */
5906   if (cand->pos == IP_END
5907       && empty_block_p (ip_end_pos (data->current_loop)))
5908     cost++;
5909 
5910   cand->cost = cost;
5911   cand->cost_step = cost_step;
5912 }
5913 
5914 /* Determines costs of computation of the candidates.  */
5915 
5916 static void
determine_iv_costs(struct ivopts_data * data)5917 determine_iv_costs (struct ivopts_data *data)
5918 {
5919   unsigned i;
5920 
5921   if (dump_file && (dump_flags & TDF_DETAILS))
5922     {
5923       fprintf (dump_file, "<Candidate Costs>:\n");
5924       fprintf (dump_file, "  cand\tcost\n");
5925     }
5926 
5927   for (i = 0; i < data->vcands.length (); i++)
5928     {
5929       struct iv_cand *cand = data->vcands[i];
5930 
5931       determine_iv_cost (data, cand);
5932 
5933       if (dump_file && (dump_flags & TDF_DETAILS))
5934 	fprintf (dump_file, "  %d\t%d\n", i, cand->cost);
5935     }
5936 
5937   if (dump_file && (dump_flags & TDF_DETAILS))
5938     fprintf (dump_file, "\n");
5939 }
5940 
5941 /* Estimate register pressure for loop having N_INVS invariants and N_CANDS
5942    induction variables.  Note N_INVS includes both invariant variables and
5943    invariant expressions.  */
5944 
5945 static unsigned
ivopts_estimate_reg_pressure(struct ivopts_data * data,unsigned n_invs,unsigned n_cands)5946 ivopts_estimate_reg_pressure (struct ivopts_data *data, unsigned n_invs,
5947 			      unsigned n_cands)
5948 {
5949   unsigned cost;
5950   unsigned n_old = data->regs_used, n_new = n_invs + n_cands;
5951   unsigned regs_needed = n_new + n_old, available_regs = target_avail_regs;
5952   bool speed = data->speed;
5953 
5954   /* If there is a call in the loop body, the call-clobbered registers
5955      are not available for loop invariants.  */
5956   if (data->body_includes_call)
5957     available_regs = available_regs - target_clobbered_regs;
5958 
5959   /* If we have enough registers.  */
5960   if (regs_needed + target_res_regs < available_regs)
5961     cost = n_new;
5962   /* If close to running out of registers, try to preserve them.  */
5963   else if (regs_needed <= available_regs)
5964     cost = target_reg_cost [speed] * regs_needed;
5965   /* If we run out of available registers but the number of candidates
5966      does not, we penalize extra registers using target_spill_cost.  */
5967   else if (n_cands <= available_regs)
5968     cost = target_reg_cost [speed] * available_regs
5969 	   + target_spill_cost [speed] * (regs_needed - available_regs);
5970   /* If the number of candidates runs out available registers, we penalize
5971      extra candidate registers using target_spill_cost * 2.  Because it is
5972      more expensive to spill induction variable than invariant.  */
5973   else
5974     cost = target_reg_cost [speed] * available_regs
5975 	   + target_spill_cost [speed] * (n_cands - available_regs) * 2
5976 	   + target_spill_cost [speed] * (regs_needed - n_cands);
5977 
5978   /* Finally, add the number of candidates, so that we prefer eliminating
5979      induction variables if possible.  */
5980   return cost + n_cands;
5981 }
5982 
5983 /* For each size of the induction variable set determine the penalty.  */
5984 
5985 static void
determine_set_costs(struct ivopts_data * data)5986 determine_set_costs (struct ivopts_data *data)
5987 {
5988   unsigned j, n;
5989   gphi *phi;
5990   gphi_iterator psi;
5991   tree op;
5992   class loop *loop = data->current_loop;
5993   bitmap_iterator bi;
5994 
5995   if (dump_file && (dump_flags & TDF_DETAILS))
5996     {
5997       fprintf (dump_file, "<Global Costs>:\n");
5998       fprintf (dump_file, "  target_avail_regs %d\n", target_avail_regs);
5999       fprintf (dump_file, "  target_clobbered_regs %d\n", target_clobbered_regs);
6000       fprintf (dump_file, "  target_reg_cost %d\n", target_reg_cost[data->speed]);
6001       fprintf (dump_file, "  target_spill_cost %d\n", target_spill_cost[data->speed]);
6002     }
6003 
6004   n = 0;
6005   for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
6006     {
6007       phi = psi.phi ();
6008       op = PHI_RESULT (phi);
6009 
6010       if (virtual_operand_p (op))
6011 	continue;
6012 
6013       if (get_iv (data, op))
6014 	continue;
6015 
6016       if (!POINTER_TYPE_P (TREE_TYPE (op))
6017 	  && !INTEGRAL_TYPE_P (TREE_TYPE (op)))
6018 	continue;
6019 
6020       n++;
6021     }
6022 
6023   EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j, bi)
6024     {
6025       struct version_info *info = ver_info (data, j);
6026 
6027       if (info->inv_id && info->has_nonlin_use)
6028 	n++;
6029     }
6030 
6031   data->regs_used = n;
6032   if (dump_file && (dump_flags & TDF_DETAILS))
6033     fprintf (dump_file, "  regs_used %d\n", n);
6034 
6035   if (dump_file && (dump_flags & TDF_DETAILS))
6036     {
6037       fprintf (dump_file, "  cost for size:\n");
6038       fprintf (dump_file, "  ivs\tcost\n");
6039       for (j = 0; j <= 2 * target_avail_regs; j++)
6040 	fprintf (dump_file, "  %d\t%d\n", j,
6041 		 ivopts_estimate_reg_pressure (data, 0, j));
6042       fprintf (dump_file, "\n");
6043     }
6044 }
6045 
6046 /* Returns true if A is a cheaper cost pair than B.  */
6047 
6048 static bool
cheaper_cost_pair(class cost_pair * a,class cost_pair * b)6049 cheaper_cost_pair (class cost_pair *a, class cost_pair *b)
6050 {
6051   if (!a)
6052     return false;
6053 
6054   if (!b)
6055     return true;
6056 
6057   if (a->cost < b->cost)
6058     return true;
6059 
6060   if (b->cost < a->cost)
6061     return false;
6062 
6063   /* In case the costs are the same, prefer the cheaper candidate.  */
6064   if (a->cand->cost < b->cand->cost)
6065     return true;
6066 
6067   return false;
6068 }
6069 
6070 /* Compare if A is a more expensive cost pair than B.  Return 1, 0 and -1
6071    for more expensive, equal and cheaper respectively.  */
6072 
6073 static int
compare_cost_pair(class cost_pair * a,class cost_pair * b)6074 compare_cost_pair (class cost_pair *a, class cost_pair *b)
6075 {
6076   if (cheaper_cost_pair (a, b))
6077     return -1;
6078   if (cheaper_cost_pair (b, a))
6079     return 1;
6080 
6081   return 0;
6082 }
6083 
6084 /* Returns candidate by that USE is expressed in IVS.  */
6085 
6086 static class cost_pair *
iv_ca_cand_for_group(class iv_ca * ivs,struct iv_group * group)6087 iv_ca_cand_for_group (class iv_ca *ivs, struct iv_group *group)
6088 {
6089   return ivs->cand_for_group[group->id];
6090 }
6091 
6092 /* Computes the cost field of IVS structure.  */
6093 
6094 static void
iv_ca_recount_cost(struct ivopts_data * data,class iv_ca * ivs)6095 iv_ca_recount_cost (struct ivopts_data *data, class iv_ca *ivs)
6096 {
6097   comp_cost cost = ivs->cand_use_cost;
6098 
6099   cost += ivs->cand_cost;
6100   cost += ivopts_estimate_reg_pressure (data, ivs->n_invs, ivs->n_cands);
6101   ivs->cost = cost;
6102 }
6103 
6104 /* Remove use of invariants in set INVS by decreasing counter in N_INV_USES
6105    and IVS.  */
6106 
6107 static void
iv_ca_set_remove_invs(class iv_ca * ivs,bitmap invs,unsigned * n_inv_uses)6108 iv_ca_set_remove_invs (class iv_ca *ivs, bitmap invs, unsigned *n_inv_uses)
6109 {
6110   bitmap_iterator bi;
6111   unsigned iid;
6112 
6113   if (!invs)
6114     return;
6115 
6116   gcc_assert (n_inv_uses != NULL);
6117   EXECUTE_IF_SET_IN_BITMAP (invs, 0, iid, bi)
6118     {
6119       n_inv_uses[iid]--;
6120       if (n_inv_uses[iid] == 0)
6121 	ivs->n_invs--;
6122     }
6123 }
6124 
6125 /* Set USE not to be expressed by any candidate in IVS.  */
6126 
6127 static void
iv_ca_set_no_cp(struct ivopts_data * data,class iv_ca * ivs,struct iv_group * group)6128 iv_ca_set_no_cp (struct ivopts_data *data, class iv_ca *ivs,
6129 		 struct iv_group *group)
6130 {
6131   unsigned gid = group->id, cid;
6132   class cost_pair *cp;
6133 
6134   cp = ivs->cand_for_group[gid];
6135   if (!cp)
6136     return;
6137   cid = cp->cand->id;
6138 
6139   ivs->bad_groups++;
6140   ivs->cand_for_group[gid] = NULL;
6141   ivs->n_cand_uses[cid]--;
6142 
6143   if (ivs->n_cand_uses[cid] == 0)
6144     {
6145       bitmap_clear_bit (ivs->cands, cid);
6146       if (!cp->cand->doloop_p || !targetm.have_count_reg_decr_p)
6147 	ivs->n_cands--;
6148       ivs->cand_cost -= cp->cand->cost;
6149       iv_ca_set_remove_invs (ivs, cp->cand->inv_vars, ivs->n_inv_var_uses);
6150       iv_ca_set_remove_invs (ivs, cp->cand->inv_exprs, ivs->n_inv_expr_uses);
6151     }
6152 
6153   ivs->cand_use_cost -= cp->cost;
6154   iv_ca_set_remove_invs (ivs, cp->inv_vars, ivs->n_inv_var_uses);
6155   iv_ca_set_remove_invs (ivs, cp->inv_exprs, ivs->n_inv_expr_uses);
6156   iv_ca_recount_cost (data, ivs);
6157 }
6158 
6159 /* Add use of invariants in set INVS by increasing counter in N_INV_USES and
6160    IVS.  */
6161 
6162 static void
iv_ca_set_add_invs(class iv_ca * ivs,bitmap invs,unsigned * n_inv_uses)6163 iv_ca_set_add_invs (class iv_ca *ivs, bitmap invs, unsigned *n_inv_uses)
6164 {
6165   bitmap_iterator bi;
6166   unsigned iid;
6167 
6168   if (!invs)
6169     return;
6170 
6171   gcc_assert (n_inv_uses != NULL);
6172   EXECUTE_IF_SET_IN_BITMAP (invs, 0, iid, bi)
6173     {
6174       n_inv_uses[iid]++;
6175       if (n_inv_uses[iid] == 1)
6176 	ivs->n_invs++;
6177     }
6178 }
6179 
6180 /* Set cost pair for GROUP in set IVS to CP.  */
6181 
6182 static void
iv_ca_set_cp(struct ivopts_data * data,class iv_ca * ivs,struct iv_group * group,class cost_pair * cp)6183 iv_ca_set_cp (struct ivopts_data *data, class iv_ca *ivs,
6184 	      struct iv_group *group, class cost_pair *cp)
6185 {
6186   unsigned gid = group->id, cid;
6187 
6188   if (ivs->cand_for_group[gid] == cp)
6189     return;
6190 
6191   if (ivs->cand_for_group[gid])
6192     iv_ca_set_no_cp (data, ivs, group);
6193 
6194   if (cp)
6195     {
6196       cid = cp->cand->id;
6197 
6198       ivs->bad_groups--;
6199       ivs->cand_for_group[gid] = cp;
6200       ivs->n_cand_uses[cid]++;
6201       if (ivs->n_cand_uses[cid] == 1)
6202 	{
6203 	  bitmap_set_bit (ivs->cands, cid);
6204 	  if (!cp->cand->doloop_p || !targetm.have_count_reg_decr_p)
6205 	    ivs->n_cands++;
6206 	  ivs->cand_cost += cp->cand->cost;
6207 	  iv_ca_set_add_invs (ivs, cp->cand->inv_vars, ivs->n_inv_var_uses);
6208 	  iv_ca_set_add_invs (ivs, cp->cand->inv_exprs, ivs->n_inv_expr_uses);
6209 	}
6210 
6211       ivs->cand_use_cost += cp->cost;
6212       iv_ca_set_add_invs (ivs, cp->inv_vars, ivs->n_inv_var_uses);
6213       iv_ca_set_add_invs (ivs, cp->inv_exprs, ivs->n_inv_expr_uses);
6214       iv_ca_recount_cost (data, ivs);
6215     }
6216 }
6217 
6218 /* Extend set IVS by expressing USE by some of the candidates in it
6219    if possible.  Consider all important candidates if candidates in
6220    set IVS don't give any result.  */
6221 
6222 static void
iv_ca_add_group(struct ivopts_data * data,class iv_ca * ivs,struct iv_group * group)6223 iv_ca_add_group (struct ivopts_data *data, class iv_ca *ivs,
6224 	       struct iv_group *group)
6225 {
6226   class cost_pair *best_cp = NULL, *cp;
6227   bitmap_iterator bi;
6228   unsigned i;
6229   struct iv_cand *cand;
6230 
6231   gcc_assert (ivs->upto >= group->id);
6232   ivs->upto++;
6233   ivs->bad_groups++;
6234 
6235   EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
6236     {
6237       cand = data->vcands[i];
6238       cp = get_group_iv_cost (data, group, cand);
6239       if (cheaper_cost_pair (cp, best_cp))
6240 	best_cp = cp;
6241     }
6242 
6243   if (best_cp == NULL)
6244     {
6245       EXECUTE_IF_SET_IN_BITMAP (data->important_candidates, 0, i, bi)
6246 	{
6247 	  cand = data->vcands[i];
6248 	  cp = get_group_iv_cost (data, group, cand);
6249 	  if (cheaper_cost_pair (cp, best_cp))
6250 	    best_cp = cp;
6251 	}
6252     }
6253 
6254   iv_ca_set_cp (data, ivs, group, best_cp);
6255 }
6256 
6257 /* Get cost for assignment IVS.  */
6258 
6259 static comp_cost
iv_ca_cost(class iv_ca * ivs)6260 iv_ca_cost (class iv_ca *ivs)
6261 {
6262   /* This was a conditional expression but it triggered a bug in
6263      Sun C 5.5.  */
6264   if (ivs->bad_groups)
6265     return infinite_cost;
6266   else
6267     return ivs->cost;
6268 }
6269 
6270 /* Compare if applying NEW_CP to GROUP for IVS introduces more invariants
6271    than OLD_CP.  Return 1, 0 and -1 for more, equal and fewer invariants
6272    respectively.  */
6273 
6274 static int
iv_ca_compare_deps(struct ivopts_data * data,class iv_ca * ivs,struct iv_group * group,class cost_pair * old_cp,class cost_pair * new_cp)6275 iv_ca_compare_deps (struct ivopts_data *data, class iv_ca *ivs,
6276 		    struct iv_group *group, class cost_pair *old_cp,
6277 		    class cost_pair *new_cp)
6278 {
6279   gcc_assert (old_cp && new_cp && old_cp != new_cp);
6280   unsigned old_n_invs = ivs->n_invs;
6281   iv_ca_set_cp (data, ivs, group, new_cp);
6282   unsigned new_n_invs = ivs->n_invs;
6283   iv_ca_set_cp (data, ivs, group, old_cp);
6284 
6285   return new_n_invs > old_n_invs ? 1 : (new_n_invs < old_n_invs ? -1 : 0);
6286 }
6287 
6288 /* Creates change of expressing GROUP by NEW_CP instead of OLD_CP and chains
6289    it before NEXT.  */
6290 
6291 static struct iv_ca_delta *
iv_ca_delta_add(struct iv_group * group,class cost_pair * old_cp,class cost_pair * new_cp,struct iv_ca_delta * next)6292 iv_ca_delta_add (struct iv_group *group, class cost_pair *old_cp,
6293 		 class cost_pair *new_cp, struct iv_ca_delta *next)
6294 {
6295   struct iv_ca_delta *change = XNEW (struct iv_ca_delta);
6296 
6297   change->group = group;
6298   change->old_cp = old_cp;
6299   change->new_cp = new_cp;
6300   change->next = next;
6301 
6302   return change;
6303 }
6304 
6305 /* Joins two lists of changes L1 and L2.  Destructive -- old lists
6306    are rewritten.  */
6307 
6308 static struct iv_ca_delta *
iv_ca_delta_join(struct iv_ca_delta * l1,struct iv_ca_delta * l2)6309 iv_ca_delta_join (struct iv_ca_delta *l1, struct iv_ca_delta *l2)
6310 {
6311   struct iv_ca_delta *last;
6312 
6313   if (!l2)
6314     return l1;
6315 
6316   if (!l1)
6317     return l2;
6318 
6319   for (last = l1; last->next; last = last->next)
6320     continue;
6321   last->next = l2;
6322 
6323   return l1;
6324 }
6325 
6326 /* Reverse the list of changes DELTA, forming the inverse to it.  */
6327 
6328 static struct iv_ca_delta *
iv_ca_delta_reverse(struct iv_ca_delta * delta)6329 iv_ca_delta_reverse (struct iv_ca_delta *delta)
6330 {
6331   struct iv_ca_delta *act, *next, *prev = NULL;
6332 
6333   for (act = delta; act; act = next)
6334     {
6335       next = act->next;
6336       act->next = prev;
6337       prev = act;
6338 
6339       std::swap (act->old_cp, act->new_cp);
6340     }
6341 
6342   return prev;
6343 }
6344 
6345 /* Commit changes in DELTA to IVS.  If FORWARD is false, the changes are
6346    reverted instead.  */
6347 
6348 static void
iv_ca_delta_commit(struct ivopts_data * data,class iv_ca * ivs,struct iv_ca_delta * delta,bool forward)6349 iv_ca_delta_commit (struct ivopts_data *data, class iv_ca *ivs,
6350 		    struct iv_ca_delta *delta, bool forward)
6351 {
6352   class cost_pair *from, *to;
6353   struct iv_ca_delta *act;
6354 
6355   if (!forward)
6356     delta = iv_ca_delta_reverse (delta);
6357 
6358   for (act = delta; act; act = act->next)
6359     {
6360       from = act->old_cp;
6361       to = act->new_cp;
6362       gcc_assert (iv_ca_cand_for_group (ivs, act->group) == from);
6363       iv_ca_set_cp (data, ivs, act->group, to);
6364     }
6365 
6366   if (!forward)
6367     iv_ca_delta_reverse (delta);
6368 }
6369 
6370 /* Returns true if CAND is used in IVS.  */
6371 
6372 static bool
iv_ca_cand_used_p(class iv_ca * ivs,struct iv_cand * cand)6373 iv_ca_cand_used_p (class iv_ca *ivs, struct iv_cand *cand)
6374 {
6375   return ivs->n_cand_uses[cand->id] > 0;
6376 }
6377 
6378 /* Returns number of induction variable candidates in the set IVS.  */
6379 
6380 static unsigned
iv_ca_n_cands(class iv_ca * ivs)6381 iv_ca_n_cands (class iv_ca *ivs)
6382 {
6383   return ivs->n_cands;
6384 }
6385 
6386 /* Free the list of changes DELTA.  */
6387 
6388 static void
iv_ca_delta_free(struct iv_ca_delta ** delta)6389 iv_ca_delta_free (struct iv_ca_delta **delta)
6390 {
6391   struct iv_ca_delta *act, *next;
6392 
6393   for (act = *delta; act; act = next)
6394     {
6395       next = act->next;
6396       free (act);
6397     }
6398 
6399   *delta = NULL;
6400 }
6401 
6402 /* Allocates new iv candidates assignment.  */
6403 
6404 static class iv_ca *
iv_ca_new(struct ivopts_data * data)6405 iv_ca_new (struct ivopts_data *data)
6406 {
6407   class iv_ca *nw = XNEW (class iv_ca);
6408 
6409   nw->upto = 0;
6410   nw->bad_groups = 0;
6411   nw->cand_for_group = XCNEWVEC (class cost_pair *,
6412 				 data->vgroups.length ());
6413   nw->n_cand_uses = XCNEWVEC (unsigned, data->vcands.length ());
6414   nw->cands = BITMAP_ALLOC (NULL);
6415   nw->n_cands = 0;
6416   nw->n_invs = 0;
6417   nw->cand_use_cost = no_cost;
6418   nw->cand_cost = 0;
6419   nw->n_inv_var_uses = XCNEWVEC (unsigned, data->max_inv_var_id + 1);
6420   nw->n_inv_expr_uses = XCNEWVEC (unsigned, data->max_inv_expr_id + 1);
6421   nw->cost = no_cost;
6422 
6423   return nw;
6424 }
6425 
6426 /* Free memory occupied by the set IVS.  */
6427 
6428 static void
iv_ca_free(class iv_ca ** ivs)6429 iv_ca_free (class iv_ca **ivs)
6430 {
6431   free ((*ivs)->cand_for_group);
6432   free ((*ivs)->n_cand_uses);
6433   BITMAP_FREE ((*ivs)->cands);
6434   free ((*ivs)->n_inv_var_uses);
6435   free ((*ivs)->n_inv_expr_uses);
6436   free (*ivs);
6437   *ivs = NULL;
6438 }
6439 
6440 /* Dumps IVS to FILE.  */
6441 
6442 static void
iv_ca_dump(struct ivopts_data * data,FILE * file,class iv_ca * ivs)6443 iv_ca_dump (struct ivopts_data *data, FILE *file, class iv_ca *ivs)
6444 {
6445   unsigned i;
6446   comp_cost cost = iv_ca_cost (ivs);
6447 
6448   fprintf (file, "  cost: %" PRId64 " (complexity %d)\n", cost.cost,
6449 	   cost.complexity);
6450   fprintf (file, "  reg_cost: %d\n",
6451 	   ivopts_estimate_reg_pressure (data, ivs->n_invs, ivs->n_cands));
6452   fprintf (file, "  cand_cost: %" PRId64 "\n  cand_group_cost: "
6453 	   "%" PRId64 " (complexity %d)\n", ivs->cand_cost,
6454 	   ivs->cand_use_cost.cost, ivs->cand_use_cost.complexity);
6455   bitmap_print (file, ivs->cands, "  candidates: ","\n");
6456 
6457   for (i = 0; i < ivs->upto; i++)
6458     {
6459       struct iv_group *group = data->vgroups[i];
6460       class cost_pair *cp = iv_ca_cand_for_group (ivs, group);
6461       if (cp)
6462         fprintf (file, "   group:%d --> iv_cand:%d, cost=("
6463 		 "%" PRId64 ",%d)\n", group->id, cp->cand->id,
6464 		 cp->cost.cost, cp->cost.complexity);
6465       else
6466 	fprintf (file, "   group:%d --> ??\n", group->id);
6467     }
6468 
6469   const char *pref = "";
6470   fprintf (file, "  invariant variables: ");
6471   for (i = 1; i <= data->max_inv_var_id; i++)
6472     if (ivs->n_inv_var_uses[i])
6473       {
6474 	fprintf (file, "%s%d", pref, i);
6475 	pref = ", ";
6476       }
6477 
6478   pref = "";
6479   fprintf (file, "\n  invariant expressions: ");
6480   for (i = 1; i <= data->max_inv_expr_id; i++)
6481     if (ivs->n_inv_expr_uses[i])
6482       {
6483 	fprintf (file, "%s%d", pref, i);
6484 	pref = ", ";
6485       }
6486 
6487   fprintf (file, "\n\n");
6488 }
6489 
6490 /* Try changing candidate in IVS to CAND for each use.  Return cost of the
6491    new set, and store differences in DELTA.  Number of induction variables
6492    in the new set is stored to N_IVS. MIN_NCAND is a flag. When it is true
6493    the function will try to find a solution with mimimal iv candidates.  */
6494 
6495 static comp_cost
iv_ca_extend(struct ivopts_data * data,class iv_ca * ivs,struct iv_cand * cand,struct iv_ca_delta ** delta,unsigned * n_ivs,bool min_ncand)6496 iv_ca_extend (struct ivopts_data *data, class iv_ca *ivs,
6497 	      struct iv_cand *cand, struct iv_ca_delta **delta,
6498 	      unsigned *n_ivs, bool min_ncand)
6499 {
6500   unsigned i;
6501   comp_cost cost;
6502   struct iv_group *group;
6503   class cost_pair *old_cp, *new_cp;
6504 
6505   *delta = NULL;
6506   for (i = 0; i < ivs->upto; i++)
6507     {
6508       group = data->vgroups[i];
6509       old_cp = iv_ca_cand_for_group (ivs, group);
6510 
6511       if (old_cp
6512 	  && old_cp->cand == cand)
6513 	continue;
6514 
6515       new_cp = get_group_iv_cost (data, group, cand);
6516       if (!new_cp)
6517 	continue;
6518 
6519       if (!min_ncand)
6520 	{
6521 	  int cmp_invs = iv_ca_compare_deps (data, ivs, group, old_cp, new_cp);
6522 	  /* Skip if new_cp depends on more invariants.  */
6523 	  if (cmp_invs > 0)
6524 	    continue;
6525 
6526 	  int cmp_cost = compare_cost_pair (new_cp, old_cp);
6527 	  /* Skip if new_cp is not cheaper.  */
6528 	  if (cmp_cost > 0 || (cmp_cost == 0 && cmp_invs == 0))
6529 	    continue;
6530 	}
6531 
6532       *delta = iv_ca_delta_add (group, old_cp, new_cp, *delta);
6533     }
6534 
6535   iv_ca_delta_commit (data, ivs, *delta, true);
6536   cost = iv_ca_cost (ivs);
6537   if (n_ivs)
6538     *n_ivs = iv_ca_n_cands (ivs);
6539   iv_ca_delta_commit (data, ivs, *delta, false);
6540 
6541   return cost;
6542 }
6543 
6544 /* Try narrowing set IVS by removing CAND.  Return the cost of
6545    the new set and store the differences in DELTA.  START is
6546    the candidate with which we start narrowing.  */
6547 
6548 static comp_cost
iv_ca_narrow(struct ivopts_data * data,class iv_ca * ivs,struct iv_cand * cand,struct iv_cand * start,struct iv_ca_delta ** delta)6549 iv_ca_narrow (struct ivopts_data *data, class iv_ca *ivs,
6550 	      struct iv_cand *cand, struct iv_cand *start,
6551 	      struct iv_ca_delta **delta)
6552 {
6553   unsigned i, ci;
6554   struct iv_group *group;
6555   class cost_pair *old_cp, *new_cp, *cp;
6556   bitmap_iterator bi;
6557   struct iv_cand *cnd;
6558   comp_cost cost, best_cost, acost;
6559 
6560   *delta = NULL;
6561   for (i = 0; i < data->vgroups.length (); i++)
6562     {
6563       group = data->vgroups[i];
6564 
6565       old_cp = iv_ca_cand_for_group (ivs, group);
6566       if (old_cp->cand != cand)
6567 	continue;
6568 
6569       best_cost = iv_ca_cost (ivs);
6570       /* Start narrowing with START.  */
6571       new_cp = get_group_iv_cost (data, group, start);
6572 
6573       if (data->consider_all_candidates)
6574 	{
6575 	  EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, ci, bi)
6576 	    {
6577 	      if (ci == cand->id || (start && ci == start->id))
6578 		continue;
6579 
6580 	      cnd = data->vcands[ci];
6581 
6582 	      cp = get_group_iv_cost (data, group, cnd);
6583 	      if (!cp)
6584 		continue;
6585 
6586 	      iv_ca_set_cp (data, ivs, group, cp);
6587 	      acost = iv_ca_cost (ivs);
6588 
6589 	      if (acost < best_cost)
6590 		{
6591 		  best_cost = acost;
6592 		  new_cp = cp;
6593 		}
6594 	    }
6595 	}
6596       else
6597 	{
6598 	  EXECUTE_IF_AND_IN_BITMAP (group->related_cands, ivs->cands, 0, ci, bi)
6599 	    {
6600 	      if (ci == cand->id || (start && ci == start->id))
6601 		continue;
6602 
6603 	      cnd = data->vcands[ci];
6604 
6605 	      cp = get_group_iv_cost (data, group, cnd);
6606 	      if (!cp)
6607 		continue;
6608 
6609 	      iv_ca_set_cp (data, ivs, group, cp);
6610 	      acost = iv_ca_cost (ivs);
6611 
6612 	      if (acost < best_cost)
6613 		{
6614 		  best_cost = acost;
6615 		  new_cp = cp;
6616 		}
6617 	    }
6618 	}
6619       /* Restore to old cp for use.  */
6620       iv_ca_set_cp (data, ivs, group, old_cp);
6621 
6622       if (!new_cp)
6623 	{
6624 	  iv_ca_delta_free (delta);
6625 	  return infinite_cost;
6626 	}
6627 
6628       *delta = iv_ca_delta_add (group, old_cp, new_cp, *delta);
6629     }
6630 
6631   iv_ca_delta_commit (data, ivs, *delta, true);
6632   cost = iv_ca_cost (ivs);
6633   iv_ca_delta_commit (data, ivs, *delta, false);
6634 
6635   return cost;
6636 }
6637 
6638 /* Try optimizing the set of candidates IVS by removing candidates different
6639    from to EXCEPT_CAND from it.  Return cost of the new set, and store
6640    differences in DELTA.  */
6641 
6642 static comp_cost
iv_ca_prune(struct ivopts_data * data,class iv_ca * ivs,struct iv_cand * except_cand,struct iv_ca_delta ** delta)6643 iv_ca_prune (struct ivopts_data *data, class iv_ca *ivs,
6644 	     struct iv_cand *except_cand, struct iv_ca_delta **delta)
6645 {
6646   bitmap_iterator bi;
6647   struct iv_ca_delta *act_delta, *best_delta;
6648   unsigned i;
6649   comp_cost best_cost, acost;
6650   struct iv_cand *cand;
6651 
6652   best_delta = NULL;
6653   best_cost = iv_ca_cost (ivs);
6654 
6655   EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
6656     {
6657       cand = data->vcands[i];
6658 
6659       if (cand == except_cand)
6660 	continue;
6661 
6662       acost = iv_ca_narrow (data, ivs, cand, except_cand, &act_delta);
6663 
6664       if (acost < best_cost)
6665 	{
6666 	  best_cost = acost;
6667 	  iv_ca_delta_free (&best_delta);
6668 	  best_delta = act_delta;
6669 	}
6670       else
6671 	iv_ca_delta_free (&act_delta);
6672     }
6673 
6674   if (!best_delta)
6675     {
6676       *delta = NULL;
6677       return best_cost;
6678     }
6679 
6680   /* Recurse to possibly remove other unnecessary ivs.  */
6681   iv_ca_delta_commit (data, ivs, best_delta, true);
6682   best_cost = iv_ca_prune (data, ivs, except_cand, delta);
6683   iv_ca_delta_commit (data, ivs, best_delta, false);
6684   *delta = iv_ca_delta_join (best_delta, *delta);
6685   return best_cost;
6686 }
6687 
6688 /* Check if CAND_IDX is a candidate other than OLD_CAND and has
6689    cheaper local cost for GROUP than BEST_CP.  Return pointer to
6690    the corresponding cost_pair, otherwise just return BEST_CP.  */
6691 
6692 static class cost_pair*
cheaper_cost_with_cand(struct ivopts_data * data,struct iv_group * group,unsigned int cand_idx,struct iv_cand * old_cand,class cost_pair * best_cp)6693 cheaper_cost_with_cand (struct ivopts_data *data, struct iv_group *group,
6694 			unsigned int cand_idx, struct iv_cand *old_cand,
6695 			class cost_pair *best_cp)
6696 {
6697   struct iv_cand *cand;
6698   class cost_pair *cp;
6699 
6700   gcc_assert (old_cand != NULL && best_cp != NULL);
6701   if (cand_idx == old_cand->id)
6702     return best_cp;
6703 
6704   cand = data->vcands[cand_idx];
6705   cp = get_group_iv_cost (data, group, cand);
6706   if (cp != NULL && cheaper_cost_pair (cp, best_cp))
6707     return cp;
6708 
6709   return best_cp;
6710 }
6711 
6712 /* Try breaking local optimal fixed-point for IVS by replacing candidates
6713    which are used by more than one iv uses.  For each of those candidates,
6714    this function tries to represent iv uses under that candidate using
6715    other ones with lower local cost, then tries to prune the new set.
6716    If the new set has lower cost, It returns the new cost after recording
6717    candidate replacement in list DELTA.  */
6718 
6719 static comp_cost
iv_ca_replace(struct ivopts_data * data,class iv_ca * ivs,struct iv_ca_delta ** delta)6720 iv_ca_replace (struct ivopts_data *data, class iv_ca *ivs,
6721 	       struct iv_ca_delta **delta)
6722 {
6723   bitmap_iterator bi, bj;
6724   unsigned int i, j, k;
6725   struct iv_cand *cand;
6726   comp_cost orig_cost, acost;
6727   struct iv_ca_delta *act_delta, *tmp_delta;
6728   class cost_pair *old_cp, *best_cp = NULL;
6729 
6730   *delta = NULL;
6731   orig_cost = iv_ca_cost (ivs);
6732 
6733   EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
6734     {
6735       if (ivs->n_cand_uses[i] == 1
6736 	  || ivs->n_cand_uses[i] > ALWAYS_PRUNE_CAND_SET_BOUND)
6737 	continue;
6738 
6739       cand = data->vcands[i];
6740 
6741       act_delta = NULL;
6742       /*  Represent uses under current candidate using other ones with
6743 	  lower local cost.  */
6744       for (j = 0; j < ivs->upto; j++)
6745 	{
6746 	  struct iv_group *group = data->vgroups[j];
6747 	  old_cp = iv_ca_cand_for_group (ivs, group);
6748 
6749 	  if (old_cp->cand != cand)
6750 	    continue;
6751 
6752 	  best_cp = old_cp;
6753 	  if (data->consider_all_candidates)
6754 	    for (k = 0; k < data->vcands.length (); k++)
6755 	      best_cp = cheaper_cost_with_cand (data, group, k,
6756 						old_cp->cand, best_cp);
6757 	  else
6758 	    EXECUTE_IF_SET_IN_BITMAP (group->related_cands, 0, k, bj)
6759 	      best_cp = cheaper_cost_with_cand (data, group, k,
6760 						old_cp->cand, best_cp);
6761 
6762 	  if (best_cp == old_cp)
6763 	    continue;
6764 
6765 	  act_delta = iv_ca_delta_add (group, old_cp, best_cp, act_delta);
6766 	}
6767       /* No need for further prune.  */
6768       if (!act_delta)
6769 	continue;
6770 
6771       /* Prune the new candidate set.  */
6772       iv_ca_delta_commit (data, ivs, act_delta, true);
6773       acost = iv_ca_prune (data, ivs, NULL, &tmp_delta);
6774       iv_ca_delta_commit (data, ivs, act_delta, false);
6775       act_delta = iv_ca_delta_join (act_delta, tmp_delta);
6776 
6777       if (acost < orig_cost)
6778 	{
6779 	  *delta = act_delta;
6780 	  return acost;
6781 	}
6782       else
6783 	iv_ca_delta_free (&act_delta);
6784     }
6785 
6786   return orig_cost;
6787 }
6788 
6789 /* Tries to extend the sets IVS in the best possible way in order to
6790    express the GROUP.  If ORIGINALP is true, prefer candidates from
6791    the original set of IVs, otherwise favor important candidates not
6792    based on any memory object.  */
6793 
6794 static bool
try_add_cand_for(struct ivopts_data * data,class iv_ca * ivs,struct iv_group * group,bool originalp)6795 try_add_cand_for (struct ivopts_data *data, class iv_ca *ivs,
6796 		  struct iv_group *group, bool originalp)
6797 {
6798   comp_cost best_cost, act_cost;
6799   unsigned i;
6800   bitmap_iterator bi;
6801   struct iv_cand *cand;
6802   struct iv_ca_delta *best_delta = NULL, *act_delta;
6803   class cost_pair *cp;
6804 
6805   iv_ca_add_group (data, ivs, group);
6806   best_cost = iv_ca_cost (ivs);
6807   cp = iv_ca_cand_for_group (ivs, group);
6808   if (cp)
6809     {
6810       best_delta = iv_ca_delta_add (group, NULL, cp, NULL);
6811       iv_ca_set_no_cp (data, ivs, group);
6812     }
6813 
6814   /* If ORIGINALP is true, try to find the original IV for the use.  Otherwise
6815      first try important candidates not based on any memory object.  Only if
6816      this fails, try the specific ones.  Rationale -- in loops with many
6817      variables the best choice often is to use just one generic biv.  If we
6818      added here many ivs specific to the uses, the optimization algorithm later
6819      would be likely to get stuck in a local minimum, thus causing us to create
6820      too many ivs.  The approach from few ivs to more seems more likely to be
6821      successful -- starting from few ivs, replacing an expensive use by a
6822      specific iv should always be a win.  */
6823   EXECUTE_IF_SET_IN_BITMAP (group->related_cands, 0, i, bi)
6824     {
6825       cand = data->vcands[i];
6826 
6827       if (originalp && cand->pos !=IP_ORIGINAL)
6828 	continue;
6829 
6830       if (!originalp && cand->iv->base_object != NULL_TREE)
6831 	continue;
6832 
6833       if (iv_ca_cand_used_p (ivs, cand))
6834 	continue;
6835 
6836       cp = get_group_iv_cost (data, group, cand);
6837       if (!cp)
6838 	continue;
6839 
6840       iv_ca_set_cp (data, ivs, group, cp);
6841       act_cost = iv_ca_extend (data, ivs, cand, &act_delta, NULL,
6842 			       true);
6843       iv_ca_set_no_cp (data, ivs, group);
6844       act_delta = iv_ca_delta_add (group, NULL, cp, act_delta);
6845 
6846       if (act_cost < best_cost)
6847 	{
6848 	  best_cost = act_cost;
6849 
6850 	  iv_ca_delta_free (&best_delta);
6851 	  best_delta = act_delta;
6852 	}
6853       else
6854 	iv_ca_delta_free (&act_delta);
6855     }
6856 
6857   if (best_cost.infinite_cost_p ())
6858     {
6859       for (i = 0; i < group->n_map_members; i++)
6860 	{
6861 	  cp = group->cost_map + i;
6862 	  cand = cp->cand;
6863 	  if (!cand)
6864 	    continue;
6865 
6866 	  /* Already tried this.  */
6867 	  if (cand->important)
6868 	    {
6869 	      if (originalp && cand->pos == IP_ORIGINAL)
6870 		continue;
6871 	      if (!originalp && cand->iv->base_object == NULL_TREE)
6872 		continue;
6873 	    }
6874 
6875 	  if (iv_ca_cand_used_p (ivs, cand))
6876 	    continue;
6877 
6878 	  act_delta = NULL;
6879 	  iv_ca_set_cp (data, ivs, group, cp);
6880 	  act_cost = iv_ca_extend (data, ivs, cand, &act_delta, NULL, true);
6881 	  iv_ca_set_no_cp (data, ivs, group);
6882 	  act_delta = iv_ca_delta_add (group,
6883 				       iv_ca_cand_for_group (ivs, group),
6884 				       cp, act_delta);
6885 
6886 	  if (act_cost < best_cost)
6887 	    {
6888 	      best_cost = act_cost;
6889 
6890 	      if (best_delta)
6891 		iv_ca_delta_free (&best_delta);
6892 	      best_delta = act_delta;
6893 	    }
6894 	  else
6895 	    iv_ca_delta_free (&act_delta);
6896 	}
6897     }
6898 
6899   iv_ca_delta_commit (data, ivs, best_delta, true);
6900   iv_ca_delta_free (&best_delta);
6901 
6902   return !best_cost.infinite_cost_p ();
6903 }
6904 
6905 /* Finds an initial assignment of candidates to uses.  */
6906 
6907 static class iv_ca *
get_initial_solution(struct ivopts_data * data,bool originalp)6908 get_initial_solution (struct ivopts_data *data, bool originalp)
6909 {
6910   unsigned i;
6911   class iv_ca *ivs = iv_ca_new (data);
6912 
6913   for (i = 0; i < data->vgroups.length (); i++)
6914     if (!try_add_cand_for (data, ivs, data->vgroups[i], originalp))
6915       {
6916 	iv_ca_free (&ivs);
6917 	return NULL;
6918       }
6919 
6920   return ivs;
6921 }
6922 
6923 /* Tries to improve set of induction variables IVS.  TRY_REPLACE_P
6924    points to a bool variable, this function tries to break local
6925    optimal fixed-point by replacing candidates in IVS if it's true.  */
6926 
6927 static bool
try_improve_iv_set(struct ivopts_data * data,class iv_ca * ivs,bool * try_replace_p)6928 try_improve_iv_set (struct ivopts_data *data,
6929 		    class iv_ca *ivs, bool *try_replace_p)
6930 {
6931   unsigned i, n_ivs;
6932   comp_cost acost, best_cost = iv_ca_cost (ivs);
6933   struct iv_ca_delta *best_delta = NULL, *act_delta, *tmp_delta;
6934   struct iv_cand *cand;
6935 
6936   /* Try extending the set of induction variables by one.  */
6937   for (i = 0; i < data->vcands.length (); i++)
6938     {
6939       cand = data->vcands[i];
6940 
6941       if (iv_ca_cand_used_p (ivs, cand))
6942 	continue;
6943 
6944       acost = iv_ca_extend (data, ivs, cand, &act_delta, &n_ivs, false);
6945       if (!act_delta)
6946 	continue;
6947 
6948       /* If we successfully added the candidate and the set is small enough,
6949 	 try optimizing it by removing other candidates.  */
6950       if (n_ivs <= ALWAYS_PRUNE_CAND_SET_BOUND)
6951       	{
6952 	  iv_ca_delta_commit (data, ivs, act_delta, true);
6953 	  acost = iv_ca_prune (data, ivs, cand, &tmp_delta);
6954 	  iv_ca_delta_commit (data, ivs, act_delta, false);
6955 	  act_delta = iv_ca_delta_join (act_delta, tmp_delta);
6956 	}
6957 
6958       if (acost < best_cost)
6959 	{
6960 	  best_cost = acost;
6961 	  iv_ca_delta_free (&best_delta);
6962 	  best_delta = act_delta;
6963 	}
6964       else
6965 	iv_ca_delta_free (&act_delta);
6966     }
6967 
6968   if (!best_delta)
6969     {
6970       /* Try removing the candidates from the set instead.  */
6971       best_cost = iv_ca_prune (data, ivs, NULL, &best_delta);
6972 
6973       if (!best_delta && *try_replace_p)
6974 	{
6975 	  *try_replace_p = false;
6976 	  /* So far candidate selecting algorithm tends to choose fewer IVs
6977 	     so that it can handle cases in which loops have many variables
6978 	     but the best choice is often to use only one general biv.  One
6979 	     weakness is it can't handle opposite cases, in which different
6980 	     candidates should be chosen with respect to each use.  To solve
6981 	     the problem, we replace candidates in a manner described by the
6982 	     comments of iv_ca_replace, thus give general algorithm a chance
6983 	     to break local optimal fixed-point in these cases.  */
6984 	  best_cost = iv_ca_replace (data, ivs, &best_delta);
6985 	}
6986 
6987       if (!best_delta)
6988 	return false;
6989     }
6990 
6991   iv_ca_delta_commit (data, ivs, best_delta, true);
6992   iv_ca_delta_free (&best_delta);
6993   return best_cost == iv_ca_cost (ivs);
6994 }
6995 
6996 /* Attempts to find the optimal set of induction variables.  We do simple
6997    greedy heuristic -- we try to replace at most one candidate in the selected
6998    solution and remove the unused ivs while this improves the cost.  */
6999 
7000 static class iv_ca *
find_optimal_iv_set_1(struct ivopts_data * data,bool originalp)7001 find_optimal_iv_set_1 (struct ivopts_data *data, bool originalp)
7002 {
7003   class iv_ca *set;
7004   bool try_replace_p = true;
7005 
7006   /* Get the initial solution.  */
7007   set = get_initial_solution (data, originalp);
7008   if (!set)
7009     {
7010       if (dump_file && (dump_flags & TDF_DETAILS))
7011 	fprintf (dump_file, "Unable to substitute for ivs, failed.\n");
7012       return NULL;
7013     }
7014 
7015   if (dump_file && (dump_flags & TDF_DETAILS))
7016     {
7017       fprintf (dump_file, "Initial set of candidates:\n");
7018       iv_ca_dump (data, dump_file, set);
7019     }
7020 
7021   while (try_improve_iv_set (data, set, &try_replace_p))
7022     {
7023       if (dump_file && (dump_flags & TDF_DETAILS))
7024 	{
7025 	  fprintf (dump_file, "Improved to:\n");
7026 	  iv_ca_dump (data, dump_file, set);
7027 	}
7028     }
7029 
7030   /* If the set has infinite_cost, it can't be optimal.  */
7031   if (iv_ca_cost (set).infinite_cost_p ())
7032     {
7033       if (dump_file && (dump_flags & TDF_DETAILS))
7034 	fprintf (dump_file,
7035 		 "Overflow to infinite cost in try_improve_iv_set.\n");
7036       iv_ca_free (&set);
7037     }
7038   return set;
7039 }
7040 
7041 static class iv_ca *
find_optimal_iv_set(struct ivopts_data * data)7042 find_optimal_iv_set (struct ivopts_data *data)
7043 {
7044   unsigned i;
7045   comp_cost cost, origcost;
7046   class iv_ca *set, *origset;
7047 
7048   /* Determine the cost based on a strategy that starts with original IVs,
7049      and try again using a strategy that prefers candidates not based
7050      on any IVs.  */
7051   origset = find_optimal_iv_set_1 (data, true);
7052   set = find_optimal_iv_set_1 (data, false);
7053 
7054   if (!origset && !set)
7055     return NULL;
7056 
7057   origcost = origset ? iv_ca_cost (origset) : infinite_cost;
7058   cost = set ? iv_ca_cost (set) : infinite_cost;
7059 
7060   if (dump_file && (dump_flags & TDF_DETAILS))
7061     {
7062       fprintf (dump_file, "Original cost %" PRId64 " (complexity %d)\n\n",
7063 	       origcost.cost, origcost.complexity);
7064       fprintf (dump_file, "Final cost %" PRId64 " (complexity %d)\n\n",
7065 	       cost.cost, cost.complexity);
7066     }
7067 
7068   /* Choose the one with the best cost.  */
7069   if (origcost <= cost)
7070     {
7071       if (set)
7072 	iv_ca_free (&set);
7073       set = origset;
7074     }
7075   else if (origset)
7076     iv_ca_free (&origset);
7077 
7078   for (i = 0; i < data->vgroups.length (); i++)
7079     {
7080       struct iv_group *group = data->vgroups[i];
7081       group->selected = iv_ca_cand_for_group (set, group)->cand;
7082     }
7083 
7084   return set;
7085 }
7086 
7087 /* Creates a new induction variable corresponding to CAND.  */
7088 
7089 static void
create_new_iv(struct ivopts_data * data,struct iv_cand * cand)7090 create_new_iv (struct ivopts_data *data, struct iv_cand *cand)
7091 {
7092   gimple_stmt_iterator incr_pos;
7093   tree base;
7094   struct iv_use *use;
7095   struct iv_group *group;
7096   bool after = false;
7097 
7098   gcc_assert (cand->iv != NULL);
7099 
7100   switch (cand->pos)
7101     {
7102     case IP_NORMAL:
7103       incr_pos = gsi_last_bb (ip_normal_pos (data->current_loop));
7104       break;
7105 
7106     case IP_END:
7107       incr_pos = gsi_last_bb (ip_end_pos (data->current_loop));
7108       after = true;
7109       if (!gsi_end_p (incr_pos) && stmt_ends_bb_p (gsi_stmt (incr_pos)))
7110 	{
7111 	  edge e = find_edge (gsi_bb (incr_pos), data->current_loop->header);
7112 	  incr_pos = gsi_after_labels (split_edge (e));
7113 	  after = false;
7114 	}
7115       break;
7116 
7117     case IP_AFTER_USE:
7118       after = true;
7119       /* fall through */
7120     case IP_BEFORE_USE:
7121       incr_pos = gsi_for_stmt (cand->incremented_at);
7122       break;
7123 
7124     case IP_ORIGINAL:
7125       /* Mark that the iv is preserved.  */
7126       name_info (data, cand->var_before)->preserve_biv = true;
7127       name_info (data, cand->var_after)->preserve_biv = true;
7128 
7129       /* Rewrite the increment so that it uses var_before directly.  */
7130       use = find_interesting_uses_op (data, cand->var_after);
7131       group = data->vgroups[use->group_id];
7132       group->selected = cand;
7133       return;
7134     }
7135 
7136   gimple_add_tmp_var (cand->var_before);
7137 
7138   base = unshare_expr (cand->iv->base);
7139 
7140   create_iv (base, unshare_expr (cand->iv->step),
7141 	     cand->var_before, data->current_loop,
7142 	     &incr_pos, after, &cand->var_before, &cand->var_after);
7143 }
7144 
7145 /* Creates new induction variables described in SET.  */
7146 
7147 static void
create_new_ivs(struct ivopts_data * data,class iv_ca * set)7148 create_new_ivs (struct ivopts_data *data, class iv_ca *set)
7149 {
7150   unsigned i;
7151   struct iv_cand *cand;
7152   bitmap_iterator bi;
7153 
7154   EXECUTE_IF_SET_IN_BITMAP (set->cands, 0, i, bi)
7155     {
7156       cand = data->vcands[i];
7157       create_new_iv (data, cand);
7158     }
7159 
7160   if (dump_file && (dump_flags & TDF_DETAILS))
7161     {
7162       fprintf (dump_file, "Selected IV set for loop %d",
7163 	       data->current_loop->num);
7164       if (data->loop_loc != UNKNOWN_LOCATION)
7165 	fprintf (dump_file, " at %s:%d", LOCATION_FILE (data->loop_loc),
7166 		 LOCATION_LINE (data->loop_loc));
7167       fprintf (dump_file, ", " HOST_WIDE_INT_PRINT_DEC " avg niters",
7168 	       avg_loop_niter (data->current_loop));
7169       fprintf (dump_file, ", %lu IVs:\n", bitmap_count_bits (set->cands));
7170       EXECUTE_IF_SET_IN_BITMAP (set->cands, 0, i, bi)
7171 	{
7172 	  cand = data->vcands[i];
7173 	  dump_cand (dump_file, cand);
7174 	}
7175       fprintf (dump_file, "\n");
7176     }
7177 }
7178 
7179 /* Rewrites USE (definition of iv used in a nonlinear expression)
7180    using candidate CAND.  */
7181 
7182 static void
rewrite_use_nonlinear_expr(struct ivopts_data * data,struct iv_use * use,struct iv_cand * cand)7183 rewrite_use_nonlinear_expr (struct ivopts_data *data,
7184 			    struct iv_use *use, struct iv_cand *cand)
7185 {
7186   gassign *ass;
7187   gimple_stmt_iterator bsi;
7188   tree comp, type = get_use_type (use), tgt;
7189 
7190   /* An important special case -- if we are asked to express value of
7191      the original iv by itself, just exit; there is no need to
7192      introduce a new computation (that might also need casting the
7193      variable to unsigned and back).  */
7194   if (cand->pos == IP_ORIGINAL
7195       && cand->incremented_at == use->stmt)
7196     {
7197       tree op = NULL_TREE;
7198       enum tree_code stmt_code;
7199 
7200       gcc_assert (is_gimple_assign (use->stmt));
7201       gcc_assert (gimple_assign_lhs (use->stmt) == cand->var_after);
7202 
7203       /* Check whether we may leave the computation unchanged.
7204 	 This is the case only if it does not rely on other
7205 	 computations in the loop -- otherwise, the computation
7206 	 we rely upon may be removed in remove_unused_ivs,
7207 	 thus leading to ICE.  */
7208       stmt_code = gimple_assign_rhs_code (use->stmt);
7209       if (stmt_code == PLUS_EXPR
7210 	  || stmt_code == MINUS_EXPR
7211 	  || stmt_code == POINTER_PLUS_EXPR)
7212 	{
7213 	  if (gimple_assign_rhs1 (use->stmt) == cand->var_before)
7214 	    op = gimple_assign_rhs2 (use->stmt);
7215 	  else if (gimple_assign_rhs2 (use->stmt) == cand->var_before)
7216 	    op = gimple_assign_rhs1 (use->stmt);
7217 	}
7218 
7219       if (op != NULL_TREE)
7220 	{
7221 	  if (expr_invariant_in_loop_p (data->current_loop, op))
7222 	    return;
7223 	  if (TREE_CODE (op) == SSA_NAME)
7224 	    {
7225 	      struct iv *iv = get_iv (data, op);
7226 	      if (iv != NULL && integer_zerop (iv->step))
7227 		return;
7228 	    }
7229 	}
7230     }
7231 
7232   switch (gimple_code (use->stmt))
7233     {
7234     case GIMPLE_PHI:
7235       tgt = PHI_RESULT (use->stmt);
7236 
7237       /* If we should keep the biv, do not replace it.  */
7238       if (name_info (data, tgt)->preserve_biv)
7239 	return;
7240 
7241       bsi = gsi_after_labels (gimple_bb (use->stmt));
7242       break;
7243 
7244     case GIMPLE_ASSIGN:
7245       tgt = gimple_assign_lhs (use->stmt);
7246       bsi = gsi_for_stmt (use->stmt);
7247       break;
7248 
7249     default:
7250       gcc_unreachable ();
7251     }
7252 
7253   aff_tree aff_inv, aff_var;
7254   if (!get_computation_aff_1 (data->current_loop, use->stmt,
7255 			      use, cand, &aff_inv, &aff_var))
7256     gcc_unreachable ();
7257 
7258   unshare_aff_combination (&aff_inv);
7259   unshare_aff_combination (&aff_var);
7260   /* Prefer CSE opportunity than loop invariant by adding offset at last
7261      so that iv_uses have different offsets can be CSEed.  */
7262   poly_widest_int offset = aff_inv.offset;
7263   aff_inv.offset = 0;
7264 
7265   gimple_seq stmt_list = NULL, seq = NULL;
7266   tree comp_op1 = aff_combination_to_tree (&aff_inv);
7267   tree comp_op2 = aff_combination_to_tree (&aff_var);
7268   gcc_assert (comp_op1 && comp_op2);
7269 
7270   comp_op1 = force_gimple_operand (comp_op1, &seq, true, NULL);
7271   gimple_seq_add_seq (&stmt_list, seq);
7272   comp_op2 = force_gimple_operand (comp_op2, &seq, true, NULL);
7273   gimple_seq_add_seq (&stmt_list, seq);
7274 
7275   if (POINTER_TYPE_P (TREE_TYPE (comp_op2)))
7276     std::swap (comp_op1, comp_op2);
7277 
7278   if (POINTER_TYPE_P (TREE_TYPE (comp_op1)))
7279     {
7280       comp = fold_build_pointer_plus (comp_op1,
7281 				      fold_convert (sizetype, comp_op2));
7282       comp = fold_build_pointer_plus (comp,
7283 				      wide_int_to_tree (sizetype, offset));
7284     }
7285   else
7286     {
7287       comp = fold_build2 (PLUS_EXPR, TREE_TYPE (comp_op1), comp_op1,
7288 			  fold_convert (TREE_TYPE (comp_op1), comp_op2));
7289       comp = fold_build2 (PLUS_EXPR, TREE_TYPE (comp_op1), comp,
7290 			  wide_int_to_tree (TREE_TYPE (comp_op1), offset));
7291     }
7292 
7293   comp = fold_convert (type, comp);
7294   if (!valid_gimple_rhs_p (comp)
7295       || (gimple_code (use->stmt) != GIMPLE_PHI
7296 	  /* We can't allow re-allocating the stmt as it might be pointed
7297 	     to still.  */
7298 	  && (get_gimple_rhs_num_ops (TREE_CODE (comp))
7299 	      >= gimple_num_ops (gsi_stmt (bsi)))))
7300     {
7301       comp = force_gimple_operand (comp, &seq, true, NULL);
7302       gimple_seq_add_seq (&stmt_list, seq);
7303       if (POINTER_TYPE_P (TREE_TYPE (tgt)))
7304 	{
7305 	  duplicate_ssa_name_ptr_info (comp, SSA_NAME_PTR_INFO (tgt));
7306 	  /* As this isn't a plain copy we have to reset alignment
7307 	     information.  */
7308 	  if (SSA_NAME_PTR_INFO (comp))
7309 	    mark_ptr_info_alignment_unknown (SSA_NAME_PTR_INFO (comp));
7310 	}
7311     }
7312 
7313   gsi_insert_seq_before (&bsi, stmt_list, GSI_SAME_STMT);
7314   if (gimple_code (use->stmt) == GIMPLE_PHI)
7315     {
7316       ass = gimple_build_assign (tgt, comp);
7317       gsi_insert_before (&bsi, ass, GSI_SAME_STMT);
7318 
7319       bsi = gsi_for_stmt (use->stmt);
7320       remove_phi_node (&bsi, false);
7321     }
7322   else
7323     {
7324       gimple_assign_set_rhs_from_tree (&bsi, comp);
7325       use->stmt = gsi_stmt (bsi);
7326     }
7327 }
7328 
7329 /* Performs a peephole optimization to reorder the iv update statement with
7330    a mem ref to enable instruction combining in later phases. The mem ref uses
7331    the iv value before the update, so the reordering transformation requires
7332    adjustment of the offset. CAND is the selected IV_CAND.
7333 
7334    Example:
7335 
7336    t = MEM_REF (base, iv1, 8, 16);  // base, index, stride, offset
7337    iv2 = iv1 + 1;
7338 
7339    if (t < val)      (1)
7340      goto L;
7341    goto Head;
7342 
7343 
7344    directly propagating t over to (1) will introduce overlapping live range
7345    thus increase register pressure. This peephole transform it into:
7346 
7347 
7348    iv2 = iv1 + 1;
7349    t = MEM_REF (base, iv2, 8, 8);
7350    if (t < val)
7351      goto L;
7352    goto Head;
7353 */
7354 
7355 static void
adjust_iv_update_pos(struct iv_cand * cand,struct iv_use * use)7356 adjust_iv_update_pos (struct iv_cand *cand, struct iv_use *use)
7357 {
7358   tree var_after;
7359   gimple *iv_update, *stmt;
7360   basic_block bb;
7361   gimple_stmt_iterator gsi, gsi_iv;
7362 
7363   if (cand->pos != IP_NORMAL)
7364     return;
7365 
7366   var_after = cand->var_after;
7367   iv_update = SSA_NAME_DEF_STMT (var_after);
7368 
7369   bb = gimple_bb (iv_update);
7370   gsi = gsi_last_nondebug_bb (bb);
7371   stmt = gsi_stmt (gsi);
7372 
7373   /* Only handle conditional statement for now.  */
7374   if (gimple_code (stmt) != GIMPLE_COND)
7375     return;
7376 
7377   gsi_prev_nondebug (&gsi);
7378   stmt = gsi_stmt (gsi);
7379   if (stmt != iv_update)
7380     return;
7381 
7382   gsi_prev_nondebug (&gsi);
7383   if (gsi_end_p (gsi))
7384     return;
7385 
7386   stmt = gsi_stmt (gsi);
7387   if (gimple_code (stmt) != GIMPLE_ASSIGN)
7388     return;
7389 
7390   if (stmt != use->stmt)
7391     return;
7392 
7393   if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
7394     return;
7395 
7396   if (dump_file && (dump_flags & TDF_DETAILS))
7397     {
7398       fprintf (dump_file, "Reordering \n");
7399       print_gimple_stmt (dump_file, iv_update, 0);
7400       print_gimple_stmt (dump_file, use->stmt, 0);
7401       fprintf (dump_file, "\n");
7402     }
7403 
7404   gsi = gsi_for_stmt (use->stmt);
7405   gsi_iv = gsi_for_stmt (iv_update);
7406   gsi_move_before (&gsi_iv, &gsi);
7407 
7408   cand->pos = IP_BEFORE_USE;
7409   cand->incremented_at = use->stmt;
7410 }
7411 
7412 /* Return the alias pointer type that should be used for a MEM_REF
7413    associated with USE, which has type USE_PTR_ADDRESS.  */
7414 
7415 static tree
get_alias_ptr_type_for_ptr_address(iv_use * use)7416 get_alias_ptr_type_for_ptr_address (iv_use *use)
7417 {
7418   gcall *call = as_a <gcall *> (use->stmt);
7419   switch (gimple_call_internal_fn (call))
7420     {
7421     case IFN_MASK_LOAD:
7422     case IFN_MASK_STORE:
7423     case IFN_MASK_LOAD_LANES:
7424     case IFN_MASK_STORE_LANES:
7425       /* The second argument contains the correct alias type.  */
7426       gcc_assert (use->op_p = gimple_call_arg_ptr (call, 0));
7427       return TREE_TYPE (gimple_call_arg (call, 1));
7428 
7429     default:
7430       gcc_unreachable ();
7431     }
7432 }
7433 
7434 
7435 /* Rewrites USE (address that is an iv) using candidate CAND.  */
7436 
7437 static void
rewrite_use_address(struct ivopts_data * data,struct iv_use * use,struct iv_cand * cand)7438 rewrite_use_address (struct ivopts_data *data,
7439 		     struct iv_use *use, struct iv_cand *cand)
7440 {
7441   aff_tree aff;
7442   bool ok;
7443 
7444   adjust_iv_update_pos (cand, use);
7445   ok = get_computation_aff (data->current_loop, use->stmt, use, cand, &aff);
7446   gcc_assert (ok);
7447   unshare_aff_combination (&aff);
7448 
7449   /* To avoid undefined overflow problems, all IV candidates use unsigned
7450      integer types.  The drawback is that this makes it impossible for
7451      create_mem_ref to distinguish an IV that is based on a memory object
7452      from one that represents simply an offset.
7453 
7454      To work around this problem, we pass a hint to create_mem_ref that
7455      indicates which variable (if any) in aff is an IV based on a memory
7456      object.  Note that we only consider the candidate.  If this is not
7457      based on an object, the base of the reference is in some subexpression
7458      of the use -- but these will use pointer types, so they are recognized
7459      by the create_mem_ref heuristics anyway.  */
7460   tree iv = var_at_stmt (data->current_loop, cand, use->stmt);
7461   tree base_hint = (cand->iv->base_object) ? iv : NULL_TREE;
7462   gimple_stmt_iterator bsi = gsi_for_stmt (use->stmt);
7463   tree type = use->mem_type;
7464   tree alias_ptr_type;
7465   if (use->type == USE_PTR_ADDRESS)
7466     alias_ptr_type = get_alias_ptr_type_for_ptr_address (use);
7467   else
7468     {
7469       gcc_assert (type == TREE_TYPE (*use->op_p));
7470       unsigned int align = get_object_alignment (*use->op_p);
7471       if (align != TYPE_ALIGN (type))
7472 	type = build_aligned_type (type, align);
7473       alias_ptr_type = reference_alias_ptr_type (*use->op_p);
7474     }
7475   tree ref = create_mem_ref (&bsi, type, &aff, alias_ptr_type,
7476 			     iv, base_hint, data->speed);
7477 
7478   if (use->type == USE_PTR_ADDRESS)
7479     {
7480       ref = fold_build1 (ADDR_EXPR, build_pointer_type (use->mem_type), ref);
7481       ref = fold_convert (get_use_type (use), ref);
7482       ref = force_gimple_operand_gsi (&bsi, ref, true, NULL_TREE,
7483 				      true, GSI_SAME_STMT);
7484     }
7485   else
7486     copy_ref_info (ref, *use->op_p);
7487 
7488   *use->op_p = ref;
7489 }
7490 
7491 /* Rewrites USE (the condition such that one of the arguments is an iv) using
7492    candidate CAND.  */
7493 
7494 static void
rewrite_use_compare(struct ivopts_data * data,struct iv_use * use,struct iv_cand * cand)7495 rewrite_use_compare (struct ivopts_data *data,
7496 		     struct iv_use *use, struct iv_cand *cand)
7497 {
7498   tree comp, op, bound;
7499   gimple_stmt_iterator bsi = gsi_for_stmt (use->stmt);
7500   enum tree_code compare;
7501   struct iv_group *group = data->vgroups[use->group_id];
7502   class cost_pair *cp = get_group_iv_cost (data, group, cand);
7503 
7504   bound = cp->value;
7505   if (bound)
7506     {
7507       tree var = var_at_stmt (data->current_loop, cand, use->stmt);
7508       tree var_type = TREE_TYPE (var);
7509       gimple_seq stmts;
7510 
7511       if (dump_file && (dump_flags & TDF_DETAILS))
7512 	{
7513 	  fprintf (dump_file, "Replacing exit test: ");
7514 	  print_gimple_stmt (dump_file, use->stmt, 0, TDF_SLIM);
7515 	}
7516       compare = cp->comp;
7517       bound = unshare_expr (fold_convert (var_type, bound));
7518       op = force_gimple_operand (bound, &stmts, true, NULL_TREE);
7519       if (stmts)
7520 	gsi_insert_seq_on_edge_immediate (
7521 		loop_preheader_edge (data->current_loop),
7522 		stmts);
7523 
7524       gcond *cond_stmt = as_a <gcond *> (use->stmt);
7525       gimple_cond_set_lhs (cond_stmt, var);
7526       gimple_cond_set_code (cond_stmt, compare);
7527       gimple_cond_set_rhs (cond_stmt, op);
7528       return;
7529     }
7530 
7531   /* The induction variable elimination failed; just express the original
7532      giv.  */
7533   comp = get_computation_at (data->current_loop, use->stmt, use, cand);
7534   gcc_assert (comp != NULL_TREE);
7535   gcc_assert (use->op_p != NULL);
7536   *use->op_p = force_gimple_operand_gsi (&bsi, comp, true,
7537 					 SSA_NAME_VAR (*use->op_p),
7538 					 true, GSI_SAME_STMT);
7539 }
7540 
7541 /* Rewrite the groups using the selected induction variables.  */
7542 
7543 static void
rewrite_groups(struct ivopts_data * data)7544 rewrite_groups (struct ivopts_data *data)
7545 {
7546   unsigned i, j;
7547 
7548   for (i = 0; i < data->vgroups.length (); i++)
7549     {
7550       struct iv_group *group = data->vgroups[i];
7551       struct iv_cand *cand = group->selected;
7552 
7553       gcc_assert (cand);
7554 
7555       if (group->type == USE_NONLINEAR_EXPR)
7556 	{
7557 	  for (j = 0; j < group->vuses.length (); j++)
7558 	    {
7559 	      rewrite_use_nonlinear_expr (data, group->vuses[j], cand);
7560 	      update_stmt (group->vuses[j]->stmt);
7561 	    }
7562 	}
7563       else if (address_p (group->type))
7564 	{
7565 	  for (j = 0; j < group->vuses.length (); j++)
7566 	    {
7567 	      rewrite_use_address (data, group->vuses[j], cand);
7568 	      update_stmt (group->vuses[j]->stmt);
7569 	    }
7570 	}
7571       else
7572 	{
7573 	  gcc_assert (group->type == USE_COMPARE);
7574 
7575 	  for (j = 0; j < group->vuses.length (); j++)
7576 	    {
7577 	      rewrite_use_compare (data, group->vuses[j], cand);
7578 	      update_stmt (group->vuses[j]->stmt);
7579 	    }
7580 	}
7581     }
7582 }
7583 
7584 /* Removes the ivs that are not used after rewriting.  */
7585 
7586 static void
remove_unused_ivs(struct ivopts_data * data,bitmap toremove)7587 remove_unused_ivs (struct ivopts_data *data, bitmap toremove)
7588 {
7589   unsigned j;
7590   bitmap_iterator bi;
7591 
7592   /* Figure out an order in which to release SSA DEFs so that we don't
7593      release something that we'd have to propagate into a debug stmt
7594      afterwards.  */
7595   EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j, bi)
7596     {
7597       struct version_info *info;
7598 
7599       info = ver_info (data, j);
7600       if (info->iv
7601 	  && !integer_zerop (info->iv->step)
7602 	  && !info->inv_id
7603 	  && !info->iv->nonlin_use
7604 	  && !info->preserve_biv)
7605 	{
7606 	  bitmap_set_bit (toremove, SSA_NAME_VERSION (info->iv->ssa_name));
7607 
7608 	  tree def = info->iv->ssa_name;
7609 
7610 	  if (MAY_HAVE_DEBUG_BIND_STMTS && SSA_NAME_DEF_STMT (def))
7611 	    {
7612 	      imm_use_iterator imm_iter;
7613 	      use_operand_p use_p;
7614 	      gimple *stmt;
7615 	      int count = 0;
7616 
7617 	      FOR_EACH_IMM_USE_STMT (stmt, imm_iter, def)
7618 		{
7619 		  if (!gimple_debug_bind_p (stmt))
7620 		    continue;
7621 
7622 		  /* We just want to determine whether to do nothing
7623 		     (count == 0), to substitute the computed
7624 		     expression into a single use of the SSA DEF by
7625 		     itself (count == 1), or to use a debug temp
7626 		     because the SSA DEF is used multiple times or as
7627 		     part of a larger expression (count > 1). */
7628 		  count++;
7629 		  if (gimple_debug_bind_get_value (stmt) != def)
7630 		    count++;
7631 
7632 		  if (count > 1)
7633 		    BREAK_FROM_IMM_USE_STMT (imm_iter);
7634 		}
7635 
7636 	      if (!count)
7637 		continue;
7638 
7639 	      struct iv_use dummy_use;
7640 	      struct iv_cand *best_cand = NULL, *cand;
7641 	      unsigned i, best_pref = 0, cand_pref;
7642 	      tree comp = NULL_TREE;
7643 
7644 	      memset (&dummy_use, 0, sizeof (dummy_use));
7645 	      dummy_use.iv = info->iv;
7646 	      for (i = 0; i < data->vgroups.length () && i < 64; i++)
7647 		{
7648 		  cand = data->vgroups[i]->selected;
7649 		  if (cand == best_cand)
7650 		    continue;
7651 		  cand_pref = operand_equal_p (cand->iv->step,
7652 					       info->iv->step, 0)
7653 		    ? 4 : 0;
7654 		  cand_pref
7655 		    += TYPE_MODE (TREE_TYPE (cand->iv->base))
7656 		    == TYPE_MODE (TREE_TYPE (info->iv->base))
7657 		    ? 2 : 0;
7658 		  cand_pref
7659 		    += TREE_CODE (cand->iv->base) == INTEGER_CST
7660 		    ? 1 : 0;
7661 		  if (best_cand == NULL || best_pref < cand_pref)
7662 		    {
7663 		      tree this_comp
7664 			= get_debug_computation_at (data->current_loop,
7665 						    SSA_NAME_DEF_STMT (def),
7666 						    &dummy_use, cand);
7667 		      if (this_comp)
7668 			{
7669 			  best_cand = cand;
7670 			  best_pref = cand_pref;
7671 			  comp = this_comp;
7672 			}
7673 		    }
7674 		}
7675 
7676 	      if (!best_cand)
7677 		continue;
7678 
7679 	      comp = unshare_expr (comp);
7680 	      if (count > 1)
7681 		{
7682 		  tree vexpr = make_node (DEBUG_EXPR_DECL);
7683 		  DECL_ARTIFICIAL (vexpr) = 1;
7684 		  TREE_TYPE (vexpr) = TREE_TYPE (comp);
7685 		  if (SSA_NAME_VAR (def))
7686 		    SET_DECL_MODE (vexpr, DECL_MODE (SSA_NAME_VAR (def)));
7687 		  else
7688 		    SET_DECL_MODE (vexpr, TYPE_MODE (TREE_TYPE (vexpr)));
7689 		  gdebug *def_temp
7690 		    = gimple_build_debug_bind (vexpr, comp, NULL);
7691 		  gimple_stmt_iterator gsi;
7692 
7693 		  if (gimple_code (SSA_NAME_DEF_STMT (def)) == GIMPLE_PHI)
7694 		    gsi = gsi_after_labels (gimple_bb
7695 					    (SSA_NAME_DEF_STMT (def)));
7696 		  else
7697 		    gsi = gsi_for_stmt (SSA_NAME_DEF_STMT (def));
7698 
7699 		  gsi_insert_before (&gsi, def_temp, GSI_SAME_STMT);
7700 		  comp = vexpr;
7701 		}
7702 
7703 	      FOR_EACH_IMM_USE_STMT (stmt, imm_iter, def)
7704 		{
7705 		  if (!gimple_debug_bind_p (stmt))
7706 		    continue;
7707 
7708 		  FOR_EACH_IMM_USE_ON_STMT (use_p, imm_iter)
7709 		    SET_USE (use_p, comp);
7710 
7711 		  update_stmt (stmt);
7712 		}
7713 	    }
7714 	}
7715     }
7716 }
7717 
7718 /* Frees memory occupied by class tree_niter_desc in *VALUE. Callback
7719    for hash_map::traverse.  */
7720 
7721 bool
free_tree_niter_desc(edge const &,tree_niter_desc * const & value,void *)7722 free_tree_niter_desc (edge const &, tree_niter_desc *const &value, void *)
7723 {
7724   free (value);
7725   return true;
7726 }
7727 
7728 /* Frees data allocated by the optimization of a single loop.  */
7729 
7730 static void
free_loop_data(struct ivopts_data * data)7731 free_loop_data (struct ivopts_data *data)
7732 {
7733   unsigned i, j;
7734   bitmap_iterator bi;
7735   tree obj;
7736 
7737   if (data->niters)
7738     {
7739       data->niters->traverse<void *, free_tree_niter_desc> (NULL);
7740       delete data->niters;
7741       data->niters = NULL;
7742     }
7743 
7744   EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
7745     {
7746       struct version_info *info;
7747 
7748       info = ver_info (data, i);
7749       info->iv = NULL;
7750       info->has_nonlin_use = false;
7751       info->preserve_biv = false;
7752       info->inv_id = 0;
7753     }
7754   bitmap_clear (data->relevant);
7755   bitmap_clear (data->important_candidates);
7756 
7757   for (i = 0; i < data->vgroups.length (); i++)
7758     {
7759       struct iv_group *group = data->vgroups[i];
7760 
7761       for (j = 0; j < group->vuses.length (); j++)
7762 	free (group->vuses[j]);
7763       group->vuses.release ();
7764 
7765       BITMAP_FREE (group->related_cands);
7766       for (j = 0; j < group->n_map_members; j++)
7767 	{
7768 	  if (group->cost_map[j].inv_vars)
7769 	    BITMAP_FREE (group->cost_map[j].inv_vars);
7770 	  if (group->cost_map[j].inv_exprs)
7771 	    BITMAP_FREE (group->cost_map[j].inv_exprs);
7772 	}
7773 
7774       free (group->cost_map);
7775       free (group);
7776     }
7777   data->vgroups.truncate (0);
7778 
7779   for (i = 0; i < data->vcands.length (); i++)
7780     {
7781       struct iv_cand *cand = data->vcands[i];
7782 
7783       if (cand->inv_vars)
7784 	BITMAP_FREE (cand->inv_vars);
7785       if (cand->inv_exprs)
7786 	BITMAP_FREE (cand->inv_exprs);
7787       free (cand);
7788     }
7789   data->vcands.truncate (0);
7790 
7791   if (data->version_info_size < num_ssa_names)
7792     {
7793       data->version_info_size = 2 * num_ssa_names;
7794       free (data->version_info);
7795       data->version_info = XCNEWVEC (struct version_info, data->version_info_size);
7796     }
7797 
7798   data->max_inv_var_id = 0;
7799   data->max_inv_expr_id = 0;
7800 
7801   FOR_EACH_VEC_ELT (decl_rtl_to_reset, i, obj)
7802     SET_DECL_RTL (obj, NULL_RTX);
7803 
7804   decl_rtl_to_reset.truncate (0);
7805 
7806   data->inv_expr_tab->empty ();
7807 
7808   data->iv_common_cand_tab->empty ();
7809   data->iv_common_cands.truncate (0);
7810 }
7811 
7812 /* Finalizes data structures used by the iv optimization pass.  LOOPS is the
7813    loop tree.  */
7814 
7815 static void
tree_ssa_iv_optimize_finalize(struct ivopts_data * data)7816 tree_ssa_iv_optimize_finalize (struct ivopts_data *data)
7817 {
7818   free_loop_data (data);
7819   free (data->version_info);
7820   BITMAP_FREE (data->relevant);
7821   BITMAP_FREE (data->important_candidates);
7822 
7823   decl_rtl_to_reset.release ();
7824   data->vgroups.release ();
7825   data->vcands.release ();
7826   delete data->inv_expr_tab;
7827   data->inv_expr_tab = NULL;
7828   free_affine_expand_cache (&data->name_expansion_cache);
7829   if (data->base_object_map)
7830     delete data->base_object_map;
7831   delete data->iv_common_cand_tab;
7832   data->iv_common_cand_tab = NULL;
7833   data->iv_common_cands.release ();
7834   obstack_free (&data->iv_obstack, NULL);
7835 }
7836 
7837 /* Returns true if the loop body BODY includes any function calls.  */
7838 
7839 static bool
loop_body_includes_call(basic_block * body,unsigned num_nodes)7840 loop_body_includes_call (basic_block *body, unsigned num_nodes)
7841 {
7842   gimple_stmt_iterator gsi;
7843   unsigned i;
7844 
7845   for (i = 0; i < num_nodes; i++)
7846     for (gsi = gsi_start_bb (body[i]); !gsi_end_p (gsi); gsi_next (&gsi))
7847       {
7848 	gimple *stmt = gsi_stmt (gsi);
7849 	if (is_gimple_call (stmt)
7850 	    && !gimple_call_internal_p (stmt)
7851 	    && !is_inexpensive_builtin (gimple_call_fndecl (stmt)))
7852 	  return true;
7853       }
7854   return false;
7855 }
7856 
7857 /* Determine cost scaling factor for basic blocks in loop.  */
7858 #define COST_SCALING_FACTOR_BOUND (20)
7859 
7860 static void
determine_scaling_factor(struct ivopts_data * data,basic_block * body)7861 determine_scaling_factor (struct ivopts_data *data, basic_block *body)
7862 {
7863   int lfreq = data->current_loop->header->count.to_frequency (cfun);
7864   if (!data->speed || lfreq <= 0)
7865     return;
7866 
7867   int max_freq = lfreq;
7868   for (unsigned i = 0; i < data->current_loop->num_nodes; i++)
7869     {
7870       body[i]->aux = (void *)(intptr_t) 1;
7871       if (max_freq < body[i]->count.to_frequency (cfun))
7872 	max_freq = body[i]->count.to_frequency (cfun);
7873     }
7874   if (max_freq > lfreq)
7875     {
7876       int divisor, factor;
7877       /* Check if scaling factor itself needs to be scaled by the bound.  This
7878 	 is to avoid overflow when scaling cost according to profile info.  */
7879       if (max_freq / lfreq > COST_SCALING_FACTOR_BOUND)
7880 	{
7881 	  divisor = max_freq;
7882 	  factor = COST_SCALING_FACTOR_BOUND;
7883 	}
7884       else
7885 	{
7886 	  divisor = lfreq;
7887 	  factor = 1;
7888 	}
7889       for (unsigned i = 0; i < data->current_loop->num_nodes; i++)
7890 	{
7891 	  int bfreq = body[i]->count.to_frequency (cfun);
7892 	  if (bfreq <= lfreq)
7893 	    continue;
7894 
7895 	  body[i]->aux = (void*)(intptr_t) (factor * bfreq / divisor);
7896 	}
7897     }
7898 }
7899 
7900 /* Find doloop comparison use and set its doloop_p on if found.  */
7901 
7902 static bool
find_doloop_use(struct ivopts_data * data)7903 find_doloop_use (struct ivopts_data *data)
7904 {
7905   struct loop *loop = data->current_loop;
7906 
7907   for (unsigned i = 0; i < data->vgroups.length (); i++)
7908     {
7909       struct iv_group *group = data->vgroups[i];
7910       if (group->type == USE_COMPARE)
7911 	{
7912 	  gcc_assert (group->vuses.length () == 1);
7913 	  struct iv_use *use = group->vuses[0];
7914 	  gimple *stmt = use->stmt;
7915 	  if (gimple_code (stmt) == GIMPLE_COND)
7916 	    {
7917 	      basic_block bb = gimple_bb (stmt);
7918 	      edge true_edge, false_edge;
7919 	      extract_true_false_edges_from_block (bb, &true_edge, &false_edge);
7920 	      /* This comparison is used for loop latch.  Require latch is empty
7921 		 for now.  */
7922 	      if ((loop->latch == true_edge->dest
7923 		   || loop->latch == false_edge->dest)
7924 		  && empty_block_p (loop->latch))
7925 		{
7926 		  group->doloop_p = true;
7927 		  if (dump_file && (dump_flags & TDF_DETAILS))
7928 		    {
7929 		      fprintf (dump_file, "Doloop cmp iv use: ");
7930 		      print_gimple_stmt (dump_file, stmt, TDF_DETAILS);
7931 		    }
7932 		  return true;
7933 		}
7934 	    }
7935 	}
7936     }
7937 
7938   return false;
7939 }
7940 
7941 /* For the targets which support doloop, to predict whether later RTL doloop
7942    transformation will perform on this loop, further detect the doloop use and
7943    mark the flag doloop_use_p if predicted.  */
7944 
7945 void
analyze_and_mark_doloop_use(struct ivopts_data * data)7946 analyze_and_mark_doloop_use (struct ivopts_data *data)
7947 {
7948   data->doloop_use_p = false;
7949 
7950   if (!flag_branch_on_count_reg)
7951     return;
7952 
7953   if (!generic_predict_doloop_p (data))
7954     return;
7955 
7956   if (find_doloop_use (data))
7957     {
7958       data->doloop_use_p = true;
7959       if (dump_file && (dump_flags & TDF_DETAILS))
7960 	{
7961 	  struct loop *loop = data->current_loop;
7962 	  fprintf (dump_file,
7963 		   "Predict loop %d can perform"
7964 		   " doloop optimization later.\n",
7965 		   loop->num);
7966 	  flow_loop_dump (loop, dump_file, NULL, 1);
7967 	}
7968     }
7969 }
7970 
7971 /* Optimizes the LOOP.  Returns true if anything changed.  */
7972 
7973 static bool
tree_ssa_iv_optimize_loop(struct ivopts_data * data,class loop * loop,bitmap toremove)7974 tree_ssa_iv_optimize_loop (struct ivopts_data *data, class loop *loop,
7975 			   bitmap toremove)
7976 {
7977   bool changed = false;
7978   class iv_ca *iv_ca;
7979   edge exit = single_dom_exit (loop);
7980   basic_block *body;
7981 
7982   gcc_assert (!data->niters);
7983   data->current_loop = loop;
7984   data->loop_loc = find_loop_location (loop).get_location_t ();
7985   data->speed = optimize_loop_for_speed_p (loop);
7986 
7987   if (dump_file && (dump_flags & TDF_DETAILS))
7988     {
7989       fprintf (dump_file, "Processing loop %d", loop->num);
7990       if (data->loop_loc != UNKNOWN_LOCATION)
7991 	fprintf (dump_file, " at %s:%d", LOCATION_FILE (data->loop_loc),
7992 		 LOCATION_LINE (data->loop_loc));
7993       fprintf (dump_file, "\n");
7994 
7995       if (exit)
7996 	{
7997 	  fprintf (dump_file, "  single exit %d -> %d, exit condition ",
7998 		   exit->src->index, exit->dest->index);
7999 	  print_gimple_stmt (dump_file, last_stmt (exit->src), 0, TDF_SLIM);
8000 	  fprintf (dump_file, "\n");
8001 	}
8002 
8003       fprintf (dump_file, "\n");
8004     }
8005 
8006   body = get_loop_body (loop);
8007   data->body_includes_call = loop_body_includes_call (body, loop->num_nodes);
8008   renumber_gimple_stmt_uids_in_blocks (body, loop->num_nodes);
8009 
8010   data->loop_single_exit_p
8011     = exit != NULL && loop_only_exit_p (loop, body, exit);
8012 
8013   /* For each ssa name determines whether it behaves as an induction variable
8014      in some loop.  */
8015   if (!find_induction_variables (data))
8016     goto finish;
8017 
8018   /* Finds interesting uses (item 1).  */
8019   find_interesting_uses (data);
8020   if (data->vgroups.length () > MAX_CONSIDERED_GROUPS)
8021     goto finish;
8022 
8023   /* Determine cost scaling factor for basic blocks in loop.  */
8024   determine_scaling_factor (data, body);
8025 
8026   /* Analyze doloop possibility and mark the doloop use if predicted.  */
8027   analyze_and_mark_doloop_use (data);
8028 
8029   /* Finds candidates for the induction variables (item 2).  */
8030   find_iv_candidates (data);
8031 
8032   /* Calculates the costs (item 3, part 1).  */
8033   determine_iv_costs (data);
8034   determine_group_iv_costs (data);
8035   determine_set_costs (data);
8036 
8037   /* Find the optimal set of induction variables (item 3, part 2).  */
8038   iv_ca = find_optimal_iv_set (data);
8039   /* Cleanup basic block aux field.  */
8040   for (unsigned i = 0; i < data->current_loop->num_nodes; i++)
8041     body[i]->aux = NULL;
8042   if (!iv_ca)
8043     goto finish;
8044   changed = true;
8045 
8046   /* Create the new induction variables (item 4, part 1).  */
8047   create_new_ivs (data, iv_ca);
8048   iv_ca_free (&iv_ca);
8049 
8050   /* Rewrite the uses (item 4, part 2).  */
8051   rewrite_groups (data);
8052 
8053   /* Remove the ivs that are unused after rewriting.  */
8054   remove_unused_ivs (data, toremove);
8055 
8056 finish:
8057   free (body);
8058   free_loop_data (data);
8059 
8060   return changed;
8061 }
8062 
8063 /* Main entry point.  Optimizes induction variables in loops.  */
8064 
8065 void
tree_ssa_iv_optimize(void)8066 tree_ssa_iv_optimize (void)
8067 {
8068   class loop *loop;
8069   struct ivopts_data data;
8070   auto_bitmap toremove;
8071 
8072   tree_ssa_iv_optimize_init (&data);
8073 
8074   /* Optimize the loops starting with the innermost ones.  */
8075   FOR_EACH_LOOP (loop, LI_FROM_INNERMOST)
8076     {
8077       if (!dbg_cnt (ivopts_loop))
8078 	continue;
8079 
8080       if (dump_file && (dump_flags & TDF_DETAILS))
8081 	flow_loop_dump (loop, dump_file, NULL, 1);
8082 
8083       tree_ssa_iv_optimize_loop (&data, loop, toremove);
8084     }
8085 
8086   /* Remove eliminated IV defs.  */
8087   release_defs_bitset (toremove);
8088 
8089   /* We have changed the structure of induction variables; it might happen
8090      that definitions in the scev database refer to some of them that were
8091      eliminated.  */
8092   scev_reset_htab ();
8093   /* Likewise niter and control-IV information.  */
8094   free_numbers_of_iterations_estimates (cfun);
8095 
8096   tree_ssa_iv_optimize_finalize (&data);
8097 }
8098 
8099 #include "gt-tree-ssa-loop-ivopts.h"
8100