xref: /netbsd-src/external/gpl3/gcc.old/dist/gcc/tree-ssa-loop-ivopts.c (revision 53b02e147d4ed531c0d2a5ca9b3e8026ba3e99b5)
1 /* Induction variable optimizations.
2    Copyright (C) 2003-2019 Free Software Foundation, Inc.
3 
4 This file is part of GCC.
5 
6 GCC is free software; you can redistribute it and/or modify it
7 under the terms of the GNU General Public License as published by the
8 Free Software Foundation; either version 3, or (at your option) any
9 later version.
10 
11 GCC is distributed in the hope that it will be useful, but WITHOUT
12 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
14 for more details.
15 
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING3.  If not see
18 <http://www.gnu.org/licenses/>.  */
19 
20 /* This pass tries to find the optimal set of induction variables for the loop.
21    It optimizes just the basic linear induction variables (although adding
22    support for other types should not be too hard).  It includes the
23    optimizations commonly known as strength reduction, induction variable
24    coalescing and induction variable elimination.  It does it in the
25    following steps:
26 
27    1) The interesting uses of induction variables are found.  This includes
28 
29       -- uses of induction variables in non-linear expressions
30       -- addresses of arrays
31       -- comparisons of induction variables
32 
33       Note the interesting uses are categorized and handled in group.
34       Generally, address type uses are grouped together if their iv bases
35       are different in constant offset.
36 
37    2) Candidates for the induction variables are found.  This includes
38 
39       -- old induction variables
40       -- the variables defined by expressions derived from the "interesting
41 	 groups/uses" above
42 
43    3) The optimal (w.r. to a cost function) set of variables is chosen.  The
44       cost function assigns a cost to sets of induction variables and consists
45       of three parts:
46 
47       -- The group/use costs.  Each of the interesting groups/uses chooses
48 	 the best induction variable in the set and adds its cost to the sum.
49 	 The cost reflects the time spent on modifying the induction variables
50 	 value to be usable for the given purpose (adding base and offset for
51 	 arrays, etc.).
52       -- The variable costs.  Each of the variables has a cost assigned that
53 	 reflects the costs associated with incrementing the value of the
54 	 variable.  The original variables are somewhat preferred.
55       -- The set cost.  Depending on the size of the set, extra cost may be
56 	 added to reflect register pressure.
57 
58       All the costs are defined in a machine-specific way, using the target
59       hooks and machine descriptions to determine them.
60 
61    4) The trees are transformed to use the new variables, the dead code is
62       removed.
63 
64    All of this is done loop by loop.  Doing it globally is theoretically
65    possible, it might give a better performance and it might enable us
66    to decide costs more precisely, but getting all the interactions right
67    would be complicated.  */
68 
69 #include "config.h"
70 #include "system.h"
71 #include "coretypes.h"
72 #include "backend.h"
73 #include "rtl.h"
74 #include "tree.h"
75 #include "gimple.h"
76 #include "cfghooks.h"
77 #include "tree-pass.h"
78 #include "memmodel.h"
79 #include "tm_p.h"
80 #include "ssa.h"
81 #include "expmed.h"
82 #include "insn-config.h"
83 #include "emit-rtl.h"
84 #include "recog.h"
85 #include "cgraph.h"
86 #include "gimple-pretty-print.h"
87 #include "alias.h"
88 #include "fold-const.h"
89 #include "stor-layout.h"
90 #include "tree-eh.h"
91 #include "gimplify.h"
92 #include "gimple-iterator.h"
93 #include "gimplify-me.h"
94 #include "tree-cfg.h"
95 #include "tree-ssa-loop-ivopts.h"
96 #include "tree-ssa-loop-manip.h"
97 #include "tree-ssa-loop-niter.h"
98 #include "tree-ssa-loop.h"
99 #include "explow.h"
100 #include "expr.h"
101 #include "tree-dfa.h"
102 #include "tree-ssa.h"
103 #include "cfgloop.h"
104 #include "tree-scalar-evolution.h"
105 #include "params.h"
106 #include "tree-affine.h"
107 #include "tree-ssa-propagate.h"
108 #include "tree-ssa-address.h"
109 #include "builtins.h"
110 #include "tree-vectorizer.h"
111 
112 /* FIXME: Expressions are expanded to RTL in this pass to determine the
113    cost of different addressing modes.  This should be moved to a TBD
114    interface between the GIMPLE and RTL worlds.  */
115 
116 /* The infinite cost.  */
117 #define INFTY 10000000
118 
119 /* Returns the expected number of loop iterations for LOOP.
120    The average trip count is computed from profile data if it
121    exists. */
122 
123 static inline HOST_WIDE_INT
124 avg_loop_niter (struct loop *loop)
125 {
126   HOST_WIDE_INT niter = estimated_stmt_executions_int (loop);
127   if (niter == -1)
128     {
129       niter = likely_max_stmt_executions_int (loop);
130 
131       if (niter == -1 || niter > PARAM_VALUE (PARAM_AVG_LOOP_NITER))
132 	return PARAM_VALUE (PARAM_AVG_LOOP_NITER);
133     }
134 
135   return niter;
136 }
137 
138 struct iv_use;
139 
140 /* Representation of the induction variable.  */
141 struct iv
142 {
143   tree base;		/* Initial value of the iv.  */
144   tree base_object;	/* A memory object to that the induction variable points.  */
145   tree step;		/* Step of the iv (constant only).  */
146   tree ssa_name;	/* The ssa name with the value.  */
147   struct iv_use *nonlin_use;	/* The identifier in the use if it is the case.  */
148   bool biv_p;		/* Is it a biv?  */
149   bool no_overflow;	/* True if the iv doesn't overflow.  */
150   bool have_address_use;/* For biv, indicate if it's used in any address
151 			   type use.  */
152 };
153 
154 /* Per-ssa version information (induction variable descriptions, etc.).  */
155 struct version_info
156 {
157   tree name;		/* The ssa name.  */
158   struct iv *iv;	/* Induction variable description.  */
159   bool has_nonlin_use;	/* For a loop-level invariant, whether it is used in
160 			   an expression that is not an induction variable.  */
161   bool preserve_biv;	/* For the original biv, whether to preserve it.  */
162   unsigned inv_id;	/* Id of an invariant.  */
163 };
164 
165 /* Types of uses.  */
166 enum use_type
167 {
168   USE_NONLINEAR_EXPR,	/* Use in a nonlinear expression.  */
169   USE_REF_ADDRESS,	/* Use is an address for an explicit memory
170 			   reference.  */
171   USE_PTR_ADDRESS,	/* Use is a pointer argument to a function in
172 			   cases where the expansion of the function
173 			   will turn the argument into a normal address.  */
174   USE_COMPARE		/* Use is a compare.  */
175 };
176 
177 /* Cost of a computation.  */
178 struct comp_cost
179 {
180   comp_cost (): cost (0), complexity (0), scratch (0)
181   {}
182 
183   comp_cost (int cost, unsigned complexity, int scratch = 0)
184     : cost (cost), complexity (complexity), scratch (scratch)
185   {}
186 
187   /* Returns true if COST is infinite.  */
188   bool infinite_cost_p ();
189 
190   /* Adds costs COST1 and COST2.  */
191   friend comp_cost operator+ (comp_cost cost1, comp_cost cost2);
192 
193   /* Adds COST to the comp_cost.  */
194   comp_cost operator+= (comp_cost cost);
195 
196   /* Adds constant C to this comp_cost.  */
197   comp_cost operator+= (HOST_WIDE_INT c);
198 
199   /* Subtracts constant C to this comp_cost.  */
200   comp_cost operator-= (HOST_WIDE_INT c);
201 
202   /* Divide the comp_cost by constant C.  */
203   comp_cost operator/= (HOST_WIDE_INT c);
204 
205   /* Multiply the comp_cost by constant C.  */
206   comp_cost operator*= (HOST_WIDE_INT c);
207 
208   /* Subtracts costs COST1 and COST2.  */
209   friend comp_cost operator- (comp_cost cost1, comp_cost cost2);
210 
211   /* Subtracts COST from this comp_cost.  */
212   comp_cost operator-= (comp_cost cost);
213 
214   /* Returns true if COST1 is smaller than COST2.  */
215   friend bool operator< (comp_cost cost1, comp_cost cost2);
216 
217   /* Returns true if COST1 and COST2 are equal.  */
218   friend bool operator== (comp_cost cost1, comp_cost cost2);
219 
220   /* Returns true if COST1 is smaller or equal than COST2.  */
221   friend bool operator<= (comp_cost cost1, comp_cost cost2);
222 
223   int cost;		/* The runtime cost.  */
224   unsigned complexity;  /* The estimate of the complexity of the code for
225 			   the computation (in no concrete units --
226 			   complexity field should be larger for more
227 			   complex expressions and addressing modes).  */
228   int scratch;		/* Scratch used during cost computation.  */
229 };
230 
231 static const comp_cost no_cost;
232 static const comp_cost infinite_cost (INFTY, INFTY, INFTY);
233 
234 bool
235 comp_cost::infinite_cost_p ()
236 {
237   return cost == INFTY;
238 }
239 
240 comp_cost
241 operator+ (comp_cost cost1, comp_cost cost2)
242 {
243   if (cost1.infinite_cost_p () || cost2.infinite_cost_p ())
244     return infinite_cost;
245 
246   cost1.cost += cost2.cost;
247   cost1.complexity += cost2.complexity;
248 
249   return cost1;
250 }
251 
252 comp_cost
253 operator- (comp_cost cost1, comp_cost cost2)
254 {
255   if (cost1.infinite_cost_p ())
256     return infinite_cost;
257 
258   gcc_assert (!cost2.infinite_cost_p ());
259 
260   cost1.cost -= cost2.cost;
261   cost1.complexity -= cost2.complexity;
262 
263   return cost1;
264 }
265 
266 comp_cost
267 comp_cost::operator+= (comp_cost cost)
268 {
269   *this = *this + cost;
270   return *this;
271 }
272 
273 comp_cost
274 comp_cost::operator+= (HOST_WIDE_INT c)
275 {
276   if (infinite_cost_p ())
277     return *this;
278 
279   this->cost += c;
280 
281   return *this;
282 }
283 
284 comp_cost
285 comp_cost::operator-= (HOST_WIDE_INT c)
286 {
287   if (infinite_cost_p ())
288     return *this;
289 
290   this->cost -= c;
291 
292   return *this;
293 }
294 
295 comp_cost
296 comp_cost::operator/= (HOST_WIDE_INT c)
297 {
298   if (infinite_cost_p ())
299     return *this;
300 
301   this->cost /= c;
302 
303   return *this;
304 }
305 
306 comp_cost
307 comp_cost::operator*= (HOST_WIDE_INT c)
308 {
309   if (infinite_cost_p ())
310     return *this;
311 
312   this->cost *= c;
313 
314   return *this;
315 }
316 
317 comp_cost
318 comp_cost::operator-= (comp_cost cost)
319 {
320   *this = *this - cost;
321   return *this;
322 }
323 
324 bool
325 operator< (comp_cost cost1, comp_cost cost2)
326 {
327   if (cost1.cost == cost2.cost)
328     return cost1.complexity < cost2.complexity;
329 
330   return cost1.cost < cost2.cost;
331 }
332 
333 bool
334 operator== (comp_cost cost1, comp_cost cost2)
335 {
336   return cost1.cost == cost2.cost
337     && cost1.complexity == cost2.complexity;
338 }
339 
340 bool
341 operator<= (comp_cost cost1, comp_cost cost2)
342 {
343   return cost1 < cost2 || cost1 == cost2;
344 }
345 
346 struct iv_inv_expr_ent;
347 
348 /* The candidate - cost pair.  */
349 struct cost_pair
350 {
351   struct iv_cand *cand;	/* The candidate.  */
352   comp_cost cost;	/* The cost.  */
353   enum tree_code comp;	/* For iv elimination, the comparison.  */
354   bitmap inv_vars;	/* The list of invariant ssa_vars that have to be
355 			   preserved when representing iv_use with iv_cand.  */
356   bitmap inv_exprs;	/* The list of newly created invariant expressions
357 			   when representing iv_use with iv_cand.  */
358   tree value;		/* For final value elimination, the expression for
359 			   the final value of the iv.  For iv elimination,
360 			   the new bound to compare with.  */
361 };
362 
363 /* Use.  */
364 struct iv_use
365 {
366   unsigned id;		/* The id of the use.  */
367   unsigned group_id;	/* The group id the use belongs to.  */
368   enum use_type type;	/* Type of the use.  */
369   tree mem_type;	/* The memory type to use when testing whether an
370 			   address is legitimate, and what the address's
371 			   cost is.  */
372   struct iv *iv;	/* The induction variable it is based on.  */
373   gimple *stmt;		/* Statement in that it occurs.  */
374   tree *op_p;		/* The place where it occurs.  */
375 
376   tree addr_base;	/* Base address with const offset stripped.  */
377   poly_uint64_pod addr_offset;
378 			/* Const offset stripped from base address.  */
379 };
380 
381 /* Group of uses.  */
382 struct iv_group
383 {
384   /* The id of the group.  */
385   unsigned id;
386   /* Uses of the group are of the same type.  */
387   enum use_type type;
388   /* The set of "related" IV candidates, plus the important ones.  */
389   bitmap related_cands;
390   /* Number of IV candidates in the cost_map.  */
391   unsigned n_map_members;
392   /* The costs wrto the iv candidates.  */
393   struct cost_pair *cost_map;
394   /* The selected candidate for the group.  */
395   struct iv_cand *selected;
396   /* Uses in the group.  */
397   vec<struct iv_use *> vuses;
398 };
399 
400 /* The position where the iv is computed.  */
401 enum iv_position
402 {
403   IP_NORMAL,		/* At the end, just before the exit condition.  */
404   IP_END,		/* At the end of the latch block.  */
405   IP_BEFORE_USE,	/* Immediately before a specific use.  */
406   IP_AFTER_USE,		/* Immediately after a specific use.  */
407   IP_ORIGINAL		/* The original biv.  */
408 };
409 
410 /* The induction variable candidate.  */
411 struct iv_cand
412 {
413   unsigned id;		/* The number of the candidate.  */
414   bool important;	/* Whether this is an "important" candidate, i.e. such
415 			   that it should be considered by all uses.  */
416   ENUM_BITFIELD(iv_position) pos : 8;	/* Where it is computed.  */
417   gimple *incremented_at;/* For original biv, the statement where it is
418 			   incremented.  */
419   tree var_before;	/* The variable used for it before increment.  */
420   tree var_after;	/* The variable used for it after increment.  */
421   struct iv *iv;	/* The value of the candidate.  NULL for
422 			   "pseudocandidate" used to indicate the possibility
423 			   to replace the final value of an iv by direct
424 			   computation of the value.  */
425   unsigned cost;	/* Cost of the candidate.  */
426   unsigned cost_step;	/* Cost of the candidate's increment operation.  */
427   struct iv_use *ainc_use; /* For IP_{BEFORE,AFTER}_USE candidates, the place
428 			      where it is incremented.  */
429   bitmap inv_vars;	/* The list of invariant ssa_vars used in step of the
430 			   iv_cand.  */
431   bitmap inv_exprs;	/* If step is more complicated than a single ssa_var,
432 			   hanlde it as a new invariant expression which will
433 			   be hoisted out of loop.  */
434   struct iv *orig_iv;	/* The original iv if this cand is added from biv with
435 			   smaller type.  */
436 };
437 
438 /* Hashtable entry for common candidate derived from iv uses.  */
439 struct iv_common_cand
440 {
441   tree base;
442   tree step;
443   /* IV uses from which this common candidate is derived.  */
444   auto_vec<struct iv_use *> uses;
445   hashval_t hash;
446 };
447 
448 /* Hashtable helpers.  */
449 
450 struct iv_common_cand_hasher : delete_ptr_hash <iv_common_cand>
451 {
452   static inline hashval_t hash (const iv_common_cand *);
453   static inline bool equal (const iv_common_cand *, const iv_common_cand *);
454 };
455 
456 /* Hash function for possible common candidates.  */
457 
458 inline hashval_t
459 iv_common_cand_hasher::hash (const iv_common_cand *ccand)
460 {
461   return ccand->hash;
462 }
463 
464 /* Hash table equality function for common candidates.  */
465 
466 inline bool
467 iv_common_cand_hasher::equal (const iv_common_cand *ccand1,
468 			      const iv_common_cand *ccand2)
469 {
470   return (ccand1->hash == ccand2->hash
471 	  && operand_equal_p (ccand1->base, ccand2->base, 0)
472 	  && operand_equal_p (ccand1->step, ccand2->step, 0)
473 	  && (TYPE_PRECISION (TREE_TYPE (ccand1->base))
474 	      == TYPE_PRECISION (TREE_TYPE (ccand2->base))));
475 }
476 
477 /* Loop invariant expression hashtable entry.  */
478 
479 struct iv_inv_expr_ent
480 {
481   /* Tree expression of the entry.  */
482   tree expr;
483   /* Unique indentifier.  */
484   int id;
485   /* Hash value.  */
486   hashval_t hash;
487 };
488 
489 /* Sort iv_inv_expr_ent pair A and B by id field.  */
490 
491 static int
492 sort_iv_inv_expr_ent (const void *a, const void *b)
493 {
494   const iv_inv_expr_ent * const *e1 = (const iv_inv_expr_ent * const *) (a);
495   const iv_inv_expr_ent * const *e2 = (const iv_inv_expr_ent * const *) (b);
496 
497   unsigned id1 = (*e1)->id;
498   unsigned id2 = (*e2)->id;
499 
500   if (id1 < id2)
501     return -1;
502   else if (id1 > id2)
503     return 1;
504   else
505     return 0;
506 }
507 
508 /* Hashtable helpers.  */
509 
510 struct iv_inv_expr_hasher : free_ptr_hash <iv_inv_expr_ent>
511 {
512   static inline hashval_t hash (const iv_inv_expr_ent *);
513   static inline bool equal (const iv_inv_expr_ent *, const iv_inv_expr_ent *);
514 };
515 
516 /* Return true if uses of type TYPE represent some form of address.  */
517 
518 inline bool
519 address_p (use_type type)
520 {
521   return type == USE_REF_ADDRESS || type == USE_PTR_ADDRESS;
522 }
523 
524 /* Hash function for loop invariant expressions.  */
525 
526 inline hashval_t
527 iv_inv_expr_hasher::hash (const iv_inv_expr_ent *expr)
528 {
529   return expr->hash;
530 }
531 
532 /* Hash table equality function for expressions.  */
533 
534 inline bool
535 iv_inv_expr_hasher::equal (const iv_inv_expr_ent *expr1,
536 			   const iv_inv_expr_ent *expr2)
537 {
538   return expr1->hash == expr2->hash
539 	 && operand_equal_p (expr1->expr, expr2->expr, 0);
540 }
541 
542 struct ivopts_data
543 {
544   /* The currently optimized loop.  */
545   struct loop *current_loop;
546   location_t loop_loc;
547 
548   /* Numbers of iterations for all exits of the current loop.  */
549   hash_map<edge, tree_niter_desc *> *niters;
550 
551   /* Number of registers used in it.  */
552   unsigned regs_used;
553 
554   /* The size of version_info array allocated.  */
555   unsigned version_info_size;
556 
557   /* The array of information for the ssa names.  */
558   struct version_info *version_info;
559 
560   /* The hashtable of loop invariant expressions created
561      by ivopt.  */
562   hash_table<iv_inv_expr_hasher> *inv_expr_tab;
563 
564   /* The bitmap of indices in version_info whose value was changed.  */
565   bitmap relevant;
566 
567   /* The uses of induction variables.  */
568   vec<iv_group *> vgroups;
569 
570   /* The candidates.  */
571   vec<iv_cand *> vcands;
572 
573   /* A bitmap of important candidates.  */
574   bitmap important_candidates;
575 
576   /* Cache used by tree_to_aff_combination_expand.  */
577   hash_map<tree, name_expansion *> *name_expansion_cache;
578 
579   /* The hashtable of common candidates derived from iv uses.  */
580   hash_table<iv_common_cand_hasher> *iv_common_cand_tab;
581 
582   /* The common candidates.  */
583   vec<iv_common_cand *> iv_common_cands;
584 
585   /* Hash map recording base object information of tree exp.  */
586   hash_map<tree, tree> *base_object_map;
587 
588   /* The maximum invariant variable id.  */
589   unsigned max_inv_var_id;
590 
591   /* The maximum invariant expression id.  */
592   unsigned max_inv_expr_id;
593 
594   /* Number of no_overflow BIVs which are not used in memory address.  */
595   unsigned bivs_not_used_in_addr;
596 
597   /* Obstack for iv structure.  */
598   struct obstack iv_obstack;
599 
600   /* Whether to consider just related and important candidates when replacing a
601      use.  */
602   bool consider_all_candidates;
603 
604   /* Are we optimizing for speed?  */
605   bool speed;
606 
607   /* Whether the loop body includes any function calls.  */
608   bool body_includes_call;
609 
610   /* Whether the loop body can only be exited via single exit.  */
611   bool loop_single_exit_p;
612 };
613 
614 /* An assignment of iv candidates to uses.  */
615 
616 struct iv_ca
617 {
618   /* The number of uses covered by the assignment.  */
619   unsigned upto;
620 
621   /* Number of uses that cannot be expressed by the candidates in the set.  */
622   unsigned bad_groups;
623 
624   /* Candidate assigned to a use, together with the related costs.  */
625   struct cost_pair **cand_for_group;
626 
627   /* Number of times each candidate is used.  */
628   unsigned *n_cand_uses;
629 
630   /* The candidates used.  */
631   bitmap cands;
632 
633   /* The number of candidates in the set.  */
634   unsigned n_cands;
635 
636   /* The number of invariants needed, including both invariant variants and
637      invariant expressions.  */
638   unsigned n_invs;
639 
640   /* Total cost of expressing uses.  */
641   comp_cost cand_use_cost;
642 
643   /* Total cost of candidates.  */
644   unsigned cand_cost;
645 
646   /* Number of times each invariant variable is used.  */
647   unsigned *n_inv_var_uses;
648 
649   /* Number of times each invariant expression is used.  */
650   unsigned *n_inv_expr_uses;
651 
652   /* Total cost of the assignment.  */
653   comp_cost cost;
654 };
655 
656 /* Difference of two iv candidate assignments.  */
657 
658 struct iv_ca_delta
659 {
660   /* Changed group.  */
661   struct iv_group *group;
662 
663   /* An old assignment (for rollback purposes).  */
664   struct cost_pair *old_cp;
665 
666   /* A new assignment.  */
667   struct cost_pair *new_cp;
668 
669   /* Next change in the list.  */
670   struct iv_ca_delta *next;
671 };
672 
673 /* Bound on number of candidates below that all candidates are considered.  */
674 
675 #define CONSIDER_ALL_CANDIDATES_BOUND \
676   ((unsigned) PARAM_VALUE (PARAM_IV_CONSIDER_ALL_CANDIDATES_BOUND))
677 
678 /* If there are more iv occurrences, we just give up (it is quite unlikely that
679    optimizing such a loop would help, and it would take ages).  */
680 
681 #define MAX_CONSIDERED_GROUPS \
682   ((unsigned) PARAM_VALUE (PARAM_IV_MAX_CONSIDERED_USES))
683 
684 /* If there are at most this number of ivs in the set, try removing unnecessary
685    ivs from the set always.  */
686 
687 #define ALWAYS_PRUNE_CAND_SET_BOUND \
688   ((unsigned) PARAM_VALUE (PARAM_IV_ALWAYS_PRUNE_CAND_SET_BOUND))
689 
690 /* The list of trees for that the decl_rtl field must be reset is stored
691    here.  */
692 
693 static vec<tree> decl_rtl_to_reset;
694 
695 static comp_cost force_expr_to_var_cost (tree, bool);
696 
697 /* The single loop exit if it dominates the latch, NULL otherwise.  */
698 
699 edge
700 single_dom_exit (struct loop *loop)
701 {
702   edge exit = single_exit (loop);
703 
704   if (!exit)
705     return NULL;
706 
707   if (!just_once_each_iteration_p (loop, exit->src))
708     return NULL;
709 
710   return exit;
711 }
712 
713 /* Dumps information about the induction variable IV to FILE.  Don't dump
714    variable's name if DUMP_NAME is FALSE.  The information is dumped with
715    preceding spaces indicated by INDENT_LEVEL.  */
716 
717 void
718 dump_iv (FILE *file, struct iv *iv, bool dump_name, unsigned indent_level)
719 {
720   const char *p;
721   const char spaces[9] = {' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '\0'};
722 
723   if (indent_level > 4)
724     indent_level = 4;
725   p = spaces + 8 - (indent_level << 1);
726 
727   fprintf (file, "%sIV struct:\n", p);
728   if (iv->ssa_name && dump_name)
729     {
730       fprintf (file, "%s  SSA_NAME:\t", p);
731       print_generic_expr (file, iv->ssa_name, TDF_SLIM);
732       fprintf (file, "\n");
733     }
734 
735   fprintf (file, "%s  Type:\t", p);
736   print_generic_expr (file, TREE_TYPE (iv->base), TDF_SLIM);
737   fprintf (file, "\n");
738 
739   fprintf (file, "%s  Base:\t", p);
740   print_generic_expr (file, iv->base, TDF_SLIM);
741   fprintf (file, "\n");
742 
743   fprintf (file, "%s  Step:\t", p);
744   print_generic_expr (file, iv->step, TDF_SLIM);
745   fprintf (file, "\n");
746 
747   if (iv->base_object)
748     {
749       fprintf (file, "%s  Object:\t", p);
750       print_generic_expr (file, iv->base_object, TDF_SLIM);
751       fprintf (file, "\n");
752     }
753 
754   fprintf (file, "%s  Biv:\t%c\n", p, iv->biv_p ? 'Y' : 'N');
755 
756   fprintf (file, "%s  Overflowness wrto loop niter:\t%s\n",
757 	   p, iv->no_overflow ? "No-overflow" : "Overflow");
758 }
759 
760 /* Dumps information about the USE to FILE.  */
761 
762 void
763 dump_use (FILE *file, struct iv_use *use)
764 {
765   fprintf (file, "  Use %d.%d:\n", use->group_id, use->id);
766   fprintf (file, "    At stmt:\t");
767   print_gimple_stmt (file, use->stmt, 0);
768   fprintf (file, "    At pos:\t");
769   if (use->op_p)
770     print_generic_expr (file, *use->op_p, TDF_SLIM);
771   fprintf (file, "\n");
772   dump_iv (file, use->iv, false, 2);
773 }
774 
775 /* Dumps information about the uses to FILE.  */
776 
777 void
778 dump_groups (FILE *file, struct ivopts_data *data)
779 {
780   unsigned i, j;
781   struct iv_group *group;
782 
783   for (i = 0; i < data->vgroups.length (); i++)
784     {
785       group = data->vgroups[i];
786       fprintf (file, "Group %d:\n", group->id);
787       if (group->type == USE_NONLINEAR_EXPR)
788 	fprintf (file, "  Type:\tGENERIC\n");
789       else if (group->type == USE_REF_ADDRESS)
790 	fprintf (file, "  Type:\tREFERENCE ADDRESS\n");
791       else if (group->type == USE_PTR_ADDRESS)
792 	fprintf (file, "  Type:\tPOINTER ARGUMENT ADDRESS\n");
793       else
794 	{
795 	  gcc_assert (group->type == USE_COMPARE);
796 	  fprintf (file, "  Type:\tCOMPARE\n");
797 	}
798       for (j = 0; j < group->vuses.length (); j++)
799 	dump_use (file, group->vuses[j]);
800     }
801 }
802 
803 /* Dumps information about induction variable candidate CAND to FILE.  */
804 
805 void
806 dump_cand (FILE *file, struct iv_cand *cand)
807 {
808   struct iv *iv = cand->iv;
809 
810   fprintf (file, "Candidate %d:\n", cand->id);
811   if (cand->inv_vars)
812     {
813       fprintf (file, "  Depend on inv.vars: ");
814       dump_bitmap (file, cand->inv_vars);
815     }
816   if (cand->inv_exprs)
817     {
818       fprintf (file, "  Depend on inv.exprs: ");
819       dump_bitmap (file, cand->inv_exprs);
820     }
821 
822   if (cand->var_before)
823     {
824       fprintf (file, "  Var befor: ");
825       print_generic_expr (file, cand->var_before, TDF_SLIM);
826       fprintf (file, "\n");
827     }
828   if (cand->var_after)
829     {
830       fprintf (file, "  Var after: ");
831       print_generic_expr (file, cand->var_after, TDF_SLIM);
832       fprintf (file, "\n");
833     }
834 
835   switch (cand->pos)
836     {
837     case IP_NORMAL:
838       fprintf (file, "  Incr POS: before exit test\n");
839       break;
840 
841     case IP_BEFORE_USE:
842       fprintf (file, "  Incr POS: before use %d\n", cand->ainc_use->id);
843       break;
844 
845     case IP_AFTER_USE:
846       fprintf (file, "  Incr POS: after use %d\n", cand->ainc_use->id);
847       break;
848 
849     case IP_END:
850       fprintf (file, "  Incr POS: at end\n");
851       break;
852 
853     case IP_ORIGINAL:
854       fprintf (file, "  Incr POS: orig biv\n");
855       break;
856     }
857 
858   dump_iv (file, iv, false, 1);
859 }
860 
861 /* Returns the info for ssa version VER.  */
862 
863 static inline struct version_info *
864 ver_info (struct ivopts_data *data, unsigned ver)
865 {
866   return data->version_info + ver;
867 }
868 
869 /* Returns the info for ssa name NAME.  */
870 
871 static inline struct version_info *
872 name_info (struct ivopts_data *data, tree name)
873 {
874   return ver_info (data, SSA_NAME_VERSION (name));
875 }
876 
877 /* Returns true if STMT is after the place where the IP_NORMAL ivs will be
878    emitted in LOOP.  */
879 
880 static bool
881 stmt_after_ip_normal_pos (struct loop *loop, gimple *stmt)
882 {
883   basic_block bb = ip_normal_pos (loop), sbb = gimple_bb (stmt);
884 
885   gcc_assert (bb);
886 
887   if (sbb == loop->latch)
888     return true;
889 
890   if (sbb != bb)
891     return false;
892 
893   return stmt == last_stmt (bb);
894 }
895 
896 /* Returns true if STMT if after the place where the original induction
897    variable CAND is incremented.  If TRUE_IF_EQUAL is set, we return true
898    if the positions are identical.  */
899 
900 static bool
901 stmt_after_inc_pos (struct iv_cand *cand, gimple *stmt, bool true_if_equal)
902 {
903   basic_block cand_bb = gimple_bb (cand->incremented_at);
904   basic_block stmt_bb = gimple_bb (stmt);
905 
906   if (!dominated_by_p (CDI_DOMINATORS, stmt_bb, cand_bb))
907     return false;
908 
909   if (stmt_bb != cand_bb)
910     return true;
911 
912   if (true_if_equal
913       && gimple_uid (stmt) == gimple_uid (cand->incremented_at))
914     return true;
915   return gimple_uid (stmt) > gimple_uid (cand->incremented_at);
916 }
917 
918 /* Returns true if STMT if after the place where the induction variable
919    CAND is incremented in LOOP.  */
920 
921 static bool
922 stmt_after_increment (struct loop *loop, struct iv_cand *cand, gimple *stmt)
923 {
924   switch (cand->pos)
925     {
926     case IP_END:
927       return false;
928 
929     case IP_NORMAL:
930       return stmt_after_ip_normal_pos (loop, stmt);
931 
932     case IP_ORIGINAL:
933     case IP_AFTER_USE:
934       return stmt_after_inc_pos (cand, stmt, false);
935 
936     case IP_BEFORE_USE:
937       return stmt_after_inc_pos (cand, stmt, true);
938 
939     default:
940       gcc_unreachable ();
941     }
942 }
943 
944 /* Returns true if EXP is a ssa name that occurs in an abnormal phi node.  */
945 
946 static bool
947 abnormal_ssa_name_p (tree exp)
948 {
949   if (!exp)
950     return false;
951 
952   if (TREE_CODE (exp) != SSA_NAME)
953     return false;
954 
955   return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (exp) != 0;
956 }
957 
958 /* Returns false if BASE or INDEX contains a ssa name that occurs in an
959    abnormal phi node.  Callback for for_each_index.  */
960 
961 static bool
962 idx_contains_abnormal_ssa_name_p (tree base, tree *index,
963 				  void *data ATTRIBUTE_UNUSED)
964 {
965   if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
966     {
967       if (abnormal_ssa_name_p (TREE_OPERAND (base, 2)))
968 	return false;
969       if (abnormal_ssa_name_p (TREE_OPERAND (base, 3)))
970 	return false;
971     }
972 
973   return !abnormal_ssa_name_p (*index);
974 }
975 
976 /* Returns true if EXPR contains a ssa name that occurs in an
977    abnormal phi node.  */
978 
979 bool
980 contains_abnormal_ssa_name_p (tree expr)
981 {
982   enum tree_code code;
983   enum tree_code_class codeclass;
984 
985   if (!expr)
986     return false;
987 
988   code = TREE_CODE (expr);
989   codeclass = TREE_CODE_CLASS (code);
990 
991   if (code == CALL_EXPR)
992     {
993       tree arg;
994       call_expr_arg_iterator iter;
995       FOR_EACH_CALL_EXPR_ARG (arg, iter, expr)
996 	if (contains_abnormal_ssa_name_p (arg))
997 	  return true;
998       return false;
999     }
1000 
1001   if (code == SSA_NAME)
1002     return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (expr) != 0;
1003 
1004   if (code == INTEGER_CST
1005       || is_gimple_min_invariant (expr))
1006     return false;
1007 
1008   if (code == ADDR_EXPR)
1009     return !for_each_index (&TREE_OPERAND (expr, 0),
1010 			    idx_contains_abnormal_ssa_name_p,
1011 			    NULL);
1012 
1013   if (code == COND_EXPR)
1014     return contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 0))
1015       || contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 1))
1016       || contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 2));
1017 
1018   switch (codeclass)
1019     {
1020     case tcc_binary:
1021     case tcc_comparison:
1022       if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 1)))
1023 	return true;
1024 
1025       /* Fallthru.  */
1026     case tcc_unary:
1027       if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 0)))
1028 	return true;
1029 
1030       break;
1031 
1032     default:
1033       gcc_unreachable ();
1034     }
1035 
1036   return false;
1037 }
1038 
1039 /*  Returns the structure describing number of iterations determined from
1040     EXIT of DATA->current_loop, or NULL if something goes wrong.  */
1041 
1042 static struct tree_niter_desc *
1043 niter_for_exit (struct ivopts_data *data, edge exit)
1044 {
1045   struct tree_niter_desc *desc;
1046   tree_niter_desc **slot;
1047 
1048   if (!data->niters)
1049     {
1050       data->niters = new hash_map<edge, tree_niter_desc *>;
1051       slot = NULL;
1052     }
1053   else
1054     slot = data->niters->get (exit);
1055 
1056   if (!slot)
1057     {
1058       /* Try to determine number of iterations.  We cannot safely work with ssa
1059 	 names that appear in phi nodes on abnormal edges, so that we do not
1060 	 create overlapping life ranges for them (PR 27283).  */
1061       desc = XNEW (struct tree_niter_desc);
1062       if (!number_of_iterations_exit (data->current_loop,
1063 				      exit, desc, true)
1064      	  || contains_abnormal_ssa_name_p (desc->niter))
1065 	{
1066 	  XDELETE (desc);
1067 	  desc = NULL;
1068 	}
1069       data->niters->put (exit, desc);
1070     }
1071   else
1072     desc = *slot;
1073 
1074   return desc;
1075 }
1076 
1077 /* Returns the structure describing number of iterations determined from
1078    single dominating exit of DATA->current_loop, or NULL if something
1079    goes wrong.  */
1080 
1081 static struct tree_niter_desc *
1082 niter_for_single_dom_exit (struct ivopts_data *data)
1083 {
1084   edge exit = single_dom_exit (data->current_loop);
1085 
1086   if (!exit)
1087     return NULL;
1088 
1089   return niter_for_exit (data, exit);
1090 }
1091 
1092 /* Initializes data structures used by the iv optimization pass, stored
1093    in DATA.  */
1094 
1095 static void
1096 tree_ssa_iv_optimize_init (struct ivopts_data *data)
1097 {
1098   data->version_info_size = 2 * num_ssa_names;
1099   data->version_info = XCNEWVEC (struct version_info, data->version_info_size);
1100   data->relevant = BITMAP_ALLOC (NULL);
1101   data->important_candidates = BITMAP_ALLOC (NULL);
1102   data->max_inv_var_id = 0;
1103   data->max_inv_expr_id = 0;
1104   data->niters = NULL;
1105   data->vgroups.create (20);
1106   data->vcands.create (20);
1107   data->inv_expr_tab = new hash_table<iv_inv_expr_hasher> (10);
1108   data->name_expansion_cache = NULL;
1109   data->base_object_map = NULL;
1110   data->iv_common_cand_tab = new hash_table<iv_common_cand_hasher> (10);
1111   data->iv_common_cands.create (20);
1112   decl_rtl_to_reset.create (20);
1113   gcc_obstack_init (&data->iv_obstack);
1114 }
1115 
1116 /* walk_tree callback for determine_base_object.  */
1117 
1118 static tree
1119 determine_base_object_1 (tree *tp, int *walk_subtrees, void *wdata)
1120 {
1121   tree_code code = TREE_CODE (*tp);
1122   tree obj = NULL_TREE;
1123   if (code == ADDR_EXPR)
1124     {
1125       tree base = get_base_address (TREE_OPERAND (*tp, 0));
1126       if (!base)
1127 	obj = *tp;
1128       else if (TREE_CODE (base) != MEM_REF)
1129 	obj = fold_convert (ptr_type_node, build_fold_addr_expr (base));
1130     }
1131   else if (code == SSA_NAME && POINTER_TYPE_P (TREE_TYPE (*tp)))
1132 	obj = fold_convert (ptr_type_node, *tp);
1133 
1134   if (!obj)
1135     {
1136       if (!EXPR_P (*tp))
1137 	*walk_subtrees = 0;
1138 
1139       return NULL_TREE;
1140     }
1141   /* Record special node for multiple base objects and stop.  */
1142   if (*static_cast<tree *> (wdata))
1143     {
1144       *static_cast<tree *> (wdata) = integer_zero_node;
1145       return integer_zero_node;
1146     }
1147   /* Record the base object and continue looking.  */
1148   *static_cast<tree *> (wdata) = obj;
1149   return NULL_TREE;
1150 }
1151 
1152 /* Returns a memory object to that EXPR points with caching.  Return NULL if we
1153    are able to determine that it does not point to any such object; specially
1154    return integer_zero_node if EXPR contains multiple base objects.  */
1155 
1156 static tree
1157 determine_base_object (struct ivopts_data *data, tree expr)
1158 {
1159   tree *slot, obj = NULL_TREE;
1160   if (data->base_object_map)
1161     {
1162       if ((slot = data->base_object_map->get(expr)) != NULL)
1163 	return *slot;
1164     }
1165   else
1166     data->base_object_map = new hash_map<tree, tree>;
1167 
1168   (void) walk_tree_without_duplicates (&expr, determine_base_object_1, &obj);
1169   data->base_object_map->put (expr, obj);
1170   return obj;
1171 }
1172 
1173 /* Return true if address expression with non-DECL_P operand appears
1174    in EXPR.  */
1175 
1176 static bool
1177 contain_complex_addr_expr (tree expr)
1178 {
1179   bool res = false;
1180 
1181   STRIP_NOPS (expr);
1182   switch (TREE_CODE (expr))
1183     {
1184     case POINTER_PLUS_EXPR:
1185     case PLUS_EXPR:
1186     case MINUS_EXPR:
1187       res |= contain_complex_addr_expr (TREE_OPERAND (expr, 0));
1188       res |= contain_complex_addr_expr (TREE_OPERAND (expr, 1));
1189       break;
1190 
1191     case ADDR_EXPR:
1192       return (!DECL_P (TREE_OPERAND (expr, 0)));
1193 
1194     default:
1195       return false;
1196     }
1197 
1198   return res;
1199 }
1200 
1201 /* Allocates an induction variable with given initial value BASE and step STEP
1202    for loop LOOP.  NO_OVERFLOW implies the iv doesn't overflow.  */
1203 
1204 static struct iv *
1205 alloc_iv (struct ivopts_data *data, tree base, tree step,
1206 	  bool no_overflow = false)
1207 {
1208   tree expr = base;
1209   struct iv *iv = (struct iv*) obstack_alloc (&data->iv_obstack,
1210 					      sizeof (struct iv));
1211   gcc_assert (step != NULL_TREE);
1212 
1213   /* Lower address expression in base except ones with DECL_P as operand.
1214      By doing this:
1215        1) More accurate cost can be computed for address expressions;
1216        2) Duplicate candidates won't be created for bases in different
1217 	  forms, like &a[0] and &a.  */
1218   STRIP_NOPS (expr);
1219   if ((TREE_CODE (expr) == ADDR_EXPR && !DECL_P (TREE_OPERAND (expr, 0)))
1220       || contain_complex_addr_expr (expr))
1221     {
1222       aff_tree comb;
1223       tree_to_aff_combination (expr, TREE_TYPE (expr), &comb);
1224       base = fold_convert (TREE_TYPE (base), aff_combination_to_tree (&comb));
1225     }
1226 
1227   iv->base = base;
1228   iv->base_object = determine_base_object (data, base);
1229   iv->step = step;
1230   iv->biv_p = false;
1231   iv->nonlin_use = NULL;
1232   iv->ssa_name = NULL_TREE;
1233   if (!no_overflow
1234        && !iv_can_overflow_p (data->current_loop, TREE_TYPE (base),
1235 			      base, step))
1236     no_overflow = true;
1237   iv->no_overflow = no_overflow;
1238   iv->have_address_use = false;
1239 
1240   return iv;
1241 }
1242 
1243 /* Sets STEP and BASE for induction variable IV.  NO_OVERFLOW implies the IV
1244    doesn't overflow.  */
1245 
1246 static void
1247 set_iv (struct ivopts_data *data, tree iv, tree base, tree step,
1248 	bool no_overflow)
1249 {
1250   struct version_info *info = name_info (data, iv);
1251 
1252   gcc_assert (!info->iv);
1253 
1254   bitmap_set_bit (data->relevant, SSA_NAME_VERSION (iv));
1255   info->iv = alloc_iv (data, base, step, no_overflow);
1256   info->iv->ssa_name = iv;
1257 }
1258 
1259 /* Finds induction variable declaration for VAR.  */
1260 
1261 static struct iv *
1262 get_iv (struct ivopts_data *data, tree var)
1263 {
1264   basic_block bb;
1265   tree type = TREE_TYPE (var);
1266 
1267   if (!POINTER_TYPE_P (type)
1268       && !INTEGRAL_TYPE_P (type))
1269     return NULL;
1270 
1271   if (!name_info (data, var)->iv)
1272     {
1273       bb = gimple_bb (SSA_NAME_DEF_STMT (var));
1274 
1275       if (!bb
1276 	  || !flow_bb_inside_loop_p (data->current_loop, bb))
1277 	set_iv (data, var, var, build_int_cst (type, 0), true);
1278     }
1279 
1280   return name_info (data, var)->iv;
1281 }
1282 
1283 /* Return the first non-invariant ssa var found in EXPR.  */
1284 
1285 static tree
1286 extract_single_var_from_expr (tree expr)
1287 {
1288   int i, n;
1289   tree tmp;
1290   enum tree_code code;
1291 
1292   if (!expr || is_gimple_min_invariant (expr))
1293     return NULL;
1294 
1295   code = TREE_CODE (expr);
1296   if (IS_EXPR_CODE_CLASS (TREE_CODE_CLASS (code)))
1297     {
1298       n = TREE_OPERAND_LENGTH (expr);
1299       for (i = 0; i < n; i++)
1300 	{
1301 	  tmp = extract_single_var_from_expr (TREE_OPERAND (expr, i));
1302 
1303 	  if (tmp)
1304 	    return tmp;
1305 	}
1306     }
1307   return (TREE_CODE (expr) == SSA_NAME) ? expr : NULL;
1308 }
1309 
1310 /* Finds basic ivs.  */
1311 
1312 static bool
1313 find_bivs (struct ivopts_data *data)
1314 {
1315   gphi *phi;
1316   affine_iv iv;
1317   tree step, type, base, stop;
1318   bool found = false;
1319   struct loop *loop = data->current_loop;
1320   gphi_iterator psi;
1321 
1322   for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
1323     {
1324       phi = psi.phi ();
1325 
1326       if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (PHI_RESULT (phi)))
1327 	continue;
1328 
1329       if (virtual_operand_p (PHI_RESULT (phi)))
1330 	continue;
1331 
1332       if (!simple_iv (loop, loop, PHI_RESULT (phi), &iv, true))
1333 	continue;
1334 
1335       if (integer_zerop (iv.step))
1336 	continue;
1337 
1338       step = iv.step;
1339       base = PHI_ARG_DEF_FROM_EDGE (phi, loop_preheader_edge (loop));
1340       /* Stop expanding iv base at the first ssa var referred by iv step.
1341 	 Ideally we should stop at any ssa var, because that's expensive
1342 	 and unusual to happen, we just do it on the first one.
1343 
1344 	 See PR64705 for the rationale.  */
1345       stop = extract_single_var_from_expr (step);
1346       base = expand_simple_operations (base, stop);
1347       if (contains_abnormal_ssa_name_p (base)
1348 	  || contains_abnormal_ssa_name_p (step))
1349 	continue;
1350 
1351       type = TREE_TYPE (PHI_RESULT (phi));
1352       base = fold_convert (type, base);
1353       if (step)
1354 	{
1355 	  if (POINTER_TYPE_P (type))
1356 	    step = convert_to_ptrofftype (step);
1357 	  else
1358 	    step = fold_convert (type, step);
1359 	}
1360 
1361       set_iv (data, PHI_RESULT (phi), base, step, iv.no_overflow);
1362       found = true;
1363     }
1364 
1365   return found;
1366 }
1367 
1368 /* Marks basic ivs.  */
1369 
1370 static void
1371 mark_bivs (struct ivopts_data *data)
1372 {
1373   gphi *phi;
1374   gimple *def;
1375   tree var;
1376   struct iv *iv, *incr_iv;
1377   struct loop *loop = data->current_loop;
1378   basic_block incr_bb;
1379   gphi_iterator psi;
1380 
1381   data->bivs_not_used_in_addr = 0;
1382   for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
1383     {
1384       phi = psi.phi ();
1385 
1386       iv = get_iv (data, PHI_RESULT (phi));
1387       if (!iv)
1388 	continue;
1389 
1390       var = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (loop));
1391       def = SSA_NAME_DEF_STMT (var);
1392       /* Don't mark iv peeled from other one as biv.  */
1393       if (def
1394 	  && gimple_code (def) == GIMPLE_PHI
1395 	  && gimple_bb (def) == loop->header)
1396 	continue;
1397 
1398       incr_iv = get_iv (data, var);
1399       if (!incr_iv)
1400 	continue;
1401 
1402       /* If the increment is in the subloop, ignore it.  */
1403       incr_bb = gimple_bb (SSA_NAME_DEF_STMT (var));
1404       if (incr_bb->loop_father != data->current_loop
1405 	  || (incr_bb->flags & BB_IRREDUCIBLE_LOOP))
1406 	continue;
1407 
1408       iv->biv_p = true;
1409       incr_iv->biv_p = true;
1410       if (iv->no_overflow)
1411 	data->bivs_not_used_in_addr++;
1412       if (incr_iv->no_overflow)
1413 	data->bivs_not_used_in_addr++;
1414     }
1415 }
1416 
1417 /* Checks whether STMT defines a linear induction variable and stores its
1418    parameters to IV.  */
1419 
1420 static bool
1421 find_givs_in_stmt_scev (struct ivopts_data *data, gimple *stmt, affine_iv *iv)
1422 {
1423   tree lhs, stop;
1424   struct loop *loop = data->current_loop;
1425 
1426   iv->base = NULL_TREE;
1427   iv->step = NULL_TREE;
1428 
1429   if (gimple_code (stmt) != GIMPLE_ASSIGN)
1430     return false;
1431 
1432   lhs = gimple_assign_lhs (stmt);
1433   if (TREE_CODE (lhs) != SSA_NAME)
1434     return false;
1435 
1436   if (!simple_iv (loop, loop_containing_stmt (stmt), lhs, iv, true))
1437     return false;
1438 
1439   /* Stop expanding iv base at the first ssa var referred by iv step.
1440      Ideally we should stop at any ssa var, because that's expensive
1441      and unusual to happen, we just do it on the first one.
1442 
1443      See PR64705 for the rationale.  */
1444   stop = extract_single_var_from_expr (iv->step);
1445   iv->base = expand_simple_operations (iv->base, stop);
1446   if (contains_abnormal_ssa_name_p (iv->base)
1447       || contains_abnormal_ssa_name_p (iv->step))
1448     return false;
1449 
1450   /* If STMT could throw, then do not consider STMT as defining a GIV.
1451      While this will suppress optimizations, we cannot safely delete this
1452      GIV and associated statements, even if it appears it is not used.  */
1453   if (stmt_could_throw_p (cfun, stmt))
1454     return false;
1455 
1456   return true;
1457 }
1458 
1459 /* Finds general ivs in statement STMT.  */
1460 
1461 static void
1462 find_givs_in_stmt (struct ivopts_data *data, gimple *stmt)
1463 {
1464   affine_iv iv;
1465 
1466   if (!find_givs_in_stmt_scev (data, stmt, &iv))
1467     return;
1468 
1469   set_iv (data, gimple_assign_lhs (stmt), iv.base, iv.step, iv.no_overflow);
1470 }
1471 
1472 /* Finds general ivs in basic block BB.  */
1473 
1474 static void
1475 find_givs_in_bb (struct ivopts_data *data, basic_block bb)
1476 {
1477   gimple_stmt_iterator bsi;
1478 
1479   for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
1480     find_givs_in_stmt (data, gsi_stmt (bsi));
1481 }
1482 
1483 /* Finds general ivs.  */
1484 
1485 static void
1486 find_givs (struct ivopts_data *data)
1487 {
1488   struct loop *loop = data->current_loop;
1489   basic_block *body = get_loop_body_in_dom_order (loop);
1490   unsigned i;
1491 
1492   for (i = 0; i < loop->num_nodes; i++)
1493     find_givs_in_bb (data, body[i]);
1494   free (body);
1495 }
1496 
1497 /* For each ssa name defined in LOOP determines whether it is an induction
1498    variable and if so, its initial value and step.  */
1499 
1500 static bool
1501 find_induction_variables (struct ivopts_data *data)
1502 {
1503   unsigned i;
1504   bitmap_iterator bi;
1505 
1506   if (!find_bivs (data))
1507     return false;
1508 
1509   find_givs (data);
1510   mark_bivs (data);
1511 
1512   if (dump_file && (dump_flags & TDF_DETAILS))
1513     {
1514       struct tree_niter_desc *niter = niter_for_single_dom_exit (data);
1515 
1516       if (niter)
1517 	{
1518 	  fprintf (dump_file, "  number of iterations ");
1519 	  print_generic_expr (dump_file, niter->niter, TDF_SLIM);
1520 	  if (!integer_zerop (niter->may_be_zero))
1521 	    {
1522 	      fprintf (dump_file, "; zero if ");
1523 	      print_generic_expr (dump_file, niter->may_be_zero, TDF_SLIM);
1524 	    }
1525 	  fprintf (dump_file, "\n");
1526 	};
1527 
1528       fprintf (dump_file, "\n<Induction Vars>:\n");
1529       EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
1530 	{
1531 	  struct version_info *info = ver_info (data, i);
1532 	  if (info->iv && info->iv->step && !integer_zerop (info->iv->step))
1533 	    dump_iv (dump_file, ver_info (data, i)->iv, true, 0);
1534 	}
1535     }
1536 
1537   return true;
1538 }
1539 
1540 /* Records a use of TYPE at *USE_P in STMT whose value is IV in GROUP.
1541    For address type use, ADDR_BASE is the stripped IV base, ADDR_OFFSET
1542    is the const offset stripped from IV base and MEM_TYPE is the type
1543    of the memory being addressed.  For uses of other types, ADDR_BASE
1544    and ADDR_OFFSET are zero by default and MEM_TYPE is NULL_TREE.  */
1545 
1546 static struct iv_use *
1547 record_use (struct iv_group *group, tree *use_p, struct iv *iv,
1548 	    gimple *stmt, enum use_type type, tree mem_type,
1549 	    tree addr_base, poly_uint64 addr_offset)
1550 {
1551   struct iv_use *use = XCNEW (struct iv_use);
1552 
1553   use->id = group->vuses.length ();
1554   use->group_id = group->id;
1555   use->type = type;
1556   use->mem_type = mem_type;
1557   use->iv = iv;
1558   use->stmt = stmt;
1559   use->op_p = use_p;
1560   use->addr_base = addr_base;
1561   use->addr_offset = addr_offset;
1562 
1563   group->vuses.safe_push (use);
1564   return use;
1565 }
1566 
1567 /* Checks whether OP is a loop-level invariant and if so, records it.
1568    NONLINEAR_USE is true if the invariant is used in a way we do not
1569    handle specially.  */
1570 
1571 static void
1572 record_invariant (struct ivopts_data *data, tree op, bool nonlinear_use)
1573 {
1574   basic_block bb;
1575   struct version_info *info;
1576 
1577   if (TREE_CODE (op) != SSA_NAME
1578       || virtual_operand_p (op))
1579     return;
1580 
1581   bb = gimple_bb (SSA_NAME_DEF_STMT (op));
1582   if (bb
1583       && flow_bb_inside_loop_p (data->current_loop, bb))
1584     return;
1585 
1586   info = name_info (data, op);
1587   info->name = op;
1588   info->has_nonlin_use |= nonlinear_use;
1589   if (!info->inv_id)
1590     info->inv_id = ++data->max_inv_var_id;
1591   bitmap_set_bit (data->relevant, SSA_NAME_VERSION (op));
1592 }
1593 
1594 /* Record a group of TYPE.  */
1595 
1596 static struct iv_group *
1597 record_group (struct ivopts_data *data, enum use_type type)
1598 {
1599   struct iv_group *group = XCNEW (struct iv_group);
1600 
1601   group->id = data->vgroups.length ();
1602   group->type = type;
1603   group->related_cands = BITMAP_ALLOC (NULL);
1604   group->vuses.create (1);
1605 
1606   data->vgroups.safe_push (group);
1607   return group;
1608 }
1609 
1610 /* Record a use of TYPE at *USE_P in STMT whose value is IV in a group.
1611    New group will be created if there is no existing group for the use.
1612    MEM_TYPE is the type of memory being addressed, or NULL if this
1613    isn't an address reference.  */
1614 
1615 static struct iv_use *
1616 record_group_use (struct ivopts_data *data, tree *use_p,
1617 		  struct iv *iv, gimple *stmt, enum use_type type,
1618 		  tree mem_type)
1619 {
1620   tree addr_base = NULL;
1621   struct iv_group *group = NULL;
1622   poly_uint64 addr_offset = 0;
1623 
1624   /* Record non address type use in a new group.  */
1625   if (address_p (type))
1626     {
1627       unsigned int i;
1628 
1629       addr_base = strip_offset (iv->base, &addr_offset);
1630       for (i = 0; i < data->vgroups.length (); i++)
1631 	{
1632 	  struct iv_use *use;
1633 
1634 	  group = data->vgroups[i];
1635 	  use = group->vuses[0];
1636 	  if (!address_p (use->type))
1637 	    continue;
1638 
1639 	  /* Check if it has the same stripped base and step.  */
1640 	  if (operand_equal_p (iv->base_object, use->iv->base_object, 0)
1641 	      && operand_equal_p (iv->step, use->iv->step, 0)
1642 	      && operand_equal_p (addr_base, use->addr_base, 0))
1643 	    break;
1644 	}
1645       if (i == data->vgroups.length ())
1646 	group = NULL;
1647     }
1648 
1649   if (!group)
1650     group = record_group (data, type);
1651 
1652   return record_use (group, use_p, iv, stmt, type, mem_type,
1653 		     addr_base, addr_offset);
1654 }
1655 
1656 /* Checks whether the use OP is interesting and if so, records it.  */
1657 
1658 static struct iv_use *
1659 find_interesting_uses_op (struct ivopts_data *data, tree op)
1660 {
1661   struct iv *iv;
1662   gimple *stmt;
1663   struct iv_use *use;
1664 
1665   if (TREE_CODE (op) != SSA_NAME)
1666     return NULL;
1667 
1668   iv = get_iv (data, op);
1669   if (!iv)
1670     return NULL;
1671 
1672   if (iv->nonlin_use)
1673     {
1674       gcc_assert (iv->nonlin_use->type == USE_NONLINEAR_EXPR);
1675       return iv->nonlin_use;
1676     }
1677 
1678   if (integer_zerop (iv->step))
1679     {
1680       record_invariant (data, op, true);
1681       return NULL;
1682     }
1683 
1684   stmt = SSA_NAME_DEF_STMT (op);
1685   gcc_assert (gimple_code (stmt) == GIMPLE_PHI || is_gimple_assign (stmt));
1686 
1687   use = record_group_use (data, NULL, iv, stmt, USE_NONLINEAR_EXPR, NULL_TREE);
1688   iv->nonlin_use = use;
1689   return use;
1690 }
1691 
1692 /* Indicate how compare type iv_use can be handled.  */
1693 enum comp_iv_rewrite
1694 {
1695   COMP_IV_NA,
1696   /* We may rewrite compare type iv_use by expressing value of the iv_use.  */
1697   COMP_IV_EXPR,
1698   /* We may rewrite compare type iv_uses on both sides of comparison by
1699      expressing value of each iv_use.  */
1700   COMP_IV_EXPR_2,
1701   /* We may rewrite compare type iv_use by expressing value of the iv_use
1702      or by eliminating it with other iv_cand.  */
1703   COMP_IV_ELIM
1704 };
1705 
1706 /* Given a condition in statement STMT, checks whether it is a compare
1707    of an induction variable and an invariant.  If this is the case,
1708    CONTROL_VAR is set to location of the iv, BOUND to the location of
1709    the invariant, IV_VAR and IV_BOUND are set to the corresponding
1710    induction variable descriptions, and true is returned.  If this is not
1711    the case, CONTROL_VAR and BOUND are set to the arguments of the
1712    condition and false is returned.  */
1713 
1714 static enum comp_iv_rewrite
1715 extract_cond_operands (struct ivopts_data *data, gimple *stmt,
1716 		       tree **control_var, tree **bound,
1717 		       struct iv **iv_var, struct iv **iv_bound)
1718 {
1719   /* The objects returned when COND has constant operands.  */
1720   static struct iv const_iv;
1721   static tree zero;
1722   tree *op0 = &zero, *op1 = &zero;
1723   struct iv *iv0 = &const_iv, *iv1 = &const_iv;
1724   enum comp_iv_rewrite rewrite_type = COMP_IV_NA;
1725 
1726   if (gimple_code (stmt) == GIMPLE_COND)
1727     {
1728       gcond *cond_stmt = as_a <gcond *> (stmt);
1729       op0 = gimple_cond_lhs_ptr (cond_stmt);
1730       op1 = gimple_cond_rhs_ptr (cond_stmt);
1731     }
1732   else
1733     {
1734       op0 = gimple_assign_rhs1_ptr (stmt);
1735       op1 = gimple_assign_rhs2_ptr (stmt);
1736     }
1737 
1738   zero = integer_zero_node;
1739   const_iv.step = integer_zero_node;
1740 
1741   if (TREE_CODE (*op0) == SSA_NAME)
1742     iv0 = get_iv (data, *op0);
1743   if (TREE_CODE (*op1) == SSA_NAME)
1744     iv1 = get_iv (data, *op1);
1745 
1746   /* If both sides of comparison are IVs.  We can express ivs on both end.  */
1747   if (iv0 && iv1 && !integer_zerop (iv0->step) && !integer_zerop (iv1->step))
1748     {
1749       rewrite_type = COMP_IV_EXPR_2;
1750       goto end;
1751     }
1752 
1753   /* If none side of comparison is IV.  */
1754   if ((!iv0 || integer_zerop (iv0->step))
1755       && (!iv1 || integer_zerop (iv1->step)))
1756     goto end;
1757 
1758   /* Control variable may be on the other side.  */
1759   if (!iv0 || integer_zerop (iv0->step))
1760     {
1761       std::swap (op0, op1);
1762       std::swap (iv0, iv1);
1763     }
1764   /* If one side is IV and the other side isn't loop invariant.  */
1765   if (!iv1)
1766     rewrite_type = COMP_IV_EXPR;
1767   /* If one side is IV and the other side is loop invariant.  */
1768   else if (!integer_zerop (iv0->step) && integer_zerop (iv1->step))
1769     rewrite_type = COMP_IV_ELIM;
1770 
1771 end:
1772   if (control_var)
1773     *control_var = op0;
1774   if (iv_var)
1775     *iv_var = iv0;
1776   if (bound)
1777     *bound = op1;
1778   if (iv_bound)
1779     *iv_bound = iv1;
1780 
1781   return rewrite_type;
1782 }
1783 
1784 /* Checks whether the condition in STMT is interesting and if so,
1785    records it.  */
1786 
1787 static void
1788 find_interesting_uses_cond (struct ivopts_data *data, gimple *stmt)
1789 {
1790   tree *var_p, *bound_p;
1791   struct iv *var_iv, *bound_iv;
1792   enum comp_iv_rewrite ret;
1793 
1794   ret = extract_cond_operands (data, stmt,
1795 			       &var_p, &bound_p, &var_iv, &bound_iv);
1796   if (ret == COMP_IV_NA)
1797     {
1798       find_interesting_uses_op (data, *var_p);
1799       find_interesting_uses_op (data, *bound_p);
1800       return;
1801     }
1802 
1803   record_group_use (data, var_p, var_iv, stmt, USE_COMPARE, NULL_TREE);
1804   /* Record compare type iv_use for iv on the other side of comparison.  */
1805   if (ret == COMP_IV_EXPR_2)
1806     record_group_use (data, bound_p, bound_iv, stmt, USE_COMPARE, NULL_TREE);
1807 }
1808 
1809 /* Returns the outermost loop EXPR is obviously invariant in
1810    relative to the loop LOOP, i.e. if all its operands are defined
1811    outside of the returned loop.  Returns NULL if EXPR is not
1812    even obviously invariant in LOOP.  */
1813 
1814 struct loop *
1815 outermost_invariant_loop_for_expr (struct loop *loop, tree expr)
1816 {
1817   basic_block def_bb;
1818   unsigned i, len;
1819 
1820   if (is_gimple_min_invariant (expr))
1821     return current_loops->tree_root;
1822 
1823   if (TREE_CODE (expr) == SSA_NAME)
1824     {
1825       def_bb = gimple_bb (SSA_NAME_DEF_STMT (expr));
1826       if (def_bb)
1827 	{
1828 	  if (flow_bb_inside_loop_p (loop, def_bb))
1829 	    return NULL;
1830 	  return superloop_at_depth (loop,
1831 				     loop_depth (def_bb->loop_father) + 1);
1832 	}
1833 
1834       return current_loops->tree_root;
1835     }
1836 
1837   if (!EXPR_P (expr))
1838     return NULL;
1839 
1840   unsigned maxdepth = 0;
1841   len = TREE_OPERAND_LENGTH (expr);
1842   for (i = 0; i < len; i++)
1843     {
1844       struct loop *ivloop;
1845       if (!TREE_OPERAND (expr, i))
1846 	continue;
1847 
1848       ivloop = outermost_invariant_loop_for_expr (loop, TREE_OPERAND (expr, i));
1849       if (!ivloop)
1850 	return NULL;
1851       maxdepth = MAX (maxdepth, loop_depth (ivloop));
1852     }
1853 
1854   return superloop_at_depth (loop, maxdepth);
1855 }
1856 
1857 /* Returns true if expression EXPR is obviously invariant in LOOP,
1858    i.e. if all its operands are defined outside of the LOOP.  LOOP
1859    should not be the function body.  */
1860 
1861 bool
1862 expr_invariant_in_loop_p (struct loop *loop, tree expr)
1863 {
1864   basic_block def_bb;
1865   unsigned i, len;
1866 
1867   gcc_assert (loop_depth (loop) > 0);
1868 
1869   if (is_gimple_min_invariant (expr))
1870     return true;
1871 
1872   if (TREE_CODE (expr) == SSA_NAME)
1873     {
1874       def_bb = gimple_bb (SSA_NAME_DEF_STMT (expr));
1875       if (def_bb
1876 	  && flow_bb_inside_loop_p (loop, def_bb))
1877 	return false;
1878 
1879       return true;
1880     }
1881 
1882   if (!EXPR_P (expr))
1883     return false;
1884 
1885   len = TREE_OPERAND_LENGTH (expr);
1886   for (i = 0; i < len; i++)
1887     if (TREE_OPERAND (expr, i)
1888 	&& !expr_invariant_in_loop_p (loop, TREE_OPERAND (expr, i)))
1889       return false;
1890 
1891   return true;
1892 }
1893 
1894 /* Given expression EXPR which computes inductive values with respect
1895    to loop recorded in DATA, this function returns biv from which EXPR
1896    is derived by tracing definition chains of ssa variables in EXPR.  */
1897 
1898 static struct iv*
1899 find_deriving_biv_for_expr (struct ivopts_data *data, tree expr)
1900 {
1901   struct iv *iv;
1902   unsigned i, n;
1903   tree e2, e1;
1904   enum tree_code code;
1905   gimple *stmt;
1906 
1907   if (expr == NULL_TREE)
1908     return NULL;
1909 
1910   if (is_gimple_min_invariant (expr))
1911     return NULL;
1912 
1913   code = TREE_CODE (expr);
1914   if (IS_EXPR_CODE_CLASS (TREE_CODE_CLASS (code)))
1915     {
1916       n = TREE_OPERAND_LENGTH (expr);
1917       for (i = 0; i < n; i++)
1918 	{
1919 	  iv = find_deriving_biv_for_expr (data, TREE_OPERAND (expr, i));
1920 	  if (iv)
1921 	    return iv;
1922 	}
1923     }
1924 
1925   /* Stop if it's not ssa name.  */
1926   if (code != SSA_NAME)
1927     return NULL;
1928 
1929   iv = get_iv (data, expr);
1930   if (!iv || integer_zerop (iv->step))
1931     return NULL;
1932   else if (iv->biv_p)
1933     return iv;
1934 
1935   stmt = SSA_NAME_DEF_STMT (expr);
1936   if (gphi *phi = dyn_cast <gphi *> (stmt))
1937     {
1938       ssa_op_iter iter;
1939       use_operand_p use_p;
1940       basic_block phi_bb = gimple_bb (phi);
1941 
1942       /* Skip loop header PHI that doesn't define biv.  */
1943       if (phi_bb->loop_father == data->current_loop)
1944 	return NULL;
1945 
1946       if (virtual_operand_p (gimple_phi_result (phi)))
1947 	return NULL;
1948 
1949       FOR_EACH_PHI_ARG (use_p, phi, iter, SSA_OP_USE)
1950 	{
1951 	  tree use = USE_FROM_PTR (use_p);
1952 	  iv = find_deriving_biv_for_expr (data, use);
1953 	  if (iv)
1954 	    return iv;
1955 	}
1956       return NULL;
1957     }
1958   if (gimple_code (stmt) != GIMPLE_ASSIGN)
1959     return NULL;
1960 
1961   e1 = gimple_assign_rhs1 (stmt);
1962   code = gimple_assign_rhs_code (stmt);
1963   if (get_gimple_rhs_class (code) == GIMPLE_SINGLE_RHS)
1964     return find_deriving_biv_for_expr (data, e1);
1965 
1966   switch (code)
1967     {
1968     case MULT_EXPR:
1969     case PLUS_EXPR:
1970     case MINUS_EXPR:
1971     case POINTER_PLUS_EXPR:
1972       /* Increments, decrements and multiplications by a constant
1973 	 are simple.  */
1974       e2 = gimple_assign_rhs2 (stmt);
1975       iv = find_deriving_biv_for_expr (data, e2);
1976       if (iv)
1977 	return iv;
1978       gcc_fallthrough ();
1979 
1980     CASE_CONVERT:
1981       /* Casts are simple.  */
1982       return find_deriving_biv_for_expr (data, e1);
1983 
1984     default:
1985       break;
1986     }
1987 
1988   return NULL;
1989 }
1990 
1991 /* Record BIV, its predecessor and successor that they are used in
1992    address type uses.  */
1993 
1994 static void
1995 record_biv_for_address_use (struct ivopts_data *data, struct iv *biv)
1996 {
1997   unsigned i;
1998   tree type, base_1, base_2;
1999   bitmap_iterator bi;
2000 
2001   if (!biv || !biv->biv_p || integer_zerop (biv->step)
2002       || biv->have_address_use || !biv->no_overflow)
2003     return;
2004 
2005   type = TREE_TYPE (biv->base);
2006   if (!INTEGRAL_TYPE_P (type))
2007     return;
2008 
2009   biv->have_address_use = true;
2010   data->bivs_not_used_in_addr--;
2011   base_1 = fold_build2 (PLUS_EXPR, type, biv->base, biv->step);
2012   EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
2013     {
2014       struct iv *iv = ver_info (data, i)->iv;
2015 
2016       if (!iv || !iv->biv_p || integer_zerop (iv->step)
2017 	  || iv->have_address_use || !iv->no_overflow)
2018 	continue;
2019 
2020       if (type != TREE_TYPE (iv->base)
2021 	  || !INTEGRAL_TYPE_P (TREE_TYPE (iv->base)))
2022 	continue;
2023 
2024       if (!operand_equal_p (biv->step, iv->step, 0))
2025 	continue;
2026 
2027       base_2 = fold_build2 (PLUS_EXPR, type, iv->base, iv->step);
2028       if (operand_equal_p (base_1, iv->base, 0)
2029 	  || operand_equal_p (base_2, biv->base, 0))
2030 	{
2031 	  iv->have_address_use = true;
2032 	  data->bivs_not_used_in_addr--;
2033 	}
2034     }
2035 }
2036 
2037 /* Cumulates the steps of indices into DATA and replaces their values with the
2038    initial ones.  Returns false when the value of the index cannot be determined.
2039    Callback for for_each_index.  */
2040 
2041 struct ifs_ivopts_data
2042 {
2043   struct ivopts_data *ivopts_data;
2044   gimple *stmt;
2045   tree step;
2046 };
2047 
2048 static bool
2049 idx_find_step (tree base, tree *idx, void *data)
2050 {
2051   struct ifs_ivopts_data *dta = (struct ifs_ivopts_data *) data;
2052   struct iv *iv;
2053   bool use_overflow_semantics = false;
2054   tree step, iv_base, iv_step, lbound, off;
2055   struct loop *loop = dta->ivopts_data->current_loop;
2056 
2057   /* If base is a component ref, require that the offset of the reference
2058      be invariant.  */
2059   if (TREE_CODE (base) == COMPONENT_REF)
2060     {
2061       off = component_ref_field_offset (base);
2062       return expr_invariant_in_loop_p (loop, off);
2063     }
2064 
2065   /* If base is array, first check whether we will be able to move the
2066      reference out of the loop (in order to take its address in strength
2067      reduction).  In order for this to work we need both lower bound
2068      and step to be loop invariants.  */
2069   if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
2070     {
2071       /* Moreover, for a range, the size needs to be invariant as well.  */
2072       if (TREE_CODE (base) == ARRAY_RANGE_REF
2073 	  && !expr_invariant_in_loop_p (loop, TYPE_SIZE (TREE_TYPE (base))))
2074 	return false;
2075 
2076       step = array_ref_element_size (base);
2077       lbound = array_ref_low_bound (base);
2078 
2079       if (!expr_invariant_in_loop_p (loop, step)
2080 	  || !expr_invariant_in_loop_p (loop, lbound))
2081 	return false;
2082     }
2083 
2084   if (TREE_CODE (*idx) != SSA_NAME)
2085     return true;
2086 
2087   iv = get_iv (dta->ivopts_data, *idx);
2088   if (!iv)
2089     return false;
2090 
2091   /* XXX  We produce for a base of *D42 with iv->base being &x[0]
2092 	  *&x[0], which is not folded and does not trigger the
2093 	  ARRAY_REF path below.  */
2094   *idx = iv->base;
2095 
2096   if (integer_zerop (iv->step))
2097     return true;
2098 
2099   if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
2100     {
2101       step = array_ref_element_size (base);
2102 
2103       /* We only handle addresses whose step is an integer constant.  */
2104       if (TREE_CODE (step) != INTEGER_CST)
2105 	return false;
2106     }
2107   else
2108     /* The step for pointer arithmetics already is 1 byte.  */
2109     step = size_one_node;
2110 
2111   iv_base = iv->base;
2112   iv_step = iv->step;
2113   if (iv->no_overflow && nowrap_type_p (TREE_TYPE (iv_step)))
2114     use_overflow_semantics = true;
2115 
2116   if (!convert_affine_scev (dta->ivopts_data->current_loop,
2117 			    sizetype, &iv_base, &iv_step, dta->stmt,
2118 			    use_overflow_semantics))
2119     {
2120       /* The index might wrap.  */
2121       return false;
2122     }
2123 
2124   step = fold_build2 (MULT_EXPR, sizetype, step, iv_step);
2125   dta->step = fold_build2 (PLUS_EXPR, sizetype, dta->step, step);
2126 
2127   if (dta->ivopts_data->bivs_not_used_in_addr)
2128     {
2129       if (!iv->biv_p)
2130 	iv = find_deriving_biv_for_expr (dta->ivopts_data, iv->ssa_name);
2131 
2132       record_biv_for_address_use (dta->ivopts_data, iv);
2133     }
2134   return true;
2135 }
2136 
2137 /* Records use in index IDX.  Callback for for_each_index.  Ivopts data
2138    object is passed to it in DATA.  */
2139 
2140 static bool
2141 idx_record_use (tree base, tree *idx,
2142 		void *vdata)
2143 {
2144   struct ivopts_data *data = (struct ivopts_data *) vdata;
2145   find_interesting_uses_op (data, *idx);
2146   if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
2147     {
2148       find_interesting_uses_op (data, array_ref_element_size (base));
2149       find_interesting_uses_op (data, array_ref_low_bound (base));
2150     }
2151   return true;
2152 }
2153 
2154 /* If we can prove that TOP = cst * BOT for some constant cst,
2155    store cst to MUL and return true.  Otherwise return false.
2156    The returned value is always sign-extended, regardless of the
2157    signedness of TOP and BOT.  */
2158 
2159 static bool
2160 constant_multiple_of (tree top, tree bot, widest_int *mul)
2161 {
2162   tree mby;
2163   enum tree_code code;
2164   unsigned precision = TYPE_PRECISION (TREE_TYPE (top));
2165   widest_int res, p0, p1;
2166 
2167   STRIP_NOPS (top);
2168   STRIP_NOPS (bot);
2169 
2170   if (operand_equal_p (top, bot, 0))
2171     {
2172       *mul = 1;
2173       return true;
2174     }
2175 
2176   code = TREE_CODE (top);
2177   switch (code)
2178     {
2179     case MULT_EXPR:
2180       mby = TREE_OPERAND (top, 1);
2181       if (TREE_CODE (mby) != INTEGER_CST)
2182 	return false;
2183 
2184       if (!constant_multiple_of (TREE_OPERAND (top, 0), bot, &res))
2185 	return false;
2186 
2187       *mul = wi::sext (res * wi::to_widest (mby), precision);
2188       return true;
2189 
2190     case PLUS_EXPR:
2191     case MINUS_EXPR:
2192       if (!constant_multiple_of (TREE_OPERAND (top, 0), bot, &p0)
2193 	  || !constant_multiple_of (TREE_OPERAND (top, 1), bot, &p1))
2194 	return false;
2195 
2196       if (code == MINUS_EXPR)
2197 	p1 = -p1;
2198       *mul = wi::sext (p0 + p1, precision);
2199       return true;
2200 
2201     case INTEGER_CST:
2202       if (TREE_CODE (bot) != INTEGER_CST)
2203 	return false;
2204 
2205       p0 = widest_int::from (wi::to_wide (top), SIGNED);
2206       p1 = widest_int::from (wi::to_wide (bot), SIGNED);
2207       if (p1 == 0)
2208 	return false;
2209       *mul = wi::sext (wi::divmod_trunc (p0, p1, SIGNED, &res), precision);
2210       return res == 0;
2211 
2212     default:
2213       if (POLY_INT_CST_P (top)
2214 	  && POLY_INT_CST_P (bot)
2215 	  && constant_multiple_p (wi::to_poly_widest (top),
2216 				  wi::to_poly_widest (bot), mul))
2217 	return true;
2218 
2219       return false;
2220     }
2221 }
2222 
2223 /* Return true if memory reference REF with step STEP may be unaligned.  */
2224 
2225 static bool
2226 may_be_unaligned_p (tree ref, tree step)
2227 {
2228   /* TARGET_MEM_REFs are translated directly to valid MEMs on the target,
2229      thus they are not misaligned.  */
2230   if (TREE_CODE (ref) == TARGET_MEM_REF)
2231     return false;
2232 
2233   unsigned int align = TYPE_ALIGN (TREE_TYPE (ref));
2234   if (GET_MODE_ALIGNMENT (TYPE_MODE (TREE_TYPE (ref))) > align)
2235     align = GET_MODE_ALIGNMENT (TYPE_MODE (TREE_TYPE (ref)));
2236 
2237   unsigned HOST_WIDE_INT bitpos;
2238   unsigned int ref_align;
2239   get_object_alignment_1 (ref, &ref_align, &bitpos);
2240   if (ref_align < align
2241       || (bitpos % align) != 0
2242       || (bitpos % BITS_PER_UNIT) != 0)
2243     return true;
2244 
2245   unsigned int trailing_zeros = tree_ctz (step);
2246   if (trailing_zeros < HOST_BITS_PER_INT
2247       && (1U << trailing_zeros) * BITS_PER_UNIT < align)
2248     return true;
2249 
2250   return false;
2251 }
2252 
2253 /* Return true if EXPR may be non-addressable.   */
2254 
2255 bool
2256 may_be_nonaddressable_p (tree expr)
2257 {
2258   switch (TREE_CODE (expr))
2259     {
2260     case VAR_DECL:
2261       /* Check if it's a register variable.  */
2262       return DECL_HARD_REGISTER (expr);
2263 
2264     case TARGET_MEM_REF:
2265       /* TARGET_MEM_REFs are translated directly to valid MEMs on the
2266 	 target, thus they are always addressable.  */
2267       return false;
2268 
2269     case MEM_REF:
2270       /* Likewise for MEM_REFs, modulo the storage order.  */
2271       return REF_REVERSE_STORAGE_ORDER (expr);
2272 
2273     case BIT_FIELD_REF:
2274       if (REF_REVERSE_STORAGE_ORDER (expr))
2275 	return true;
2276       return may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
2277 
2278     case COMPONENT_REF:
2279       if (TYPE_REVERSE_STORAGE_ORDER (TREE_TYPE (TREE_OPERAND (expr, 0))))
2280 	return true;
2281       return DECL_NONADDRESSABLE_P (TREE_OPERAND (expr, 1))
2282 	     || may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
2283 
2284     case ARRAY_REF:
2285     case ARRAY_RANGE_REF:
2286       if (TYPE_REVERSE_STORAGE_ORDER (TREE_TYPE (TREE_OPERAND (expr, 0))))
2287 	return true;
2288       return may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
2289 
2290     case VIEW_CONVERT_EXPR:
2291       /* This kind of view-conversions may wrap non-addressable objects
2292 	 and make them look addressable.  After some processing the
2293 	 non-addressability may be uncovered again, causing ADDR_EXPRs
2294 	 of inappropriate objects to be built.  */
2295       if (is_gimple_reg (TREE_OPERAND (expr, 0))
2296 	  || !is_gimple_addressable (TREE_OPERAND (expr, 0)))
2297 	return true;
2298       return may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
2299 
2300     CASE_CONVERT:
2301       return true;
2302 
2303     default:
2304       break;
2305     }
2306 
2307   return false;
2308 }
2309 
2310 /* Finds addresses in *OP_P inside STMT.  */
2311 
2312 static void
2313 find_interesting_uses_address (struct ivopts_data *data, gimple *stmt,
2314 			       tree *op_p)
2315 {
2316   tree base = *op_p, step = size_zero_node;
2317   struct iv *civ;
2318   struct ifs_ivopts_data ifs_ivopts_data;
2319 
2320   /* Do not play with volatile memory references.  A bit too conservative,
2321      perhaps, but safe.  */
2322   if (gimple_has_volatile_ops (stmt))
2323     goto fail;
2324 
2325   /* Ignore bitfields for now.  Not really something terribly complicated
2326      to handle.  TODO.  */
2327   if (TREE_CODE (base) == BIT_FIELD_REF)
2328     goto fail;
2329 
2330   base = unshare_expr (base);
2331 
2332   if (TREE_CODE (base) == TARGET_MEM_REF)
2333     {
2334       tree type = build_pointer_type (TREE_TYPE (base));
2335       tree astep;
2336 
2337       if (TMR_BASE (base)
2338 	  && TREE_CODE (TMR_BASE (base)) == SSA_NAME)
2339 	{
2340 	  civ = get_iv (data, TMR_BASE (base));
2341 	  if (!civ)
2342 	    goto fail;
2343 
2344 	  TMR_BASE (base) = civ->base;
2345 	  step = civ->step;
2346 	}
2347       if (TMR_INDEX2 (base)
2348 	  && TREE_CODE (TMR_INDEX2 (base)) == SSA_NAME)
2349 	{
2350 	  civ = get_iv (data, TMR_INDEX2 (base));
2351 	  if (!civ)
2352 	    goto fail;
2353 
2354 	  TMR_INDEX2 (base) = civ->base;
2355 	  step = civ->step;
2356 	}
2357       if (TMR_INDEX (base)
2358 	  && TREE_CODE (TMR_INDEX (base)) == SSA_NAME)
2359 	{
2360 	  civ = get_iv (data, TMR_INDEX (base));
2361 	  if (!civ)
2362 	    goto fail;
2363 
2364 	  TMR_INDEX (base) = civ->base;
2365 	  astep = civ->step;
2366 
2367 	  if (astep)
2368 	    {
2369 	      if (TMR_STEP (base))
2370 		astep = fold_build2 (MULT_EXPR, type, TMR_STEP (base), astep);
2371 
2372 	      step = fold_build2 (PLUS_EXPR, type, step, astep);
2373 	    }
2374 	}
2375 
2376       if (integer_zerop (step))
2377 	goto fail;
2378       base = tree_mem_ref_addr (type, base);
2379     }
2380   else
2381     {
2382       ifs_ivopts_data.ivopts_data = data;
2383       ifs_ivopts_data.stmt = stmt;
2384       ifs_ivopts_data.step = size_zero_node;
2385       if (!for_each_index (&base, idx_find_step, &ifs_ivopts_data)
2386 	  || integer_zerop (ifs_ivopts_data.step))
2387 	goto fail;
2388       step = ifs_ivopts_data.step;
2389 
2390       /* Check that the base expression is addressable.  This needs
2391 	 to be done after substituting bases of IVs into it.  */
2392       if (may_be_nonaddressable_p (base))
2393 	goto fail;
2394 
2395       /* Moreover, on strict alignment platforms, check that it is
2396 	 sufficiently aligned.  */
2397       if (STRICT_ALIGNMENT && may_be_unaligned_p (base, step))
2398 	goto fail;
2399 
2400       base = build_fold_addr_expr (base);
2401 
2402       /* Substituting bases of IVs into the base expression might
2403 	 have caused folding opportunities.  */
2404       if (TREE_CODE (base) == ADDR_EXPR)
2405 	{
2406 	  tree *ref = &TREE_OPERAND (base, 0);
2407 	  while (handled_component_p (*ref))
2408 	    ref = &TREE_OPERAND (*ref, 0);
2409 	  if (TREE_CODE (*ref) == MEM_REF)
2410 	    {
2411 	      tree tem = fold_binary (MEM_REF, TREE_TYPE (*ref),
2412 				      TREE_OPERAND (*ref, 0),
2413 				      TREE_OPERAND (*ref, 1));
2414 	      if (tem)
2415 		*ref = tem;
2416 	    }
2417 	}
2418     }
2419 
2420   civ = alloc_iv (data, base, step);
2421   /* Fail if base object of this memory reference is unknown.  */
2422   if (civ->base_object == NULL_TREE)
2423     goto fail;
2424 
2425   record_group_use (data, op_p, civ, stmt, USE_REF_ADDRESS, TREE_TYPE (*op_p));
2426   return;
2427 
2428 fail:
2429   for_each_index (op_p, idx_record_use, data);
2430 }
2431 
2432 /* Finds and records invariants used in STMT.  */
2433 
2434 static void
2435 find_invariants_stmt (struct ivopts_data *data, gimple *stmt)
2436 {
2437   ssa_op_iter iter;
2438   use_operand_p use_p;
2439   tree op;
2440 
2441   FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
2442     {
2443       op = USE_FROM_PTR (use_p);
2444       record_invariant (data, op, false);
2445     }
2446 }
2447 
2448 /* CALL calls an internal function.  If operand *OP_P will become an
2449    address when the call is expanded, return the type of the memory
2450    being addressed, otherwise return null.  */
2451 
2452 static tree
2453 get_mem_type_for_internal_fn (gcall *call, tree *op_p)
2454 {
2455   switch (gimple_call_internal_fn (call))
2456     {
2457     case IFN_MASK_LOAD:
2458       if (op_p == gimple_call_arg_ptr (call, 0))
2459 	return TREE_TYPE (gimple_call_lhs (call));
2460       return NULL_TREE;
2461 
2462     case IFN_MASK_STORE:
2463       if (op_p == gimple_call_arg_ptr (call, 0))
2464 	return TREE_TYPE (gimple_call_arg (call, 3));
2465       return NULL_TREE;
2466 
2467     default:
2468       return NULL_TREE;
2469     }
2470 }
2471 
2472 /* IV is a (non-address) iv that describes operand *OP_P of STMT.
2473    Return true if the operand will become an address when STMT
2474    is expanded and record the associated address use if so.  */
2475 
2476 static bool
2477 find_address_like_use (struct ivopts_data *data, gimple *stmt, tree *op_p,
2478 		       struct iv *iv)
2479 {
2480   /* Fail if base object of this memory reference is unknown.  */
2481   if (iv->base_object == NULL_TREE)
2482     return false;
2483 
2484   tree mem_type = NULL_TREE;
2485   if (gcall *call = dyn_cast <gcall *> (stmt))
2486     if (gimple_call_internal_p (call))
2487       mem_type = get_mem_type_for_internal_fn (call, op_p);
2488   if (mem_type)
2489     {
2490       iv = alloc_iv (data, iv->base, iv->step);
2491       record_group_use (data, op_p, iv, stmt, USE_PTR_ADDRESS, mem_type);
2492       return true;
2493     }
2494   return false;
2495 }
2496 
2497 /* Finds interesting uses of induction variables in the statement STMT.  */
2498 
2499 static void
2500 find_interesting_uses_stmt (struct ivopts_data *data, gimple *stmt)
2501 {
2502   struct iv *iv;
2503   tree op, *lhs, *rhs;
2504   ssa_op_iter iter;
2505   use_operand_p use_p;
2506   enum tree_code code;
2507 
2508   find_invariants_stmt (data, stmt);
2509 
2510   if (gimple_code (stmt) == GIMPLE_COND)
2511     {
2512       find_interesting_uses_cond (data, stmt);
2513       return;
2514     }
2515 
2516   if (is_gimple_assign (stmt))
2517     {
2518       lhs = gimple_assign_lhs_ptr (stmt);
2519       rhs = gimple_assign_rhs1_ptr (stmt);
2520 
2521       if (TREE_CODE (*lhs) == SSA_NAME)
2522 	{
2523 	  /* If the statement defines an induction variable, the uses are not
2524 	     interesting by themselves.  */
2525 
2526 	  iv = get_iv (data, *lhs);
2527 
2528 	  if (iv && !integer_zerop (iv->step))
2529 	    return;
2530 	}
2531 
2532       code = gimple_assign_rhs_code (stmt);
2533       if (get_gimple_rhs_class (code) == GIMPLE_SINGLE_RHS
2534 	  && (REFERENCE_CLASS_P (*rhs)
2535 	      || is_gimple_val (*rhs)))
2536 	{
2537 	  if (REFERENCE_CLASS_P (*rhs))
2538 	    find_interesting_uses_address (data, stmt, rhs);
2539 	  else
2540 	    find_interesting_uses_op (data, *rhs);
2541 
2542 	  if (REFERENCE_CLASS_P (*lhs))
2543 	    find_interesting_uses_address (data, stmt, lhs);
2544 	  return;
2545 	}
2546       else if (TREE_CODE_CLASS (code) == tcc_comparison)
2547 	{
2548 	  find_interesting_uses_cond (data, stmt);
2549 	  return;
2550 	}
2551 
2552       /* TODO -- we should also handle address uses of type
2553 
2554 	 memory = call (whatever);
2555 
2556 	 and
2557 
2558 	 call (memory).  */
2559     }
2560 
2561   if (gimple_code (stmt) == GIMPLE_PHI
2562       && gimple_bb (stmt) == data->current_loop->header)
2563     {
2564       iv = get_iv (data, PHI_RESULT (stmt));
2565 
2566       if (iv && !integer_zerop (iv->step))
2567 	return;
2568     }
2569 
2570   FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
2571     {
2572       op = USE_FROM_PTR (use_p);
2573 
2574       if (TREE_CODE (op) != SSA_NAME)
2575 	continue;
2576 
2577       iv = get_iv (data, op);
2578       if (!iv)
2579 	continue;
2580 
2581       if (!find_address_like_use (data, stmt, use_p->use, iv))
2582 	find_interesting_uses_op (data, op);
2583     }
2584 }
2585 
2586 /* Finds interesting uses of induction variables outside of loops
2587    on loop exit edge EXIT.  */
2588 
2589 static void
2590 find_interesting_uses_outside (struct ivopts_data *data, edge exit)
2591 {
2592   gphi *phi;
2593   gphi_iterator psi;
2594   tree def;
2595 
2596   for (psi = gsi_start_phis (exit->dest); !gsi_end_p (psi); gsi_next (&psi))
2597     {
2598       phi = psi.phi ();
2599       def = PHI_ARG_DEF_FROM_EDGE (phi, exit);
2600       if (!virtual_operand_p (def))
2601 	find_interesting_uses_op (data, def);
2602     }
2603 }
2604 
2605 /* Return TRUE if OFFSET is within the range of [base + offset] addressing
2606    mode for memory reference represented by USE.  */
2607 
2608 static GTY (()) vec<rtx, va_gc> *addr_list;
2609 
2610 static bool
2611 addr_offset_valid_p (struct iv_use *use, poly_int64 offset)
2612 {
2613   rtx reg, addr;
2614   unsigned list_index;
2615   addr_space_t as = TYPE_ADDR_SPACE (TREE_TYPE (use->iv->base));
2616   machine_mode addr_mode, mem_mode = TYPE_MODE (use->mem_type);
2617 
2618   list_index = (unsigned) as * MAX_MACHINE_MODE + (unsigned) mem_mode;
2619   if (list_index >= vec_safe_length (addr_list))
2620     vec_safe_grow_cleared (addr_list, list_index + MAX_MACHINE_MODE);
2621 
2622   addr = (*addr_list)[list_index];
2623   if (!addr)
2624     {
2625       addr_mode = targetm.addr_space.address_mode (as);
2626       reg = gen_raw_REG (addr_mode, LAST_VIRTUAL_REGISTER + 1);
2627       addr = gen_rtx_fmt_ee (PLUS, addr_mode, reg, NULL_RTX);
2628       (*addr_list)[list_index] = addr;
2629     }
2630   else
2631     addr_mode = GET_MODE (addr);
2632 
2633   XEXP (addr, 1) = gen_int_mode (offset, addr_mode);
2634   return (memory_address_addr_space_p (mem_mode, addr, as));
2635 }
2636 
2637 /* Comparison function to sort group in ascending order of addr_offset.  */
2638 
2639 static int
2640 group_compare_offset (const void *a, const void *b)
2641 {
2642   const struct iv_use *const *u1 = (const struct iv_use *const *) a;
2643   const struct iv_use *const *u2 = (const struct iv_use *const *) b;
2644 
2645   return compare_sizes_for_sort ((*u1)->addr_offset, (*u2)->addr_offset);
2646 }
2647 
2648 /* Check if small groups should be split.  Return true if no group
2649    contains more than two uses with distinct addr_offsets.  Return
2650    false otherwise.  We want to split such groups because:
2651 
2652      1) Small groups don't have much benefit and may interfer with
2653 	general candidate selection.
2654      2) Size for problem with only small groups is usually small and
2655 	general algorithm can handle it well.
2656 
2657    TODO -- Above claim may not hold when we want to merge memory
2658    accesses with conseuctive addresses.  */
2659 
2660 static bool
2661 split_small_address_groups_p (struct ivopts_data *data)
2662 {
2663   unsigned int i, j, distinct = 1;
2664   struct iv_use *pre;
2665   struct iv_group *group;
2666 
2667   for (i = 0; i < data->vgroups.length (); i++)
2668     {
2669       group = data->vgroups[i];
2670       if (group->vuses.length () == 1)
2671 	continue;
2672 
2673       gcc_assert (address_p (group->type));
2674       if (group->vuses.length () == 2)
2675 	{
2676 	  if (compare_sizes_for_sort (group->vuses[0]->addr_offset,
2677 				      group->vuses[1]->addr_offset) > 0)
2678 	    std::swap (group->vuses[0], group->vuses[1]);
2679 	}
2680       else
2681 	group->vuses.qsort (group_compare_offset);
2682 
2683       if (distinct > 2)
2684 	continue;
2685 
2686       distinct = 1;
2687       for (pre = group->vuses[0], j = 1; j < group->vuses.length (); j++)
2688 	{
2689 	  if (maybe_ne (group->vuses[j]->addr_offset, pre->addr_offset))
2690 	    {
2691 	      pre = group->vuses[j];
2692 	      distinct++;
2693 	    }
2694 
2695 	  if (distinct > 2)
2696 	    break;
2697 	}
2698     }
2699 
2700   return (distinct <= 2);
2701 }
2702 
2703 /* For each group of address type uses, this function further groups
2704    these uses according to the maximum offset supported by target's
2705    [base + offset] addressing mode.  */
2706 
2707 static void
2708 split_address_groups (struct ivopts_data *data)
2709 {
2710   unsigned int i, j;
2711   /* Always split group.  */
2712   bool split_p = split_small_address_groups_p (data);
2713 
2714   for (i = 0; i < data->vgroups.length (); i++)
2715     {
2716       struct iv_group *new_group = NULL;
2717       struct iv_group *group = data->vgroups[i];
2718       struct iv_use *use = group->vuses[0];
2719 
2720       use->id = 0;
2721       use->group_id = group->id;
2722       if (group->vuses.length () == 1)
2723 	continue;
2724 
2725       gcc_assert (address_p (use->type));
2726 
2727       for (j = 1; j < group->vuses.length ();)
2728 	{
2729 	  struct iv_use *next = group->vuses[j];
2730 	  poly_int64 offset = next->addr_offset - use->addr_offset;
2731 
2732 	  /* Split group if aksed to, or the offset against the first
2733 	     use can't fit in offset part of addressing mode.  IV uses
2734 	     having the same offset are still kept in one group.  */
2735 	  if (maybe_ne (offset, 0)
2736 	      && (split_p || !addr_offset_valid_p (use, offset)))
2737 	    {
2738 	      if (!new_group)
2739 		new_group = record_group (data, group->type);
2740 	      group->vuses.ordered_remove (j);
2741 	      new_group->vuses.safe_push (next);
2742 	      continue;
2743 	    }
2744 
2745 	  next->id = j;
2746 	  next->group_id = group->id;
2747 	  j++;
2748 	}
2749     }
2750 }
2751 
2752 /* Finds uses of the induction variables that are interesting.  */
2753 
2754 static void
2755 find_interesting_uses (struct ivopts_data *data)
2756 {
2757   basic_block bb;
2758   gimple_stmt_iterator bsi;
2759   basic_block *body = get_loop_body (data->current_loop);
2760   unsigned i;
2761   edge e;
2762 
2763   for (i = 0; i < data->current_loop->num_nodes; i++)
2764     {
2765       edge_iterator ei;
2766       bb = body[i];
2767 
2768       FOR_EACH_EDGE (e, ei, bb->succs)
2769 	if (e->dest != EXIT_BLOCK_PTR_FOR_FN (cfun)
2770 	    && !flow_bb_inside_loop_p (data->current_loop, e->dest))
2771 	  find_interesting_uses_outside (data, e);
2772 
2773       for (bsi = gsi_start_phis (bb); !gsi_end_p (bsi); gsi_next (&bsi))
2774 	find_interesting_uses_stmt (data, gsi_stmt (bsi));
2775       for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
2776 	if (!is_gimple_debug (gsi_stmt (bsi)))
2777 	  find_interesting_uses_stmt (data, gsi_stmt (bsi));
2778     }
2779   free (body);
2780 
2781   split_address_groups (data);
2782 
2783   if (dump_file && (dump_flags & TDF_DETAILS))
2784     {
2785       fprintf (dump_file, "\n<IV Groups>:\n");
2786       dump_groups (dump_file, data);
2787       fprintf (dump_file, "\n");
2788     }
2789 }
2790 
2791 /* Strips constant offsets from EXPR and stores them to OFFSET.  If INSIDE_ADDR
2792    is true, assume we are inside an address.  If TOP_COMPREF is true, assume
2793    we are at the top-level of the processed address.  */
2794 
2795 static tree
2796 strip_offset_1 (tree expr, bool inside_addr, bool top_compref,
2797 		poly_int64 *offset)
2798 {
2799   tree op0 = NULL_TREE, op1 = NULL_TREE, tmp, step;
2800   enum tree_code code;
2801   tree type, orig_type = TREE_TYPE (expr);
2802   poly_int64 off0, off1;
2803   HOST_WIDE_INT st;
2804   tree orig_expr = expr;
2805 
2806   STRIP_NOPS (expr);
2807 
2808   type = TREE_TYPE (expr);
2809   code = TREE_CODE (expr);
2810   *offset = 0;
2811 
2812   switch (code)
2813     {
2814     case POINTER_PLUS_EXPR:
2815     case PLUS_EXPR:
2816     case MINUS_EXPR:
2817       op0 = TREE_OPERAND (expr, 0);
2818       op1 = TREE_OPERAND (expr, 1);
2819 
2820       op0 = strip_offset_1 (op0, false, false, &off0);
2821       op1 = strip_offset_1 (op1, false, false, &off1);
2822 
2823       *offset = (code == MINUS_EXPR ? off0 - off1 : off0 + off1);
2824       if (op0 == TREE_OPERAND (expr, 0)
2825 	  && op1 == TREE_OPERAND (expr, 1))
2826 	return orig_expr;
2827 
2828       if (integer_zerop (op1))
2829 	expr = op0;
2830       else if (integer_zerop (op0))
2831 	{
2832 	  if (code == MINUS_EXPR)
2833 	    expr = fold_build1 (NEGATE_EXPR, type, op1);
2834 	  else
2835 	    expr = op1;
2836 	}
2837       else
2838 	expr = fold_build2 (code, type, op0, op1);
2839 
2840       return fold_convert (orig_type, expr);
2841 
2842     case MULT_EXPR:
2843       op1 = TREE_OPERAND (expr, 1);
2844       if (!cst_and_fits_in_hwi (op1))
2845 	return orig_expr;
2846 
2847       op0 = TREE_OPERAND (expr, 0);
2848       op0 = strip_offset_1 (op0, false, false, &off0);
2849       if (op0 == TREE_OPERAND (expr, 0))
2850 	return orig_expr;
2851 
2852       *offset = off0 * int_cst_value (op1);
2853       if (integer_zerop (op0))
2854 	expr = op0;
2855       else
2856 	expr = fold_build2 (MULT_EXPR, type, op0, op1);
2857 
2858       return fold_convert (orig_type, expr);
2859 
2860     case ARRAY_REF:
2861     case ARRAY_RANGE_REF:
2862       if (!inside_addr)
2863 	return orig_expr;
2864 
2865       step = array_ref_element_size (expr);
2866       if (!cst_and_fits_in_hwi (step))
2867 	break;
2868 
2869       st = int_cst_value (step);
2870       op1 = TREE_OPERAND (expr, 1);
2871       op1 = strip_offset_1 (op1, false, false, &off1);
2872       *offset = off1 * st;
2873 
2874       if (top_compref
2875 	  && integer_zerop (op1))
2876 	{
2877 	  /* Strip the component reference completely.  */
2878 	  op0 = TREE_OPERAND (expr, 0);
2879 	  op0 = strip_offset_1 (op0, inside_addr, top_compref, &off0);
2880 	  *offset += off0;
2881 	  return op0;
2882 	}
2883       break;
2884 
2885     case COMPONENT_REF:
2886       {
2887 	tree field;
2888 
2889 	if (!inside_addr)
2890 	  return orig_expr;
2891 
2892 	tmp = component_ref_field_offset (expr);
2893 	field = TREE_OPERAND (expr, 1);
2894 	if (top_compref
2895 	    && cst_and_fits_in_hwi (tmp)
2896 	    && cst_and_fits_in_hwi (DECL_FIELD_BIT_OFFSET (field)))
2897 	  {
2898 	    HOST_WIDE_INT boffset, abs_off;
2899 
2900 	    /* Strip the component reference completely.  */
2901 	    op0 = TREE_OPERAND (expr, 0);
2902 	    op0 = strip_offset_1 (op0, inside_addr, top_compref, &off0);
2903 	    boffset = int_cst_value (DECL_FIELD_BIT_OFFSET (field));
2904 	    abs_off = abs_hwi (boffset) / BITS_PER_UNIT;
2905 	    if (boffset < 0)
2906 	      abs_off = -abs_off;
2907 
2908 	    *offset = off0 + int_cst_value (tmp) + abs_off;
2909 	    return op0;
2910 	  }
2911       }
2912       break;
2913 
2914     case ADDR_EXPR:
2915       op0 = TREE_OPERAND (expr, 0);
2916       op0 = strip_offset_1 (op0, true, true, &off0);
2917       *offset += off0;
2918 
2919       if (op0 == TREE_OPERAND (expr, 0))
2920 	return orig_expr;
2921 
2922       expr = build_fold_addr_expr (op0);
2923       return fold_convert (orig_type, expr);
2924 
2925     case MEM_REF:
2926       /* ???  Offset operand?  */
2927       inside_addr = false;
2928       break;
2929 
2930     default:
2931       if (ptrdiff_tree_p (expr, offset) && maybe_ne (*offset, 0))
2932 	return build_int_cst (orig_type, 0);
2933       return orig_expr;
2934     }
2935 
2936   /* Default handling of expressions for that we want to recurse into
2937      the first operand.  */
2938   op0 = TREE_OPERAND (expr, 0);
2939   op0 = strip_offset_1 (op0, inside_addr, false, &off0);
2940   *offset += off0;
2941 
2942   if (op0 == TREE_OPERAND (expr, 0)
2943       && (!op1 || op1 == TREE_OPERAND (expr, 1)))
2944     return orig_expr;
2945 
2946   expr = copy_node (expr);
2947   TREE_OPERAND (expr, 0) = op0;
2948   if (op1)
2949     TREE_OPERAND (expr, 1) = op1;
2950 
2951   /* Inside address, we might strip the top level component references,
2952      thus changing type of the expression.  Handling of ADDR_EXPR
2953      will fix that.  */
2954   expr = fold_convert (orig_type, expr);
2955 
2956   return expr;
2957 }
2958 
2959 /* Strips constant offsets from EXPR and stores them to OFFSET.  */
2960 
2961 tree
2962 strip_offset (tree expr, poly_uint64_pod *offset)
2963 {
2964   poly_int64 off;
2965   tree core = strip_offset_1 (expr, false, false, &off);
2966   *offset = off;
2967   return core;
2968 }
2969 
2970 /* Returns variant of TYPE that can be used as base for different uses.
2971    We return unsigned type with the same precision, which avoids problems
2972    with overflows.  */
2973 
2974 static tree
2975 generic_type_for (tree type)
2976 {
2977   if (POINTER_TYPE_P (type))
2978     return unsigned_type_for (type);
2979 
2980   if (TYPE_UNSIGNED (type))
2981     return type;
2982 
2983   return unsigned_type_for (type);
2984 }
2985 
2986 /* Private data for walk_tree.  */
2987 
2988 struct walk_tree_data
2989 {
2990   bitmap *inv_vars;
2991   struct ivopts_data *idata;
2992 };
2993 
2994 /* Callback function for walk_tree, it records invariants and symbol
2995    reference in *EXPR_P.  DATA is the structure storing result info.  */
2996 
2997 static tree
2998 find_inv_vars_cb (tree *expr_p, int *ws ATTRIBUTE_UNUSED, void *data)
2999 {
3000   tree op = *expr_p;
3001   struct version_info *info;
3002   struct walk_tree_data *wdata = (struct walk_tree_data*) data;
3003 
3004   if (TREE_CODE (op) != SSA_NAME)
3005     return NULL_TREE;
3006 
3007   info = name_info (wdata->idata, op);
3008   /* Because we expand simple operations when finding IVs, loop invariant
3009      variable that isn't referred by the original loop could be used now.
3010      Record such invariant variables here.  */
3011   if (!info->iv)
3012     {
3013       struct ivopts_data *idata = wdata->idata;
3014       basic_block bb = gimple_bb (SSA_NAME_DEF_STMT (op));
3015 
3016       if (!bb || !flow_bb_inside_loop_p (idata->current_loop, bb))
3017 	{
3018 	  set_iv (idata, op, op, build_int_cst (TREE_TYPE (op), 0), true);
3019 	  record_invariant (idata, op, false);
3020 	}
3021     }
3022   if (!info->inv_id || info->has_nonlin_use)
3023     return NULL_TREE;
3024 
3025   if (!*wdata->inv_vars)
3026     *wdata->inv_vars = BITMAP_ALLOC (NULL);
3027   bitmap_set_bit (*wdata->inv_vars, info->inv_id);
3028 
3029   return NULL_TREE;
3030 }
3031 
3032 /* Records invariants in *EXPR_P.  INV_VARS is the bitmap to that we should
3033    store it.  */
3034 
3035 static inline void
3036 find_inv_vars (struct ivopts_data *data, tree *expr_p, bitmap *inv_vars)
3037 {
3038   struct walk_tree_data wdata;
3039 
3040   if (!inv_vars)
3041     return;
3042 
3043   wdata.idata = data;
3044   wdata.inv_vars = inv_vars;
3045   walk_tree (expr_p, find_inv_vars_cb, &wdata, NULL);
3046 }
3047 
3048 /* Get entry from invariant expr hash table for INV_EXPR.  New entry
3049    will be recorded if it doesn't exist yet.  Given below two exprs:
3050      inv_expr + cst1, inv_expr + cst2
3051    It's hard to make decision whether constant part should be stripped
3052    or not.  We choose to not strip based on below facts:
3053      1) We need to count ADD cost for constant part if it's stripped,
3054 	which isn't always trivial where this functions is called.
3055      2) Stripping constant away may be conflict with following loop
3056 	invariant hoisting pass.
3057      3) Not stripping constant away results in more invariant exprs,
3058 	which usually leads to decision preferring lower reg pressure.  */
3059 
3060 static iv_inv_expr_ent *
3061 get_loop_invariant_expr (struct ivopts_data *data, tree inv_expr)
3062 {
3063   STRIP_NOPS (inv_expr);
3064 
3065   if (poly_int_tree_p (inv_expr)
3066       || TREE_CODE (inv_expr) == SSA_NAME)
3067     return NULL;
3068 
3069   /* Don't strip constant part away as we used to.  */
3070 
3071   /* Stores EXPR in DATA->inv_expr_tab, return pointer to iv_inv_expr_ent.  */
3072   struct iv_inv_expr_ent ent;
3073   ent.expr = inv_expr;
3074   ent.hash = iterative_hash_expr (inv_expr, 0);
3075   struct iv_inv_expr_ent **slot = data->inv_expr_tab->find_slot (&ent, INSERT);
3076 
3077   if (!*slot)
3078     {
3079       *slot = XNEW (struct iv_inv_expr_ent);
3080       (*slot)->expr = inv_expr;
3081       (*slot)->hash = ent.hash;
3082       (*slot)->id = ++data->max_inv_expr_id;
3083     }
3084 
3085   return *slot;
3086 }
3087 
3088 /* Adds a candidate BASE + STEP * i.  Important field is set to IMPORTANT and
3089    position to POS.  If USE is not NULL, the candidate is set as related to
3090    it.  If both BASE and STEP are NULL, we add a pseudocandidate for the
3091    replacement of the final value of the iv by a direct computation.  */
3092 
3093 static struct iv_cand *
3094 add_candidate_1 (struct ivopts_data *data,
3095 		 tree base, tree step, bool important, enum iv_position pos,
3096 		 struct iv_use *use, gimple *incremented_at,
3097 		 struct iv *orig_iv = NULL)
3098 {
3099   unsigned i;
3100   struct iv_cand *cand = NULL;
3101   tree type, orig_type;
3102 
3103   gcc_assert (base && step);
3104 
3105   /* -fkeep-gc-roots-live means that we have to keep a real pointer
3106      live, but the ivopts code may replace a real pointer with one
3107      pointing before or after the memory block that is then adjusted
3108      into the memory block during the loop.  FIXME: It would likely be
3109      better to actually force the pointer live and still use ivopts;
3110      for example, it would be enough to write the pointer into memory
3111      and keep it there until after the loop.  */
3112   if (flag_keep_gc_roots_live && POINTER_TYPE_P (TREE_TYPE (base)))
3113     return NULL;
3114 
3115   /* For non-original variables, make sure their values are computed in a type
3116      that does not invoke undefined behavior on overflows (since in general,
3117      we cannot prove that these induction variables are non-wrapping).  */
3118   if (pos != IP_ORIGINAL)
3119     {
3120       orig_type = TREE_TYPE (base);
3121       type = generic_type_for (orig_type);
3122       if (type != orig_type)
3123 	{
3124 	  base = fold_convert (type, base);
3125 	  step = fold_convert (type, step);
3126 	}
3127     }
3128 
3129   for (i = 0; i < data->vcands.length (); i++)
3130     {
3131       cand = data->vcands[i];
3132 
3133       if (cand->pos != pos)
3134 	continue;
3135 
3136       if (cand->incremented_at != incremented_at
3137 	  || ((pos == IP_AFTER_USE || pos == IP_BEFORE_USE)
3138 	      && cand->ainc_use != use))
3139 	continue;
3140 
3141       if (operand_equal_p (base, cand->iv->base, 0)
3142 	  && operand_equal_p (step, cand->iv->step, 0)
3143 	  && (TYPE_PRECISION (TREE_TYPE (base))
3144 	      == TYPE_PRECISION (TREE_TYPE (cand->iv->base))))
3145 	break;
3146     }
3147 
3148   if (i == data->vcands.length ())
3149     {
3150       cand = XCNEW (struct iv_cand);
3151       cand->id = i;
3152       cand->iv = alloc_iv (data, base, step);
3153       cand->pos = pos;
3154       if (pos != IP_ORIGINAL)
3155 	{
3156 	  cand->var_before = create_tmp_var_raw (TREE_TYPE (base), "ivtmp");
3157 	  cand->var_after = cand->var_before;
3158 	}
3159       cand->important = important;
3160       cand->incremented_at = incremented_at;
3161       data->vcands.safe_push (cand);
3162 
3163       if (!poly_int_tree_p (step))
3164 	{
3165 	  find_inv_vars (data, &step, &cand->inv_vars);
3166 
3167 	  iv_inv_expr_ent *inv_expr = get_loop_invariant_expr (data, step);
3168 	  /* Share bitmap between inv_vars and inv_exprs for cand.  */
3169 	  if (inv_expr != NULL)
3170 	    {
3171 	      cand->inv_exprs = cand->inv_vars;
3172 	      cand->inv_vars = NULL;
3173 	      if (cand->inv_exprs)
3174 		bitmap_clear (cand->inv_exprs);
3175 	      else
3176 		cand->inv_exprs = BITMAP_ALLOC (NULL);
3177 
3178 	      bitmap_set_bit (cand->inv_exprs, inv_expr->id);
3179 	    }
3180 	}
3181 
3182       if (pos == IP_AFTER_USE || pos == IP_BEFORE_USE)
3183 	cand->ainc_use = use;
3184       else
3185 	cand->ainc_use = NULL;
3186 
3187       cand->orig_iv = orig_iv;
3188       if (dump_file && (dump_flags & TDF_DETAILS))
3189 	dump_cand (dump_file, cand);
3190     }
3191 
3192   cand->important |= important;
3193 
3194   /* Relate candidate to the group for which it is added.  */
3195   if (use)
3196     bitmap_set_bit (data->vgroups[use->group_id]->related_cands, i);
3197 
3198   return cand;
3199 }
3200 
3201 /* Returns true if incrementing the induction variable at the end of the LOOP
3202    is allowed.
3203 
3204    The purpose is to avoid splitting latch edge with a biv increment, thus
3205    creating a jump, possibly confusing other optimization passes and leaving
3206    less freedom to scheduler.  So we allow IP_END only if IP_NORMAL is not
3207    available (so we do not have a better alternative), or if the latch edge
3208    is already nonempty.  */
3209 
3210 static bool
3211 allow_ip_end_pos_p (struct loop *loop)
3212 {
3213   if (!ip_normal_pos (loop))
3214     return true;
3215 
3216   if (!empty_block_p (ip_end_pos (loop)))
3217     return true;
3218 
3219   return false;
3220 }
3221 
3222 /* If possible, adds autoincrement candidates BASE + STEP * i based on use USE.
3223    Important field is set to IMPORTANT.  */
3224 
3225 static void
3226 add_autoinc_candidates (struct ivopts_data *data, tree base, tree step,
3227 			bool important, struct iv_use *use)
3228 {
3229   basic_block use_bb = gimple_bb (use->stmt);
3230   machine_mode mem_mode;
3231   unsigned HOST_WIDE_INT cstepi;
3232 
3233   /* If we insert the increment in any position other than the standard
3234      ones, we must ensure that it is incremented once per iteration.
3235      It must not be in an inner nested loop, or one side of an if
3236      statement.  */
3237   if (use_bb->loop_father != data->current_loop
3238       || !dominated_by_p (CDI_DOMINATORS, data->current_loop->latch, use_bb)
3239       || stmt_can_throw_internal (cfun, use->stmt)
3240       || !cst_and_fits_in_hwi (step))
3241     return;
3242 
3243   cstepi = int_cst_value (step);
3244 
3245   mem_mode = TYPE_MODE (use->mem_type);
3246   if (((USE_LOAD_PRE_INCREMENT (mem_mode)
3247 	|| USE_STORE_PRE_INCREMENT (mem_mode))
3248        && known_eq (GET_MODE_SIZE (mem_mode), cstepi))
3249       || ((USE_LOAD_PRE_DECREMENT (mem_mode)
3250 	   || USE_STORE_PRE_DECREMENT (mem_mode))
3251 	  && known_eq (GET_MODE_SIZE (mem_mode), -cstepi)))
3252     {
3253       enum tree_code code = MINUS_EXPR;
3254       tree new_base;
3255       tree new_step = step;
3256 
3257       if (POINTER_TYPE_P (TREE_TYPE (base)))
3258 	{
3259 	  new_step = fold_build1 (NEGATE_EXPR, TREE_TYPE (step), step);
3260 	  code = POINTER_PLUS_EXPR;
3261 	}
3262       else
3263 	new_step = fold_convert (TREE_TYPE (base), new_step);
3264       new_base = fold_build2 (code, TREE_TYPE (base), base, new_step);
3265       add_candidate_1 (data, new_base, step, important, IP_BEFORE_USE, use,
3266 		       use->stmt);
3267     }
3268   if (((USE_LOAD_POST_INCREMENT (mem_mode)
3269 	|| USE_STORE_POST_INCREMENT (mem_mode))
3270        && known_eq (GET_MODE_SIZE (mem_mode), cstepi))
3271       || ((USE_LOAD_POST_DECREMENT (mem_mode)
3272 	   || USE_STORE_POST_DECREMENT (mem_mode))
3273 	  && known_eq (GET_MODE_SIZE (mem_mode), -cstepi)))
3274     {
3275       add_candidate_1 (data, base, step, important, IP_AFTER_USE, use,
3276 		       use->stmt);
3277     }
3278 }
3279 
3280 /* Adds a candidate BASE + STEP * i.  Important field is set to IMPORTANT and
3281    position to POS.  If USE is not NULL, the candidate is set as related to
3282    it.  The candidate computation is scheduled before exit condition and at
3283    the end of loop.  */
3284 
3285 static void
3286 add_candidate (struct ivopts_data *data,
3287 	       tree base, tree step, bool important, struct iv_use *use,
3288 	       struct iv *orig_iv = NULL)
3289 {
3290   if (ip_normal_pos (data->current_loop))
3291     add_candidate_1 (data, base, step, important,
3292 		     IP_NORMAL, use, NULL, orig_iv);
3293   if (ip_end_pos (data->current_loop)
3294       && allow_ip_end_pos_p (data->current_loop))
3295     add_candidate_1 (data, base, step, important, IP_END, use, NULL, orig_iv);
3296 }
3297 
3298 /* Adds standard iv candidates.  */
3299 
3300 static void
3301 add_standard_iv_candidates (struct ivopts_data *data)
3302 {
3303   add_candidate (data, integer_zero_node, integer_one_node, true, NULL);
3304 
3305   /* The same for a double-integer type if it is still fast enough.  */
3306   if (TYPE_PRECISION
3307 	(long_integer_type_node) > TYPE_PRECISION (integer_type_node)
3308       && TYPE_PRECISION (long_integer_type_node) <= BITS_PER_WORD)
3309     add_candidate (data, build_int_cst (long_integer_type_node, 0),
3310 		   build_int_cst (long_integer_type_node, 1), true, NULL);
3311 
3312   /* The same for a double-integer type if it is still fast enough.  */
3313   if (TYPE_PRECISION
3314 	(long_long_integer_type_node) > TYPE_PRECISION (long_integer_type_node)
3315       && TYPE_PRECISION (long_long_integer_type_node) <= BITS_PER_WORD)
3316     add_candidate (data, build_int_cst (long_long_integer_type_node, 0),
3317 		   build_int_cst (long_long_integer_type_node, 1), true, NULL);
3318 }
3319 
3320 
3321 /* Adds candidates bases on the old induction variable IV.  */
3322 
3323 static void
3324 add_iv_candidate_for_biv (struct ivopts_data *data, struct iv *iv)
3325 {
3326   gimple *phi;
3327   tree def;
3328   struct iv_cand *cand;
3329 
3330   /* Check if this biv is used in address type use.  */
3331   if (iv->no_overflow  && iv->have_address_use
3332       && INTEGRAL_TYPE_P (TREE_TYPE (iv->base))
3333       && TYPE_PRECISION (TREE_TYPE (iv->base)) < TYPE_PRECISION (sizetype))
3334     {
3335       tree base = fold_convert (sizetype, iv->base);
3336       tree step = fold_convert (sizetype, iv->step);
3337 
3338       /* Add iv cand of same precision as index part in TARGET_MEM_REF.  */
3339       add_candidate (data, base, step, true, NULL, iv);
3340       /* Add iv cand of the original type only if it has nonlinear use.  */
3341       if (iv->nonlin_use)
3342 	add_candidate (data, iv->base, iv->step, true, NULL);
3343     }
3344   else
3345     add_candidate (data, iv->base, iv->step, true, NULL);
3346 
3347   /* The same, but with initial value zero.  */
3348   if (POINTER_TYPE_P (TREE_TYPE (iv->base)))
3349     add_candidate (data, size_int (0), iv->step, true, NULL);
3350   else
3351     add_candidate (data, build_int_cst (TREE_TYPE (iv->base), 0),
3352 		   iv->step, true, NULL);
3353 
3354   phi = SSA_NAME_DEF_STMT (iv->ssa_name);
3355   if (gimple_code (phi) == GIMPLE_PHI)
3356     {
3357       /* Additionally record the possibility of leaving the original iv
3358 	 untouched.  */
3359       def = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (data->current_loop));
3360       /* Don't add candidate if it's from another PHI node because
3361 	 it's an affine iv appearing in the form of PEELED_CHREC.  */
3362       phi = SSA_NAME_DEF_STMT (def);
3363       if (gimple_code (phi) != GIMPLE_PHI)
3364 	{
3365 	  cand = add_candidate_1 (data,
3366 				  iv->base, iv->step, true, IP_ORIGINAL, NULL,
3367 				  SSA_NAME_DEF_STMT (def));
3368 	  if (cand)
3369 	    {
3370 	      cand->var_before = iv->ssa_name;
3371 	      cand->var_after = def;
3372 	    }
3373 	}
3374       else
3375 	gcc_assert (gimple_bb (phi) == data->current_loop->header);
3376     }
3377 }
3378 
3379 /* Adds candidates based on the old induction variables.  */
3380 
3381 static void
3382 add_iv_candidate_for_bivs (struct ivopts_data *data)
3383 {
3384   unsigned i;
3385   struct iv *iv;
3386   bitmap_iterator bi;
3387 
3388   EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
3389     {
3390       iv = ver_info (data, i)->iv;
3391       if (iv && iv->biv_p && !integer_zerop (iv->step))
3392 	add_iv_candidate_for_biv (data, iv);
3393     }
3394 }
3395 
3396 /* Record common candidate {BASE, STEP} derived from USE in hashtable.  */
3397 
3398 static void
3399 record_common_cand (struct ivopts_data *data, tree base,
3400 		    tree step, struct iv_use *use)
3401 {
3402   struct iv_common_cand ent;
3403   struct iv_common_cand **slot;
3404 
3405   ent.base = base;
3406   ent.step = step;
3407   ent.hash = iterative_hash_expr (base, 0);
3408   ent.hash = iterative_hash_expr (step, ent.hash);
3409 
3410   slot = data->iv_common_cand_tab->find_slot (&ent, INSERT);
3411   if (*slot == NULL)
3412     {
3413       *slot = new iv_common_cand ();
3414       (*slot)->base = base;
3415       (*slot)->step = step;
3416       (*slot)->uses.create (8);
3417       (*slot)->hash = ent.hash;
3418       data->iv_common_cands.safe_push ((*slot));
3419     }
3420 
3421   gcc_assert (use != NULL);
3422   (*slot)->uses.safe_push (use);
3423   return;
3424 }
3425 
3426 /* Comparison function used to sort common candidates.  */
3427 
3428 static int
3429 common_cand_cmp (const void *p1, const void *p2)
3430 {
3431   unsigned n1, n2;
3432   const struct iv_common_cand *const *const ccand1
3433     = (const struct iv_common_cand *const *)p1;
3434   const struct iv_common_cand *const *const ccand2
3435     = (const struct iv_common_cand *const *)p2;
3436 
3437   n1 = (*ccand1)->uses.length ();
3438   n2 = (*ccand2)->uses.length ();
3439   return n2 - n1;
3440 }
3441 
3442 /* Adds IV candidates based on common candidated recorded.  */
3443 
3444 static void
3445 add_iv_candidate_derived_from_uses (struct ivopts_data *data)
3446 {
3447   unsigned i, j;
3448   struct iv_cand *cand_1, *cand_2;
3449 
3450   data->iv_common_cands.qsort (common_cand_cmp);
3451   for (i = 0; i < data->iv_common_cands.length (); i++)
3452     {
3453       struct iv_common_cand *ptr = data->iv_common_cands[i];
3454 
3455       /* Only add IV candidate if it's derived from multiple uses.  */
3456       if (ptr->uses.length () <= 1)
3457 	break;
3458 
3459       cand_1 = NULL;
3460       cand_2 = NULL;
3461       if (ip_normal_pos (data->current_loop))
3462 	cand_1 = add_candidate_1 (data, ptr->base, ptr->step,
3463 				  false, IP_NORMAL, NULL, NULL);
3464 
3465       if (ip_end_pos (data->current_loop)
3466 	  && allow_ip_end_pos_p (data->current_loop))
3467 	cand_2 = add_candidate_1 (data, ptr->base, ptr->step,
3468 				  false, IP_END, NULL, NULL);
3469 
3470       /* Bind deriving uses and the new candidates.  */
3471       for (j = 0; j < ptr->uses.length (); j++)
3472 	{
3473 	  struct iv_group *group = data->vgroups[ptr->uses[j]->group_id];
3474 	  if (cand_1)
3475 	    bitmap_set_bit (group->related_cands, cand_1->id);
3476 	  if (cand_2)
3477 	    bitmap_set_bit (group->related_cands, cand_2->id);
3478 	}
3479     }
3480 
3481   /* Release data since it is useless from this point.  */
3482   data->iv_common_cand_tab->empty ();
3483   data->iv_common_cands.truncate (0);
3484 }
3485 
3486 /* Adds candidates based on the value of USE's iv.  */
3487 
3488 static void
3489 add_iv_candidate_for_use (struct ivopts_data *data, struct iv_use *use)
3490 {
3491   poly_uint64 offset;
3492   tree base;
3493   tree basetype;
3494   struct iv *iv = use->iv;
3495 
3496   add_candidate (data, iv->base, iv->step, false, use);
3497 
3498   /* Record common candidate for use in case it can be shared by others.  */
3499   record_common_cand (data, iv->base, iv->step, use);
3500 
3501   /* Record common candidate with initial value zero.  */
3502   basetype = TREE_TYPE (iv->base);
3503   if (POINTER_TYPE_P (basetype))
3504     basetype = sizetype;
3505   record_common_cand (data, build_int_cst (basetype, 0), iv->step, use);
3506 
3507   /* Record common candidate with constant offset stripped in base.
3508      Like the use itself, we also add candidate directly for it.  */
3509   base = strip_offset (iv->base, &offset);
3510   if (maybe_ne (offset, 0U) || base != iv->base)
3511     {
3512       record_common_cand (data, base, iv->step, use);
3513       add_candidate (data, base, iv->step, false, use);
3514     }
3515 
3516   /* Record common candidate with base_object removed in base.  */
3517   base = iv->base;
3518   STRIP_NOPS (base);
3519   if (iv->base_object != NULL && TREE_CODE (base) == POINTER_PLUS_EXPR)
3520     {
3521       tree step = iv->step;
3522 
3523       STRIP_NOPS (step);
3524       base = TREE_OPERAND (base, 1);
3525       step = fold_convert (sizetype, step);
3526       record_common_cand (data, base, step, use);
3527       /* Also record common candidate with offset stripped.  */
3528       base = strip_offset (base, &offset);
3529       if (maybe_ne (offset, 0U))
3530 	record_common_cand (data, base, step, use);
3531     }
3532 
3533   /* At last, add auto-incremental candidates.  Make such variables
3534      important since other iv uses with same base object may be based
3535      on it.  */
3536   if (use != NULL && address_p (use->type))
3537     add_autoinc_candidates (data, iv->base, iv->step, true, use);
3538 }
3539 
3540 /* Adds candidates based on the uses.  */
3541 
3542 static void
3543 add_iv_candidate_for_groups (struct ivopts_data *data)
3544 {
3545   unsigned i;
3546 
3547   /* Only add candidate for the first use in group.  */
3548   for (i = 0; i < data->vgroups.length (); i++)
3549     {
3550       struct iv_group *group = data->vgroups[i];
3551 
3552       gcc_assert (group->vuses[0] != NULL);
3553       add_iv_candidate_for_use (data, group->vuses[0]);
3554     }
3555   add_iv_candidate_derived_from_uses (data);
3556 }
3557 
3558 /* Record important candidates and add them to related_cands bitmaps.  */
3559 
3560 static void
3561 record_important_candidates (struct ivopts_data *data)
3562 {
3563   unsigned i;
3564   struct iv_group *group;
3565 
3566   for (i = 0; i < data->vcands.length (); i++)
3567     {
3568       struct iv_cand *cand = data->vcands[i];
3569 
3570       if (cand->important)
3571 	bitmap_set_bit (data->important_candidates, i);
3572     }
3573 
3574   data->consider_all_candidates = (data->vcands.length ()
3575 				   <= CONSIDER_ALL_CANDIDATES_BOUND);
3576 
3577   /* Add important candidates to groups' related_cands bitmaps.  */
3578   for (i = 0; i < data->vgroups.length (); i++)
3579     {
3580       group = data->vgroups[i];
3581       bitmap_ior_into (group->related_cands, data->important_candidates);
3582     }
3583 }
3584 
3585 /* Allocates the data structure mapping the (use, candidate) pairs to costs.
3586    If consider_all_candidates is true, we use a two-dimensional array, otherwise
3587    we allocate a simple list to every use.  */
3588 
3589 static void
3590 alloc_use_cost_map (struct ivopts_data *data)
3591 {
3592   unsigned i, size, s;
3593 
3594   for (i = 0; i < data->vgroups.length (); i++)
3595     {
3596       struct iv_group *group = data->vgroups[i];
3597 
3598       if (data->consider_all_candidates)
3599 	size = data->vcands.length ();
3600       else
3601 	{
3602 	  s = bitmap_count_bits (group->related_cands);
3603 
3604 	  /* Round up to the power of two, so that moduling by it is fast.  */
3605 	  size = s ? (1 << ceil_log2 (s)) : 1;
3606 	}
3607 
3608       group->n_map_members = size;
3609       group->cost_map = XCNEWVEC (struct cost_pair, size);
3610     }
3611 }
3612 
3613 /* Sets cost of (GROUP, CAND) pair to COST and record that it depends
3614    on invariants INV_VARS and that the value used in expressing it is
3615    VALUE, and in case of iv elimination the comparison operator is COMP.  */
3616 
3617 static void
3618 set_group_iv_cost (struct ivopts_data *data,
3619 		   struct iv_group *group, struct iv_cand *cand,
3620 		   comp_cost cost, bitmap inv_vars, tree value,
3621 		   enum tree_code comp, bitmap inv_exprs)
3622 {
3623   unsigned i, s;
3624 
3625   if (cost.infinite_cost_p ())
3626     {
3627       BITMAP_FREE (inv_vars);
3628       BITMAP_FREE (inv_exprs);
3629       return;
3630     }
3631 
3632   if (data->consider_all_candidates)
3633     {
3634       group->cost_map[cand->id].cand = cand;
3635       group->cost_map[cand->id].cost = cost;
3636       group->cost_map[cand->id].inv_vars = inv_vars;
3637       group->cost_map[cand->id].inv_exprs = inv_exprs;
3638       group->cost_map[cand->id].value = value;
3639       group->cost_map[cand->id].comp = comp;
3640       return;
3641     }
3642 
3643   /* n_map_members is a power of two, so this computes modulo.  */
3644   s = cand->id & (group->n_map_members - 1);
3645   for (i = s; i < group->n_map_members; i++)
3646     if (!group->cost_map[i].cand)
3647       goto found;
3648   for (i = 0; i < s; i++)
3649     if (!group->cost_map[i].cand)
3650       goto found;
3651 
3652   gcc_unreachable ();
3653 
3654 found:
3655   group->cost_map[i].cand = cand;
3656   group->cost_map[i].cost = cost;
3657   group->cost_map[i].inv_vars = inv_vars;
3658   group->cost_map[i].inv_exprs = inv_exprs;
3659   group->cost_map[i].value = value;
3660   group->cost_map[i].comp = comp;
3661 }
3662 
3663 /* Gets cost of (GROUP, CAND) pair.  */
3664 
3665 static struct cost_pair *
3666 get_group_iv_cost (struct ivopts_data *data, struct iv_group *group,
3667 		   struct iv_cand *cand)
3668 {
3669   unsigned i, s;
3670   struct cost_pair *ret;
3671 
3672   if (!cand)
3673     return NULL;
3674 
3675   if (data->consider_all_candidates)
3676     {
3677       ret = group->cost_map + cand->id;
3678       if (!ret->cand)
3679 	return NULL;
3680 
3681       return ret;
3682     }
3683 
3684   /* n_map_members is a power of two, so this computes modulo.  */
3685   s = cand->id & (group->n_map_members - 1);
3686   for (i = s; i < group->n_map_members; i++)
3687     if (group->cost_map[i].cand == cand)
3688       return group->cost_map + i;
3689     else if (group->cost_map[i].cand == NULL)
3690       return NULL;
3691   for (i = 0; i < s; i++)
3692     if (group->cost_map[i].cand == cand)
3693       return group->cost_map + i;
3694     else if (group->cost_map[i].cand == NULL)
3695       return NULL;
3696 
3697   return NULL;
3698 }
3699 
3700 /* Produce DECL_RTL for object obj so it looks like it is stored in memory.  */
3701 static rtx
3702 produce_memory_decl_rtl (tree obj, int *regno)
3703 {
3704   addr_space_t as = TYPE_ADDR_SPACE (TREE_TYPE (obj));
3705   machine_mode address_mode = targetm.addr_space.address_mode (as);
3706   rtx x;
3707 
3708   gcc_assert (obj);
3709   if (TREE_STATIC (obj) || DECL_EXTERNAL (obj))
3710     {
3711       const char *name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (obj));
3712       x = gen_rtx_SYMBOL_REF (address_mode, name);
3713       SET_SYMBOL_REF_DECL (x, obj);
3714       x = gen_rtx_MEM (DECL_MODE (obj), x);
3715       set_mem_addr_space (x, as);
3716       targetm.encode_section_info (obj, x, true);
3717     }
3718   else
3719     {
3720       x = gen_raw_REG (address_mode, (*regno)++);
3721       x = gen_rtx_MEM (DECL_MODE (obj), x);
3722       set_mem_addr_space (x, as);
3723     }
3724 
3725   return x;
3726 }
3727 
3728 /* Prepares decl_rtl for variables referred in *EXPR_P.  Callback for
3729    walk_tree.  DATA contains the actual fake register number.  */
3730 
3731 static tree
3732 prepare_decl_rtl (tree *expr_p, int *ws, void *data)
3733 {
3734   tree obj = NULL_TREE;
3735   rtx x = NULL_RTX;
3736   int *regno = (int *) data;
3737 
3738   switch (TREE_CODE (*expr_p))
3739     {
3740     case ADDR_EXPR:
3741       for (expr_p = &TREE_OPERAND (*expr_p, 0);
3742 	   handled_component_p (*expr_p);
3743 	   expr_p = &TREE_OPERAND (*expr_p, 0))
3744 	continue;
3745       obj = *expr_p;
3746       if (DECL_P (obj) && HAS_RTL_P (obj) && !DECL_RTL_SET_P (obj))
3747 	x = produce_memory_decl_rtl (obj, regno);
3748       break;
3749 
3750     case SSA_NAME:
3751       *ws = 0;
3752       obj = SSA_NAME_VAR (*expr_p);
3753       /* Defer handling of anonymous SSA_NAMEs to the expander.  */
3754       if (!obj)
3755 	return NULL_TREE;
3756       if (!DECL_RTL_SET_P (obj))
3757 	x = gen_raw_REG (DECL_MODE (obj), (*regno)++);
3758       break;
3759 
3760     case VAR_DECL:
3761     case PARM_DECL:
3762     case RESULT_DECL:
3763       *ws = 0;
3764       obj = *expr_p;
3765 
3766       if (DECL_RTL_SET_P (obj))
3767 	break;
3768 
3769       if (DECL_MODE (obj) == BLKmode)
3770 	x = produce_memory_decl_rtl (obj, regno);
3771       else
3772 	x = gen_raw_REG (DECL_MODE (obj), (*regno)++);
3773 
3774       break;
3775 
3776     default:
3777       break;
3778     }
3779 
3780   if (x)
3781     {
3782       decl_rtl_to_reset.safe_push (obj);
3783       SET_DECL_RTL (obj, x);
3784     }
3785 
3786   return NULL_TREE;
3787 }
3788 
3789 /* Determines cost of the computation of EXPR.  */
3790 
3791 static unsigned
3792 computation_cost (tree expr, bool speed)
3793 {
3794   rtx_insn *seq;
3795   rtx rslt;
3796   tree type = TREE_TYPE (expr);
3797   unsigned cost;
3798   /* Avoid using hard regs in ways which may be unsupported.  */
3799   int regno = LAST_VIRTUAL_REGISTER + 1;
3800   struct cgraph_node *node = cgraph_node::get (current_function_decl);
3801   enum node_frequency real_frequency = node->frequency;
3802 
3803   node->frequency = NODE_FREQUENCY_NORMAL;
3804   crtl->maybe_hot_insn_p = speed;
3805   walk_tree (&expr, prepare_decl_rtl, &regno, NULL);
3806   start_sequence ();
3807   rslt = expand_expr (expr, NULL_RTX, TYPE_MODE (type), EXPAND_NORMAL);
3808   seq = get_insns ();
3809   end_sequence ();
3810   default_rtl_profile ();
3811   node->frequency = real_frequency;
3812 
3813   cost = seq_cost (seq, speed);
3814   if (MEM_P (rslt))
3815     cost += address_cost (XEXP (rslt, 0), TYPE_MODE (type),
3816 			  TYPE_ADDR_SPACE (type), speed);
3817   else if (!REG_P (rslt))
3818     cost += set_src_cost (rslt, TYPE_MODE (type), speed);
3819 
3820   return cost;
3821 }
3822 
3823 /* Returns variable containing the value of candidate CAND at statement AT.  */
3824 
3825 static tree
3826 var_at_stmt (struct loop *loop, struct iv_cand *cand, gimple *stmt)
3827 {
3828   if (stmt_after_increment (loop, cand, stmt))
3829     return cand->var_after;
3830   else
3831     return cand->var_before;
3832 }
3833 
3834 /* If A is (TYPE) BA and B is (TYPE) BB, and the types of BA and BB have the
3835    same precision that is at least as wide as the precision of TYPE, stores
3836    BA to A and BB to B, and returns the type of BA.  Otherwise, returns the
3837    type of A and B.  */
3838 
3839 static tree
3840 determine_common_wider_type (tree *a, tree *b)
3841 {
3842   tree wider_type = NULL;
3843   tree suba, subb;
3844   tree atype = TREE_TYPE (*a);
3845 
3846   if (CONVERT_EXPR_P (*a))
3847     {
3848       suba = TREE_OPERAND (*a, 0);
3849       wider_type = TREE_TYPE (suba);
3850       if (TYPE_PRECISION (wider_type) < TYPE_PRECISION (atype))
3851 	return atype;
3852     }
3853   else
3854     return atype;
3855 
3856   if (CONVERT_EXPR_P (*b))
3857     {
3858       subb = TREE_OPERAND (*b, 0);
3859       if (TYPE_PRECISION (wider_type) != TYPE_PRECISION (TREE_TYPE (subb)))
3860 	return atype;
3861     }
3862   else
3863     return atype;
3864 
3865   *a = suba;
3866   *b = subb;
3867   return wider_type;
3868 }
3869 
3870 /* Determines the expression by that USE is expressed from induction variable
3871    CAND at statement AT in LOOP.  The expression is stored in two parts in a
3872    decomposed form.  The invariant part is stored in AFF_INV; while variant
3873    part in AFF_VAR.  Store ratio of CAND.step over USE.step in PRAT if it's
3874    non-null.  Returns false if USE cannot be expressed using CAND.  */
3875 
3876 static bool
3877 get_computation_aff_1 (struct loop *loop, gimple *at, struct iv_use *use,
3878 		       struct iv_cand *cand, struct aff_tree *aff_inv,
3879 		       struct aff_tree *aff_var, widest_int *prat = NULL)
3880 {
3881   tree ubase = use->iv->base, ustep = use->iv->step;
3882   tree cbase = cand->iv->base, cstep = cand->iv->step;
3883   tree common_type, uutype, var, cstep_common;
3884   tree utype = TREE_TYPE (ubase), ctype = TREE_TYPE (cbase);
3885   aff_tree aff_cbase;
3886   widest_int rat;
3887 
3888   /* We must have a precision to express the values of use.  */
3889   if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
3890     return false;
3891 
3892   var = var_at_stmt (loop, cand, at);
3893   uutype = unsigned_type_for (utype);
3894 
3895   /* If the conversion is not noop, perform it.  */
3896   if (TYPE_PRECISION (utype) < TYPE_PRECISION (ctype))
3897     {
3898       if (cand->orig_iv != NULL && CONVERT_EXPR_P (cbase)
3899 	  && (CONVERT_EXPR_P (cstep) || poly_int_tree_p (cstep)))
3900 	{
3901 	  tree inner_base, inner_step, inner_type;
3902 	  inner_base = TREE_OPERAND (cbase, 0);
3903 	  if (CONVERT_EXPR_P (cstep))
3904 	    inner_step = TREE_OPERAND (cstep, 0);
3905 	  else
3906 	    inner_step = cstep;
3907 
3908 	  inner_type = TREE_TYPE (inner_base);
3909 	  /* If candidate is added from a biv whose type is smaller than
3910 	     ctype, we know both candidate and the biv won't overflow.
3911 	     In this case, it's safe to skip the convertion in candidate.
3912 	     As an example, (unsigned short)((unsigned long)A) equals to
3913 	     (unsigned short)A, if A has a type no larger than short.  */
3914 	  if (TYPE_PRECISION (inner_type) <= TYPE_PRECISION (uutype))
3915 	    {
3916 	      cbase = inner_base;
3917 	      cstep = inner_step;
3918 	    }
3919 	}
3920       cbase = fold_convert (uutype, cbase);
3921       cstep = fold_convert (uutype, cstep);
3922       var = fold_convert (uutype, var);
3923     }
3924 
3925   /* Ratio is 1 when computing the value of biv cand by itself.
3926      We can't rely on constant_multiple_of in this case because the
3927      use is created after the original biv is selected.  The call
3928      could fail because of inconsistent fold behavior.  See PR68021
3929      for more information.  */
3930   if (cand->pos == IP_ORIGINAL && cand->incremented_at == use->stmt)
3931     {
3932       gcc_assert (is_gimple_assign (use->stmt));
3933       gcc_assert (use->iv->ssa_name == cand->var_after);
3934       gcc_assert (gimple_assign_lhs (use->stmt) == cand->var_after);
3935       rat = 1;
3936     }
3937   else if (!constant_multiple_of (ustep, cstep, &rat))
3938     return false;
3939 
3940   if (prat)
3941     *prat = rat;
3942 
3943   /* In case both UBASE and CBASE are shortened to UUTYPE from some common
3944      type, we achieve better folding by computing their difference in this
3945      wider type, and cast the result to UUTYPE.  We do not need to worry about
3946      overflows, as all the arithmetics will in the end be performed in UUTYPE
3947      anyway.  */
3948   common_type = determine_common_wider_type (&ubase, &cbase);
3949 
3950   /* use = ubase - ratio * cbase + ratio * var.  */
3951   tree_to_aff_combination (ubase, common_type, aff_inv);
3952   tree_to_aff_combination (cbase, common_type, &aff_cbase);
3953   tree_to_aff_combination (var, uutype, aff_var);
3954 
3955   /* We need to shift the value if we are after the increment.  */
3956   if (stmt_after_increment (loop, cand, at))
3957     {
3958       aff_tree cstep_aff;
3959 
3960       if (common_type != uutype)
3961 	cstep_common = fold_convert (common_type, cstep);
3962       else
3963 	cstep_common = cstep;
3964 
3965       tree_to_aff_combination (cstep_common, common_type, &cstep_aff);
3966       aff_combination_add (&aff_cbase, &cstep_aff);
3967     }
3968 
3969   aff_combination_scale (&aff_cbase, -rat);
3970   aff_combination_add (aff_inv, &aff_cbase);
3971   if (common_type != uutype)
3972     aff_combination_convert (aff_inv, uutype);
3973 
3974   aff_combination_scale (aff_var, rat);
3975   return true;
3976 }
3977 
3978 /* Determines the expression by that USE is expressed from induction variable
3979    CAND at statement AT in LOOP.  The expression is stored in a decomposed
3980    form into AFF.  Returns false if USE cannot be expressed using CAND.  */
3981 
3982 static bool
3983 get_computation_aff (struct loop *loop, gimple *at, struct iv_use *use,
3984 		     struct iv_cand *cand, struct aff_tree *aff)
3985 {
3986   aff_tree aff_var;
3987 
3988   if (!get_computation_aff_1 (loop, at, use, cand, aff, &aff_var))
3989     return false;
3990 
3991   aff_combination_add (aff, &aff_var);
3992   return true;
3993 }
3994 
3995 /* Return the type of USE.  */
3996 
3997 static tree
3998 get_use_type (struct iv_use *use)
3999 {
4000   tree base_type = TREE_TYPE (use->iv->base);
4001   tree type;
4002 
4003   if (use->type == USE_REF_ADDRESS)
4004     {
4005       /* The base_type may be a void pointer.  Create a pointer type based on
4006 	 the mem_ref instead.  */
4007       type = build_pointer_type (TREE_TYPE (*use->op_p));
4008       gcc_assert (TYPE_ADDR_SPACE (TREE_TYPE (type))
4009 		  == TYPE_ADDR_SPACE (TREE_TYPE (base_type)));
4010     }
4011   else
4012     type = base_type;
4013 
4014   return type;
4015 }
4016 
4017 /* Determines the expression by that USE is expressed from induction variable
4018    CAND at statement AT in LOOP.  The computation is unshared.  */
4019 
4020 static tree
4021 get_computation_at (struct loop *loop, gimple *at,
4022 		    struct iv_use *use, struct iv_cand *cand)
4023 {
4024   aff_tree aff;
4025   tree type = get_use_type (use);
4026 
4027   if (!get_computation_aff (loop, at, use, cand, &aff))
4028     return NULL_TREE;
4029   unshare_aff_combination (&aff);
4030   return fold_convert (type, aff_combination_to_tree (&aff));
4031 }
4032 
4033 /* Adjust the cost COST for being in loop setup rather than loop body.
4034    If we're optimizing for space, the loop setup overhead is constant;
4035    if we're optimizing for speed, amortize it over the per-iteration cost.
4036    If ROUND_UP_P is true, the result is round up rather than to zero when
4037    optimizing for speed.  */
4038 static unsigned
4039 adjust_setup_cost (struct ivopts_data *data, unsigned cost,
4040 		   bool round_up_p = false)
4041 {
4042   if (cost == INFTY)
4043     return cost;
4044   else if (optimize_loop_for_speed_p (data->current_loop))
4045     {
4046       HOST_WIDE_INT niters = avg_loop_niter (data->current_loop);
4047       return ((HOST_WIDE_INT) cost + (round_up_p ? niters - 1 : 0)) / niters;
4048     }
4049   else
4050     return cost;
4051 }
4052 
4053 /* Calculate the SPEED or size cost of shiftadd EXPR in MODE.  MULT is the
4054    EXPR operand holding the shift.  COST0 and COST1 are the costs for
4055    calculating the operands of EXPR.  Returns true if successful, and returns
4056    the cost in COST.  */
4057 
4058 static bool
4059 get_shiftadd_cost (tree expr, scalar_int_mode mode, comp_cost cost0,
4060 		   comp_cost cost1, tree mult, bool speed, comp_cost *cost)
4061 {
4062   comp_cost res;
4063   tree op1 = TREE_OPERAND (expr, 1);
4064   tree cst = TREE_OPERAND (mult, 1);
4065   tree multop = TREE_OPERAND (mult, 0);
4066   int m = exact_log2 (int_cst_value (cst));
4067   int maxm = MIN (BITS_PER_WORD, GET_MODE_BITSIZE (mode));
4068   int as_cost, sa_cost;
4069   bool mult_in_op1;
4070 
4071   if (!(m >= 0 && m < maxm))
4072     return false;
4073 
4074   STRIP_NOPS (op1);
4075   mult_in_op1 = operand_equal_p (op1, mult, 0);
4076 
4077   as_cost = add_cost (speed, mode) + shift_cost (speed, mode, m);
4078 
4079   /* If the target has a cheap shift-and-add or shift-and-sub instruction,
4080      use that in preference to a shift insn followed by an add insn.  */
4081   sa_cost = (TREE_CODE (expr) != MINUS_EXPR
4082 	     ? shiftadd_cost (speed, mode, m)
4083 	     : (mult_in_op1
4084 		? shiftsub1_cost (speed, mode, m)
4085 		: shiftsub0_cost (speed, mode, m)));
4086 
4087   res = comp_cost (MIN (as_cost, sa_cost), 0);
4088   res += (mult_in_op1 ? cost0 : cost1);
4089 
4090   STRIP_NOPS (multop);
4091   if (!is_gimple_val (multop))
4092     res += force_expr_to_var_cost (multop, speed);
4093 
4094   *cost = res;
4095   return true;
4096 }
4097 
4098 /* Estimates cost of forcing expression EXPR into a variable.  */
4099 
4100 static comp_cost
4101 force_expr_to_var_cost (tree expr, bool speed)
4102 {
4103   static bool costs_initialized = false;
4104   static unsigned integer_cost [2];
4105   static unsigned symbol_cost [2];
4106   static unsigned address_cost [2];
4107   tree op0, op1;
4108   comp_cost cost0, cost1, cost;
4109   machine_mode mode;
4110   scalar_int_mode int_mode;
4111 
4112   if (!costs_initialized)
4113     {
4114       tree type = build_pointer_type (integer_type_node);
4115       tree var, addr;
4116       rtx x;
4117       int i;
4118 
4119       var = create_tmp_var_raw (integer_type_node, "test_var");
4120       TREE_STATIC (var) = 1;
4121       x = produce_memory_decl_rtl (var, NULL);
4122       SET_DECL_RTL (var, x);
4123 
4124       addr = build1 (ADDR_EXPR, type, var);
4125 
4126 
4127       for (i = 0; i < 2; i++)
4128 	{
4129 	  integer_cost[i] = computation_cost (build_int_cst (integer_type_node,
4130 							     2000), i);
4131 
4132 	  symbol_cost[i] = computation_cost (addr, i) + 1;
4133 
4134 	  address_cost[i]
4135 	    = computation_cost (fold_build_pointer_plus_hwi (addr, 2000), i) + 1;
4136 	  if (dump_file && (dump_flags & TDF_DETAILS))
4137 	    {
4138 	      fprintf (dump_file, "force_expr_to_var_cost %s costs:\n", i ? "speed" : "size");
4139 	      fprintf (dump_file, "  integer %d\n", (int) integer_cost[i]);
4140 	      fprintf (dump_file, "  symbol %d\n", (int) symbol_cost[i]);
4141 	      fprintf (dump_file, "  address %d\n", (int) address_cost[i]);
4142 	      fprintf (dump_file, "  other %d\n", (int) target_spill_cost[i]);
4143 	      fprintf (dump_file, "\n");
4144 	    }
4145 	}
4146 
4147       costs_initialized = true;
4148     }
4149 
4150   STRIP_NOPS (expr);
4151 
4152   if (SSA_VAR_P (expr))
4153     return no_cost;
4154 
4155   if (is_gimple_min_invariant (expr))
4156     {
4157       if (poly_int_tree_p (expr))
4158 	return comp_cost (integer_cost [speed], 0);
4159 
4160       if (TREE_CODE (expr) == ADDR_EXPR)
4161 	{
4162 	  tree obj = TREE_OPERAND (expr, 0);
4163 
4164 	  if (VAR_P (obj)
4165 	      || TREE_CODE (obj) == PARM_DECL
4166 	      || TREE_CODE (obj) == RESULT_DECL)
4167 	    return comp_cost (symbol_cost [speed], 0);
4168 	}
4169 
4170       return comp_cost (address_cost [speed], 0);
4171     }
4172 
4173   switch (TREE_CODE (expr))
4174     {
4175     case POINTER_PLUS_EXPR:
4176     case PLUS_EXPR:
4177     case MINUS_EXPR:
4178     case MULT_EXPR:
4179     case TRUNC_DIV_EXPR:
4180     case BIT_AND_EXPR:
4181     case BIT_IOR_EXPR:
4182     case LSHIFT_EXPR:
4183     case RSHIFT_EXPR:
4184       op0 = TREE_OPERAND (expr, 0);
4185       op1 = TREE_OPERAND (expr, 1);
4186       STRIP_NOPS (op0);
4187       STRIP_NOPS (op1);
4188       break;
4189 
4190     CASE_CONVERT:
4191     case NEGATE_EXPR:
4192     case BIT_NOT_EXPR:
4193       op0 = TREE_OPERAND (expr, 0);
4194       STRIP_NOPS (op0);
4195       op1 = NULL_TREE;
4196       break;
4197 
4198     default:
4199       /* Just an arbitrary value, FIXME.  */
4200       return comp_cost (target_spill_cost[speed], 0);
4201     }
4202 
4203   if (op0 == NULL_TREE
4204       || TREE_CODE (op0) == SSA_NAME || CONSTANT_CLASS_P (op0))
4205     cost0 = no_cost;
4206   else
4207     cost0 = force_expr_to_var_cost (op0, speed);
4208 
4209   if (op1 == NULL_TREE
4210       || TREE_CODE (op1) == SSA_NAME || CONSTANT_CLASS_P (op1))
4211     cost1 = no_cost;
4212   else
4213     cost1 = force_expr_to_var_cost (op1, speed);
4214 
4215   mode = TYPE_MODE (TREE_TYPE (expr));
4216   switch (TREE_CODE (expr))
4217     {
4218     case POINTER_PLUS_EXPR:
4219     case PLUS_EXPR:
4220     case MINUS_EXPR:
4221     case NEGATE_EXPR:
4222       cost = comp_cost (add_cost (speed, mode), 0);
4223       if (TREE_CODE (expr) != NEGATE_EXPR)
4224 	{
4225 	  tree mult = NULL_TREE;
4226 	  comp_cost sa_cost;
4227 	  if (TREE_CODE (op1) == MULT_EXPR)
4228 	    mult = op1;
4229 	  else if (TREE_CODE (op0) == MULT_EXPR)
4230 	    mult = op0;
4231 
4232 	  if (mult != NULL_TREE
4233 	      && is_a <scalar_int_mode> (mode, &int_mode)
4234 	      && cst_and_fits_in_hwi (TREE_OPERAND (mult, 1))
4235 	      && get_shiftadd_cost (expr, int_mode, cost0, cost1, mult,
4236 				    speed, &sa_cost))
4237 	    return sa_cost;
4238 	}
4239       break;
4240 
4241     CASE_CONVERT:
4242       {
4243 	tree inner_mode, outer_mode;
4244 	outer_mode = TREE_TYPE (expr);
4245 	inner_mode = TREE_TYPE (op0);
4246 	cost = comp_cost (convert_cost (TYPE_MODE (outer_mode),
4247 				       TYPE_MODE (inner_mode), speed), 0);
4248       }
4249       break;
4250 
4251     case MULT_EXPR:
4252       if (cst_and_fits_in_hwi (op0))
4253 	cost = comp_cost (mult_by_coeff_cost (int_cst_value (op0),
4254 					     mode, speed), 0);
4255       else if (cst_and_fits_in_hwi (op1))
4256 	cost = comp_cost (mult_by_coeff_cost (int_cst_value (op1),
4257 					     mode, speed), 0);
4258       else
4259 	return comp_cost (target_spill_cost [speed], 0);
4260       break;
4261 
4262     case TRUNC_DIV_EXPR:
4263       /* Division by power of two is usually cheap, so we allow it.  Forbid
4264 	 anything else.  */
4265       if (integer_pow2p (TREE_OPERAND (expr, 1)))
4266 	cost = comp_cost (add_cost (speed, mode), 0);
4267       else
4268 	cost = comp_cost (target_spill_cost[speed], 0);
4269       break;
4270 
4271     case BIT_AND_EXPR:
4272     case BIT_IOR_EXPR:
4273     case BIT_NOT_EXPR:
4274     case LSHIFT_EXPR:
4275     case RSHIFT_EXPR:
4276       cost = comp_cost (add_cost (speed, mode), 0);
4277       break;
4278 
4279     default:
4280       gcc_unreachable ();
4281     }
4282 
4283   cost += cost0;
4284   cost += cost1;
4285   return cost;
4286 }
4287 
4288 /* Estimates cost of forcing EXPR into a variable.  INV_VARS is a set of the
4289    invariants the computation depends on.  */
4290 
4291 static comp_cost
4292 force_var_cost (struct ivopts_data *data, tree expr, bitmap *inv_vars)
4293 {
4294   if (!expr)
4295     return no_cost;
4296 
4297   find_inv_vars (data, &expr, inv_vars);
4298   return force_expr_to_var_cost (expr, data->speed);
4299 }
4300 
4301 /* Returns cost of auto-modifying address expression in shape base + offset.
4302    AINC_STEP is step size of the address IV.  AINC_OFFSET is offset of the
4303    address expression.  The address expression has ADDR_MODE in addr space
4304    AS.  The memory access has MEM_MODE.  SPEED means we are optimizing for
4305    speed or size.  */
4306 
4307 enum ainc_type
4308 {
4309   AINC_PRE_INC,		/* Pre increment.  */
4310   AINC_PRE_DEC,		/* Pre decrement.  */
4311   AINC_POST_INC,	/* Post increment.  */
4312   AINC_POST_DEC,	/* Post decrement.  */
4313   AINC_NONE		/* Also the number of auto increment types.  */
4314 };
4315 
4316 struct ainc_cost_data
4317 {
4318   unsigned costs[AINC_NONE];
4319 };
4320 
4321 static comp_cost
4322 get_address_cost_ainc (poly_int64 ainc_step, poly_int64 ainc_offset,
4323 		       machine_mode addr_mode, machine_mode mem_mode,
4324 		       addr_space_t as, bool speed)
4325 {
4326   if (!USE_LOAD_PRE_DECREMENT (mem_mode)
4327       && !USE_STORE_PRE_DECREMENT (mem_mode)
4328       && !USE_LOAD_POST_DECREMENT (mem_mode)
4329       && !USE_STORE_POST_DECREMENT (mem_mode)
4330       && !USE_LOAD_PRE_INCREMENT (mem_mode)
4331       && !USE_STORE_PRE_INCREMENT (mem_mode)
4332       && !USE_LOAD_POST_INCREMENT (mem_mode)
4333       && !USE_STORE_POST_INCREMENT (mem_mode))
4334     return infinite_cost;
4335 
4336   static vec<ainc_cost_data *> ainc_cost_data_list;
4337   unsigned idx = (unsigned) as * MAX_MACHINE_MODE + (unsigned) mem_mode;
4338   if (idx >= ainc_cost_data_list.length ())
4339     {
4340       unsigned nsize = ((unsigned) as + 1) *MAX_MACHINE_MODE;
4341 
4342       gcc_assert (nsize > idx);
4343       ainc_cost_data_list.safe_grow_cleared (nsize);
4344     }
4345 
4346   ainc_cost_data *data = ainc_cost_data_list[idx];
4347   if (data == NULL)
4348     {
4349       rtx reg = gen_raw_REG (addr_mode, LAST_VIRTUAL_REGISTER + 1);
4350 
4351       data = (ainc_cost_data *) xcalloc (1, sizeof (*data));
4352       data->costs[AINC_PRE_DEC] = INFTY;
4353       data->costs[AINC_POST_DEC] = INFTY;
4354       data->costs[AINC_PRE_INC] = INFTY;
4355       data->costs[AINC_POST_INC] = INFTY;
4356       if (USE_LOAD_PRE_DECREMENT (mem_mode)
4357 	  || USE_STORE_PRE_DECREMENT (mem_mode))
4358 	{
4359 	  rtx addr = gen_rtx_PRE_DEC (addr_mode, reg);
4360 
4361 	  if (memory_address_addr_space_p (mem_mode, addr, as))
4362 	    data->costs[AINC_PRE_DEC]
4363 	      = address_cost (addr, mem_mode, as, speed);
4364 	}
4365       if (USE_LOAD_POST_DECREMENT (mem_mode)
4366 	  || USE_STORE_POST_DECREMENT (mem_mode))
4367 	{
4368 	  rtx addr = gen_rtx_POST_DEC (addr_mode, reg);
4369 
4370 	  if (memory_address_addr_space_p (mem_mode, addr, as))
4371 	    data->costs[AINC_POST_DEC]
4372 	      = address_cost (addr, mem_mode, as, speed);
4373 	}
4374       if (USE_LOAD_PRE_INCREMENT (mem_mode)
4375 	  || USE_STORE_PRE_INCREMENT (mem_mode))
4376 	{
4377 	  rtx addr = gen_rtx_PRE_INC (addr_mode, reg);
4378 
4379 	  if (memory_address_addr_space_p (mem_mode, addr, as))
4380 	    data->costs[AINC_PRE_INC]
4381 	      = address_cost (addr, mem_mode, as, speed);
4382 	}
4383       if (USE_LOAD_POST_INCREMENT (mem_mode)
4384 	  || USE_STORE_POST_INCREMENT (mem_mode))
4385 	{
4386 	  rtx addr = gen_rtx_POST_INC (addr_mode, reg);
4387 
4388 	  if (memory_address_addr_space_p (mem_mode, addr, as))
4389 	    data->costs[AINC_POST_INC]
4390 	      = address_cost (addr, mem_mode, as, speed);
4391 	}
4392       ainc_cost_data_list[idx] = data;
4393     }
4394 
4395   poly_int64 msize = GET_MODE_SIZE (mem_mode);
4396   if (known_eq (ainc_offset, 0) && known_eq (msize, ainc_step))
4397     return comp_cost (data->costs[AINC_POST_INC], 0);
4398   if (known_eq (ainc_offset, 0) && known_eq (msize, -ainc_step))
4399     return comp_cost (data->costs[AINC_POST_DEC], 0);
4400   if (known_eq (ainc_offset, msize) && known_eq (msize, ainc_step))
4401     return comp_cost (data->costs[AINC_PRE_INC], 0);
4402   if (known_eq (ainc_offset, -msize) && known_eq (msize, -ainc_step))
4403     return comp_cost (data->costs[AINC_PRE_DEC], 0);
4404 
4405   return infinite_cost;
4406 }
4407 
4408 /* Return cost of computing USE's address expression by using CAND.
4409    AFF_INV and AFF_VAR represent invariant and variant parts of the
4410    address expression, respectively.  If AFF_INV is simple, store
4411    the loop invariant variables which are depended by it in INV_VARS;
4412    if AFF_INV is complicated, handle it as a new invariant expression
4413    and record it in INV_EXPR.  RATIO indicates multiple times between
4414    steps of USE and CAND.  If CAN_AUTOINC is nonNULL, store boolean
4415    value to it indicating if this is an auto-increment address.  */
4416 
4417 static comp_cost
4418 get_address_cost (struct ivopts_data *data, struct iv_use *use,
4419 		  struct iv_cand *cand, aff_tree *aff_inv,
4420 		  aff_tree *aff_var, HOST_WIDE_INT ratio,
4421 		  bitmap *inv_vars, iv_inv_expr_ent **inv_expr,
4422 		  bool *can_autoinc, bool speed)
4423 {
4424   rtx addr;
4425   bool simple_inv = true;
4426   tree comp_inv = NULL_TREE, type = aff_var->type;
4427   comp_cost var_cost = no_cost, cost = no_cost;
4428   struct mem_address parts = {NULL_TREE, integer_one_node,
4429 			      NULL_TREE, NULL_TREE, NULL_TREE};
4430   machine_mode addr_mode = TYPE_MODE (type);
4431   machine_mode mem_mode = TYPE_MODE (use->mem_type);
4432   addr_space_t as = TYPE_ADDR_SPACE (TREE_TYPE (use->iv->base));
4433   /* Only true if ratio != 1.  */
4434   bool ok_with_ratio_p = false;
4435   bool ok_without_ratio_p = false;
4436 
4437   if (!aff_combination_const_p (aff_inv))
4438     {
4439       parts.index = integer_one_node;
4440       /* Addressing mode "base + index".  */
4441       ok_without_ratio_p = valid_mem_ref_p (mem_mode, as, &parts);
4442       if (ratio != 1)
4443 	{
4444 	  parts.step = wide_int_to_tree (type, ratio);
4445 	  /* Addressing mode "base + index << scale".  */
4446 	  ok_with_ratio_p = valid_mem_ref_p (mem_mode, as, &parts);
4447 	  if (!ok_with_ratio_p)
4448 	    parts.step = NULL_TREE;
4449 	}
4450       if (ok_with_ratio_p || ok_without_ratio_p)
4451 	{
4452 	  if (maybe_ne (aff_inv->offset, 0))
4453 	    {
4454 	      parts.offset = wide_int_to_tree (sizetype, aff_inv->offset);
4455 	      /* Addressing mode "base + index [<< scale] + offset".  */
4456 	      if (!valid_mem_ref_p (mem_mode, as, &parts))
4457 		parts.offset = NULL_TREE;
4458 	      else
4459 		aff_inv->offset = 0;
4460 	    }
4461 
4462 	  move_fixed_address_to_symbol (&parts, aff_inv);
4463 	  /* Base is fixed address and is moved to symbol part.  */
4464 	  if (parts.symbol != NULL_TREE && aff_combination_zero_p (aff_inv))
4465 	    parts.base = NULL_TREE;
4466 
4467 	  /* Addressing mode "symbol + base + index [<< scale] [+ offset]".  */
4468 	  if (parts.symbol != NULL_TREE
4469 	      && !valid_mem_ref_p (mem_mode, as, &parts))
4470 	    {
4471 	      aff_combination_add_elt (aff_inv, parts.symbol, 1);
4472 	      parts.symbol = NULL_TREE;
4473 	      /* Reset SIMPLE_INV since symbol address needs to be computed
4474 		 outside of address expression in this case.  */
4475 	      simple_inv = false;
4476 	      /* Symbol part is moved back to base part, it can't be NULL.  */
4477 	      parts.base = integer_one_node;
4478 	    }
4479 	}
4480       else
4481 	parts.index = NULL_TREE;
4482     }
4483   else
4484     {
4485       poly_int64 ainc_step;
4486       if (can_autoinc
4487 	  && ratio == 1
4488 	  && ptrdiff_tree_p (cand->iv->step, &ainc_step))
4489 	{
4490 	  poly_int64 ainc_offset = (aff_inv->offset).force_shwi ();
4491 
4492 	  if (stmt_after_increment (data->current_loop, cand, use->stmt))
4493 	    ainc_offset += ainc_step;
4494 	  cost = get_address_cost_ainc (ainc_step, ainc_offset,
4495 					addr_mode, mem_mode, as, speed);
4496 	  if (!cost.infinite_cost_p ())
4497 	    {
4498 	      *can_autoinc = true;
4499 	      return cost;
4500 	    }
4501 	  cost = no_cost;
4502 	}
4503       if (!aff_combination_zero_p (aff_inv))
4504 	{
4505 	  parts.offset = wide_int_to_tree (sizetype, aff_inv->offset);
4506 	  /* Addressing mode "base + offset".  */
4507 	  if (!valid_mem_ref_p (mem_mode, as, &parts))
4508 	    parts.offset = NULL_TREE;
4509 	  else
4510 	    aff_inv->offset = 0;
4511 	}
4512     }
4513 
4514   if (simple_inv)
4515     simple_inv = (aff_inv == NULL
4516 		  || aff_combination_const_p (aff_inv)
4517 		  || aff_combination_singleton_var_p (aff_inv));
4518   if (!aff_combination_zero_p (aff_inv))
4519     comp_inv = aff_combination_to_tree (aff_inv);
4520   if (comp_inv != NULL_TREE)
4521     cost = force_var_cost (data, comp_inv, inv_vars);
4522   if (ratio != 1 && parts.step == NULL_TREE)
4523     var_cost += mult_by_coeff_cost (ratio, addr_mode, speed);
4524   if (comp_inv != NULL_TREE && parts.index == NULL_TREE)
4525     var_cost += add_cost (speed, addr_mode);
4526 
4527   if (comp_inv && inv_expr && !simple_inv)
4528     {
4529       *inv_expr = get_loop_invariant_expr (data, comp_inv);
4530       /* Clear depends on.  */
4531       if (*inv_expr != NULL && inv_vars && *inv_vars)
4532 	bitmap_clear (*inv_vars);
4533 
4534       /* Cost of small invariant expression adjusted against loop niters
4535 	 is usually zero, which makes it difficult to be differentiated
4536 	 from candidate based on loop invariant variables.  Secondly, the
4537 	 generated invariant expression may not be hoisted out of loop by
4538 	 following pass.  We penalize the cost by rounding up in order to
4539 	 neutralize such effects.  */
4540       cost.cost = adjust_setup_cost (data, cost.cost, true);
4541       cost.scratch = cost.cost;
4542     }
4543 
4544   cost += var_cost;
4545   addr = addr_for_mem_ref (&parts, as, false);
4546   gcc_assert (memory_address_addr_space_p (mem_mode, addr, as));
4547   cost += address_cost (addr, mem_mode, as, speed);
4548 
4549   if (parts.symbol != NULL_TREE)
4550     cost.complexity += 1;
4551   /* Don't increase the complexity of adding a scaled index if it's
4552      the only kind of index that the target allows.  */
4553   if (parts.step != NULL_TREE && ok_without_ratio_p)
4554     cost.complexity += 1;
4555   if (parts.base != NULL_TREE && parts.index != NULL_TREE)
4556     cost.complexity += 1;
4557   if (parts.offset != NULL_TREE && !integer_zerop (parts.offset))
4558     cost.complexity += 1;
4559 
4560   return cost;
4561 }
4562 
4563 /* Scale (multiply) the computed COST (except scratch part that should be
4564    hoisted out a loop) by header->frequency / AT->frequency, which makes
4565    expected cost more accurate.  */
4566 
4567 static comp_cost
4568 get_scaled_computation_cost_at (ivopts_data *data, gimple *at, comp_cost cost)
4569 {
4570    int loop_freq = data->current_loop->header->count.to_frequency (cfun);
4571    int bb_freq = gimple_bb (at)->count.to_frequency (cfun);
4572    if (loop_freq != 0)
4573      {
4574        gcc_assert (cost.scratch <= cost.cost);
4575        int scaled_cost
4576 	 = cost.scratch + (cost.cost - cost.scratch) * bb_freq / loop_freq;
4577 
4578        if (dump_file && (dump_flags & TDF_DETAILS))
4579 	 fprintf (dump_file, "Scaling cost based on bb prob "
4580 		  "by %2.2f: %d (scratch: %d) -> %d (%d/%d)\n",
4581 		  1.0f * bb_freq / loop_freq, cost.cost,
4582 		  cost.scratch, scaled_cost, bb_freq, loop_freq);
4583 
4584        cost.cost = scaled_cost;
4585      }
4586 
4587   return cost;
4588 }
4589 
4590 /* Determines the cost of the computation by that USE is expressed
4591    from induction variable CAND.  If ADDRESS_P is true, we just need
4592    to create an address from it, otherwise we want to get it into
4593    register.  A set of invariants we depend on is stored in INV_VARS.
4594    If CAN_AUTOINC is nonnull, use it to record whether autoinc
4595    addressing is likely.  If INV_EXPR is nonnull, record invariant
4596    expr entry in it.  */
4597 
4598 static comp_cost
4599 get_computation_cost (struct ivopts_data *data, struct iv_use *use,
4600 		      struct iv_cand *cand, bool address_p, bitmap *inv_vars,
4601 		      bool *can_autoinc, iv_inv_expr_ent **inv_expr)
4602 {
4603   gimple *at = use->stmt;
4604   tree ubase = use->iv->base, cbase = cand->iv->base;
4605   tree utype = TREE_TYPE (ubase), ctype = TREE_TYPE (cbase);
4606   tree comp_inv = NULL_TREE;
4607   HOST_WIDE_INT ratio, aratio;
4608   comp_cost cost;
4609   widest_int rat;
4610   aff_tree aff_inv, aff_var;
4611   bool speed = optimize_bb_for_speed_p (gimple_bb (at));
4612 
4613   if (inv_vars)
4614     *inv_vars = NULL;
4615   if (can_autoinc)
4616     *can_autoinc = false;
4617   if (inv_expr)
4618     *inv_expr = NULL;
4619 
4620   /* Check if we have enough precision to express the values of use.  */
4621   if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
4622     return infinite_cost;
4623 
4624   if (address_p
4625       || (use->iv->base_object
4626 	  && cand->iv->base_object
4627 	  && POINTER_TYPE_P (TREE_TYPE (use->iv->base_object))
4628 	  && POINTER_TYPE_P (TREE_TYPE (cand->iv->base_object))))
4629     {
4630       /* Do not try to express address of an object with computation based
4631 	 on address of a different object.  This may cause problems in rtl
4632 	 level alias analysis (that does not expect this to be happening,
4633 	 as this is illegal in C), and would be unlikely to be useful
4634 	 anyway.  */
4635       if (use->iv->base_object
4636 	  && cand->iv->base_object
4637 	  && !operand_equal_p (use->iv->base_object, cand->iv->base_object, 0))
4638 	return infinite_cost;
4639     }
4640 
4641   if (!get_computation_aff_1 (data->current_loop, at, use,
4642 			      cand, &aff_inv, &aff_var, &rat)
4643       || !wi::fits_shwi_p (rat))
4644     return infinite_cost;
4645 
4646   ratio = rat.to_shwi ();
4647   if (address_p)
4648     {
4649       cost = get_address_cost (data, use, cand, &aff_inv, &aff_var, ratio,
4650 			       inv_vars, inv_expr, can_autoinc, speed);
4651       return get_scaled_computation_cost_at (data, at, cost);
4652     }
4653 
4654   bool simple_inv = (aff_combination_const_p (&aff_inv)
4655 		     || aff_combination_singleton_var_p (&aff_inv));
4656   tree signed_type = signed_type_for (aff_combination_type (&aff_inv));
4657   aff_combination_convert (&aff_inv, signed_type);
4658   if (!aff_combination_zero_p (&aff_inv))
4659     comp_inv = aff_combination_to_tree (&aff_inv);
4660 
4661   cost = force_var_cost (data, comp_inv, inv_vars);
4662   if (comp_inv && inv_expr && !simple_inv)
4663     {
4664       *inv_expr = get_loop_invariant_expr (data, comp_inv);
4665       /* Clear depends on.  */
4666       if (*inv_expr != NULL && inv_vars && *inv_vars)
4667 	bitmap_clear (*inv_vars);
4668 
4669       cost.cost = adjust_setup_cost (data, cost.cost);
4670       /* Record setup cost in scratch field.  */
4671       cost.scratch = cost.cost;
4672     }
4673   /* Cost of constant integer can be covered when adding invariant part to
4674      variant part.  */
4675   else if (comp_inv && CONSTANT_CLASS_P (comp_inv))
4676     cost = no_cost;
4677 
4678   /* Need type narrowing to represent use with cand.  */
4679   if (TYPE_PRECISION (utype) < TYPE_PRECISION (ctype))
4680     {
4681       machine_mode outer_mode = TYPE_MODE (utype);
4682       machine_mode inner_mode = TYPE_MODE (ctype);
4683       cost += comp_cost (convert_cost (outer_mode, inner_mode, speed), 0);
4684     }
4685 
4686   /* Turn a + i * (-c) into a - i * c.  */
4687   if (ratio < 0 && comp_inv && !integer_zerop (comp_inv))
4688     aratio = -ratio;
4689   else
4690     aratio = ratio;
4691 
4692   if (ratio != 1)
4693     cost += mult_by_coeff_cost (aratio, TYPE_MODE (utype), speed);
4694 
4695   /* TODO: We may also need to check if we can compute  a + i * 4 in one
4696      instruction.  */
4697   /* Need to add up the invariant and variant parts.  */
4698   if (comp_inv && !integer_zerop (comp_inv))
4699     cost += add_cost (speed, TYPE_MODE (utype));
4700 
4701   return get_scaled_computation_cost_at (data, at, cost);
4702 }
4703 
4704 /* Determines cost of computing the use in GROUP with CAND in a generic
4705    expression.  */
4706 
4707 static bool
4708 determine_group_iv_cost_generic (struct ivopts_data *data,
4709 				 struct iv_group *group, struct iv_cand *cand)
4710 {
4711   comp_cost cost;
4712   iv_inv_expr_ent *inv_expr = NULL;
4713   bitmap inv_vars = NULL, inv_exprs = NULL;
4714   struct iv_use *use = group->vuses[0];
4715 
4716   /* The simple case first -- if we need to express value of the preserved
4717      original biv, the cost is 0.  This also prevents us from counting the
4718      cost of increment twice -- once at this use and once in the cost of
4719      the candidate.  */
4720   if (cand->pos == IP_ORIGINAL && cand->incremented_at == use->stmt)
4721     cost = no_cost;
4722   else
4723     cost = get_computation_cost (data, use, cand, false,
4724 				 &inv_vars, NULL, &inv_expr);
4725 
4726   if (inv_expr)
4727     {
4728       inv_exprs = BITMAP_ALLOC (NULL);
4729       bitmap_set_bit (inv_exprs, inv_expr->id);
4730     }
4731   set_group_iv_cost (data, group, cand, cost, inv_vars,
4732 		     NULL_TREE, ERROR_MARK, inv_exprs);
4733   return !cost.infinite_cost_p ();
4734 }
4735 
4736 /* Determines cost of computing uses in GROUP with CAND in addresses.  */
4737 
4738 static bool
4739 determine_group_iv_cost_address (struct ivopts_data *data,
4740 				 struct iv_group *group, struct iv_cand *cand)
4741 {
4742   unsigned i;
4743   bitmap inv_vars = NULL, inv_exprs = NULL;
4744   bool can_autoinc;
4745   iv_inv_expr_ent *inv_expr = NULL;
4746   struct iv_use *use = group->vuses[0];
4747   comp_cost sum_cost = no_cost, cost;
4748 
4749   cost = get_computation_cost (data, use, cand, true,
4750 			       &inv_vars, &can_autoinc, &inv_expr);
4751 
4752   if (inv_expr)
4753     {
4754       inv_exprs = BITMAP_ALLOC (NULL);
4755       bitmap_set_bit (inv_exprs, inv_expr->id);
4756     }
4757   sum_cost = cost;
4758   if (!sum_cost.infinite_cost_p () && cand->ainc_use == use)
4759     {
4760       if (can_autoinc)
4761 	sum_cost -= cand->cost_step;
4762       /* If we generated the candidate solely for exploiting autoincrement
4763 	 opportunities, and it turns out it can't be used, set the cost to
4764 	 infinity to make sure we ignore it.  */
4765       else if (cand->pos == IP_AFTER_USE || cand->pos == IP_BEFORE_USE)
4766 	sum_cost = infinite_cost;
4767     }
4768 
4769   /* Uses in a group can share setup code, so only add setup cost once.  */
4770   cost -= cost.scratch;
4771   /* Compute and add costs for rest uses of this group.  */
4772   for (i = 1; i < group->vuses.length () && !sum_cost.infinite_cost_p (); i++)
4773     {
4774       struct iv_use *next = group->vuses[i];
4775 
4776       /* TODO: We could skip computing cost for sub iv_use when it has the
4777 	 same cost as the first iv_use, but the cost really depends on the
4778 	 offset and where the iv_use is.  */
4779 	cost = get_computation_cost (data, next, cand, true,
4780 				     NULL, &can_autoinc, &inv_expr);
4781 	if (inv_expr)
4782 	  {
4783 	    if (!inv_exprs)
4784 	      inv_exprs = BITMAP_ALLOC (NULL);
4785 
4786 	    bitmap_set_bit (inv_exprs, inv_expr->id);
4787 	  }
4788       sum_cost += cost;
4789     }
4790   set_group_iv_cost (data, group, cand, sum_cost, inv_vars,
4791 		     NULL_TREE, ERROR_MARK, inv_exprs);
4792 
4793   return !sum_cost.infinite_cost_p ();
4794 }
4795 
4796 /* Computes value of candidate CAND at position AT in iteration NITER, and
4797    stores it to VAL.  */
4798 
4799 static void
4800 cand_value_at (struct loop *loop, struct iv_cand *cand, gimple *at, tree niter,
4801 	       aff_tree *val)
4802 {
4803   aff_tree step, delta, nit;
4804   struct iv *iv = cand->iv;
4805   tree type = TREE_TYPE (iv->base);
4806   tree steptype;
4807   if (POINTER_TYPE_P (type))
4808     steptype = sizetype;
4809   else
4810     steptype = unsigned_type_for (type);
4811 
4812   tree_to_aff_combination (iv->step, TREE_TYPE (iv->step), &step);
4813   aff_combination_convert (&step, steptype);
4814   tree_to_aff_combination (niter, TREE_TYPE (niter), &nit);
4815   aff_combination_convert (&nit, steptype);
4816   aff_combination_mult (&nit, &step, &delta);
4817   if (stmt_after_increment (loop, cand, at))
4818     aff_combination_add (&delta, &step);
4819 
4820   tree_to_aff_combination (iv->base, type, val);
4821   if (!POINTER_TYPE_P (type))
4822     aff_combination_convert (val, steptype);
4823   aff_combination_add (val, &delta);
4824 }
4825 
4826 /* Returns period of induction variable iv.  */
4827 
4828 static tree
4829 iv_period (struct iv *iv)
4830 {
4831   tree step = iv->step, period, type;
4832   tree pow2div;
4833 
4834   gcc_assert (step && TREE_CODE (step) == INTEGER_CST);
4835 
4836   type = unsigned_type_for (TREE_TYPE (step));
4837   /* Period of the iv is lcm (step, type_range)/step -1,
4838      i.e., N*type_range/step - 1. Since type range is power
4839      of two, N == (step >> num_of_ending_zeros_binary (step),
4840      so the final result is
4841 
4842        (type_range >> num_of_ending_zeros_binary (step)) - 1
4843 
4844   */
4845   pow2div = num_ending_zeros (step);
4846 
4847   period = build_low_bits_mask (type,
4848 				(TYPE_PRECISION (type)
4849 				 - tree_to_uhwi (pow2div)));
4850 
4851   return period;
4852 }
4853 
4854 /* Returns the comparison operator used when eliminating the iv USE.  */
4855 
4856 static enum tree_code
4857 iv_elimination_compare (struct ivopts_data *data, struct iv_use *use)
4858 {
4859   struct loop *loop = data->current_loop;
4860   basic_block ex_bb;
4861   edge exit;
4862 
4863   ex_bb = gimple_bb (use->stmt);
4864   exit = EDGE_SUCC (ex_bb, 0);
4865   if (flow_bb_inside_loop_p (loop, exit->dest))
4866     exit = EDGE_SUCC (ex_bb, 1);
4867 
4868   return (exit->flags & EDGE_TRUE_VALUE ? EQ_EXPR : NE_EXPR);
4869 }
4870 
4871 /* Returns true if we can prove that BASE - OFFSET does not overflow.  For now,
4872    we only detect the situation that BASE = SOMETHING + OFFSET, where the
4873    calculation is performed in non-wrapping type.
4874 
4875    TODO: More generally, we could test for the situation that
4876 	 BASE = SOMETHING + OFFSET' and OFFSET is between OFFSET' and zero.
4877 	 This would require knowing the sign of OFFSET.  */
4878 
4879 static bool
4880 difference_cannot_overflow_p (struct ivopts_data *data, tree base, tree offset)
4881 {
4882   enum tree_code code;
4883   tree e1, e2;
4884   aff_tree aff_e1, aff_e2, aff_offset;
4885 
4886   if (!nowrap_type_p (TREE_TYPE (base)))
4887     return false;
4888 
4889   base = expand_simple_operations (base);
4890 
4891   if (TREE_CODE (base) == SSA_NAME)
4892     {
4893       gimple *stmt = SSA_NAME_DEF_STMT (base);
4894 
4895       if (gimple_code (stmt) != GIMPLE_ASSIGN)
4896 	return false;
4897 
4898       code = gimple_assign_rhs_code (stmt);
4899       if (get_gimple_rhs_class (code) != GIMPLE_BINARY_RHS)
4900 	return false;
4901 
4902       e1 = gimple_assign_rhs1 (stmt);
4903       e2 = gimple_assign_rhs2 (stmt);
4904     }
4905   else
4906     {
4907       code = TREE_CODE (base);
4908       if (get_gimple_rhs_class (code) != GIMPLE_BINARY_RHS)
4909 	return false;
4910       e1 = TREE_OPERAND (base, 0);
4911       e2 = TREE_OPERAND (base, 1);
4912     }
4913 
4914   /* Use affine expansion as deeper inspection to prove the equality.  */
4915   tree_to_aff_combination_expand (e2, TREE_TYPE (e2),
4916 				  &aff_e2, &data->name_expansion_cache);
4917   tree_to_aff_combination_expand (offset, TREE_TYPE (offset),
4918 				  &aff_offset, &data->name_expansion_cache);
4919   aff_combination_scale (&aff_offset, -1);
4920   switch (code)
4921     {
4922     case PLUS_EXPR:
4923       aff_combination_add (&aff_e2, &aff_offset);
4924       if (aff_combination_zero_p (&aff_e2))
4925 	return true;
4926 
4927       tree_to_aff_combination_expand (e1, TREE_TYPE (e1),
4928 				      &aff_e1, &data->name_expansion_cache);
4929       aff_combination_add (&aff_e1, &aff_offset);
4930       return aff_combination_zero_p (&aff_e1);
4931 
4932     case POINTER_PLUS_EXPR:
4933       aff_combination_add (&aff_e2, &aff_offset);
4934       return aff_combination_zero_p (&aff_e2);
4935 
4936     default:
4937       return false;
4938     }
4939 }
4940 
4941 /* Tries to replace loop exit by one formulated in terms of a LT_EXPR
4942    comparison with CAND.  NITER describes the number of iterations of
4943    the loops.  If successful, the comparison in COMP_P is altered accordingly.
4944 
4945    We aim to handle the following situation:
4946 
4947    sometype *base, *p;
4948    int a, b, i;
4949 
4950    i = a;
4951    p = p_0 = base + a;
4952 
4953    do
4954      {
4955        bla (*p);
4956        p++;
4957        i++;
4958      }
4959    while (i < b);
4960 
4961    Here, the number of iterations of the loop is (a + 1 > b) ? 0 : b - a - 1.
4962    We aim to optimize this to
4963 
4964    p = p_0 = base + a;
4965    do
4966      {
4967        bla (*p);
4968        p++;
4969      }
4970    while (p < p_0 - a + b);
4971 
4972    This preserves the correctness, since the pointer arithmetics does not
4973    overflow.  More precisely:
4974 
4975    1) if a + 1 <= b, then p_0 - a + b is the final value of p, hence there is no
4976       overflow in computing it or the values of p.
4977    2) if a + 1 > b, then we need to verify that the expression p_0 - a does not
4978       overflow.  To prove this, we use the fact that p_0 = base + a.  */
4979 
4980 static bool
4981 iv_elimination_compare_lt (struct ivopts_data *data,
4982 			   struct iv_cand *cand, enum tree_code *comp_p,
4983 			   struct tree_niter_desc *niter)
4984 {
4985   tree cand_type, a, b, mbz, nit_type = TREE_TYPE (niter->niter), offset;
4986   struct aff_tree nit, tmpa, tmpb;
4987   enum tree_code comp;
4988   HOST_WIDE_INT step;
4989 
4990   /* We need to know that the candidate induction variable does not overflow.
4991      While more complex analysis may be used to prove this, for now just
4992      check that the variable appears in the original program and that it
4993      is computed in a type that guarantees no overflows.  */
4994   cand_type = TREE_TYPE (cand->iv->base);
4995   if (cand->pos != IP_ORIGINAL || !nowrap_type_p (cand_type))
4996     return false;
4997 
4998   /* Make sure that the loop iterates till the loop bound is hit, as otherwise
4999      the calculation of the BOUND could overflow, making the comparison
5000      invalid.  */
5001   if (!data->loop_single_exit_p)
5002     return false;
5003 
5004   /* We need to be able to decide whether candidate is increasing or decreasing
5005      in order to choose the right comparison operator.  */
5006   if (!cst_and_fits_in_hwi (cand->iv->step))
5007     return false;
5008   step = int_cst_value (cand->iv->step);
5009 
5010   /* Check that the number of iterations matches the expected pattern:
5011      a + 1 > b ? 0 : b - a - 1.  */
5012   mbz = niter->may_be_zero;
5013   if (TREE_CODE (mbz) == GT_EXPR)
5014     {
5015       /* Handle a + 1 > b.  */
5016       tree op0 = TREE_OPERAND (mbz, 0);
5017       if (TREE_CODE (op0) == PLUS_EXPR && integer_onep (TREE_OPERAND (op0, 1)))
5018 	{
5019 	  a = TREE_OPERAND (op0, 0);
5020 	  b = TREE_OPERAND (mbz, 1);
5021 	}
5022       else
5023 	return false;
5024     }
5025   else if (TREE_CODE (mbz) == LT_EXPR)
5026     {
5027       tree op1 = TREE_OPERAND (mbz, 1);
5028 
5029       /* Handle b < a + 1.  */
5030       if (TREE_CODE (op1) == PLUS_EXPR && integer_onep (TREE_OPERAND (op1, 1)))
5031 	{
5032 	  a = TREE_OPERAND (op1, 0);
5033 	  b = TREE_OPERAND (mbz, 0);
5034 	}
5035       else
5036 	return false;
5037     }
5038   else
5039     return false;
5040 
5041   /* Expected number of iterations is B - A - 1.  Check that it matches
5042      the actual number, i.e., that B - A - NITER = 1.  */
5043   tree_to_aff_combination (niter->niter, nit_type, &nit);
5044   tree_to_aff_combination (fold_convert (nit_type, a), nit_type, &tmpa);
5045   tree_to_aff_combination (fold_convert (nit_type, b), nit_type, &tmpb);
5046   aff_combination_scale (&nit, -1);
5047   aff_combination_scale (&tmpa, -1);
5048   aff_combination_add (&tmpb, &tmpa);
5049   aff_combination_add (&tmpb, &nit);
5050   if (tmpb.n != 0 || maybe_ne (tmpb.offset, 1))
5051     return false;
5052 
5053   /* Finally, check that CAND->IV->BASE - CAND->IV->STEP * A does not
5054      overflow.  */
5055   offset = fold_build2 (MULT_EXPR, TREE_TYPE (cand->iv->step),
5056 			cand->iv->step,
5057 			fold_convert (TREE_TYPE (cand->iv->step), a));
5058   if (!difference_cannot_overflow_p (data, cand->iv->base, offset))
5059     return false;
5060 
5061   /* Determine the new comparison operator.  */
5062   comp = step < 0 ? GT_EXPR : LT_EXPR;
5063   if (*comp_p == NE_EXPR)
5064     *comp_p = comp;
5065   else if (*comp_p == EQ_EXPR)
5066     *comp_p = invert_tree_comparison (comp, false);
5067   else
5068     gcc_unreachable ();
5069 
5070   return true;
5071 }
5072 
5073 /* Check whether it is possible to express the condition in USE by comparison
5074    of candidate CAND.  If so, store the value compared with to BOUND, and the
5075    comparison operator to COMP.  */
5076 
5077 static bool
5078 may_eliminate_iv (struct ivopts_data *data,
5079 		  struct iv_use *use, struct iv_cand *cand, tree *bound,
5080 		  enum tree_code *comp)
5081 {
5082   basic_block ex_bb;
5083   edge exit;
5084   tree period;
5085   struct loop *loop = data->current_loop;
5086   aff_tree bnd;
5087   struct tree_niter_desc *desc = NULL;
5088 
5089   if (TREE_CODE (cand->iv->step) != INTEGER_CST)
5090     return false;
5091 
5092   /* For now works only for exits that dominate the loop latch.
5093      TODO: extend to other conditions inside loop body.  */
5094   ex_bb = gimple_bb (use->stmt);
5095   if (use->stmt != last_stmt (ex_bb)
5096       || gimple_code (use->stmt) != GIMPLE_COND
5097       || !dominated_by_p (CDI_DOMINATORS, loop->latch, ex_bb))
5098     return false;
5099 
5100   exit = EDGE_SUCC (ex_bb, 0);
5101   if (flow_bb_inside_loop_p (loop, exit->dest))
5102     exit = EDGE_SUCC (ex_bb, 1);
5103   if (flow_bb_inside_loop_p (loop, exit->dest))
5104     return false;
5105 
5106   desc = niter_for_exit (data, exit);
5107   if (!desc)
5108     return false;
5109 
5110   /* Determine whether we can use the variable to test the exit condition.
5111      This is the case iff the period of the induction variable is greater
5112      than the number of iterations for which the exit condition is true.  */
5113   period = iv_period (cand->iv);
5114 
5115   /* If the number of iterations is constant, compare against it directly.  */
5116   if (TREE_CODE (desc->niter) == INTEGER_CST)
5117     {
5118       /* See cand_value_at.  */
5119       if (stmt_after_increment (loop, cand, use->stmt))
5120 	{
5121 	  if (!tree_int_cst_lt (desc->niter, period))
5122 	    return false;
5123 	}
5124       else
5125 	{
5126 	  if (tree_int_cst_lt (period, desc->niter))
5127 	    return false;
5128 	}
5129     }
5130 
5131   /* If not, and if this is the only possible exit of the loop, see whether
5132      we can get a conservative estimate on the number of iterations of the
5133      entire loop and compare against that instead.  */
5134   else
5135     {
5136       widest_int period_value, max_niter;
5137 
5138       max_niter = desc->max;
5139       if (stmt_after_increment (loop, cand, use->stmt))
5140 	max_niter += 1;
5141       period_value = wi::to_widest (period);
5142       if (wi::gtu_p (max_niter, period_value))
5143 	{
5144 	  /* See if we can take advantage of inferred loop bound
5145 	     information.  */
5146 	  if (data->loop_single_exit_p)
5147 	    {
5148 	      if (!max_loop_iterations (loop, &max_niter))
5149 		return false;
5150 	      /* The loop bound is already adjusted by adding 1.  */
5151 	      if (wi::gtu_p (max_niter, period_value))
5152 		return false;
5153 	    }
5154 	  else
5155 	    return false;
5156 	}
5157     }
5158 
5159   cand_value_at (loop, cand, use->stmt, desc->niter, &bnd);
5160 
5161   *bound = fold_convert (TREE_TYPE (cand->iv->base),
5162 			 aff_combination_to_tree (&bnd));
5163   *comp = iv_elimination_compare (data, use);
5164 
5165   /* It is unlikely that computing the number of iterations using division
5166      would be more profitable than keeping the original induction variable.  */
5167   if (expression_expensive_p (*bound))
5168     return false;
5169 
5170   /* Sometimes, it is possible to handle the situation that the number of
5171      iterations may be zero unless additional assumptions by using <
5172      instead of != in the exit condition.
5173 
5174      TODO: we could also calculate the value MAY_BE_ZERO ? 0 : NITER and
5175 	   base the exit condition on it.  However, that is often too
5176 	   expensive.  */
5177   if (!integer_zerop (desc->may_be_zero))
5178     return iv_elimination_compare_lt (data, cand, comp, desc);
5179 
5180   return true;
5181 }
5182 
5183  /* Calculates the cost of BOUND, if it is a PARM_DECL.  A PARM_DECL must
5184     be copied, if it is used in the loop body and DATA->body_includes_call.  */
5185 
5186 static int
5187 parm_decl_cost (struct ivopts_data *data, tree bound)
5188 {
5189   tree sbound = bound;
5190   STRIP_NOPS (sbound);
5191 
5192   if (TREE_CODE (sbound) == SSA_NAME
5193       && SSA_NAME_IS_DEFAULT_DEF (sbound)
5194       && TREE_CODE (SSA_NAME_VAR (sbound)) == PARM_DECL
5195       && data->body_includes_call)
5196     return COSTS_N_INSNS (1);
5197 
5198   return 0;
5199 }
5200 
5201 /* Determines cost of computing the use in GROUP with CAND in a condition.  */
5202 
5203 static bool
5204 determine_group_iv_cost_cond (struct ivopts_data *data,
5205 			      struct iv_group *group, struct iv_cand *cand)
5206 {
5207   tree bound = NULL_TREE;
5208   struct iv *cmp_iv;
5209   bitmap inv_exprs = NULL;
5210   bitmap inv_vars_elim = NULL, inv_vars_express = NULL, inv_vars;
5211   comp_cost elim_cost = infinite_cost, express_cost, cost, bound_cost;
5212   enum comp_iv_rewrite rewrite_type;
5213   iv_inv_expr_ent *inv_expr_elim = NULL, *inv_expr_express = NULL, *inv_expr;
5214   tree *control_var, *bound_cst;
5215   enum tree_code comp = ERROR_MARK;
5216   struct iv_use *use = group->vuses[0];
5217 
5218   /* Extract condition operands.  */
5219   rewrite_type = extract_cond_operands (data, use->stmt, &control_var,
5220 					&bound_cst, NULL, &cmp_iv);
5221   gcc_assert (rewrite_type != COMP_IV_NA);
5222 
5223   /* Try iv elimination.  */
5224   if (rewrite_type == COMP_IV_ELIM
5225       && may_eliminate_iv (data, use, cand, &bound, &comp))
5226     {
5227       elim_cost = force_var_cost (data, bound, &inv_vars_elim);
5228       if (elim_cost.cost == 0)
5229 	elim_cost.cost = parm_decl_cost (data, bound);
5230       else if (TREE_CODE (bound) == INTEGER_CST)
5231 	elim_cost.cost = 0;
5232       /* If we replace a loop condition 'i < n' with 'p < base + n',
5233 	 inv_vars_elim will have 'base' and 'n' set, which implies that both
5234 	 'base' and 'n' will be live during the loop.	 More likely,
5235 	 'base + n' will be loop invariant, resulting in only one live value
5236 	 during the loop.  So in that case we clear inv_vars_elim and set
5237 	 inv_expr_elim instead.  */
5238       if (inv_vars_elim && bitmap_count_bits (inv_vars_elim) > 1)
5239 	{
5240 	  inv_expr_elim = get_loop_invariant_expr (data, bound);
5241 	  bitmap_clear (inv_vars_elim);
5242 	}
5243       /* The bound is a loop invariant, so it will be only computed
5244 	 once.  */
5245       elim_cost.cost = adjust_setup_cost (data, elim_cost.cost);
5246     }
5247 
5248   /* When the condition is a comparison of the candidate IV against
5249      zero, prefer this IV.
5250 
5251      TODO: The constant that we're subtracting from the cost should
5252      be target-dependent.  This information should be added to the
5253      target costs for each backend.  */
5254   if (!elim_cost.infinite_cost_p () /* Do not try to decrease infinite! */
5255       && integer_zerop (*bound_cst)
5256       && (operand_equal_p (*control_var, cand->var_after, 0)
5257 	  || operand_equal_p (*control_var, cand->var_before, 0)))
5258     elim_cost -= 1;
5259 
5260   express_cost = get_computation_cost (data, use, cand, false,
5261 				       &inv_vars_express, NULL,
5262 				       &inv_expr_express);
5263   if (cmp_iv != NULL)
5264     find_inv_vars (data, &cmp_iv->base, &inv_vars_express);
5265 
5266   /* Count the cost of the original bound as well.  */
5267   bound_cost = force_var_cost (data, *bound_cst, NULL);
5268   if (bound_cost.cost == 0)
5269     bound_cost.cost = parm_decl_cost (data, *bound_cst);
5270   else if (TREE_CODE (*bound_cst) == INTEGER_CST)
5271     bound_cost.cost = 0;
5272   express_cost += bound_cost;
5273 
5274   /* Choose the better approach, preferring the eliminated IV. */
5275   if (elim_cost <= express_cost)
5276     {
5277       cost = elim_cost;
5278       inv_vars = inv_vars_elim;
5279       inv_vars_elim = NULL;
5280       inv_expr = inv_expr_elim;
5281     }
5282   else
5283     {
5284       cost = express_cost;
5285       inv_vars = inv_vars_express;
5286       inv_vars_express = NULL;
5287       bound = NULL_TREE;
5288       comp = ERROR_MARK;
5289       inv_expr = inv_expr_express;
5290     }
5291 
5292   if (inv_expr)
5293     {
5294       inv_exprs = BITMAP_ALLOC (NULL);
5295       bitmap_set_bit (inv_exprs, inv_expr->id);
5296     }
5297   set_group_iv_cost (data, group, cand, cost,
5298 		     inv_vars, bound, comp, inv_exprs);
5299 
5300   if (inv_vars_elim)
5301     BITMAP_FREE (inv_vars_elim);
5302   if (inv_vars_express)
5303     BITMAP_FREE (inv_vars_express);
5304 
5305   return !cost.infinite_cost_p ();
5306 }
5307 
5308 /* Determines cost of computing uses in GROUP with CAND.  Returns false
5309    if USE cannot be represented with CAND.  */
5310 
5311 static bool
5312 determine_group_iv_cost (struct ivopts_data *data,
5313 			 struct iv_group *group, struct iv_cand *cand)
5314 {
5315   switch (group->type)
5316     {
5317     case USE_NONLINEAR_EXPR:
5318       return determine_group_iv_cost_generic (data, group, cand);
5319 
5320     case USE_REF_ADDRESS:
5321     case USE_PTR_ADDRESS:
5322       return determine_group_iv_cost_address (data, group, cand);
5323 
5324     case USE_COMPARE:
5325       return determine_group_iv_cost_cond (data, group, cand);
5326 
5327     default:
5328       gcc_unreachable ();
5329     }
5330 }
5331 
5332 /* Return true if get_computation_cost indicates that autoincrement is
5333    a possibility for the pair of USE and CAND, false otherwise.  */
5334 
5335 static bool
5336 autoinc_possible_for_pair (struct ivopts_data *data, struct iv_use *use,
5337 			   struct iv_cand *cand)
5338 {
5339   if (!address_p (use->type))
5340     return false;
5341 
5342   bool can_autoinc = false;
5343   get_computation_cost (data, use, cand, true, NULL, &can_autoinc, NULL);
5344   return can_autoinc;
5345 }
5346 
5347 /* Examine IP_ORIGINAL candidates to see if they are incremented next to a
5348    use that allows autoincrement, and set their AINC_USE if possible.  */
5349 
5350 static void
5351 set_autoinc_for_original_candidates (struct ivopts_data *data)
5352 {
5353   unsigned i, j;
5354 
5355   for (i = 0; i < data->vcands.length (); i++)
5356     {
5357       struct iv_cand *cand = data->vcands[i];
5358       struct iv_use *closest_before = NULL;
5359       struct iv_use *closest_after = NULL;
5360       if (cand->pos != IP_ORIGINAL)
5361 	continue;
5362 
5363       for (j = 0; j < data->vgroups.length (); j++)
5364 	{
5365 	  struct iv_group *group = data->vgroups[j];
5366 	  struct iv_use *use = group->vuses[0];
5367 	  unsigned uid = gimple_uid (use->stmt);
5368 
5369 	  if (gimple_bb (use->stmt) != gimple_bb (cand->incremented_at))
5370 	    continue;
5371 
5372 	  if (uid < gimple_uid (cand->incremented_at)
5373 	      && (closest_before == NULL
5374 		  || uid > gimple_uid (closest_before->stmt)))
5375 	    closest_before = use;
5376 
5377 	  if (uid > gimple_uid (cand->incremented_at)
5378 	      && (closest_after == NULL
5379 		  || uid < gimple_uid (closest_after->stmt)))
5380 	    closest_after = use;
5381 	}
5382 
5383       if (closest_before != NULL
5384 	  && autoinc_possible_for_pair (data, closest_before, cand))
5385 	cand->ainc_use = closest_before;
5386       else if (closest_after != NULL
5387 	       && autoinc_possible_for_pair (data, closest_after, cand))
5388 	cand->ainc_use = closest_after;
5389     }
5390 }
5391 
5392 /* Relate compare use with all candidates.  */
5393 
5394 static void
5395 relate_compare_use_with_all_cands (struct ivopts_data *data)
5396 {
5397   unsigned i, count = data->vcands.length ();
5398   for (i = 0; i < data->vgroups.length (); i++)
5399     {
5400       struct iv_group *group = data->vgroups[i];
5401 
5402       if (group->type == USE_COMPARE)
5403 	bitmap_set_range (group->related_cands, 0, count);
5404     }
5405 }
5406 
5407 /* Finds the candidates for the induction variables.  */
5408 
5409 static void
5410 find_iv_candidates (struct ivopts_data *data)
5411 {
5412   /* Add commonly used ivs.  */
5413   add_standard_iv_candidates (data);
5414 
5415   /* Add old induction variables.  */
5416   add_iv_candidate_for_bivs (data);
5417 
5418   /* Add induction variables derived from uses.  */
5419   add_iv_candidate_for_groups (data);
5420 
5421   set_autoinc_for_original_candidates (data);
5422 
5423   /* Record the important candidates.  */
5424   record_important_candidates (data);
5425 
5426   /* Relate compare iv_use with all candidates.  */
5427   if (!data->consider_all_candidates)
5428     relate_compare_use_with_all_cands (data);
5429 
5430   if (dump_file && (dump_flags & TDF_DETAILS))
5431     {
5432       unsigned i;
5433 
5434       fprintf (dump_file, "\n<Important Candidates>:\t");
5435       for (i = 0; i < data->vcands.length (); i++)
5436 	if (data->vcands[i]->important)
5437 	  fprintf (dump_file, " %d,", data->vcands[i]->id);
5438       fprintf (dump_file, "\n");
5439 
5440       fprintf (dump_file, "\n<Group, Cand> Related:\n");
5441       for (i = 0; i < data->vgroups.length (); i++)
5442 	{
5443 	  struct iv_group *group = data->vgroups[i];
5444 
5445 	  if (group->related_cands)
5446 	    {
5447 	      fprintf (dump_file, "  Group %d:\t", group->id);
5448 	      dump_bitmap (dump_file, group->related_cands);
5449 	    }
5450 	}
5451       fprintf (dump_file, "\n");
5452     }
5453 }
5454 
5455 /* Determines costs of computing use of iv with an iv candidate.  */
5456 
5457 static void
5458 determine_group_iv_costs (struct ivopts_data *data)
5459 {
5460   unsigned i, j;
5461   struct iv_cand *cand;
5462   struct iv_group *group;
5463   bitmap to_clear = BITMAP_ALLOC (NULL);
5464 
5465   alloc_use_cost_map (data);
5466 
5467   for (i = 0; i < data->vgroups.length (); i++)
5468     {
5469       group = data->vgroups[i];
5470 
5471       if (data->consider_all_candidates)
5472 	{
5473 	  for (j = 0; j < data->vcands.length (); j++)
5474 	    {
5475 	      cand = data->vcands[j];
5476 	      determine_group_iv_cost (data, group, cand);
5477 	    }
5478 	}
5479       else
5480 	{
5481 	  bitmap_iterator bi;
5482 
5483 	  EXECUTE_IF_SET_IN_BITMAP (group->related_cands, 0, j, bi)
5484 	    {
5485 	      cand = data->vcands[j];
5486 	      if (!determine_group_iv_cost (data, group, cand))
5487 		bitmap_set_bit (to_clear, j);
5488 	    }
5489 
5490 	  /* Remove the candidates for that the cost is infinite from
5491 	     the list of related candidates.  */
5492 	  bitmap_and_compl_into (group->related_cands, to_clear);
5493 	  bitmap_clear (to_clear);
5494 	}
5495     }
5496 
5497   BITMAP_FREE (to_clear);
5498 
5499   if (dump_file && (dump_flags & TDF_DETAILS))
5500     {
5501       bitmap_iterator bi;
5502 
5503       /* Dump invariant variables.  */
5504       fprintf (dump_file, "\n<Invariant Vars>:\n");
5505       EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
5506 	{
5507 	  struct version_info *info = ver_info (data, i);
5508 	  if (info->inv_id)
5509 	    {
5510 	      fprintf (dump_file, "Inv %d:\t", info->inv_id);
5511 	      print_generic_expr (dump_file, info->name, TDF_SLIM);
5512 	      fprintf (dump_file, "%s\n",
5513 		       info->has_nonlin_use ? "" : "\t(eliminable)");
5514 	    }
5515 	}
5516 
5517       /* Dump invariant expressions.  */
5518       fprintf (dump_file, "\n<Invariant Expressions>:\n");
5519       auto_vec <iv_inv_expr_ent *> list (data->inv_expr_tab->elements ());
5520 
5521       for (hash_table<iv_inv_expr_hasher>::iterator it
5522 	   = data->inv_expr_tab->begin (); it != data->inv_expr_tab->end ();
5523 	   ++it)
5524 	list.safe_push (*it);
5525 
5526       list.qsort (sort_iv_inv_expr_ent);
5527 
5528       for (i = 0; i < list.length (); ++i)
5529 	{
5530 	  fprintf (dump_file, "inv_expr %d: \t", list[i]->id);
5531 	  print_generic_expr (dump_file, list[i]->expr, TDF_SLIM);
5532 	  fprintf (dump_file, "\n");
5533 	}
5534 
5535       fprintf (dump_file, "\n<Group-candidate Costs>:\n");
5536 
5537       for (i = 0; i < data->vgroups.length (); i++)
5538 	{
5539 	  group = data->vgroups[i];
5540 
5541 	  fprintf (dump_file, "Group %d:\n", i);
5542 	  fprintf (dump_file, "  cand\tcost\tcompl.\tinv.expr.\tinv.vars\n");
5543 	  for (j = 0; j < group->n_map_members; j++)
5544 	    {
5545 	      if (!group->cost_map[j].cand
5546 		  || group->cost_map[j].cost.infinite_cost_p ())
5547 		continue;
5548 
5549 	      fprintf (dump_file, "  %d\t%d\t%d\t",
5550 		       group->cost_map[j].cand->id,
5551 		       group->cost_map[j].cost.cost,
5552 		       group->cost_map[j].cost.complexity);
5553 	      if (!group->cost_map[j].inv_exprs
5554 		  || bitmap_empty_p (group->cost_map[j].inv_exprs))
5555 		fprintf (dump_file, "NIL;\t");
5556 	      else
5557 		bitmap_print (dump_file,
5558 			      group->cost_map[j].inv_exprs, "", ";\t");
5559 	      if (!group->cost_map[j].inv_vars
5560 		  || bitmap_empty_p (group->cost_map[j].inv_vars))
5561 		fprintf (dump_file, "NIL;\n");
5562 	      else
5563 		bitmap_print (dump_file,
5564 			      group->cost_map[j].inv_vars, "", "\n");
5565 	    }
5566 
5567 	  fprintf (dump_file, "\n");
5568 	}
5569       fprintf (dump_file, "\n");
5570     }
5571 }
5572 
5573 /* Determines cost of the candidate CAND.  */
5574 
5575 static void
5576 determine_iv_cost (struct ivopts_data *data, struct iv_cand *cand)
5577 {
5578   comp_cost cost_base;
5579   unsigned cost, cost_step;
5580   tree base;
5581 
5582   gcc_assert (cand->iv != NULL);
5583 
5584   /* There are two costs associated with the candidate -- its increment
5585      and its initialization.  The second is almost negligible for any loop
5586      that rolls enough, so we take it just very little into account.  */
5587 
5588   base = cand->iv->base;
5589   cost_base = force_var_cost (data, base, NULL);
5590   /* It will be exceptional that the iv register happens to be initialized with
5591      the proper value at no cost.  In general, there will at least be a regcopy
5592      or a const set.  */
5593   if (cost_base.cost == 0)
5594     cost_base.cost = COSTS_N_INSNS (1);
5595   cost_step = add_cost (data->speed, TYPE_MODE (TREE_TYPE (base)));
5596 
5597   cost = cost_step + adjust_setup_cost (data, cost_base.cost);
5598 
5599   /* Prefer the original ivs unless we may gain something by replacing it.
5600      The reason is to make debugging simpler; so this is not relevant for
5601      artificial ivs created by other optimization passes.  */
5602   if (cand->pos != IP_ORIGINAL
5603       || !SSA_NAME_VAR (cand->var_before)
5604       || DECL_ARTIFICIAL (SSA_NAME_VAR (cand->var_before)))
5605     cost++;
5606 
5607   /* Prefer not to insert statements into latch unless there are some
5608      already (so that we do not create unnecessary jumps).  */
5609   if (cand->pos == IP_END
5610       && empty_block_p (ip_end_pos (data->current_loop)))
5611     cost++;
5612 
5613   cand->cost = cost;
5614   cand->cost_step = cost_step;
5615 }
5616 
5617 /* Determines costs of computation of the candidates.  */
5618 
5619 static void
5620 determine_iv_costs (struct ivopts_data *data)
5621 {
5622   unsigned i;
5623 
5624   if (dump_file && (dump_flags & TDF_DETAILS))
5625     {
5626       fprintf (dump_file, "<Candidate Costs>:\n");
5627       fprintf (dump_file, "  cand\tcost\n");
5628     }
5629 
5630   for (i = 0; i < data->vcands.length (); i++)
5631     {
5632       struct iv_cand *cand = data->vcands[i];
5633 
5634       determine_iv_cost (data, cand);
5635 
5636       if (dump_file && (dump_flags & TDF_DETAILS))
5637 	fprintf (dump_file, "  %d\t%d\n", i, cand->cost);
5638     }
5639 
5640   if (dump_file && (dump_flags & TDF_DETAILS))
5641     fprintf (dump_file, "\n");
5642 }
5643 
5644 /* Estimate register pressure for loop having N_INVS invariants and N_CANDS
5645    induction variables.  Note N_INVS includes both invariant variables and
5646    invariant expressions.  */
5647 
5648 static unsigned
5649 ivopts_estimate_reg_pressure (struct ivopts_data *data, unsigned n_invs,
5650 			      unsigned n_cands)
5651 {
5652   unsigned cost;
5653   unsigned n_old = data->regs_used, n_new = n_invs + n_cands;
5654   unsigned regs_needed = n_new + n_old, available_regs = target_avail_regs;
5655   bool speed = data->speed;
5656 
5657   /* If there is a call in the loop body, the call-clobbered registers
5658      are not available for loop invariants.  */
5659   if (data->body_includes_call)
5660     available_regs = available_regs - target_clobbered_regs;
5661 
5662   /* If we have enough registers.  */
5663   if (regs_needed + target_res_regs < available_regs)
5664     cost = n_new;
5665   /* If close to running out of registers, try to preserve them.  */
5666   else if (regs_needed <= available_regs)
5667     cost = target_reg_cost [speed] * regs_needed;
5668   /* If we run out of available registers but the number of candidates
5669      does not, we penalize extra registers using target_spill_cost.  */
5670   else if (n_cands <= available_regs)
5671     cost = target_reg_cost [speed] * available_regs
5672 	   + target_spill_cost [speed] * (regs_needed - available_regs);
5673   /* If the number of candidates runs out available registers, we penalize
5674      extra candidate registers using target_spill_cost * 2.  Because it is
5675      more expensive to spill induction variable than invariant.  */
5676   else
5677     cost = target_reg_cost [speed] * available_regs
5678 	   + target_spill_cost [speed] * (n_cands - available_regs) * 2
5679 	   + target_spill_cost [speed] * (regs_needed - n_cands);
5680 
5681   /* Finally, add the number of candidates, so that we prefer eliminating
5682      induction variables if possible.  */
5683   return cost + n_cands;
5684 }
5685 
5686 /* For each size of the induction variable set determine the penalty.  */
5687 
5688 static void
5689 determine_set_costs (struct ivopts_data *data)
5690 {
5691   unsigned j, n;
5692   gphi *phi;
5693   gphi_iterator psi;
5694   tree op;
5695   struct loop *loop = data->current_loop;
5696   bitmap_iterator bi;
5697 
5698   if (dump_file && (dump_flags & TDF_DETAILS))
5699     {
5700       fprintf (dump_file, "<Global Costs>:\n");
5701       fprintf (dump_file, "  target_avail_regs %d\n", target_avail_regs);
5702       fprintf (dump_file, "  target_clobbered_regs %d\n", target_clobbered_regs);
5703       fprintf (dump_file, "  target_reg_cost %d\n", target_reg_cost[data->speed]);
5704       fprintf (dump_file, "  target_spill_cost %d\n", target_spill_cost[data->speed]);
5705     }
5706 
5707   n = 0;
5708   for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
5709     {
5710       phi = psi.phi ();
5711       op = PHI_RESULT (phi);
5712 
5713       if (virtual_operand_p (op))
5714 	continue;
5715 
5716       if (get_iv (data, op))
5717 	continue;
5718 
5719       if (!POINTER_TYPE_P (TREE_TYPE (op))
5720 	  && !INTEGRAL_TYPE_P (TREE_TYPE (op)))
5721 	continue;
5722 
5723       n++;
5724     }
5725 
5726   EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j, bi)
5727     {
5728       struct version_info *info = ver_info (data, j);
5729 
5730       if (info->inv_id && info->has_nonlin_use)
5731 	n++;
5732     }
5733 
5734   data->regs_used = n;
5735   if (dump_file && (dump_flags & TDF_DETAILS))
5736     fprintf (dump_file, "  regs_used %d\n", n);
5737 
5738   if (dump_file && (dump_flags & TDF_DETAILS))
5739     {
5740       fprintf (dump_file, "  cost for size:\n");
5741       fprintf (dump_file, "  ivs\tcost\n");
5742       for (j = 0; j <= 2 * target_avail_regs; j++)
5743 	fprintf (dump_file, "  %d\t%d\n", j,
5744 		 ivopts_estimate_reg_pressure (data, 0, j));
5745       fprintf (dump_file, "\n");
5746     }
5747 }
5748 
5749 /* Returns true if A is a cheaper cost pair than B.  */
5750 
5751 static bool
5752 cheaper_cost_pair (struct cost_pair *a, struct cost_pair *b)
5753 {
5754   if (!a)
5755     return false;
5756 
5757   if (!b)
5758     return true;
5759 
5760   if (a->cost < b->cost)
5761     return true;
5762 
5763   if (b->cost < a->cost)
5764     return false;
5765 
5766   /* In case the costs are the same, prefer the cheaper candidate.  */
5767   if (a->cand->cost < b->cand->cost)
5768     return true;
5769 
5770   return false;
5771 }
5772 
5773 /* Compare if A is a more expensive cost pair than B.  Return 1, 0 and -1
5774    for more expensive, equal and cheaper respectively.  */
5775 
5776 static int
5777 compare_cost_pair (struct cost_pair *a, struct cost_pair *b)
5778 {
5779   if (cheaper_cost_pair (a, b))
5780     return -1;
5781   if (cheaper_cost_pair (b, a))
5782     return 1;
5783 
5784   return 0;
5785 }
5786 
5787 /* Returns candidate by that USE is expressed in IVS.  */
5788 
5789 static struct cost_pair *
5790 iv_ca_cand_for_group (struct iv_ca *ivs, struct iv_group *group)
5791 {
5792   return ivs->cand_for_group[group->id];
5793 }
5794 
5795 /* Computes the cost field of IVS structure.  */
5796 
5797 static void
5798 iv_ca_recount_cost (struct ivopts_data *data, struct iv_ca *ivs)
5799 {
5800   comp_cost cost = ivs->cand_use_cost;
5801 
5802   cost += ivs->cand_cost;
5803   cost += ivopts_estimate_reg_pressure (data, ivs->n_invs, ivs->n_cands);
5804   ivs->cost = cost;
5805 }
5806 
5807 /* Remove use of invariants in set INVS by decreasing counter in N_INV_USES
5808    and IVS.  */
5809 
5810 static void
5811 iv_ca_set_remove_invs (struct iv_ca *ivs, bitmap invs, unsigned *n_inv_uses)
5812 {
5813   bitmap_iterator bi;
5814   unsigned iid;
5815 
5816   if (!invs)
5817     return;
5818 
5819   gcc_assert (n_inv_uses != NULL);
5820   EXECUTE_IF_SET_IN_BITMAP (invs, 0, iid, bi)
5821     {
5822       n_inv_uses[iid]--;
5823       if (n_inv_uses[iid] == 0)
5824 	ivs->n_invs--;
5825     }
5826 }
5827 
5828 /* Set USE not to be expressed by any candidate in IVS.  */
5829 
5830 static void
5831 iv_ca_set_no_cp (struct ivopts_data *data, struct iv_ca *ivs,
5832 		 struct iv_group *group)
5833 {
5834   unsigned gid = group->id, cid;
5835   struct cost_pair *cp;
5836 
5837   cp = ivs->cand_for_group[gid];
5838   if (!cp)
5839     return;
5840   cid = cp->cand->id;
5841 
5842   ivs->bad_groups++;
5843   ivs->cand_for_group[gid] = NULL;
5844   ivs->n_cand_uses[cid]--;
5845 
5846   if (ivs->n_cand_uses[cid] == 0)
5847     {
5848       bitmap_clear_bit (ivs->cands, cid);
5849       ivs->n_cands--;
5850       ivs->cand_cost -= cp->cand->cost;
5851       iv_ca_set_remove_invs (ivs, cp->cand->inv_vars, ivs->n_inv_var_uses);
5852       iv_ca_set_remove_invs (ivs, cp->cand->inv_exprs, ivs->n_inv_expr_uses);
5853     }
5854 
5855   ivs->cand_use_cost -= cp->cost;
5856   iv_ca_set_remove_invs (ivs, cp->inv_vars, ivs->n_inv_var_uses);
5857   iv_ca_set_remove_invs (ivs, cp->inv_exprs, ivs->n_inv_expr_uses);
5858   iv_ca_recount_cost (data, ivs);
5859 }
5860 
5861 /* Add use of invariants in set INVS by increasing counter in N_INV_USES and
5862    IVS.  */
5863 
5864 static void
5865 iv_ca_set_add_invs (struct iv_ca *ivs, bitmap invs, unsigned *n_inv_uses)
5866 {
5867   bitmap_iterator bi;
5868   unsigned iid;
5869 
5870   if (!invs)
5871     return;
5872 
5873   gcc_assert (n_inv_uses != NULL);
5874   EXECUTE_IF_SET_IN_BITMAP (invs, 0, iid, bi)
5875     {
5876       n_inv_uses[iid]++;
5877       if (n_inv_uses[iid] == 1)
5878 	ivs->n_invs++;
5879     }
5880 }
5881 
5882 /* Set cost pair for GROUP in set IVS to CP.  */
5883 
5884 static void
5885 iv_ca_set_cp (struct ivopts_data *data, struct iv_ca *ivs,
5886 	      struct iv_group *group, struct cost_pair *cp)
5887 {
5888   unsigned gid = group->id, cid;
5889 
5890   if (ivs->cand_for_group[gid] == cp)
5891     return;
5892 
5893   if (ivs->cand_for_group[gid])
5894     iv_ca_set_no_cp (data, ivs, group);
5895 
5896   if (cp)
5897     {
5898       cid = cp->cand->id;
5899 
5900       ivs->bad_groups--;
5901       ivs->cand_for_group[gid] = cp;
5902       ivs->n_cand_uses[cid]++;
5903       if (ivs->n_cand_uses[cid] == 1)
5904 	{
5905 	  bitmap_set_bit (ivs->cands, cid);
5906 	  ivs->n_cands++;
5907 	  ivs->cand_cost += cp->cand->cost;
5908 	  iv_ca_set_add_invs (ivs, cp->cand->inv_vars, ivs->n_inv_var_uses);
5909 	  iv_ca_set_add_invs (ivs, cp->cand->inv_exprs, ivs->n_inv_expr_uses);
5910 	}
5911 
5912       ivs->cand_use_cost += cp->cost;
5913       iv_ca_set_add_invs (ivs, cp->inv_vars, ivs->n_inv_var_uses);
5914       iv_ca_set_add_invs (ivs, cp->inv_exprs, ivs->n_inv_expr_uses);
5915       iv_ca_recount_cost (data, ivs);
5916     }
5917 }
5918 
5919 /* Extend set IVS by expressing USE by some of the candidates in it
5920    if possible.  Consider all important candidates if candidates in
5921    set IVS don't give any result.  */
5922 
5923 static void
5924 iv_ca_add_group (struct ivopts_data *data, struct iv_ca *ivs,
5925 	       struct iv_group *group)
5926 {
5927   struct cost_pair *best_cp = NULL, *cp;
5928   bitmap_iterator bi;
5929   unsigned i;
5930   struct iv_cand *cand;
5931 
5932   gcc_assert (ivs->upto >= group->id);
5933   ivs->upto++;
5934   ivs->bad_groups++;
5935 
5936   EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
5937     {
5938       cand = data->vcands[i];
5939       cp = get_group_iv_cost (data, group, cand);
5940       if (cheaper_cost_pair (cp, best_cp))
5941 	best_cp = cp;
5942     }
5943 
5944   if (best_cp == NULL)
5945     {
5946       EXECUTE_IF_SET_IN_BITMAP (data->important_candidates, 0, i, bi)
5947 	{
5948 	  cand = data->vcands[i];
5949 	  cp = get_group_iv_cost (data, group, cand);
5950 	  if (cheaper_cost_pair (cp, best_cp))
5951 	    best_cp = cp;
5952 	}
5953     }
5954 
5955   iv_ca_set_cp (data, ivs, group, best_cp);
5956 }
5957 
5958 /* Get cost for assignment IVS.  */
5959 
5960 static comp_cost
5961 iv_ca_cost (struct iv_ca *ivs)
5962 {
5963   /* This was a conditional expression but it triggered a bug in
5964      Sun C 5.5.  */
5965   if (ivs->bad_groups)
5966     return infinite_cost;
5967   else
5968     return ivs->cost;
5969 }
5970 
5971 /* Compare if applying NEW_CP to GROUP for IVS introduces more invariants
5972    than OLD_CP.  Return 1, 0 and -1 for more, equal and fewer invariants
5973    respectively.  */
5974 
5975 static int
5976 iv_ca_compare_deps (struct ivopts_data *data, struct iv_ca *ivs,
5977 		    struct iv_group *group, struct cost_pair *old_cp,
5978 		    struct cost_pair *new_cp)
5979 {
5980   gcc_assert (old_cp && new_cp && old_cp != new_cp);
5981   unsigned old_n_invs = ivs->n_invs;
5982   iv_ca_set_cp (data, ivs, group, new_cp);
5983   unsigned new_n_invs = ivs->n_invs;
5984   iv_ca_set_cp (data, ivs, group, old_cp);
5985 
5986   return new_n_invs > old_n_invs ? 1 : (new_n_invs < old_n_invs ? -1 : 0);
5987 }
5988 
5989 /* Creates change of expressing GROUP by NEW_CP instead of OLD_CP and chains
5990    it before NEXT.  */
5991 
5992 static struct iv_ca_delta *
5993 iv_ca_delta_add (struct iv_group *group, struct cost_pair *old_cp,
5994 		 struct cost_pair *new_cp, struct iv_ca_delta *next)
5995 {
5996   struct iv_ca_delta *change = XNEW (struct iv_ca_delta);
5997 
5998   change->group = group;
5999   change->old_cp = old_cp;
6000   change->new_cp = new_cp;
6001   change->next = next;
6002 
6003   return change;
6004 }
6005 
6006 /* Joins two lists of changes L1 and L2.  Destructive -- old lists
6007    are rewritten.  */
6008 
6009 static struct iv_ca_delta *
6010 iv_ca_delta_join (struct iv_ca_delta *l1, struct iv_ca_delta *l2)
6011 {
6012   struct iv_ca_delta *last;
6013 
6014   if (!l2)
6015     return l1;
6016 
6017   if (!l1)
6018     return l2;
6019 
6020   for (last = l1; last->next; last = last->next)
6021     continue;
6022   last->next = l2;
6023 
6024   return l1;
6025 }
6026 
6027 /* Reverse the list of changes DELTA, forming the inverse to it.  */
6028 
6029 static struct iv_ca_delta *
6030 iv_ca_delta_reverse (struct iv_ca_delta *delta)
6031 {
6032   struct iv_ca_delta *act, *next, *prev = NULL;
6033 
6034   for (act = delta; act; act = next)
6035     {
6036       next = act->next;
6037       act->next = prev;
6038       prev = act;
6039 
6040       std::swap (act->old_cp, act->new_cp);
6041     }
6042 
6043   return prev;
6044 }
6045 
6046 /* Commit changes in DELTA to IVS.  If FORWARD is false, the changes are
6047    reverted instead.  */
6048 
6049 static void
6050 iv_ca_delta_commit (struct ivopts_data *data, struct iv_ca *ivs,
6051 		    struct iv_ca_delta *delta, bool forward)
6052 {
6053   struct cost_pair *from, *to;
6054   struct iv_ca_delta *act;
6055 
6056   if (!forward)
6057     delta = iv_ca_delta_reverse (delta);
6058 
6059   for (act = delta; act; act = act->next)
6060     {
6061       from = act->old_cp;
6062       to = act->new_cp;
6063       gcc_assert (iv_ca_cand_for_group (ivs, act->group) == from);
6064       iv_ca_set_cp (data, ivs, act->group, to);
6065     }
6066 
6067   if (!forward)
6068     iv_ca_delta_reverse (delta);
6069 }
6070 
6071 /* Returns true if CAND is used in IVS.  */
6072 
6073 static bool
6074 iv_ca_cand_used_p (struct iv_ca *ivs, struct iv_cand *cand)
6075 {
6076   return ivs->n_cand_uses[cand->id] > 0;
6077 }
6078 
6079 /* Returns number of induction variable candidates in the set IVS.  */
6080 
6081 static unsigned
6082 iv_ca_n_cands (struct iv_ca *ivs)
6083 {
6084   return ivs->n_cands;
6085 }
6086 
6087 /* Free the list of changes DELTA.  */
6088 
6089 static void
6090 iv_ca_delta_free (struct iv_ca_delta **delta)
6091 {
6092   struct iv_ca_delta *act, *next;
6093 
6094   for (act = *delta; act; act = next)
6095     {
6096       next = act->next;
6097       free (act);
6098     }
6099 
6100   *delta = NULL;
6101 }
6102 
6103 /* Allocates new iv candidates assignment.  */
6104 
6105 static struct iv_ca *
6106 iv_ca_new (struct ivopts_data *data)
6107 {
6108   struct iv_ca *nw = XNEW (struct iv_ca);
6109 
6110   nw->upto = 0;
6111   nw->bad_groups = 0;
6112   nw->cand_for_group = XCNEWVEC (struct cost_pair *,
6113 				 data->vgroups.length ());
6114   nw->n_cand_uses = XCNEWVEC (unsigned, data->vcands.length ());
6115   nw->cands = BITMAP_ALLOC (NULL);
6116   nw->n_cands = 0;
6117   nw->n_invs = 0;
6118   nw->cand_use_cost = no_cost;
6119   nw->cand_cost = 0;
6120   nw->n_inv_var_uses = XCNEWVEC (unsigned, data->max_inv_var_id + 1);
6121   nw->n_inv_expr_uses = XCNEWVEC (unsigned, data->max_inv_expr_id + 1);
6122   nw->cost = no_cost;
6123 
6124   return nw;
6125 }
6126 
6127 /* Free memory occupied by the set IVS.  */
6128 
6129 static void
6130 iv_ca_free (struct iv_ca **ivs)
6131 {
6132   free ((*ivs)->cand_for_group);
6133   free ((*ivs)->n_cand_uses);
6134   BITMAP_FREE ((*ivs)->cands);
6135   free ((*ivs)->n_inv_var_uses);
6136   free ((*ivs)->n_inv_expr_uses);
6137   free (*ivs);
6138   *ivs = NULL;
6139 }
6140 
6141 /* Dumps IVS to FILE.  */
6142 
6143 static void
6144 iv_ca_dump (struct ivopts_data *data, FILE *file, struct iv_ca *ivs)
6145 {
6146   unsigned i;
6147   comp_cost cost = iv_ca_cost (ivs);
6148 
6149   fprintf (file, "  cost: %d (complexity %d)\n", cost.cost,
6150 	   cost.complexity);
6151   fprintf (file, "  cand_cost: %d\n  cand_group_cost: %d (complexity %d)\n",
6152 	   ivs->cand_cost, ivs->cand_use_cost.cost,
6153 	   ivs->cand_use_cost.complexity);
6154   bitmap_print (file, ivs->cands, "  candidates: ","\n");
6155 
6156   for (i = 0; i < ivs->upto; i++)
6157     {
6158       struct iv_group *group = data->vgroups[i];
6159       struct cost_pair *cp = iv_ca_cand_for_group (ivs, group);
6160       if (cp)
6161         fprintf (file, "   group:%d --> iv_cand:%d, cost=(%d,%d)\n",
6162 		 group->id, cp->cand->id, cp->cost.cost,
6163 		 cp->cost.complexity);
6164       else
6165 	fprintf (file, "   group:%d --> ??\n", group->id);
6166     }
6167 
6168   const char *pref = "";
6169   fprintf (file, "  invariant variables: ");
6170   for (i = 1; i <= data->max_inv_var_id; i++)
6171     if (ivs->n_inv_var_uses[i])
6172       {
6173 	fprintf (file, "%s%d", pref, i);
6174 	pref = ", ";
6175       }
6176 
6177   pref = "";
6178   fprintf (file, "\n  invariant expressions: ");
6179   for (i = 1; i <= data->max_inv_expr_id; i++)
6180     if (ivs->n_inv_expr_uses[i])
6181       {
6182 	fprintf (file, "%s%d", pref, i);
6183 	pref = ", ";
6184       }
6185 
6186   fprintf (file, "\n\n");
6187 }
6188 
6189 /* Try changing candidate in IVS to CAND for each use.  Return cost of the
6190    new set, and store differences in DELTA.  Number of induction variables
6191    in the new set is stored to N_IVS. MIN_NCAND is a flag. When it is true
6192    the function will try to find a solution with mimimal iv candidates.  */
6193 
6194 static comp_cost
6195 iv_ca_extend (struct ivopts_data *data, struct iv_ca *ivs,
6196 	      struct iv_cand *cand, struct iv_ca_delta **delta,
6197 	      unsigned *n_ivs, bool min_ncand)
6198 {
6199   unsigned i;
6200   comp_cost cost;
6201   struct iv_group *group;
6202   struct cost_pair *old_cp, *new_cp;
6203 
6204   *delta = NULL;
6205   for (i = 0; i < ivs->upto; i++)
6206     {
6207       group = data->vgroups[i];
6208       old_cp = iv_ca_cand_for_group (ivs, group);
6209 
6210       if (old_cp
6211 	  && old_cp->cand == cand)
6212 	continue;
6213 
6214       new_cp = get_group_iv_cost (data, group, cand);
6215       if (!new_cp)
6216 	continue;
6217 
6218       if (!min_ncand)
6219 	{
6220 	  int cmp_invs = iv_ca_compare_deps (data, ivs, group, old_cp, new_cp);
6221 	  /* Skip if new_cp depends on more invariants.  */
6222 	  if (cmp_invs > 0)
6223 	    continue;
6224 
6225 	  int cmp_cost = compare_cost_pair (new_cp, old_cp);
6226 	  /* Skip if new_cp is not cheaper.  */
6227 	  if (cmp_cost > 0 || (cmp_cost == 0 && cmp_invs == 0))
6228 	    continue;
6229 	}
6230 
6231       *delta = iv_ca_delta_add (group, old_cp, new_cp, *delta);
6232     }
6233 
6234   iv_ca_delta_commit (data, ivs, *delta, true);
6235   cost = iv_ca_cost (ivs);
6236   if (n_ivs)
6237     *n_ivs = iv_ca_n_cands (ivs);
6238   iv_ca_delta_commit (data, ivs, *delta, false);
6239 
6240   return cost;
6241 }
6242 
6243 /* Try narrowing set IVS by removing CAND.  Return the cost of
6244    the new set and store the differences in DELTA.  START is
6245    the candidate with which we start narrowing.  */
6246 
6247 static comp_cost
6248 iv_ca_narrow (struct ivopts_data *data, struct iv_ca *ivs,
6249 	      struct iv_cand *cand, struct iv_cand *start,
6250 	      struct iv_ca_delta **delta)
6251 {
6252   unsigned i, ci;
6253   struct iv_group *group;
6254   struct cost_pair *old_cp, *new_cp, *cp;
6255   bitmap_iterator bi;
6256   struct iv_cand *cnd;
6257   comp_cost cost, best_cost, acost;
6258 
6259   *delta = NULL;
6260   for (i = 0; i < data->vgroups.length (); i++)
6261     {
6262       group = data->vgroups[i];
6263 
6264       old_cp = iv_ca_cand_for_group (ivs, group);
6265       if (old_cp->cand != cand)
6266 	continue;
6267 
6268       best_cost = iv_ca_cost (ivs);
6269       /* Start narrowing with START.  */
6270       new_cp = get_group_iv_cost (data, group, start);
6271 
6272       if (data->consider_all_candidates)
6273 	{
6274 	  EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, ci, bi)
6275 	    {
6276 	      if (ci == cand->id || (start && ci == start->id))
6277 		continue;
6278 
6279 	      cnd = data->vcands[ci];
6280 
6281 	      cp = get_group_iv_cost (data, group, cnd);
6282 	      if (!cp)
6283 		continue;
6284 
6285 	      iv_ca_set_cp (data, ivs, group, cp);
6286 	      acost = iv_ca_cost (ivs);
6287 
6288 	      if (acost < best_cost)
6289 		{
6290 		  best_cost = acost;
6291 		  new_cp = cp;
6292 		}
6293 	    }
6294 	}
6295       else
6296 	{
6297 	  EXECUTE_IF_AND_IN_BITMAP (group->related_cands, ivs->cands, 0, ci, bi)
6298 	    {
6299 	      if (ci == cand->id || (start && ci == start->id))
6300 		continue;
6301 
6302 	      cnd = data->vcands[ci];
6303 
6304 	      cp = get_group_iv_cost (data, group, cnd);
6305 	      if (!cp)
6306 		continue;
6307 
6308 	      iv_ca_set_cp (data, ivs, group, cp);
6309 	      acost = iv_ca_cost (ivs);
6310 
6311 	      if (acost < best_cost)
6312 		{
6313 		  best_cost = acost;
6314 		  new_cp = cp;
6315 		}
6316 	    }
6317 	}
6318       /* Restore to old cp for use.  */
6319       iv_ca_set_cp (data, ivs, group, old_cp);
6320 
6321       if (!new_cp)
6322 	{
6323 	  iv_ca_delta_free (delta);
6324 	  return infinite_cost;
6325 	}
6326 
6327       *delta = iv_ca_delta_add (group, old_cp, new_cp, *delta);
6328     }
6329 
6330   iv_ca_delta_commit (data, ivs, *delta, true);
6331   cost = iv_ca_cost (ivs);
6332   iv_ca_delta_commit (data, ivs, *delta, false);
6333 
6334   return cost;
6335 }
6336 
6337 /* Try optimizing the set of candidates IVS by removing candidates different
6338    from to EXCEPT_CAND from it.  Return cost of the new set, and store
6339    differences in DELTA.  */
6340 
6341 static comp_cost
6342 iv_ca_prune (struct ivopts_data *data, struct iv_ca *ivs,
6343 	     struct iv_cand *except_cand, struct iv_ca_delta **delta)
6344 {
6345   bitmap_iterator bi;
6346   struct iv_ca_delta *act_delta, *best_delta;
6347   unsigned i;
6348   comp_cost best_cost, acost;
6349   struct iv_cand *cand;
6350 
6351   best_delta = NULL;
6352   best_cost = iv_ca_cost (ivs);
6353 
6354   EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
6355     {
6356       cand = data->vcands[i];
6357 
6358       if (cand == except_cand)
6359 	continue;
6360 
6361       acost = iv_ca_narrow (data, ivs, cand, except_cand, &act_delta);
6362 
6363       if (acost < best_cost)
6364 	{
6365 	  best_cost = acost;
6366 	  iv_ca_delta_free (&best_delta);
6367 	  best_delta = act_delta;
6368 	}
6369       else
6370 	iv_ca_delta_free (&act_delta);
6371     }
6372 
6373   if (!best_delta)
6374     {
6375       *delta = NULL;
6376       return best_cost;
6377     }
6378 
6379   /* Recurse to possibly remove other unnecessary ivs.  */
6380   iv_ca_delta_commit (data, ivs, best_delta, true);
6381   best_cost = iv_ca_prune (data, ivs, except_cand, delta);
6382   iv_ca_delta_commit (data, ivs, best_delta, false);
6383   *delta = iv_ca_delta_join (best_delta, *delta);
6384   return best_cost;
6385 }
6386 
6387 /* Check if CAND_IDX is a candidate other than OLD_CAND and has
6388    cheaper local cost for GROUP than BEST_CP.  Return pointer to
6389    the corresponding cost_pair, otherwise just return BEST_CP.  */
6390 
6391 static struct cost_pair*
6392 cheaper_cost_with_cand (struct ivopts_data *data, struct iv_group *group,
6393 			unsigned int cand_idx, struct iv_cand *old_cand,
6394 			struct cost_pair *best_cp)
6395 {
6396   struct iv_cand *cand;
6397   struct cost_pair *cp;
6398 
6399   gcc_assert (old_cand != NULL && best_cp != NULL);
6400   if (cand_idx == old_cand->id)
6401     return best_cp;
6402 
6403   cand = data->vcands[cand_idx];
6404   cp = get_group_iv_cost (data, group, cand);
6405   if (cp != NULL && cheaper_cost_pair (cp, best_cp))
6406     return cp;
6407 
6408   return best_cp;
6409 }
6410 
6411 /* Try breaking local optimal fixed-point for IVS by replacing candidates
6412    which are used by more than one iv uses.  For each of those candidates,
6413    this function tries to represent iv uses under that candidate using
6414    other ones with lower local cost, then tries to prune the new set.
6415    If the new set has lower cost, It returns the new cost after recording
6416    candidate replacement in list DELTA.  */
6417 
6418 static comp_cost
6419 iv_ca_replace (struct ivopts_data *data, struct iv_ca *ivs,
6420 	       struct iv_ca_delta **delta)
6421 {
6422   bitmap_iterator bi, bj;
6423   unsigned int i, j, k;
6424   struct iv_cand *cand;
6425   comp_cost orig_cost, acost;
6426   struct iv_ca_delta *act_delta, *tmp_delta;
6427   struct cost_pair *old_cp, *best_cp = NULL;
6428 
6429   *delta = NULL;
6430   orig_cost = iv_ca_cost (ivs);
6431 
6432   EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
6433     {
6434       if (ivs->n_cand_uses[i] == 1
6435 	  || ivs->n_cand_uses[i] > ALWAYS_PRUNE_CAND_SET_BOUND)
6436 	continue;
6437 
6438       cand = data->vcands[i];
6439 
6440       act_delta = NULL;
6441       /*  Represent uses under current candidate using other ones with
6442 	  lower local cost.  */
6443       for (j = 0; j < ivs->upto; j++)
6444 	{
6445 	  struct iv_group *group = data->vgroups[j];
6446 	  old_cp = iv_ca_cand_for_group (ivs, group);
6447 
6448 	  if (old_cp->cand != cand)
6449 	    continue;
6450 
6451 	  best_cp = old_cp;
6452 	  if (data->consider_all_candidates)
6453 	    for (k = 0; k < data->vcands.length (); k++)
6454 	      best_cp = cheaper_cost_with_cand (data, group, k,
6455 						old_cp->cand, best_cp);
6456 	  else
6457 	    EXECUTE_IF_SET_IN_BITMAP (group->related_cands, 0, k, bj)
6458 	      best_cp = cheaper_cost_with_cand (data, group, k,
6459 						old_cp->cand, best_cp);
6460 
6461 	  if (best_cp == old_cp)
6462 	    continue;
6463 
6464 	  act_delta = iv_ca_delta_add (group, old_cp, best_cp, act_delta);
6465 	}
6466       /* No need for further prune.  */
6467       if (!act_delta)
6468 	continue;
6469 
6470       /* Prune the new candidate set.  */
6471       iv_ca_delta_commit (data, ivs, act_delta, true);
6472       acost = iv_ca_prune (data, ivs, NULL, &tmp_delta);
6473       iv_ca_delta_commit (data, ivs, act_delta, false);
6474       act_delta = iv_ca_delta_join (act_delta, tmp_delta);
6475 
6476       if (acost < orig_cost)
6477 	{
6478 	  *delta = act_delta;
6479 	  return acost;
6480 	}
6481       else
6482 	iv_ca_delta_free (&act_delta);
6483     }
6484 
6485   return orig_cost;
6486 }
6487 
6488 /* Tries to extend the sets IVS in the best possible way in order to
6489    express the GROUP.  If ORIGINALP is true, prefer candidates from
6490    the original set of IVs, otherwise favor important candidates not
6491    based on any memory object.  */
6492 
6493 static bool
6494 try_add_cand_for (struct ivopts_data *data, struct iv_ca *ivs,
6495 		  struct iv_group *group, bool originalp)
6496 {
6497   comp_cost best_cost, act_cost;
6498   unsigned i;
6499   bitmap_iterator bi;
6500   struct iv_cand *cand;
6501   struct iv_ca_delta *best_delta = NULL, *act_delta;
6502   struct cost_pair *cp;
6503 
6504   iv_ca_add_group (data, ivs, group);
6505   best_cost = iv_ca_cost (ivs);
6506   cp = iv_ca_cand_for_group (ivs, group);
6507   if (cp)
6508     {
6509       best_delta = iv_ca_delta_add (group, NULL, cp, NULL);
6510       iv_ca_set_no_cp (data, ivs, group);
6511     }
6512 
6513   /* If ORIGINALP is true, try to find the original IV for the use.  Otherwise
6514      first try important candidates not based on any memory object.  Only if
6515      this fails, try the specific ones.  Rationale -- in loops with many
6516      variables the best choice often is to use just one generic biv.  If we
6517      added here many ivs specific to the uses, the optimization algorithm later
6518      would be likely to get stuck in a local minimum, thus causing us to create
6519      too many ivs.  The approach from few ivs to more seems more likely to be
6520      successful -- starting from few ivs, replacing an expensive use by a
6521      specific iv should always be a win.  */
6522   EXECUTE_IF_SET_IN_BITMAP (group->related_cands, 0, i, bi)
6523     {
6524       cand = data->vcands[i];
6525 
6526       if (originalp && cand->pos !=IP_ORIGINAL)
6527 	continue;
6528 
6529       if (!originalp && cand->iv->base_object != NULL_TREE)
6530 	continue;
6531 
6532       if (iv_ca_cand_used_p (ivs, cand))
6533 	continue;
6534 
6535       cp = get_group_iv_cost (data, group, cand);
6536       if (!cp)
6537 	continue;
6538 
6539       iv_ca_set_cp (data, ivs, group, cp);
6540       act_cost = iv_ca_extend (data, ivs, cand, &act_delta, NULL,
6541 			       true);
6542       iv_ca_set_no_cp (data, ivs, group);
6543       act_delta = iv_ca_delta_add (group, NULL, cp, act_delta);
6544 
6545       if (act_cost < best_cost)
6546 	{
6547 	  best_cost = act_cost;
6548 
6549 	  iv_ca_delta_free (&best_delta);
6550 	  best_delta = act_delta;
6551 	}
6552       else
6553 	iv_ca_delta_free (&act_delta);
6554     }
6555 
6556   if (best_cost.infinite_cost_p ())
6557     {
6558       for (i = 0; i < group->n_map_members; i++)
6559 	{
6560 	  cp = group->cost_map + i;
6561 	  cand = cp->cand;
6562 	  if (!cand)
6563 	    continue;
6564 
6565 	  /* Already tried this.  */
6566 	  if (cand->important)
6567 	    {
6568 	      if (originalp && cand->pos == IP_ORIGINAL)
6569 		continue;
6570 	      if (!originalp && cand->iv->base_object == NULL_TREE)
6571 		continue;
6572 	    }
6573 
6574 	  if (iv_ca_cand_used_p (ivs, cand))
6575 	    continue;
6576 
6577 	  act_delta = NULL;
6578 	  iv_ca_set_cp (data, ivs, group, cp);
6579 	  act_cost = iv_ca_extend (data, ivs, cand, &act_delta, NULL, true);
6580 	  iv_ca_set_no_cp (data, ivs, group);
6581 	  act_delta = iv_ca_delta_add (group,
6582 				       iv_ca_cand_for_group (ivs, group),
6583 				       cp, act_delta);
6584 
6585 	  if (act_cost < best_cost)
6586 	    {
6587 	      best_cost = act_cost;
6588 
6589 	      if (best_delta)
6590 		iv_ca_delta_free (&best_delta);
6591 	      best_delta = act_delta;
6592 	    }
6593 	  else
6594 	    iv_ca_delta_free (&act_delta);
6595 	}
6596     }
6597 
6598   iv_ca_delta_commit (data, ivs, best_delta, true);
6599   iv_ca_delta_free (&best_delta);
6600 
6601   return !best_cost.infinite_cost_p ();
6602 }
6603 
6604 /* Finds an initial assignment of candidates to uses.  */
6605 
6606 static struct iv_ca *
6607 get_initial_solution (struct ivopts_data *data, bool originalp)
6608 {
6609   unsigned i;
6610   struct iv_ca *ivs = iv_ca_new (data);
6611 
6612   for (i = 0; i < data->vgroups.length (); i++)
6613     if (!try_add_cand_for (data, ivs, data->vgroups[i], originalp))
6614       {
6615 	iv_ca_free (&ivs);
6616 	return NULL;
6617       }
6618 
6619   return ivs;
6620 }
6621 
6622 /* Tries to improve set of induction variables IVS.  TRY_REPLACE_P
6623    points to a bool variable, this function tries to break local
6624    optimal fixed-point by replacing candidates in IVS if it's true.  */
6625 
6626 static bool
6627 try_improve_iv_set (struct ivopts_data *data,
6628 		    struct iv_ca *ivs, bool *try_replace_p)
6629 {
6630   unsigned i, n_ivs;
6631   comp_cost acost, best_cost = iv_ca_cost (ivs);
6632   struct iv_ca_delta *best_delta = NULL, *act_delta, *tmp_delta;
6633   struct iv_cand *cand;
6634 
6635   /* Try extending the set of induction variables by one.  */
6636   for (i = 0; i < data->vcands.length (); i++)
6637     {
6638       cand = data->vcands[i];
6639 
6640       if (iv_ca_cand_used_p (ivs, cand))
6641 	continue;
6642 
6643       acost = iv_ca_extend (data, ivs, cand, &act_delta, &n_ivs, false);
6644       if (!act_delta)
6645 	continue;
6646 
6647       /* If we successfully added the candidate and the set is small enough,
6648 	 try optimizing it by removing other candidates.  */
6649       if (n_ivs <= ALWAYS_PRUNE_CAND_SET_BOUND)
6650       	{
6651 	  iv_ca_delta_commit (data, ivs, act_delta, true);
6652 	  acost = iv_ca_prune (data, ivs, cand, &tmp_delta);
6653 	  iv_ca_delta_commit (data, ivs, act_delta, false);
6654 	  act_delta = iv_ca_delta_join (act_delta, tmp_delta);
6655 	}
6656 
6657       if (acost < best_cost)
6658 	{
6659 	  best_cost = acost;
6660 	  iv_ca_delta_free (&best_delta);
6661 	  best_delta = act_delta;
6662 	}
6663       else
6664 	iv_ca_delta_free (&act_delta);
6665     }
6666 
6667   if (!best_delta)
6668     {
6669       /* Try removing the candidates from the set instead.  */
6670       best_cost = iv_ca_prune (data, ivs, NULL, &best_delta);
6671 
6672       if (!best_delta && *try_replace_p)
6673 	{
6674 	  *try_replace_p = false;
6675 	  /* So far candidate selecting algorithm tends to choose fewer IVs
6676 	     so that it can handle cases in which loops have many variables
6677 	     but the best choice is often to use only one general biv.  One
6678 	     weakness is it can't handle opposite cases, in which different
6679 	     candidates should be chosen with respect to each use.  To solve
6680 	     the problem, we replace candidates in a manner described by the
6681 	     comments of iv_ca_replace, thus give general algorithm a chance
6682 	     to break local optimal fixed-point in these cases.  */
6683 	  best_cost = iv_ca_replace (data, ivs, &best_delta);
6684 	}
6685 
6686       if (!best_delta)
6687 	return false;
6688     }
6689 
6690   iv_ca_delta_commit (data, ivs, best_delta, true);
6691   gcc_assert (best_cost == iv_ca_cost (ivs));
6692   iv_ca_delta_free (&best_delta);
6693   return true;
6694 }
6695 
6696 /* Attempts to find the optimal set of induction variables.  We do simple
6697    greedy heuristic -- we try to replace at most one candidate in the selected
6698    solution and remove the unused ivs while this improves the cost.  */
6699 
6700 static struct iv_ca *
6701 find_optimal_iv_set_1 (struct ivopts_data *data, bool originalp)
6702 {
6703   struct iv_ca *set;
6704   bool try_replace_p = true;
6705 
6706   /* Get the initial solution.  */
6707   set = get_initial_solution (data, originalp);
6708   if (!set)
6709     {
6710       if (dump_file && (dump_flags & TDF_DETAILS))
6711 	fprintf (dump_file, "Unable to substitute for ivs, failed.\n");
6712       return NULL;
6713     }
6714 
6715   if (dump_file && (dump_flags & TDF_DETAILS))
6716     {
6717       fprintf (dump_file, "Initial set of candidates:\n");
6718       iv_ca_dump (data, dump_file, set);
6719     }
6720 
6721   while (try_improve_iv_set (data, set, &try_replace_p))
6722     {
6723       if (dump_file && (dump_flags & TDF_DETAILS))
6724 	{
6725 	  fprintf (dump_file, "Improved to:\n");
6726 	  iv_ca_dump (data, dump_file, set);
6727 	}
6728     }
6729 
6730   return set;
6731 }
6732 
6733 static struct iv_ca *
6734 find_optimal_iv_set (struct ivopts_data *data)
6735 {
6736   unsigned i;
6737   comp_cost cost, origcost;
6738   struct iv_ca *set, *origset;
6739 
6740   /* Determine the cost based on a strategy that starts with original IVs,
6741      and try again using a strategy that prefers candidates not based
6742      on any IVs.  */
6743   origset = find_optimal_iv_set_1 (data, true);
6744   set = find_optimal_iv_set_1 (data, false);
6745 
6746   if (!origset && !set)
6747     return NULL;
6748 
6749   origcost = origset ? iv_ca_cost (origset) : infinite_cost;
6750   cost = set ? iv_ca_cost (set) : infinite_cost;
6751 
6752   if (dump_file && (dump_flags & TDF_DETAILS))
6753     {
6754       fprintf (dump_file, "Original cost %d (complexity %d)\n\n",
6755 	       origcost.cost, origcost.complexity);
6756       fprintf (dump_file, "Final cost %d (complexity %d)\n\n",
6757 	       cost.cost, cost.complexity);
6758     }
6759 
6760   /* Choose the one with the best cost.  */
6761   if (origcost <= cost)
6762     {
6763       if (set)
6764 	iv_ca_free (&set);
6765       set = origset;
6766     }
6767   else if (origset)
6768     iv_ca_free (&origset);
6769 
6770   for (i = 0; i < data->vgroups.length (); i++)
6771     {
6772       struct iv_group *group = data->vgroups[i];
6773       group->selected = iv_ca_cand_for_group (set, group)->cand;
6774     }
6775 
6776   return set;
6777 }
6778 
6779 /* Creates a new induction variable corresponding to CAND.  */
6780 
6781 static void
6782 create_new_iv (struct ivopts_data *data, struct iv_cand *cand)
6783 {
6784   gimple_stmt_iterator incr_pos;
6785   tree base;
6786   struct iv_use *use;
6787   struct iv_group *group;
6788   bool after = false;
6789 
6790   gcc_assert (cand->iv != NULL);
6791 
6792   switch (cand->pos)
6793     {
6794     case IP_NORMAL:
6795       incr_pos = gsi_last_bb (ip_normal_pos (data->current_loop));
6796       break;
6797 
6798     case IP_END:
6799       incr_pos = gsi_last_bb (ip_end_pos (data->current_loop));
6800       after = true;
6801       break;
6802 
6803     case IP_AFTER_USE:
6804       after = true;
6805       /* fall through */
6806     case IP_BEFORE_USE:
6807       incr_pos = gsi_for_stmt (cand->incremented_at);
6808       break;
6809 
6810     case IP_ORIGINAL:
6811       /* Mark that the iv is preserved.  */
6812       name_info (data, cand->var_before)->preserve_biv = true;
6813       name_info (data, cand->var_after)->preserve_biv = true;
6814 
6815       /* Rewrite the increment so that it uses var_before directly.  */
6816       use = find_interesting_uses_op (data, cand->var_after);
6817       group = data->vgroups[use->group_id];
6818       group->selected = cand;
6819       return;
6820     }
6821 
6822   gimple_add_tmp_var (cand->var_before);
6823 
6824   base = unshare_expr (cand->iv->base);
6825 
6826   create_iv (base, unshare_expr (cand->iv->step),
6827 	     cand->var_before, data->current_loop,
6828 	     &incr_pos, after, &cand->var_before, &cand->var_after);
6829 }
6830 
6831 /* Creates new induction variables described in SET.  */
6832 
6833 static void
6834 create_new_ivs (struct ivopts_data *data, struct iv_ca *set)
6835 {
6836   unsigned i;
6837   struct iv_cand *cand;
6838   bitmap_iterator bi;
6839 
6840   EXECUTE_IF_SET_IN_BITMAP (set->cands, 0, i, bi)
6841     {
6842       cand = data->vcands[i];
6843       create_new_iv (data, cand);
6844     }
6845 
6846   if (dump_file && (dump_flags & TDF_DETAILS))
6847     {
6848       fprintf (dump_file, "Selected IV set for loop %d",
6849 	       data->current_loop->num);
6850       if (data->loop_loc != UNKNOWN_LOCATION)
6851 	fprintf (dump_file, " at %s:%d", LOCATION_FILE (data->loop_loc),
6852 		 LOCATION_LINE (data->loop_loc));
6853       fprintf (dump_file, ", " HOST_WIDE_INT_PRINT_DEC " avg niters",
6854 	       avg_loop_niter (data->current_loop));
6855       fprintf (dump_file, ", %lu IVs:\n", bitmap_count_bits (set->cands));
6856       EXECUTE_IF_SET_IN_BITMAP (set->cands, 0, i, bi)
6857 	{
6858 	  cand = data->vcands[i];
6859 	  dump_cand (dump_file, cand);
6860 	}
6861       fprintf (dump_file, "\n");
6862     }
6863 }
6864 
6865 /* Rewrites USE (definition of iv used in a nonlinear expression)
6866    using candidate CAND.  */
6867 
6868 static void
6869 rewrite_use_nonlinear_expr (struct ivopts_data *data,
6870 			    struct iv_use *use, struct iv_cand *cand)
6871 {
6872   gassign *ass;
6873   gimple_stmt_iterator bsi;
6874   tree comp, type = get_use_type (use), tgt;
6875 
6876   /* An important special case -- if we are asked to express value of
6877      the original iv by itself, just exit; there is no need to
6878      introduce a new computation (that might also need casting the
6879      variable to unsigned and back).  */
6880   if (cand->pos == IP_ORIGINAL
6881       && cand->incremented_at == use->stmt)
6882     {
6883       tree op = NULL_TREE;
6884       enum tree_code stmt_code;
6885 
6886       gcc_assert (is_gimple_assign (use->stmt));
6887       gcc_assert (gimple_assign_lhs (use->stmt) == cand->var_after);
6888 
6889       /* Check whether we may leave the computation unchanged.
6890 	 This is the case only if it does not rely on other
6891 	 computations in the loop -- otherwise, the computation
6892 	 we rely upon may be removed in remove_unused_ivs,
6893 	 thus leading to ICE.  */
6894       stmt_code = gimple_assign_rhs_code (use->stmt);
6895       if (stmt_code == PLUS_EXPR
6896 	  || stmt_code == MINUS_EXPR
6897 	  || stmt_code == POINTER_PLUS_EXPR)
6898 	{
6899 	  if (gimple_assign_rhs1 (use->stmt) == cand->var_before)
6900 	    op = gimple_assign_rhs2 (use->stmt);
6901 	  else if (gimple_assign_rhs2 (use->stmt) == cand->var_before)
6902 	    op = gimple_assign_rhs1 (use->stmt);
6903 	}
6904 
6905       if (op != NULL_TREE)
6906 	{
6907 	  if (expr_invariant_in_loop_p (data->current_loop, op))
6908 	    return;
6909 	  if (TREE_CODE (op) == SSA_NAME)
6910 	    {
6911 	      struct iv *iv = get_iv (data, op);
6912 	      if (iv != NULL && integer_zerop (iv->step))
6913 		return;
6914 	    }
6915 	}
6916     }
6917 
6918   switch (gimple_code (use->stmt))
6919     {
6920     case GIMPLE_PHI:
6921       tgt = PHI_RESULT (use->stmt);
6922 
6923       /* If we should keep the biv, do not replace it.  */
6924       if (name_info (data, tgt)->preserve_biv)
6925 	return;
6926 
6927       bsi = gsi_after_labels (gimple_bb (use->stmt));
6928       break;
6929 
6930     case GIMPLE_ASSIGN:
6931       tgt = gimple_assign_lhs (use->stmt);
6932       bsi = gsi_for_stmt (use->stmt);
6933       break;
6934 
6935     default:
6936       gcc_unreachable ();
6937     }
6938 
6939   aff_tree aff_inv, aff_var;
6940   if (!get_computation_aff_1 (data->current_loop, use->stmt,
6941 			      use, cand, &aff_inv, &aff_var))
6942     gcc_unreachable ();
6943 
6944   unshare_aff_combination (&aff_inv);
6945   unshare_aff_combination (&aff_var);
6946   /* Prefer CSE opportunity than loop invariant by adding offset at last
6947      so that iv_uses have different offsets can be CSEed.  */
6948   poly_widest_int offset = aff_inv.offset;
6949   aff_inv.offset = 0;
6950 
6951   gimple_seq stmt_list = NULL, seq = NULL;
6952   tree comp_op1 = aff_combination_to_tree (&aff_inv);
6953   tree comp_op2 = aff_combination_to_tree (&aff_var);
6954   gcc_assert (comp_op1 && comp_op2);
6955 
6956   comp_op1 = force_gimple_operand (comp_op1, &seq, true, NULL);
6957   gimple_seq_add_seq (&stmt_list, seq);
6958   comp_op2 = force_gimple_operand (comp_op2, &seq, true, NULL);
6959   gimple_seq_add_seq (&stmt_list, seq);
6960 
6961   if (POINTER_TYPE_P (TREE_TYPE (comp_op2)))
6962     std::swap (comp_op1, comp_op2);
6963 
6964   if (POINTER_TYPE_P (TREE_TYPE (comp_op1)))
6965     {
6966       comp = fold_build_pointer_plus (comp_op1,
6967 				      fold_convert (sizetype, comp_op2));
6968       comp = fold_build_pointer_plus (comp,
6969 				      wide_int_to_tree (sizetype, offset));
6970     }
6971   else
6972     {
6973       comp = fold_build2 (PLUS_EXPR, TREE_TYPE (comp_op1), comp_op1,
6974 			  fold_convert (TREE_TYPE (comp_op1), comp_op2));
6975       comp = fold_build2 (PLUS_EXPR, TREE_TYPE (comp_op1), comp,
6976 			  wide_int_to_tree (TREE_TYPE (comp_op1), offset));
6977     }
6978 
6979   comp = fold_convert (type, comp);
6980   if (!valid_gimple_rhs_p (comp)
6981       || (gimple_code (use->stmt) != GIMPLE_PHI
6982 	  /* We can't allow re-allocating the stmt as it might be pointed
6983 	     to still.  */
6984 	  && (get_gimple_rhs_num_ops (TREE_CODE (comp))
6985 	      >= gimple_num_ops (gsi_stmt (bsi)))))
6986     {
6987       comp = force_gimple_operand (comp, &seq, true, NULL);
6988       gimple_seq_add_seq (&stmt_list, seq);
6989       if (POINTER_TYPE_P (TREE_TYPE (tgt)))
6990 	{
6991 	  duplicate_ssa_name_ptr_info (comp, SSA_NAME_PTR_INFO (tgt));
6992 	  /* As this isn't a plain copy we have to reset alignment
6993 	     information.  */
6994 	  if (SSA_NAME_PTR_INFO (comp))
6995 	    mark_ptr_info_alignment_unknown (SSA_NAME_PTR_INFO (comp));
6996 	}
6997     }
6998 
6999   gsi_insert_seq_before (&bsi, stmt_list, GSI_SAME_STMT);
7000   if (gimple_code (use->stmt) == GIMPLE_PHI)
7001     {
7002       ass = gimple_build_assign (tgt, comp);
7003       gsi_insert_before (&bsi, ass, GSI_SAME_STMT);
7004 
7005       bsi = gsi_for_stmt (use->stmt);
7006       remove_phi_node (&bsi, false);
7007     }
7008   else
7009     {
7010       gimple_assign_set_rhs_from_tree (&bsi, comp);
7011       use->stmt = gsi_stmt (bsi);
7012     }
7013 }
7014 
7015 /* Performs a peephole optimization to reorder the iv update statement with
7016    a mem ref to enable instruction combining in later phases. The mem ref uses
7017    the iv value before the update, so the reordering transformation requires
7018    adjustment of the offset. CAND is the selected IV_CAND.
7019 
7020    Example:
7021 
7022    t = MEM_REF (base, iv1, 8, 16);  // base, index, stride, offset
7023    iv2 = iv1 + 1;
7024 
7025    if (t < val)      (1)
7026      goto L;
7027    goto Head;
7028 
7029 
7030    directly propagating t over to (1) will introduce overlapping live range
7031    thus increase register pressure. This peephole transform it into:
7032 
7033 
7034    iv2 = iv1 + 1;
7035    t = MEM_REF (base, iv2, 8, 8);
7036    if (t < val)
7037      goto L;
7038    goto Head;
7039 */
7040 
7041 static void
7042 adjust_iv_update_pos (struct iv_cand *cand, struct iv_use *use)
7043 {
7044   tree var_after;
7045   gimple *iv_update, *stmt;
7046   basic_block bb;
7047   gimple_stmt_iterator gsi, gsi_iv;
7048 
7049   if (cand->pos != IP_NORMAL)
7050     return;
7051 
7052   var_after = cand->var_after;
7053   iv_update = SSA_NAME_DEF_STMT (var_after);
7054 
7055   bb = gimple_bb (iv_update);
7056   gsi = gsi_last_nondebug_bb (bb);
7057   stmt = gsi_stmt (gsi);
7058 
7059   /* Only handle conditional statement for now.  */
7060   if (gimple_code (stmt) != GIMPLE_COND)
7061     return;
7062 
7063   gsi_prev_nondebug (&gsi);
7064   stmt = gsi_stmt (gsi);
7065   if (stmt != iv_update)
7066     return;
7067 
7068   gsi_prev_nondebug (&gsi);
7069   if (gsi_end_p (gsi))
7070     return;
7071 
7072   stmt = gsi_stmt (gsi);
7073   if (gimple_code (stmt) != GIMPLE_ASSIGN)
7074     return;
7075 
7076   if (stmt != use->stmt)
7077     return;
7078 
7079   if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
7080     return;
7081 
7082   if (dump_file && (dump_flags & TDF_DETAILS))
7083     {
7084       fprintf (dump_file, "Reordering \n");
7085       print_gimple_stmt (dump_file, iv_update, 0);
7086       print_gimple_stmt (dump_file, use->stmt, 0);
7087       fprintf (dump_file, "\n");
7088     }
7089 
7090   gsi = gsi_for_stmt (use->stmt);
7091   gsi_iv = gsi_for_stmt (iv_update);
7092   gsi_move_before (&gsi_iv, &gsi);
7093 
7094   cand->pos = IP_BEFORE_USE;
7095   cand->incremented_at = use->stmt;
7096 }
7097 
7098 /* Return the alias pointer type that should be used for a MEM_REF
7099    associated with USE, which has type USE_PTR_ADDRESS.  */
7100 
7101 static tree
7102 get_alias_ptr_type_for_ptr_address (iv_use *use)
7103 {
7104   gcall *call = as_a <gcall *> (use->stmt);
7105   switch (gimple_call_internal_fn (call))
7106     {
7107     case IFN_MASK_LOAD:
7108     case IFN_MASK_STORE:
7109       /* The second argument contains the correct alias type.  */
7110       gcc_assert (use->op_p = gimple_call_arg_ptr (call, 0));
7111       return TREE_TYPE (gimple_call_arg (call, 1));
7112 
7113     default:
7114       gcc_unreachable ();
7115     }
7116 }
7117 
7118 
7119 /* Rewrites USE (address that is an iv) using candidate CAND.  */
7120 
7121 static void
7122 rewrite_use_address (struct ivopts_data *data,
7123 		     struct iv_use *use, struct iv_cand *cand)
7124 {
7125   aff_tree aff;
7126   bool ok;
7127 
7128   adjust_iv_update_pos (cand, use);
7129   ok = get_computation_aff (data->current_loop, use->stmt, use, cand, &aff);
7130   gcc_assert (ok);
7131   unshare_aff_combination (&aff);
7132 
7133   /* To avoid undefined overflow problems, all IV candidates use unsigned
7134      integer types.  The drawback is that this makes it impossible for
7135      create_mem_ref to distinguish an IV that is based on a memory object
7136      from one that represents simply an offset.
7137 
7138      To work around this problem, we pass a hint to create_mem_ref that
7139      indicates which variable (if any) in aff is an IV based on a memory
7140      object.  Note that we only consider the candidate.  If this is not
7141      based on an object, the base of the reference is in some subexpression
7142      of the use -- but these will use pointer types, so they are recognized
7143      by the create_mem_ref heuristics anyway.  */
7144   tree iv = var_at_stmt (data->current_loop, cand, use->stmt);
7145   tree base_hint = (cand->iv->base_object) ? iv : NULL_TREE;
7146   gimple_stmt_iterator bsi = gsi_for_stmt (use->stmt);
7147   tree type = use->mem_type;
7148   tree alias_ptr_type;
7149   if (use->type == USE_PTR_ADDRESS)
7150     alias_ptr_type = get_alias_ptr_type_for_ptr_address (use);
7151   else
7152     {
7153       gcc_assert (type == TREE_TYPE (*use->op_p));
7154       unsigned int align = get_object_alignment (*use->op_p);
7155       if (align != TYPE_ALIGN (type))
7156 	type = build_aligned_type (type, align);
7157       alias_ptr_type = reference_alias_ptr_type (*use->op_p);
7158     }
7159   tree ref = create_mem_ref (&bsi, type, &aff, alias_ptr_type,
7160 			     iv, base_hint, data->speed);
7161 
7162   if (use->type == USE_PTR_ADDRESS)
7163     {
7164       ref = fold_build1 (ADDR_EXPR, build_pointer_type (use->mem_type), ref);
7165       ref = fold_convert (get_use_type (use), ref);
7166       ref = force_gimple_operand_gsi (&bsi, ref, true, NULL_TREE,
7167 				      true, GSI_SAME_STMT);
7168     }
7169   else
7170     copy_ref_info (ref, *use->op_p);
7171 
7172   *use->op_p = ref;
7173 }
7174 
7175 /* Rewrites USE (the condition such that one of the arguments is an iv) using
7176    candidate CAND.  */
7177 
7178 static void
7179 rewrite_use_compare (struct ivopts_data *data,
7180 		     struct iv_use *use, struct iv_cand *cand)
7181 {
7182   tree comp, op, bound;
7183   gimple_stmt_iterator bsi = gsi_for_stmt (use->stmt);
7184   enum tree_code compare;
7185   struct iv_group *group = data->vgroups[use->group_id];
7186   struct cost_pair *cp = get_group_iv_cost (data, group, cand);
7187 
7188   bound = cp->value;
7189   if (bound)
7190     {
7191       tree var = var_at_stmt (data->current_loop, cand, use->stmt);
7192       tree var_type = TREE_TYPE (var);
7193       gimple_seq stmts;
7194 
7195       if (dump_file && (dump_flags & TDF_DETAILS))
7196 	{
7197 	  fprintf (dump_file, "Replacing exit test: ");
7198 	  print_gimple_stmt (dump_file, use->stmt, 0, TDF_SLIM);
7199 	}
7200       compare = cp->comp;
7201       bound = unshare_expr (fold_convert (var_type, bound));
7202       op = force_gimple_operand (bound, &stmts, true, NULL_TREE);
7203       if (stmts)
7204 	gsi_insert_seq_on_edge_immediate (
7205 		loop_preheader_edge (data->current_loop),
7206 		stmts);
7207 
7208       gcond *cond_stmt = as_a <gcond *> (use->stmt);
7209       gimple_cond_set_lhs (cond_stmt, var);
7210       gimple_cond_set_code (cond_stmt, compare);
7211       gimple_cond_set_rhs (cond_stmt, op);
7212       return;
7213     }
7214 
7215   /* The induction variable elimination failed; just express the original
7216      giv.  */
7217   comp = get_computation_at (data->current_loop, use->stmt, use, cand);
7218   gcc_assert (comp != NULL_TREE);
7219   gcc_assert (use->op_p != NULL);
7220   *use->op_p = force_gimple_operand_gsi (&bsi, comp, true,
7221 					 SSA_NAME_VAR (*use->op_p),
7222 					 true, GSI_SAME_STMT);
7223 }
7224 
7225 /* Rewrite the groups using the selected induction variables.  */
7226 
7227 static void
7228 rewrite_groups (struct ivopts_data *data)
7229 {
7230   unsigned i, j;
7231 
7232   for (i = 0; i < data->vgroups.length (); i++)
7233     {
7234       struct iv_group *group = data->vgroups[i];
7235       struct iv_cand *cand = group->selected;
7236 
7237       gcc_assert (cand);
7238 
7239       if (group->type == USE_NONLINEAR_EXPR)
7240 	{
7241 	  for (j = 0; j < group->vuses.length (); j++)
7242 	    {
7243 	      rewrite_use_nonlinear_expr (data, group->vuses[j], cand);
7244 	      update_stmt (group->vuses[j]->stmt);
7245 	    }
7246 	}
7247       else if (address_p (group->type))
7248 	{
7249 	  for (j = 0; j < group->vuses.length (); j++)
7250 	    {
7251 	      rewrite_use_address (data, group->vuses[j], cand);
7252 	      update_stmt (group->vuses[j]->stmt);
7253 	    }
7254 	}
7255       else
7256 	{
7257 	  gcc_assert (group->type == USE_COMPARE);
7258 
7259 	  for (j = 0; j < group->vuses.length (); j++)
7260 	    {
7261 	      rewrite_use_compare (data, group->vuses[j], cand);
7262 	      update_stmt (group->vuses[j]->stmt);
7263 	    }
7264 	}
7265     }
7266 }
7267 
7268 /* Removes the ivs that are not used after rewriting.  */
7269 
7270 static void
7271 remove_unused_ivs (struct ivopts_data *data, bitmap toremove)
7272 {
7273   unsigned j;
7274   bitmap_iterator bi;
7275 
7276   /* Figure out an order in which to release SSA DEFs so that we don't
7277      release something that we'd have to propagate into a debug stmt
7278      afterwards.  */
7279   EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j, bi)
7280     {
7281       struct version_info *info;
7282 
7283       info = ver_info (data, j);
7284       if (info->iv
7285 	  && !integer_zerop (info->iv->step)
7286 	  && !info->inv_id
7287 	  && !info->iv->nonlin_use
7288 	  && !info->preserve_biv)
7289 	{
7290 	  bitmap_set_bit (toremove, SSA_NAME_VERSION (info->iv->ssa_name));
7291 
7292 	  tree def = info->iv->ssa_name;
7293 
7294 	  if (MAY_HAVE_DEBUG_BIND_STMTS && SSA_NAME_DEF_STMT (def))
7295 	    {
7296 	      imm_use_iterator imm_iter;
7297 	      use_operand_p use_p;
7298 	      gimple *stmt;
7299 	      int count = 0;
7300 
7301 	      FOR_EACH_IMM_USE_STMT (stmt, imm_iter, def)
7302 		{
7303 		  if (!gimple_debug_bind_p (stmt))
7304 		    continue;
7305 
7306 		  /* We just want to determine whether to do nothing
7307 		     (count == 0), to substitute the computed
7308 		     expression into a single use of the SSA DEF by
7309 		     itself (count == 1), or to use a debug temp
7310 		     because the SSA DEF is used multiple times or as
7311 		     part of a larger expression (count > 1). */
7312 		  count++;
7313 		  if (gimple_debug_bind_get_value (stmt) != def)
7314 		    count++;
7315 
7316 		  if (count > 1)
7317 		    BREAK_FROM_IMM_USE_STMT (imm_iter);
7318 		}
7319 
7320 	      if (!count)
7321 		continue;
7322 
7323 	      struct iv_use dummy_use;
7324 	      struct iv_cand *best_cand = NULL, *cand;
7325 	      unsigned i, best_pref = 0, cand_pref;
7326 
7327 	      memset (&dummy_use, 0, sizeof (dummy_use));
7328 	      dummy_use.iv = info->iv;
7329 	      for (i = 0; i < data->vgroups.length () && i < 64; i++)
7330 		{
7331 		  cand = data->vgroups[i]->selected;
7332 		  if (cand == best_cand)
7333 		    continue;
7334 		  cand_pref = operand_equal_p (cand->iv->step,
7335 					       info->iv->step, 0)
7336 		    ? 4 : 0;
7337 		  cand_pref
7338 		    += TYPE_MODE (TREE_TYPE (cand->iv->base))
7339 		    == TYPE_MODE (TREE_TYPE (info->iv->base))
7340 		    ? 2 : 0;
7341 		  cand_pref
7342 		    += TREE_CODE (cand->iv->base) == INTEGER_CST
7343 		    ? 1 : 0;
7344 		  if (best_cand == NULL || best_pref < cand_pref)
7345 		    {
7346 		      best_cand = cand;
7347 		      best_pref = cand_pref;
7348 		    }
7349 		}
7350 
7351 	      if (!best_cand)
7352 		continue;
7353 
7354 	      tree comp = get_computation_at (data->current_loop,
7355 					      SSA_NAME_DEF_STMT (def),
7356 					      &dummy_use, best_cand);
7357 	      if (!comp)
7358 		continue;
7359 
7360 	      if (count > 1)
7361 		{
7362 		  tree vexpr = make_node (DEBUG_EXPR_DECL);
7363 		  DECL_ARTIFICIAL (vexpr) = 1;
7364 		  TREE_TYPE (vexpr) = TREE_TYPE (comp);
7365 		  if (SSA_NAME_VAR (def))
7366 		    SET_DECL_MODE (vexpr, DECL_MODE (SSA_NAME_VAR (def)));
7367 		  else
7368 		    SET_DECL_MODE (vexpr, TYPE_MODE (TREE_TYPE (vexpr)));
7369 		  gdebug *def_temp
7370 		    = gimple_build_debug_bind (vexpr, comp, NULL);
7371 		  gimple_stmt_iterator gsi;
7372 
7373 		  if (gimple_code (SSA_NAME_DEF_STMT (def)) == GIMPLE_PHI)
7374 		    gsi = gsi_after_labels (gimple_bb
7375 					    (SSA_NAME_DEF_STMT (def)));
7376 		  else
7377 		    gsi = gsi_for_stmt (SSA_NAME_DEF_STMT (def));
7378 
7379 		  gsi_insert_before (&gsi, def_temp, GSI_SAME_STMT);
7380 		  comp = vexpr;
7381 		}
7382 
7383 	      FOR_EACH_IMM_USE_STMT (stmt, imm_iter, def)
7384 		{
7385 		  if (!gimple_debug_bind_p (stmt))
7386 		    continue;
7387 
7388 		  FOR_EACH_IMM_USE_ON_STMT (use_p, imm_iter)
7389 		    SET_USE (use_p, comp);
7390 
7391 		  update_stmt (stmt);
7392 		}
7393 	    }
7394 	}
7395     }
7396 }
7397 
7398 /* Frees memory occupied by struct tree_niter_desc in *VALUE. Callback
7399    for hash_map::traverse.  */
7400 
7401 bool
7402 free_tree_niter_desc (edge const &, tree_niter_desc *const &value, void *)
7403 {
7404   free (value);
7405   return true;
7406 }
7407 
7408 /* Frees data allocated by the optimization of a single loop.  */
7409 
7410 static void
7411 free_loop_data (struct ivopts_data *data)
7412 {
7413   unsigned i, j;
7414   bitmap_iterator bi;
7415   tree obj;
7416 
7417   if (data->niters)
7418     {
7419       data->niters->traverse<void *, free_tree_niter_desc> (NULL);
7420       delete data->niters;
7421       data->niters = NULL;
7422     }
7423 
7424   EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
7425     {
7426       struct version_info *info;
7427 
7428       info = ver_info (data, i);
7429       info->iv = NULL;
7430       info->has_nonlin_use = false;
7431       info->preserve_biv = false;
7432       info->inv_id = 0;
7433     }
7434   bitmap_clear (data->relevant);
7435   bitmap_clear (data->important_candidates);
7436 
7437   for (i = 0; i < data->vgroups.length (); i++)
7438     {
7439       struct iv_group *group = data->vgroups[i];
7440 
7441       for (j = 0; j < group->vuses.length (); j++)
7442 	free (group->vuses[j]);
7443       group->vuses.release ();
7444 
7445       BITMAP_FREE (group->related_cands);
7446       for (j = 0; j < group->n_map_members; j++)
7447 	{
7448 	  if (group->cost_map[j].inv_vars)
7449 	    BITMAP_FREE (group->cost_map[j].inv_vars);
7450 	  if (group->cost_map[j].inv_exprs)
7451 	    BITMAP_FREE (group->cost_map[j].inv_exprs);
7452 	}
7453 
7454       free (group->cost_map);
7455       free (group);
7456     }
7457   data->vgroups.truncate (0);
7458 
7459   for (i = 0; i < data->vcands.length (); i++)
7460     {
7461       struct iv_cand *cand = data->vcands[i];
7462 
7463       if (cand->inv_vars)
7464 	BITMAP_FREE (cand->inv_vars);
7465       if (cand->inv_exprs)
7466 	BITMAP_FREE (cand->inv_exprs);
7467       free (cand);
7468     }
7469   data->vcands.truncate (0);
7470 
7471   if (data->version_info_size < num_ssa_names)
7472     {
7473       data->version_info_size = 2 * num_ssa_names;
7474       free (data->version_info);
7475       data->version_info = XCNEWVEC (struct version_info, data->version_info_size);
7476     }
7477 
7478   data->max_inv_var_id = 0;
7479   data->max_inv_expr_id = 0;
7480 
7481   FOR_EACH_VEC_ELT (decl_rtl_to_reset, i, obj)
7482     SET_DECL_RTL (obj, NULL_RTX);
7483 
7484   decl_rtl_to_reset.truncate (0);
7485 
7486   data->inv_expr_tab->empty ();
7487 
7488   data->iv_common_cand_tab->empty ();
7489   data->iv_common_cands.truncate (0);
7490 }
7491 
7492 /* Finalizes data structures used by the iv optimization pass.  LOOPS is the
7493    loop tree.  */
7494 
7495 static void
7496 tree_ssa_iv_optimize_finalize (struct ivopts_data *data)
7497 {
7498   free_loop_data (data);
7499   free (data->version_info);
7500   BITMAP_FREE (data->relevant);
7501   BITMAP_FREE (data->important_candidates);
7502 
7503   decl_rtl_to_reset.release ();
7504   data->vgroups.release ();
7505   data->vcands.release ();
7506   delete data->inv_expr_tab;
7507   data->inv_expr_tab = NULL;
7508   free_affine_expand_cache (&data->name_expansion_cache);
7509   if (data->base_object_map)
7510     delete data->base_object_map;
7511   delete data->iv_common_cand_tab;
7512   data->iv_common_cand_tab = NULL;
7513   data->iv_common_cands.release ();
7514   obstack_free (&data->iv_obstack, NULL);
7515 }
7516 
7517 /* Returns true if the loop body BODY includes any function calls.  */
7518 
7519 static bool
7520 loop_body_includes_call (basic_block *body, unsigned num_nodes)
7521 {
7522   gimple_stmt_iterator gsi;
7523   unsigned i;
7524 
7525   for (i = 0; i < num_nodes; i++)
7526     for (gsi = gsi_start_bb (body[i]); !gsi_end_p (gsi); gsi_next (&gsi))
7527       {
7528 	gimple *stmt = gsi_stmt (gsi);
7529 	if (is_gimple_call (stmt)
7530 	    && !gimple_call_internal_p (stmt)
7531 	    && !is_inexpensive_builtin (gimple_call_fndecl (stmt)))
7532 	  return true;
7533       }
7534   return false;
7535 }
7536 
7537 /* Optimizes the LOOP.  Returns true if anything changed.  */
7538 
7539 static bool
7540 tree_ssa_iv_optimize_loop (struct ivopts_data *data, struct loop *loop,
7541 			   bitmap toremove)
7542 {
7543   bool changed = false;
7544   struct iv_ca *iv_ca;
7545   edge exit = single_dom_exit (loop);
7546   basic_block *body;
7547 
7548   gcc_assert (!data->niters);
7549   data->current_loop = loop;
7550   data->loop_loc = find_loop_location (loop).get_location_t ();
7551   data->speed = optimize_loop_for_speed_p (loop);
7552 
7553   if (dump_file && (dump_flags & TDF_DETAILS))
7554     {
7555       fprintf (dump_file, "Processing loop %d", loop->num);
7556       if (data->loop_loc != UNKNOWN_LOCATION)
7557 	fprintf (dump_file, " at %s:%d", LOCATION_FILE (data->loop_loc),
7558 		 LOCATION_LINE (data->loop_loc));
7559       fprintf (dump_file, "\n");
7560 
7561       if (exit)
7562 	{
7563 	  fprintf (dump_file, "  single exit %d -> %d, exit condition ",
7564 		   exit->src->index, exit->dest->index);
7565 	  print_gimple_stmt (dump_file, last_stmt (exit->src), 0, TDF_SLIM);
7566 	  fprintf (dump_file, "\n");
7567 	}
7568 
7569       fprintf (dump_file, "\n");
7570     }
7571 
7572   body = get_loop_body (loop);
7573   data->body_includes_call = loop_body_includes_call (body, loop->num_nodes);
7574   renumber_gimple_stmt_uids_in_blocks (body, loop->num_nodes);
7575   free (body);
7576 
7577   data->loop_single_exit_p = exit != NULL && loop_only_exit_p (loop, exit);
7578 
7579   /* For each ssa name determines whether it behaves as an induction variable
7580      in some loop.  */
7581   if (!find_induction_variables (data))
7582     goto finish;
7583 
7584   /* Finds interesting uses (item 1).  */
7585   find_interesting_uses (data);
7586   if (data->vgroups.length () > MAX_CONSIDERED_GROUPS)
7587     goto finish;
7588 
7589   /* Finds candidates for the induction variables (item 2).  */
7590   find_iv_candidates (data);
7591 
7592   /* Calculates the costs (item 3, part 1).  */
7593   determine_iv_costs (data);
7594   determine_group_iv_costs (data);
7595   determine_set_costs (data);
7596 
7597   /* Find the optimal set of induction variables (item 3, part 2).  */
7598   iv_ca = find_optimal_iv_set (data);
7599   if (!iv_ca)
7600     goto finish;
7601   changed = true;
7602 
7603   /* Create the new induction variables (item 4, part 1).  */
7604   create_new_ivs (data, iv_ca);
7605   iv_ca_free (&iv_ca);
7606 
7607   /* Rewrite the uses (item 4, part 2).  */
7608   rewrite_groups (data);
7609 
7610   /* Remove the ivs that are unused after rewriting.  */
7611   remove_unused_ivs (data, toremove);
7612 
7613 finish:
7614   free_loop_data (data);
7615 
7616   return changed;
7617 }
7618 
7619 /* Main entry point.  Optimizes induction variables in loops.  */
7620 
7621 void
7622 tree_ssa_iv_optimize (void)
7623 {
7624   struct loop *loop;
7625   struct ivopts_data data;
7626   auto_bitmap toremove;
7627 
7628   tree_ssa_iv_optimize_init (&data);
7629 
7630   /* Optimize the loops starting with the innermost ones.  */
7631   FOR_EACH_LOOP (loop, LI_FROM_INNERMOST)
7632     {
7633       if (dump_file && (dump_flags & TDF_DETAILS))
7634 	flow_loop_dump (loop, dump_file, NULL, 1);
7635 
7636       tree_ssa_iv_optimize_loop (&data, loop, toremove);
7637     }
7638 
7639   /* Remove eliminated IV defs.  */
7640   release_defs_bitset (toremove);
7641 
7642   /* We have changed the structure of induction variables; it might happen
7643      that definitions in the scev database refer to some of them that were
7644      eliminated.  */
7645   scev_reset_htab ();
7646   /* Likewise niter and control-IV information.  */
7647   free_numbers_of_iterations_estimates (cfun);
7648 
7649   tree_ssa_iv_optimize_finalize (&data);
7650 }
7651 
7652 #include "gt-tree-ssa-loop-ivopts.h"
7653