xref: /netbsd-src/external/gpl3/gcc.old/dist/gcc/tree-ssa-loop-ivopts.c (revision e6c7e151de239c49d2e38720a061ed9d1fa99309)
1 /* Induction variable optimizations.
2    Copyright (C) 2003-2017 Free Software Foundation, Inc.
3 
4 This file is part of GCC.
5 
6 GCC is free software; you can redistribute it and/or modify it
7 under the terms of the GNU General Public License as published by the
8 Free Software Foundation; either version 3, or (at your option) any
9 later version.
10 
11 GCC is distributed in the hope that it will be useful, but WITHOUT
12 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
14 for more details.
15 
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING3.  If not see
18 <http://www.gnu.org/licenses/>.  */
19 
20 /* This pass tries to find the optimal set of induction variables for the loop.
21    It optimizes just the basic linear induction variables (although adding
22    support for other types should not be too hard).  It includes the
23    optimizations commonly known as strength reduction, induction variable
24    coalescing and induction variable elimination.  It does it in the
25    following steps:
26 
27    1) The interesting uses of induction variables are found.  This includes
28 
29       -- uses of induction variables in non-linear expressions
30       -- addresses of arrays
31       -- comparisons of induction variables
32 
33       Note the interesting uses are categorized and handled in group.
34       Generally, address type uses are grouped together if their iv bases
35       are different in constant offset.
36 
37    2) Candidates for the induction variables are found.  This includes
38 
39       -- old induction variables
40       -- the variables defined by expressions derived from the "interesting
41 	 groups/uses" above
42 
43    3) The optimal (w.r. to a cost function) set of variables is chosen.  The
44       cost function assigns a cost to sets of induction variables and consists
45       of three parts:
46 
47       -- The group/use costs.  Each of the interesting groups/uses chooses
48 	 the best induction variable in the set and adds its cost to the sum.
49 	 The cost reflects the time spent on modifying the induction variables
50 	 value to be usable for the given purpose (adding base and offset for
51 	 arrays, etc.).
52       -- The variable costs.  Each of the variables has a cost assigned that
53 	 reflects the costs associated with incrementing the value of the
54 	 variable.  The original variables are somewhat preferred.
55       -- The set cost.  Depending on the size of the set, extra cost may be
56 	 added to reflect register pressure.
57 
58       All the costs are defined in a machine-specific way, using the target
59       hooks and machine descriptions to determine them.
60 
61    4) The trees are transformed to use the new variables, the dead code is
62       removed.
63 
64    All of this is done loop by loop.  Doing it globally is theoretically
65    possible, it might give a better performance and it might enable us
66    to decide costs more precisely, but getting all the interactions right
67    would be complicated.  */
68 
69 #include "config.h"
70 #include "system.h"
71 #include "coretypes.h"
72 #include "backend.h"
73 #include "rtl.h"
74 #include "tree.h"
75 #include "gimple.h"
76 #include "cfghooks.h"
77 #include "tree-pass.h"
78 #include "memmodel.h"
79 #include "tm_p.h"
80 #include "ssa.h"
81 #include "expmed.h"
82 #include "insn-config.h"
83 #include "emit-rtl.h"
84 #include "recog.h"
85 #include "cgraph.h"
86 #include "gimple-pretty-print.h"
87 #include "alias.h"
88 #include "fold-const.h"
89 #include "stor-layout.h"
90 #include "tree-eh.h"
91 #include "gimplify.h"
92 #include "gimple-iterator.h"
93 #include "gimplify-me.h"
94 #include "tree-cfg.h"
95 #include "tree-ssa-loop-ivopts.h"
96 #include "tree-ssa-loop-manip.h"
97 #include "tree-ssa-loop-niter.h"
98 #include "tree-ssa-loop.h"
99 #include "explow.h"
100 #include "expr.h"
101 #include "tree-dfa.h"
102 #include "tree-ssa.h"
103 #include "cfgloop.h"
104 #include "tree-scalar-evolution.h"
105 #include "params.h"
106 #include "tree-affine.h"
107 #include "tree-ssa-propagate.h"
108 #include "tree-ssa-address.h"
109 #include "builtins.h"
110 #include "tree-vectorizer.h"
111 
112 /* FIXME: Expressions are expanded to RTL in this pass to determine the
113    cost of different addressing modes.  This should be moved to a TBD
114    interface between the GIMPLE and RTL worlds.  */
115 
116 /* The infinite cost.  */
117 #define INFTY 10000000
118 
119 /* Returns the expected number of loop iterations for LOOP.
120    The average trip count is computed from profile data if it
121    exists. */
122 
123 static inline HOST_WIDE_INT
124 avg_loop_niter (struct loop *loop)
125 {
126   HOST_WIDE_INT niter = estimated_stmt_executions_int (loop);
127   if (niter == -1)
128     {
129       niter = likely_max_stmt_executions_int (loop);
130 
131       if (niter == -1 || niter > PARAM_VALUE (PARAM_AVG_LOOP_NITER))
132 	return PARAM_VALUE (PARAM_AVG_LOOP_NITER);
133     }
134 
135   return niter;
136 }
137 
138 struct iv_use;
139 
140 /* Representation of the induction variable.  */
141 struct iv
142 {
143   tree base;		/* Initial value of the iv.  */
144   tree base_object;	/* A memory object to that the induction variable points.  */
145   tree step;		/* Step of the iv (constant only).  */
146   tree ssa_name;	/* The ssa name with the value.  */
147   struct iv_use *nonlin_use;	/* The identifier in the use if it is the case.  */
148   bool biv_p;		/* Is it a biv?  */
149   bool no_overflow;	/* True if the iv doesn't overflow.  */
150   bool have_address_use;/* For biv, indicate if it's used in any address
151 			   type use.  */
152 };
153 
154 /* Per-ssa version information (induction variable descriptions, etc.).  */
155 struct version_info
156 {
157   tree name;		/* The ssa name.  */
158   struct iv *iv;	/* Induction variable description.  */
159   bool has_nonlin_use;	/* For a loop-level invariant, whether it is used in
160 			   an expression that is not an induction variable.  */
161   bool preserve_biv;	/* For the original biv, whether to preserve it.  */
162   unsigned inv_id;	/* Id of an invariant.  */
163 };
164 
165 /* Types of uses.  */
166 enum use_type
167 {
168   USE_NONLINEAR_EXPR,	/* Use in a nonlinear expression.  */
169   USE_ADDRESS,		/* Use in an address.  */
170   USE_COMPARE		/* Use is a compare.  */
171 };
172 
173 /* Cost of a computation.  */
174 struct comp_cost
175 {
176   comp_cost (): cost (0), complexity (0), scratch (0)
177   {}
178 
179   comp_cost (int cost, unsigned complexity, int scratch = 0)
180     : cost (cost), complexity (complexity), scratch (scratch)
181   {}
182 
183   /* Returns true if COST is infinite.  */
184   bool infinite_cost_p ();
185 
186   /* Adds costs COST1 and COST2.  */
187   friend comp_cost operator+ (comp_cost cost1, comp_cost cost2);
188 
189   /* Adds COST to the comp_cost.  */
190   comp_cost operator+= (comp_cost cost);
191 
192   /* Adds constant C to this comp_cost.  */
193   comp_cost operator+= (HOST_WIDE_INT c);
194 
195   /* Subtracts constant C to this comp_cost.  */
196   comp_cost operator-= (HOST_WIDE_INT c);
197 
198   /* Divide the comp_cost by constant C.  */
199   comp_cost operator/= (HOST_WIDE_INT c);
200 
201   /* Multiply the comp_cost by constant C.  */
202   comp_cost operator*= (HOST_WIDE_INT c);
203 
204   /* Subtracts costs COST1 and COST2.  */
205   friend comp_cost operator- (comp_cost cost1, comp_cost cost2);
206 
207   /* Subtracts COST from this comp_cost.  */
208   comp_cost operator-= (comp_cost cost);
209 
210   /* Returns true if COST1 is smaller than COST2.  */
211   friend bool operator< (comp_cost cost1, comp_cost cost2);
212 
213   /* Returns true if COST1 and COST2 are equal.  */
214   friend bool operator== (comp_cost cost1, comp_cost cost2);
215 
216   /* Returns true if COST1 is smaller or equal than COST2.  */
217   friend bool operator<= (comp_cost cost1, comp_cost cost2);
218 
219   int cost;		/* The runtime cost.  */
220   unsigned complexity;  /* The estimate of the complexity of the code for
221 			   the computation (in no concrete units --
222 			   complexity field should be larger for more
223 			   complex expressions and addressing modes).  */
224   int scratch;		/* Scratch used during cost computation.  */
225 };
226 
227 static const comp_cost no_cost;
228 static const comp_cost infinite_cost (INFTY, INFTY, INFTY);
229 
230 bool
231 comp_cost::infinite_cost_p ()
232 {
233   return cost == INFTY;
234 }
235 
236 comp_cost
237 operator+ (comp_cost cost1, comp_cost cost2)
238 {
239   if (cost1.infinite_cost_p () || cost2.infinite_cost_p ())
240     return infinite_cost;
241 
242   cost1.cost += cost2.cost;
243   cost1.complexity += cost2.complexity;
244 
245   return cost1;
246 }
247 
248 comp_cost
249 operator- (comp_cost cost1, comp_cost cost2)
250 {
251   if (cost1.infinite_cost_p ())
252     return infinite_cost;
253 
254   gcc_assert (!cost2.infinite_cost_p ());
255 
256   cost1.cost -= cost2.cost;
257   cost1.complexity -= cost2.complexity;
258 
259   return cost1;
260 }
261 
262 comp_cost
263 comp_cost::operator+= (comp_cost cost)
264 {
265   *this = *this + cost;
266   return *this;
267 }
268 
269 comp_cost
270 comp_cost::operator+= (HOST_WIDE_INT c)
271 {
272   if (infinite_cost_p ())
273     return *this;
274 
275   this->cost += c;
276 
277   return *this;
278 }
279 
280 comp_cost
281 comp_cost::operator-= (HOST_WIDE_INT c)
282 {
283   if (infinite_cost_p ())
284     return *this;
285 
286   this->cost -= c;
287 
288   return *this;
289 }
290 
291 comp_cost
292 comp_cost::operator/= (HOST_WIDE_INT c)
293 {
294   if (infinite_cost_p ())
295     return *this;
296 
297   this->cost /= c;
298 
299   return *this;
300 }
301 
302 comp_cost
303 comp_cost::operator*= (HOST_WIDE_INT c)
304 {
305   if (infinite_cost_p ())
306     return *this;
307 
308   this->cost *= c;
309 
310   return *this;
311 }
312 
313 comp_cost
314 comp_cost::operator-= (comp_cost cost)
315 {
316   *this = *this - cost;
317   return *this;
318 }
319 
320 bool
321 operator< (comp_cost cost1, comp_cost cost2)
322 {
323   if (cost1.cost == cost2.cost)
324     return cost1.complexity < cost2.complexity;
325 
326   return cost1.cost < cost2.cost;
327 }
328 
329 bool
330 operator== (comp_cost cost1, comp_cost cost2)
331 {
332   return cost1.cost == cost2.cost
333     && cost1.complexity == cost2.complexity;
334 }
335 
336 bool
337 operator<= (comp_cost cost1, comp_cost cost2)
338 {
339   return cost1 < cost2 || cost1 == cost2;
340 }
341 
342 struct iv_inv_expr_ent;
343 
344 /* The candidate - cost pair.  */
345 struct cost_pair
346 {
347   struct iv_cand *cand;	/* The candidate.  */
348   comp_cost cost;	/* The cost.  */
349   enum tree_code comp;	/* For iv elimination, the comparison.  */
350   bitmap depends_on;	/* The list of invariants that have to be
351 			   preserved.  */
352   tree value;		/* For final value elimination, the expression for
353 			   the final value of the iv.  For iv elimination,
354 			   the new bound to compare with.  */
355   iv_inv_expr_ent *inv_expr; /* Loop invariant expression.  */
356 };
357 
358 /* Use.  */
359 struct iv_use
360 {
361   unsigned id;		/* The id of the use.  */
362   unsigned group_id;	/* The group id the use belongs to.  */
363   enum use_type type;	/* Type of the use.  */
364   struct iv *iv;	/* The induction variable it is based on.  */
365   gimple *stmt;		/* Statement in that it occurs.  */
366   tree *op_p;		/* The place where it occurs.  */
367 
368   tree addr_base;	/* Base address with const offset stripped.  */
369   unsigned HOST_WIDE_INT addr_offset;
370 			/* Const offset stripped from base address.  */
371 };
372 
373 /* Group of uses.  */
374 struct iv_group
375 {
376   /* The id of the group.  */
377   unsigned id;
378   /* Uses of the group are of the same type.  */
379   enum use_type type;
380   /* The set of "related" IV candidates, plus the important ones.  */
381   bitmap related_cands;
382   /* Number of IV candidates in the cost_map.  */
383   unsigned n_map_members;
384   /* The costs wrto the iv candidates.  */
385   struct cost_pair *cost_map;
386   /* The selected candidate for the group.  */
387   struct iv_cand *selected;
388   /* Uses in the group.  */
389   vec<struct iv_use *> vuses;
390 };
391 
392 /* The position where the iv is computed.  */
393 enum iv_position
394 {
395   IP_NORMAL,		/* At the end, just before the exit condition.  */
396   IP_END,		/* At the end of the latch block.  */
397   IP_BEFORE_USE,	/* Immediately before a specific use.  */
398   IP_AFTER_USE,		/* Immediately after a specific use.  */
399   IP_ORIGINAL		/* The original biv.  */
400 };
401 
402 /* The induction variable candidate.  */
403 struct iv_cand
404 {
405   unsigned id;		/* The number of the candidate.  */
406   bool important;	/* Whether this is an "important" candidate, i.e. such
407 			   that it should be considered by all uses.  */
408   ENUM_BITFIELD(iv_position) pos : 8;	/* Where it is computed.  */
409   gimple *incremented_at;/* For original biv, the statement where it is
410 			   incremented.  */
411   tree var_before;	/* The variable used for it before increment.  */
412   tree var_after;	/* The variable used for it after increment.  */
413   struct iv *iv;	/* The value of the candidate.  NULL for
414 			   "pseudocandidate" used to indicate the possibility
415 			   to replace the final value of an iv by direct
416 			   computation of the value.  */
417   unsigned cost;	/* Cost of the candidate.  */
418   unsigned cost_step;	/* Cost of the candidate's increment operation.  */
419   struct iv_use *ainc_use; /* For IP_{BEFORE,AFTER}_USE candidates, the place
420 			      where it is incremented.  */
421   bitmap depends_on;	/* The list of invariants that are used in step of the
422 			   biv.  */
423   struct iv *orig_iv;	/* The original iv if this cand is added from biv with
424 			   smaller type.  */
425 };
426 
427 /* Hashtable entry for common candidate derived from iv uses.  */
428 struct iv_common_cand
429 {
430   tree base;
431   tree step;
432   /* IV uses from which this common candidate is derived.  */
433   auto_vec<struct iv_use *> uses;
434   hashval_t hash;
435 };
436 
437 /* Hashtable helpers.  */
438 
439 struct iv_common_cand_hasher : delete_ptr_hash <iv_common_cand>
440 {
441   static inline hashval_t hash (const iv_common_cand *);
442   static inline bool equal (const iv_common_cand *, const iv_common_cand *);
443 };
444 
445 /* Hash function for possible common candidates.  */
446 
447 inline hashval_t
448 iv_common_cand_hasher::hash (const iv_common_cand *ccand)
449 {
450   return ccand->hash;
451 }
452 
453 /* Hash table equality function for common candidates.  */
454 
455 inline bool
456 iv_common_cand_hasher::equal (const iv_common_cand *ccand1,
457 			      const iv_common_cand *ccand2)
458 {
459   return (ccand1->hash == ccand2->hash
460 	  && operand_equal_p (ccand1->base, ccand2->base, 0)
461 	  && operand_equal_p (ccand1->step, ccand2->step, 0)
462 	  && (TYPE_PRECISION (TREE_TYPE (ccand1->base))
463 	      == TYPE_PRECISION (TREE_TYPE (ccand2->base))));
464 }
465 
466 /* Loop invariant expression hashtable entry.  */
467 
468 struct iv_inv_expr_ent
469 {
470   /* Tree expression of the entry.  */
471   tree expr;
472   /* Unique indentifier.  */
473   int id;
474   /* Hash value.  */
475   hashval_t hash;
476 };
477 
478 /* Sort iv_inv_expr_ent pair A and B by id field.  */
479 
480 static int
481 sort_iv_inv_expr_ent (const void *a, const void *b)
482 {
483   const iv_inv_expr_ent * const *e1 = (const iv_inv_expr_ent * const *) (a);
484   const iv_inv_expr_ent * const *e2 = (const iv_inv_expr_ent * const *) (b);
485 
486   unsigned id1 = (*e1)->id;
487   unsigned id2 = (*e2)->id;
488 
489   if (id1 < id2)
490     return -1;
491   else if (id1 > id2)
492     return 1;
493   else
494     return 0;
495 }
496 
497 /* Hashtable helpers.  */
498 
499 struct iv_inv_expr_hasher : free_ptr_hash <iv_inv_expr_ent>
500 {
501   static inline hashval_t hash (const iv_inv_expr_ent *);
502   static inline bool equal (const iv_inv_expr_ent *, const iv_inv_expr_ent *);
503 };
504 
505 /* Hash function for loop invariant expressions.  */
506 
507 inline hashval_t
508 iv_inv_expr_hasher::hash (const iv_inv_expr_ent *expr)
509 {
510   return expr->hash;
511 }
512 
513 /* Hash table equality function for expressions.  */
514 
515 inline bool
516 iv_inv_expr_hasher::equal (const iv_inv_expr_ent *expr1,
517 			   const iv_inv_expr_ent *expr2)
518 {
519   return expr1->hash == expr2->hash
520 	 && operand_equal_p (expr1->expr, expr2->expr, 0);
521 }
522 
523 struct ivopts_data
524 {
525   /* The currently optimized loop.  */
526   struct loop *current_loop;
527   source_location loop_loc;
528 
529   /* Numbers of iterations for all exits of the current loop.  */
530   hash_map<edge, tree_niter_desc *> *niters;
531 
532   /* Number of registers used in it.  */
533   unsigned regs_used;
534 
535   /* The size of version_info array allocated.  */
536   unsigned version_info_size;
537 
538   /* The array of information for the ssa names.  */
539   struct version_info *version_info;
540 
541   /* The hashtable of loop invariant expressions created
542      by ivopt.  */
543   hash_table<iv_inv_expr_hasher> *inv_expr_tab;
544 
545   /* Loop invariant expression id.  */
546   int max_inv_expr_id;
547 
548   /* The bitmap of indices in version_info whose value was changed.  */
549   bitmap relevant;
550 
551   /* The uses of induction variables.  */
552   vec<iv_group *> vgroups;
553 
554   /* The candidates.  */
555   vec<iv_cand *> vcands;
556 
557   /* A bitmap of important candidates.  */
558   bitmap important_candidates;
559 
560   /* Cache used by tree_to_aff_combination_expand.  */
561   hash_map<tree, name_expansion *> *name_expansion_cache;
562 
563   /* The hashtable of common candidates derived from iv uses.  */
564   hash_table<iv_common_cand_hasher> *iv_common_cand_tab;
565 
566   /* The common candidates.  */
567   vec<iv_common_cand *> iv_common_cands;
568 
569   /* The maximum invariant id.  */
570   unsigned max_inv_id;
571 
572   /* Number of no_overflow BIVs which are not used in memory address.  */
573   unsigned bivs_not_used_in_addr;
574 
575   /* Obstack for iv structure.  */
576   struct obstack iv_obstack;
577 
578   /* Whether to consider just related and important candidates when replacing a
579      use.  */
580   bool consider_all_candidates;
581 
582   /* Are we optimizing for speed?  */
583   bool speed;
584 
585   /* Whether the loop body includes any function calls.  */
586   bool body_includes_call;
587 
588   /* Whether the loop body can only be exited via single exit.  */
589   bool loop_single_exit_p;
590 };
591 
592 /* An assignment of iv candidates to uses.  */
593 
594 struct iv_ca
595 {
596   /* The number of uses covered by the assignment.  */
597   unsigned upto;
598 
599   /* Number of uses that cannot be expressed by the candidates in the set.  */
600   unsigned bad_groups;
601 
602   /* Candidate assigned to a use, together with the related costs.  */
603   struct cost_pair **cand_for_group;
604 
605   /* Number of times each candidate is used.  */
606   unsigned *n_cand_uses;
607 
608   /* The candidates used.  */
609   bitmap cands;
610 
611   /* The number of candidates in the set.  */
612   unsigned n_cands;
613 
614   /* Total number of registers needed.  */
615   unsigned n_regs;
616 
617   /* Total cost of expressing uses.  */
618   comp_cost cand_use_cost;
619 
620   /* Total cost of candidates.  */
621   unsigned cand_cost;
622 
623   /* Number of times each invariant is used.  */
624   unsigned *n_invariant_uses;
625 
626   /* Hash set with used invariant expression.  */
627   hash_map <iv_inv_expr_ent *, unsigned> *used_inv_exprs;
628 
629   /* Total cost of the assignment.  */
630   comp_cost cost;
631 };
632 
633 /* Difference of two iv candidate assignments.  */
634 
635 struct iv_ca_delta
636 {
637   /* Changed group.  */
638   struct iv_group *group;
639 
640   /* An old assignment (for rollback purposes).  */
641   struct cost_pair *old_cp;
642 
643   /* A new assignment.  */
644   struct cost_pair *new_cp;
645 
646   /* Next change in the list.  */
647   struct iv_ca_delta *next;
648 };
649 
650 /* Bound on number of candidates below that all candidates are considered.  */
651 
652 #define CONSIDER_ALL_CANDIDATES_BOUND \
653   ((unsigned) PARAM_VALUE (PARAM_IV_CONSIDER_ALL_CANDIDATES_BOUND))
654 
655 /* If there are more iv occurrences, we just give up (it is quite unlikely that
656    optimizing such a loop would help, and it would take ages).  */
657 
658 #define MAX_CONSIDERED_GROUPS \
659   ((unsigned) PARAM_VALUE (PARAM_IV_MAX_CONSIDERED_USES))
660 
661 /* If there are at most this number of ivs in the set, try removing unnecessary
662    ivs from the set always.  */
663 
664 #define ALWAYS_PRUNE_CAND_SET_BOUND \
665   ((unsigned) PARAM_VALUE (PARAM_IV_ALWAYS_PRUNE_CAND_SET_BOUND))
666 
667 /* The list of trees for that the decl_rtl field must be reset is stored
668    here.  */
669 
670 static vec<tree> decl_rtl_to_reset;
671 
672 static comp_cost force_expr_to_var_cost (tree, bool);
673 
674 /* The single loop exit if it dominates the latch, NULL otherwise.  */
675 
676 edge
677 single_dom_exit (struct loop *loop)
678 {
679   edge exit = single_exit (loop);
680 
681   if (!exit)
682     return NULL;
683 
684   if (!just_once_each_iteration_p (loop, exit->src))
685     return NULL;
686 
687   return exit;
688 }
689 
690 /* Dumps information about the induction variable IV to FILE.  Don't dump
691    variable's name if DUMP_NAME is FALSE.  The information is dumped with
692    preceding spaces indicated by INDENT_LEVEL.  */
693 
694 void
695 dump_iv (FILE *file, struct iv *iv, bool dump_name, unsigned indent_level)
696 {
697   const char *p;
698   const char spaces[9] = {' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '\0'};
699 
700   if (indent_level > 4)
701     indent_level = 4;
702   p = spaces + 8 - (indent_level << 1);
703 
704   fprintf (file, "%sIV struct:\n", p);
705   if (iv->ssa_name && dump_name)
706     {
707       fprintf (file, "%s  SSA_NAME:\t", p);
708       print_generic_expr (file, iv->ssa_name, TDF_SLIM);
709       fprintf (file, "\n");
710     }
711 
712   fprintf (file, "%s  Type:\t", p);
713   print_generic_expr (file, TREE_TYPE (iv->base), TDF_SLIM);
714   fprintf (file, "\n");
715 
716   fprintf (file, "%s  Base:\t", p);
717   print_generic_expr (file, iv->base, TDF_SLIM);
718   fprintf (file, "\n");
719 
720   fprintf (file, "%s  Step:\t", p);
721   print_generic_expr (file, iv->step, TDF_SLIM);
722   fprintf (file, "\n");
723 
724   if (iv->base_object)
725     {
726       fprintf (file, "%s  Object:\t", p);
727       print_generic_expr (file, iv->base_object, TDF_SLIM);
728       fprintf (file, "\n");
729     }
730 
731   fprintf (file, "%s  Biv:\t%c\n", p, iv->biv_p ? 'Y' : 'N');
732 
733   fprintf (file, "%s  Overflowness wrto loop niter:\t%s\n",
734 	   p, iv->no_overflow ? "No-overflow" : "Overflow");
735 }
736 
737 /* Dumps information about the USE to FILE.  */
738 
739 void
740 dump_use (FILE *file, struct iv_use *use)
741 {
742   fprintf (file, "  Use %d.%d:\n", use->group_id, use->id);
743   fprintf (file, "    At stmt:\t");
744   print_gimple_stmt (file, use->stmt, 0, 0);
745   fprintf (file, "    At pos:\t");
746   if (use->op_p)
747     print_generic_expr (file, *use->op_p, TDF_SLIM);
748   fprintf (file, "\n");
749   dump_iv (file, use->iv, false, 2);
750 }
751 
752 /* Dumps information about the uses to FILE.  */
753 
754 void
755 dump_groups (FILE *file, struct ivopts_data *data)
756 {
757   unsigned i, j;
758   struct iv_group *group;
759 
760   for (i = 0; i < data->vgroups.length (); i++)
761     {
762       group = data->vgroups[i];
763       fprintf (file, "Group %d:\n", group->id);
764       if (group->type == USE_NONLINEAR_EXPR)
765 	fprintf (file, "  Type:\tGENERIC\n");
766       else if (group->type == USE_ADDRESS)
767 	fprintf (file, "  Type:\tADDRESS\n");
768       else
769 	{
770 	  gcc_assert (group->type == USE_COMPARE);
771 	  fprintf (file, "  Type:\tCOMPARE\n");
772 	}
773       for (j = 0; j < group->vuses.length (); j++)
774 	dump_use (file, group->vuses[j]);
775     }
776 }
777 
778 /* Dumps information about induction variable candidate CAND to FILE.  */
779 
780 void
781 dump_cand (FILE *file, struct iv_cand *cand)
782 {
783   struct iv *iv = cand->iv;
784 
785   fprintf (file, "Candidate %d:\n", cand->id);
786   if (cand->depends_on)
787     {
788       fprintf (file, "  Depend on: ");
789       dump_bitmap (file, cand->depends_on);
790     }
791 
792   if (cand->var_before)
793     {
794       fprintf (file, "  Var befor: ");
795       print_generic_expr (file, cand->var_before, TDF_SLIM);
796       fprintf (file, "\n");
797     }
798   if (cand->var_after)
799     {
800       fprintf (file, "  Var after: ");
801       print_generic_expr (file, cand->var_after, TDF_SLIM);
802       fprintf (file, "\n");
803     }
804 
805   switch (cand->pos)
806     {
807     case IP_NORMAL:
808       fprintf (file, "  Incr POS: before exit test\n");
809       break;
810 
811     case IP_BEFORE_USE:
812       fprintf (file, "  Incr POS: before use %d\n", cand->ainc_use->id);
813       break;
814 
815     case IP_AFTER_USE:
816       fprintf (file, "  Incr POS: after use %d\n", cand->ainc_use->id);
817       break;
818 
819     case IP_END:
820       fprintf (file, "  Incr POS: at end\n");
821       break;
822 
823     case IP_ORIGINAL:
824       fprintf (file, "  Incr POS: orig biv\n");
825       break;
826     }
827 
828   dump_iv (file, iv, false, 1);
829 }
830 
831 /* Returns the info for ssa version VER.  */
832 
833 static inline struct version_info *
834 ver_info (struct ivopts_data *data, unsigned ver)
835 {
836   return data->version_info + ver;
837 }
838 
839 /* Returns the info for ssa name NAME.  */
840 
841 static inline struct version_info *
842 name_info (struct ivopts_data *data, tree name)
843 {
844   return ver_info (data, SSA_NAME_VERSION (name));
845 }
846 
847 /* Returns true if STMT is after the place where the IP_NORMAL ivs will be
848    emitted in LOOP.  */
849 
850 static bool
851 stmt_after_ip_normal_pos (struct loop *loop, gimple *stmt)
852 {
853   basic_block bb = ip_normal_pos (loop), sbb = gimple_bb (stmt);
854 
855   gcc_assert (bb);
856 
857   if (sbb == loop->latch)
858     return true;
859 
860   if (sbb != bb)
861     return false;
862 
863   return stmt == last_stmt (bb);
864 }
865 
866 /* Returns true if STMT if after the place where the original induction
867    variable CAND is incremented.  If TRUE_IF_EQUAL is set, we return true
868    if the positions are identical.  */
869 
870 static bool
871 stmt_after_inc_pos (struct iv_cand *cand, gimple *stmt, bool true_if_equal)
872 {
873   basic_block cand_bb = gimple_bb (cand->incremented_at);
874   basic_block stmt_bb = gimple_bb (stmt);
875 
876   if (!dominated_by_p (CDI_DOMINATORS, stmt_bb, cand_bb))
877     return false;
878 
879   if (stmt_bb != cand_bb)
880     return true;
881 
882   if (true_if_equal
883       && gimple_uid (stmt) == gimple_uid (cand->incremented_at))
884     return true;
885   return gimple_uid (stmt) > gimple_uid (cand->incremented_at);
886 }
887 
888 /* Returns true if STMT if after the place where the induction variable
889    CAND is incremented in LOOP.  */
890 
891 static bool
892 stmt_after_increment (struct loop *loop, struct iv_cand *cand, gimple *stmt)
893 {
894   switch (cand->pos)
895     {
896     case IP_END:
897       return false;
898 
899     case IP_NORMAL:
900       return stmt_after_ip_normal_pos (loop, stmt);
901 
902     case IP_ORIGINAL:
903     case IP_AFTER_USE:
904       return stmt_after_inc_pos (cand, stmt, false);
905 
906     case IP_BEFORE_USE:
907       return stmt_after_inc_pos (cand, stmt, true);
908 
909     default:
910       gcc_unreachable ();
911     }
912 }
913 
914 /* Returns true if EXP is a ssa name that occurs in an abnormal phi node.  */
915 
916 static bool
917 abnormal_ssa_name_p (tree exp)
918 {
919   if (!exp)
920     return false;
921 
922   if (TREE_CODE (exp) != SSA_NAME)
923     return false;
924 
925   return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (exp) != 0;
926 }
927 
928 /* Returns false if BASE or INDEX contains a ssa name that occurs in an
929    abnormal phi node.  Callback for for_each_index.  */
930 
931 static bool
932 idx_contains_abnormal_ssa_name_p (tree base, tree *index,
933 				  void *data ATTRIBUTE_UNUSED)
934 {
935   if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
936     {
937       if (abnormal_ssa_name_p (TREE_OPERAND (base, 2)))
938 	return false;
939       if (abnormal_ssa_name_p (TREE_OPERAND (base, 3)))
940 	return false;
941     }
942 
943   return !abnormal_ssa_name_p (*index);
944 }
945 
946 /* Returns true if EXPR contains a ssa name that occurs in an
947    abnormal phi node.  */
948 
949 bool
950 contains_abnormal_ssa_name_p (tree expr)
951 {
952   enum tree_code code;
953   enum tree_code_class codeclass;
954 
955   if (!expr)
956     return false;
957 
958   code = TREE_CODE (expr);
959   codeclass = TREE_CODE_CLASS (code);
960 
961   if (code == SSA_NAME)
962     return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (expr) != 0;
963 
964   if (code == INTEGER_CST
965       || is_gimple_min_invariant (expr))
966     return false;
967 
968   if (code == ADDR_EXPR)
969     return !for_each_index (&TREE_OPERAND (expr, 0),
970 			    idx_contains_abnormal_ssa_name_p,
971 			    NULL);
972 
973   if (code == COND_EXPR)
974     return contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 0))
975       || contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 1))
976       || contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 2));
977 
978   switch (codeclass)
979     {
980     case tcc_binary:
981     case tcc_comparison:
982       if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 1)))
983 	return true;
984 
985       /* Fallthru.  */
986     case tcc_unary:
987       if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 0)))
988 	return true;
989 
990       break;
991 
992     default:
993       gcc_unreachable ();
994     }
995 
996   return false;
997 }
998 
999 /*  Returns the structure describing number of iterations determined from
1000     EXIT of DATA->current_loop, or NULL if something goes wrong.  */
1001 
1002 static struct tree_niter_desc *
1003 niter_for_exit (struct ivopts_data *data, edge exit)
1004 {
1005   struct tree_niter_desc *desc;
1006   tree_niter_desc **slot;
1007 
1008   if (!data->niters)
1009     {
1010       data->niters = new hash_map<edge, tree_niter_desc *>;
1011       slot = NULL;
1012     }
1013   else
1014     slot = data->niters->get (exit);
1015 
1016   if (!slot)
1017     {
1018       /* Try to determine number of iterations.  We cannot safely work with ssa
1019 	 names that appear in phi nodes on abnormal edges, so that we do not
1020 	 create overlapping life ranges for them (PR 27283).  */
1021       desc = XNEW (struct tree_niter_desc);
1022       if (!number_of_iterations_exit (data->current_loop,
1023 				      exit, desc, true)
1024      	  || contains_abnormal_ssa_name_p (desc->niter))
1025 	{
1026 	  XDELETE (desc);
1027 	  desc = NULL;
1028 	}
1029       data->niters->put (exit, desc);
1030     }
1031   else
1032     desc = *slot;
1033 
1034   return desc;
1035 }
1036 
1037 /* Returns the structure describing number of iterations determined from
1038    single dominating exit of DATA->current_loop, or NULL if something
1039    goes wrong.  */
1040 
1041 static struct tree_niter_desc *
1042 niter_for_single_dom_exit (struct ivopts_data *data)
1043 {
1044   edge exit = single_dom_exit (data->current_loop);
1045 
1046   if (!exit)
1047     return NULL;
1048 
1049   return niter_for_exit (data, exit);
1050 }
1051 
1052 /* Initializes data structures used by the iv optimization pass, stored
1053    in DATA.  */
1054 
1055 static void
1056 tree_ssa_iv_optimize_init (struct ivopts_data *data)
1057 {
1058   data->version_info_size = 2 * num_ssa_names;
1059   data->version_info = XCNEWVEC (struct version_info, data->version_info_size);
1060   data->relevant = BITMAP_ALLOC (NULL);
1061   data->important_candidates = BITMAP_ALLOC (NULL);
1062   data->max_inv_id = 0;
1063   data->niters = NULL;
1064   data->vgroups.create (20);
1065   data->vcands.create (20);
1066   data->inv_expr_tab = new hash_table<iv_inv_expr_hasher> (10);
1067   data->max_inv_expr_id = 0;
1068   data->name_expansion_cache = NULL;
1069   data->iv_common_cand_tab = new hash_table<iv_common_cand_hasher> (10);
1070   data->iv_common_cands.create (20);
1071   decl_rtl_to_reset.create (20);
1072   gcc_obstack_init (&data->iv_obstack);
1073 }
1074 
1075 /* Returns a memory object to that EXPR points.  In case we are able to
1076    determine that it does not point to any such object, NULL is returned.  */
1077 
1078 static tree
1079 determine_base_object (tree expr)
1080 {
1081   enum tree_code code = TREE_CODE (expr);
1082   tree base, obj;
1083 
1084   /* If this is a pointer casted to any type, we need to determine
1085      the base object for the pointer; so handle conversions before
1086      throwing away non-pointer expressions.  */
1087   if (CONVERT_EXPR_P (expr))
1088     return determine_base_object (TREE_OPERAND (expr, 0));
1089 
1090   if (!POINTER_TYPE_P (TREE_TYPE (expr)))
1091     return NULL_TREE;
1092 
1093   switch (code)
1094     {
1095     case INTEGER_CST:
1096       return NULL_TREE;
1097 
1098     case ADDR_EXPR:
1099       obj = TREE_OPERAND (expr, 0);
1100       base = get_base_address (obj);
1101 
1102       if (!base)
1103 	return expr;
1104 
1105       if (TREE_CODE (base) == MEM_REF)
1106 	return determine_base_object (TREE_OPERAND (base, 0));
1107 
1108       return fold_convert (ptr_type_node,
1109 			   build_fold_addr_expr (base));
1110 
1111     case POINTER_PLUS_EXPR:
1112       return determine_base_object (TREE_OPERAND (expr, 0));
1113 
1114     case PLUS_EXPR:
1115     case MINUS_EXPR:
1116       /* Pointer addition is done solely using POINTER_PLUS_EXPR.  */
1117       gcc_unreachable ();
1118 
1119     default:
1120       return fold_convert (ptr_type_node, expr);
1121     }
1122 }
1123 
1124 /* Return true if address expression with non-DECL_P operand appears
1125    in EXPR.  */
1126 
1127 static bool
1128 contain_complex_addr_expr (tree expr)
1129 {
1130   bool res = false;
1131 
1132   STRIP_NOPS (expr);
1133   switch (TREE_CODE (expr))
1134     {
1135     case POINTER_PLUS_EXPR:
1136     case PLUS_EXPR:
1137     case MINUS_EXPR:
1138       res |= contain_complex_addr_expr (TREE_OPERAND (expr, 0));
1139       res |= contain_complex_addr_expr (TREE_OPERAND (expr, 1));
1140       break;
1141 
1142     case ADDR_EXPR:
1143       return (!DECL_P (TREE_OPERAND (expr, 0)));
1144 
1145     default:
1146       return false;
1147     }
1148 
1149   return res;
1150 }
1151 
1152 /* Allocates an induction variable with given initial value BASE and step STEP
1153    for loop LOOP.  NO_OVERFLOW implies the iv doesn't overflow.  */
1154 
1155 static struct iv *
1156 alloc_iv (struct ivopts_data *data, tree base, tree step,
1157 	  bool no_overflow = false)
1158 {
1159   tree expr = base;
1160   struct iv *iv = (struct iv*) obstack_alloc (&data->iv_obstack,
1161 					      sizeof (struct iv));
1162   gcc_assert (step != NULL_TREE);
1163 
1164   /* Lower address expression in base except ones with DECL_P as operand.
1165      By doing this:
1166        1) More accurate cost can be computed for address expressions;
1167        2) Duplicate candidates won't be created for bases in different
1168 	  forms, like &a[0] and &a.  */
1169   STRIP_NOPS (expr);
1170   if ((TREE_CODE (expr) == ADDR_EXPR && !DECL_P (TREE_OPERAND (expr, 0)))
1171       || contain_complex_addr_expr (expr))
1172     {
1173       aff_tree comb;
1174       tree_to_aff_combination (expr, TREE_TYPE (expr), &comb);
1175       base = fold_convert (TREE_TYPE (base), aff_combination_to_tree (&comb));
1176     }
1177 
1178   iv->base = base;
1179   iv->base_object = determine_base_object (base);
1180   iv->step = step;
1181   iv->biv_p = false;
1182   iv->nonlin_use = NULL;
1183   iv->ssa_name = NULL_TREE;
1184   if (!no_overflow
1185        && !iv_can_overflow_p (data->current_loop, TREE_TYPE (base),
1186 			      base, step))
1187     no_overflow = true;
1188   iv->no_overflow = no_overflow;
1189   iv->have_address_use = false;
1190 
1191   return iv;
1192 }
1193 
1194 /* Sets STEP and BASE for induction variable IV.  NO_OVERFLOW implies the IV
1195    doesn't overflow.  */
1196 
1197 static void
1198 set_iv (struct ivopts_data *data, tree iv, tree base, tree step,
1199 	bool no_overflow)
1200 {
1201   struct version_info *info = name_info (data, iv);
1202 
1203   gcc_assert (!info->iv);
1204 
1205   bitmap_set_bit (data->relevant, SSA_NAME_VERSION (iv));
1206   info->iv = alloc_iv (data, base, step, no_overflow);
1207   info->iv->ssa_name = iv;
1208 }
1209 
1210 /* Finds induction variable declaration for VAR.  */
1211 
1212 static struct iv *
1213 get_iv (struct ivopts_data *data, tree var)
1214 {
1215   basic_block bb;
1216   tree type = TREE_TYPE (var);
1217 
1218   if (!POINTER_TYPE_P (type)
1219       && !INTEGRAL_TYPE_P (type))
1220     return NULL;
1221 
1222   if (!name_info (data, var)->iv)
1223     {
1224       bb = gimple_bb (SSA_NAME_DEF_STMT (var));
1225 
1226       if (!bb
1227 	  || !flow_bb_inside_loop_p (data->current_loop, bb))
1228 	set_iv (data, var, var, build_int_cst (type, 0), true);
1229     }
1230 
1231   return name_info (data, var)->iv;
1232 }
1233 
1234 /* Return the first non-invariant ssa var found in EXPR.  */
1235 
1236 static tree
1237 extract_single_var_from_expr (tree expr)
1238 {
1239   int i, n;
1240   tree tmp;
1241   enum tree_code code;
1242 
1243   if (!expr || is_gimple_min_invariant (expr))
1244     return NULL;
1245 
1246   code = TREE_CODE (expr);
1247   if (IS_EXPR_CODE_CLASS (TREE_CODE_CLASS (code)))
1248     {
1249       n = TREE_OPERAND_LENGTH (expr);
1250       for (i = 0; i < n; i++)
1251 	{
1252 	  tmp = extract_single_var_from_expr (TREE_OPERAND (expr, i));
1253 
1254 	  if (tmp)
1255 	    return tmp;
1256 	}
1257     }
1258   return (TREE_CODE (expr) == SSA_NAME) ? expr : NULL;
1259 }
1260 
1261 /* Finds basic ivs.  */
1262 
1263 static bool
1264 find_bivs (struct ivopts_data *data)
1265 {
1266   gphi *phi;
1267   affine_iv iv;
1268   tree step, type, base, stop;
1269   bool found = false;
1270   struct loop *loop = data->current_loop;
1271   gphi_iterator psi;
1272 
1273   for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
1274     {
1275       phi = psi.phi ();
1276 
1277       if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (PHI_RESULT (phi)))
1278 	continue;
1279 
1280       if (virtual_operand_p (PHI_RESULT (phi)))
1281 	continue;
1282 
1283       if (!simple_iv (loop, loop, PHI_RESULT (phi), &iv, true))
1284 	continue;
1285 
1286       if (integer_zerop (iv.step))
1287 	continue;
1288 
1289       step = iv.step;
1290       base = PHI_ARG_DEF_FROM_EDGE (phi, loop_preheader_edge (loop));
1291       /* Stop expanding iv base at the first ssa var referred by iv step.
1292 	 Ideally we should stop at any ssa var, because that's expensive
1293 	 and unusual to happen, we just do it on the first one.
1294 
1295 	 See PR64705 for the rationale.  */
1296       stop = extract_single_var_from_expr (step);
1297       base = expand_simple_operations (base, stop);
1298       if (contains_abnormal_ssa_name_p (base)
1299 	  || contains_abnormal_ssa_name_p (step))
1300 	continue;
1301 
1302       type = TREE_TYPE (PHI_RESULT (phi));
1303       base = fold_convert (type, base);
1304       if (step)
1305 	{
1306 	  if (POINTER_TYPE_P (type))
1307 	    step = convert_to_ptrofftype (step);
1308 	  else
1309 	    step = fold_convert (type, step);
1310 	}
1311 
1312       set_iv (data, PHI_RESULT (phi), base, step, iv.no_overflow);
1313       found = true;
1314     }
1315 
1316   return found;
1317 }
1318 
1319 /* Marks basic ivs.  */
1320 
1321 static void
1322 mark_bivs (struct ivopts_data *data)
1323 {
1324   gphi *phi;
1325   gimple *def;
1326   tree var;
1327   struct iv *iv, *incr_iv;
1328   struct loop *loop = data->current_loop;
1329   basic_block incr_bb;
1330   gphi_iterator psi;
1331 
1332   data->bivs_not_used_in_addr = 0;
1333   for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
1334     {
1335       phi = psi.phi ();
1336 
1337       iv = get_iv (data, PHI_RESULT (phi));
1338       if (!iv)
1339 	continue;
1340 
1341       var = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (loop));
1342       def = SSA_NAME_DEF_STMT (var);
1343       /* Don't mark iv peeled from other one as biv.  */
1344       if (def
1345 	  && gimple_code (def) == GIMPLE_PHI
1346 	  && gimple_bb (def) == loop->header)
1347 	continue;
1348 
1349       incr_iv = get_iv (data, var);
1350       if (!incr_iv)
1351 	continue;
1352 
1353       /* If the increment is in the subloop, ignore it.  */
1354       incr_bb = gimple_bb (SSA_NAME_DEF_STMT (var));
1355       if (incr_bb->loop_father != data->current_loop
1356 	  || (incr_bb->flags & BB_IRREDUCIBLE_LOOP))
1357 	continue;
1358 
1359       iv->biv_p = true;
1360       incr_iv->biv_p = true;
1361       if (iv->no_overflow)
1362 	data->bivs_not_used_in_addr++;
1363       if (incr_iv->no_overflow)
1364 	data->bivs_not_used_in_addr++;
1365     }
1366 }
1367 
1368 /* Checks whether STMT defines a linear induction variable and stores its
1369    parameters to IV.  */
1370 
1371 static bool
1372 find_givs_in_stmt_scev (struct ivopts_data *data, gimple *stmt, affine_iv *iv)
1373 {
1374   tree lhs, stop;
1375   struct loop *loop = data->current_loop;
1376 
1377   iv->base = NULL_TREE;
1378   iv->step = NULL_TREE;
1379 
1380   if (gimple_code (stmt) != GIMPLE_ASSIGN)
1381     return false;
1382 
1383   lhs = gimple_assign_lhs (stmt);
1384   if (TREE_CODE (lhs) != SSA_NAME)
1385     return false;
1386 
1387   if (!simple_iv (loop, loop_containing_stmt (stmt), lhs, iv, true))
1388     return false;
1389 
1390   /* Stop expanding iv base at the first ssa var referred by iv step.
1391      Ideally we should stop at any ssa var, because that's expensive
1392      and unusual to happen, we just do it on the first one.
1393 
1394      See PR64705 for the rationale.  */
1395   stop = extract_single_var_from_expr (iv->step);
1396   iv->base = expand_simple_operations (iv->base, stop);
1397   if (contains_abnormal_ssa_name_p (iv->base)
1398       || contains_abnormal_ssa_name_p (iv->step))
1399     return false;
1400 
1401   /* If STMT could throw, then do not consider STMT as defining a GIV.
1402      While this will suppress optimizations, we can not safely delete this
1403      GIV and associated statements, even if it appears it is not used.  */
1404   if (stmt_could_throw_p (stmt))
1405     return false;
1406 
1407   return true;
1408 }
1409 
1410 /* Finds general ivs in statement STMT.  */
1411 
1412 static void
1413 find_givs_in_stmt (struct ivopts_data *data, gimple *stmt)
1414 {
1415   affine_iv iv;
1416 
1417   if (!find_givs_in_stmt_scev (data, stmt, &iv))
1418     return;
1419 
1420   set_iv (data, gimple_assign_lhs (stmt), iv.base, iv.step, iv.no_overflow);
1421 }
1422 
1423 /* Finds general ivs in basic block BB.  */
1424 
1425 static void
1426 find_givs_in_bb (struct ivopts_data *data, basic_block bb)
1427 {
1428   gimple_stmt_iterator bsi;
1429 
1430   for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
1431     find_givs_in_stmt (data, gsi_stmt (bsi));
1432 }
1433 
1434 /* Finds general ivs.  */
1435 
1436 static void
1437 find_givs (struct ivopts_data *data)
1438 {
1439   struct loop *loop = data->current_loop;
1440   basic_block *body = get_loop_body_in_dom_order (loop);
1441   unsigned i;
1442 
1443   for (i = 0; i < loop->num_nodes; i++)
1444     find_givs_in_bb (data, body[i]);
1445   free (body);
1446 }
1447 
1448 /* For each ssa name defined in LOOP determines whether it is an induction
1449    variable and if so, its initial value and step.  */
1450 
1451 static bool
1452 find_induction_variables (struct ivopts_data *data)
1453 {
1454   unsigned i;
1455   bitmap_iterator bi;
1456 
1457   if (!find_bivs (data))
1458     return false;
1459 
1460   find_givs (data);
1461   mark_bivs (data);
1462 
1463   if (dump_file && (dump_flags & TDF_DETAILS))
1464     {
1465       struct tree_niter_desc *niter = niter_for_single_dom_exit (data);
1466 
1467       if (niter)
1468 	{
1469 	  fprintf (dump_file, "  number of iterations ");
1470 	  print_generic_expr (dump_file, niter->niter, TDF_SLIM);
1471 	  if (!integer_zerop (niter->may_be_zero))
1472 	    {
1473 	      fprintf (dump_file, "; zero if ");
1474 	      print_generic_expr (dump_file, niter->may_be_zero, TDF_SLIM);
1475 	    }
1476 	  fprintf (dump_file, "\n");
1477 	};
1478 
1479       fprintf (dump_file, "\n<Induction Vars>:\n");
1480       EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
1481 	{
1482 	  struct version_info *info = ver_info (data, i);
1483 	  if (info->iv && info->iv->step && !integer_zerop (info->iv->step))
1484 	    dump_iv (dump_file, ver_info (data, i)->iv, true, 0);
1485 	}
1486     }
1487 
1488   return true;
1489 }
1490 
1491 /* Records a use of TYPE at *USE_P in STMT whose value is IV in GROUP.
1492    For address type use, ADDR_BASE is the stripped IV base, ADDR_OFFSET
1493    is the const offset stripped from IV base; for other types use, both
1494    are zero by default.  */
1495 
1496 static struct iv_use *
1497 record_use (struct iv_group *group, tree *use_p, struct iv *iv,
1498 	    gimple *stmt, enum use_type type, tree addr_base,
1499 	    unsigned HOST_WIDE_INT addr_offset)
1500 {
1501   struct iv_use *use = XCNEW (struct iv_use);
1502 
1503   use->id = group->vuses.length ();
1504   use->group_id = group->id;
1505   use->type = type;
1506   use->iv = iv;
1507   use->stmt = stmt;
1508   use->op_p = use_p;
1509   use->addr_base = addr_base;
1510   use->addr_offset = addr_offset;
1511 
1512   group->vuses.safe_push (use);
1513   return use;
1514 }
1515 
1516 /* Checks whether OP is a loop-level invariant and if so, records it.
1517    NONLINEAR_USE is true if the invariant is used in a way we do not
1518    handle specially.  */
1519 
1520 static void
1521 record_invariant (struct ivopts_data *data, tree op, bool nonlinear_use)
1522 {
1523   basic_block bb;
1524   struct version_info *info;
1525 
1526   if (TREE_CODE (op) != SSA_NAME
1527       || virtual_operand_p (op))
1528     return;
1529 
1530   bb = gimple_bb (SSA_NAME_DEF_STMT (op));
1531   if (bb
1532       && flow_bb_inside_loop_p (data->current_loop, bb))
1533     return;
1534 
1535   info = name_info (data, op);
1536   info->name = op;
1537   info->has_nonlin_use |= nonlinear_use;
1538   if (!info->inv_id)
1539     info->inv_id = ++data->max_inv_id;
1540   bitmap_set_bit (data->relevant, SSA_NAME_VERSION (op));
1541 }
1542 
1543 static tree
1544 strip_offset (tree expr, unsigned HOST_WIDE_INT *offset);
1545 
1546 /* Record a group of TYPE.  */
1547 
1548 static struct iv_group *
1549 record_group (struct ivopts_data *data, enum use_type type)
1550 {
1551   struct iv_group *group = XCNEW (struct iv_group);
1552 
1553   group->id = data->vgroups.length ();
1554   group->type = type;
1555   group->related_cands = BITMAP_ALLOC (NULL);
1556   group->vuses.create (1);
1557 
1558   data->vgroups.safe_push (group);
1559   return group;
1560 }
1561 
1562 /* Record a use of TYPE at *USE_P in STMT whose value is IV in a group.
1563    New group will be created if there is no existing group for the use.  */
1564 
1565 static struct iv_use *
1566 record_group_use (struct ivopts_data *data, tree *use_p,
1567 		  struct iv *iv, gimple *stmt, enum use_type type)
1568 {
1569   tree addr_base = NULL;
1570   struct iv_group *group = NULL;
1571   unsigned HOST_WIDE_INT addr_offset = 0;
1572 
1573   /* Record non address type use in a new group.  */
1574   if (type == USE_ADDRESS && iv->base_object)
1575     {
1576       unsigned int i;
1577 
1578       addr_base = strip_offset (iv->base, &addr_offset);
1579       for (i = 0; i < data->vgroups.length (); i++)
1580 	{
1581 	  struct iv_use *use;
1582 
1583 	  group = data->vgroups[i];
1584 	  use = group->vuses[0];
1585 	  if (use->type != USE_ADDRESS || !use->iv->base_object)
1586 	    continue;
1587 
1588 	  /* Check if it has the same stripped base and step.  */
1589 	  if (operand_equal_p (iv->base_object, use->iv->base_object, 0)
1590 	      && operand_equal_p (iv->step, use->iv->step, 0)
1591 	      && operand_equal_p (addr_base, use->addr_base, 0))
1592 	    break;
1593 	}
1594       if (i == data->vgroups.length ())
1595 	group = NULL;
1596     }
1597 
1598   if (!group)
1599     group = record_group (data, type);
1600 
1601   return record_use (group, use_p, iv, stmt, type, addr_base, addr_offset);
1602 }
1603 
1604 /* Checks whether the use OP is interesting and if so, records it.  */
1605 
1606 static struct iv_use *
1607 find_interesting_uses_op (struct ivopts_data *data, tree op)
1608 {
1609   struct iv *iv;
1610   gimple *stmt;
1611   struct iv_use *use;
1612 
1613   if (TREE_CODE (op) != SSA_NAME)
1614     return NULL;
1615 
1616   iv = get_iv (data, op);
1617   if (!iv)
1618     return NULL;
1619 
1620   if (iv->nonlin_use)
1621     {
1622       gcc_assert (iv->nonlin_use->type == USE_NONLINEAR_EXPR);
1623       return iv->nonlin_use;
1624     }
1625 
1626   if (integer_zerop (iv->step))
1627     {
1628       record_invariant (data, op, true);
1629       return NULL;
1630     }
1631 
1632   stmt = SSA_NAME_DEF_STMT (op);
1633   gcc_assert (gimple_code (stmt) == GIMPLE_PHI || is_gimple_assign (stmt));
1634 
1635   use = record_group_use (data, NULL, iv, stmt, USE_NONLINEAR_EXPR);
1636   iv->nonlin_use = use;
1637   return use;
1638 }
1639 
1640 /* Given a condition in statement STMT, checks whether it is a compare
1641    of an induction variable and an invariant.  If this is the case,
1642    CONTROL_VAR is set to location of the iv, BOUND to the location of
1643    the invariant, IV_VAR and IV_BOUND are set to the corresponding
1644    induction variable descriptions, and true is returned.  If this is not
1645    the case, CONTROL_VAR and BOUND are set to the arguments of the
1646    condition and false is returned.  */
1647 
1648 static bool
1649 extract_cond_operands (struct ivopts_data *data, gimple *stmt,
1650 		       tree **control_var, tree **bound,
1651 		       struct iv **iv_var, struct iv **iv_bound)
1652 {
1653   /* The objects returned when COND has constant operands.  */
1654   static struct iv const_iv;
1655   static tree zero;
1656   tree *op0 = &zero, *op1 = &zero;
1657   struct iv *iv0 = &const_iv, *iv1 = &const_iv;
1658   bool ret = false;
1659 
1660   if (gimple_code (stmt) == GIMPLE_COND)
1661     {
1662       gcond *cond_stmt = as_a <gcond *> (stmt);
1663       op0 = gimple_cond_lhs_ptr (cond_stmt);
1664       op1 = gimple_cond_rhs_ptr (cond_stmt);
1665     }
1666   else
1667     {
1668       op0 = gimple_assign_rhs1_ptr (stmt);
1669       op1 = gimple_assign_rhs2_ptr (stmt);
1670     }
1671 
1672   zero = integer_zero_node;
1673   const_iv.step = integer_zero_node;
1674 
1675   if (TREE_CODE (*op0) == SSA_NAME)
1676     iv0 = get_iv (data, *op0);
1677   if (TREE_CODE (*op1) == SSA_NAME)
1678     iv1 = get_iv (data, *op1);
1679 
1680   /* Exactly one of the compared values must be an iv, and the other one must
1681      be an invariant.  */
1682   if (!iv0 || !iv1)
1683     goto end;
1684 
1685   if (integer_zerop (iv0->step))
1686     {
1687       /* Control variable may be on the other side.  */
1688       std::swap (op0, op1);
1689       std::swap (iv0, iv1);
1690     }
1691   ret = !integer_zerop (iv0->step) && integer_zerop (iv1->step);
1692 
1693 end:
1694   if (control_var)
1695     *control_var = op0;
1696   if (iv_var)
1697     *iv_var = iv0;
1698   if (bound)
1699     *bound = op1;
1700   if (iv_bound)
1701     *iv_bound = iv1;
1702 
1703   return ret;
1704 }
1705 
1706 /* Checks whether the condition in STMT is interesting and if so,
1707    records it.  */
1708 
1709 static void
1710 find_interesting_uses_cond (struct ivopts_data *data, gimple *stmt)
1711 {
1712   tree *var_p, *bound_p;
1713   struct iv *var_iv;
1714 
1715   if (!extract_cond_operands (data, stmt, &var_p, &bound_p, &var_iv, NULL))
1716     {
1717       find_interesting_uses_op (data, *var_p);
1718       find_interesting_uses_op (data, *bound_p);
1719       return;
1720     }
1721 
1722   record_group_use (data, NULL, var_iv, stmt, USE_COMPARE);
1723 }
1724 
1725 /* Returns the outermost loop EXPR is obviously invariant in
1726    relative to the loop LOOP, i.e. if all its operands are defined
1727    outside of the returned loop.  Returns NULL if EXPR is not
1728    even obviously invariant in LOOP.  */
1729 
1730 struct loop *
1731 outermost_invariant_loop_for_expr (struct loop *loop, tree expr)
1732 {
1733   basic_block def_bb;
1734   unsigned i, len;
1735 
1736   if (is_gimple_min_invariant (expr))
1737     return current_loops->tree_root;
1738 
1739   if (TREE_CODE (expr) == SSA_NAME)
1740     {
1741       def_bb = gimple_bb (SSA_NAME_DEF_STMT (expr));
1742       if (def_bb)
1743 	{
1744 	  if (flow_bb_inside_loop_p (loop, def_bb))
1745 	    return NULL;
1746 	  return superloop_at_depth (loop,
1747 				     loop_depth (def_bb->loop_father) + 1);
1748 	}
1749 
1750       return current_loops->tree_root;
1751     }
1752 
1753   if (!EXPR_P (expr))
1754     return NULL;
1755 
1756   unsigned maxdepth = 0;
1757   len = TREE_OPERAND_LENGTH (expr);
1758   for (i = 0; i < len; i++)
1759     {
1760       struct loop *ivloop;
1761       if (!TREE_OPERAND (expr, i))
1762 	continue;
1763 
1764       ivloop = outermost_invariant_loop_for_expr (loop, TREE_OPERAND (expr, i));
1765       if (!ivloop)
1766 	return NULL;
1767       maxdepth = MAX (maxdepth, loop_depth (ivloop));
1768     }
1769 
1770   return superloop_at_depth (loop, maxdepth);
1771 }
1772 
1773 /* Returns true if expression EXPR is obviously invariant in LOOP,
1774    i.e. if all its operands are defined outside of the LOOP.  LOOP
1775    should not be the function body.  */
1776 
1777 bool
1778 expr_invariant_in_loop_p (struct loop *loop, tree expr)
1779 {
1780   basic_block def_bb;
1781   unsigned i, len;
1782 
1783   gcc_assert (loop_depth (loop) > 0);
1784 
1785   if (is_gimple_min_invariant (expr))
1786     return true;
1787 
1788   if (TREE_CODE (expr) == SSA_NAME)
1789     {
1790       def_bb = gimple_bb (SSA_NAME_DEF_STMT (expr));
1791       if (def_bb
1792 	  && flow_bb_inside_loop_p (loop, def_bb))
1793 	return false;
1794 
1795       return true;
1796     }
1797 
1798   if (!EXPR_P (expr))
1799     return false;
1800 
1801   len = TREE_OPERAND_LENGTH (expr);
1802   for (i = 0; i < len; i++)
1803     if (TREE_OPERAND (expr, i)
1804 	&& !expr_invariant_in_loop_p (loop, TREE_OPERAND (expr, i)))
1805       return false;
1806 
1807   return true;
1808 }
1809 
1810 /* Given expression EXPR which computes inductive values with respect
1811    to loop recorded in DATA, this function returns biv from which EXPR
1812    is derived by tracing definition chains of ssa variables in EXPR.  */
1813 
1814 static struct iv*
1815 find_deriving_biv_for_expr (struct ivopts_data *data, tree expr)
1816 {
1817   struct iv *iv;
1818   unsigned i, n;
1819   tree e2, e1;
1820   enum tree_code code;
1821   gimple *stmt;
1822 
1823   if (expr == NULL_TREE)
1824     return NULL;
1825 
1826   if (is_gimple_min_invariant (expr))
1827     return NULL;
1828 
1829   code = TREE_CODE (expr);
1830   if (IS_EXPR_CODE_CLASS (TREE_CODE_CLASS (code)))
1831     {
1832       n = TREE_OPERAND_LENGTH (expr);
1833       for (i = 0; i < n; i++)
1834 	{
1835 	  iv = find_deriving_biv_for_expr (data, TREE_OPERAND (expr, i));
1836 	  if (iv)
1837 	    return iv;
1838 	}
1839     }
1840 
1841   /* Stop if it's not ssa name.  */
1842   if (code != SSA_NAME)
1843     return NULL;
1844 
1845   iv = get_iv (data, expr);
1846   if (!iv || integer_zerop (iv->step))
1847     return NULL;
1848   else if (iv->biv_p)
1849     return iv;
1850 
1851   stmt = SSA_NAME_DEF_STMT (expr);
1852   if (gphi *phi = dyn_cast <gphi *> (stmt))
1853     {
1854       ssa_op_iter iter;
1855       use_operand_p use_p;
1856       basic_block phi_bb = gimple_bb (phi);
1857 
1858       /* Skip loop header PHI that doesn't define biv.  */
1859       if (phi_bb->loop_father == data->current_loop)
1860 	return NULL;
1861 
1862       if (virtual_operand_p (gimple_phi_result (phi)))
1863 	return NULL;
1864 
1865       FOR_EACH_PHI_ARG (use_p, phi, iter, SSA_OP_USE)
1866 	{
1867 	  tree use = USE_FROM_PTR (use_p);
1868 	  iv = find_deriving_biv_for_expr (data, use);
1869 	  if (iv)
1870 	    return iv;
1871 	}
1872       return NULL;
1873     }
1874   if (gimple_code (stmt) != GIMPLE_ASSIGN)
1875     return NULL;
1876 
1877   e1 = gimple_assign_rhs1 (stmt);
1878   code = gimple_assign_rhs_code (stmt);
1879   if (get_gimple_rhs_class (code) == GIMPLE_SINGLE_RHS)
1880     return find_deriving_biv_for_expr (data, e1);
1881 
1882   switch (code)
1883     {
1884     case MULT_EXPR:
1885     case PLUS_EXPR:
1886     case MINUS_EXPR:
1887     case POINTER_PLUS_EXPR:
1888       /* Increments, decrements and multiplications by a constant
1889 	 are simple.  */
1890       e2 = gimple_assign_rhs2 (stmt);
1891       iv = find_deriving_biv_for_expr (data, e2);
1892       if (iv)
1893 	return iv;
1894       gcc_fallthrough ();
1895 
1896     CASE_CONVERT:
1897       /* Casts are simple.  */
1898       return find_deriving_biv_for_expr (data, e1);
1899 
1900     default:
1901       break;
1902     }
1903 
1904   return NULL;
1905 }
1906 
1907 /* Record BIV, its predecessor and successor that they are used in
1908    address type uses.  */
1909 
1910 static void
1911 record_biv_for_address_use (struct ivopts_data *data, struct iv *biv)
1912 {
1913   unsigned i;
1914   tree type, base_1, base_2;
1915   bitmap_iterator bi;
1916 
1917   if (!biv || !biv->biv_p || integer_zerop (biv->step)
1918       || biv->have_address_use || !biv->no_overflow)
1919     return;
1920 
1921   type = TREE_TYPE (biv->base);
1922   if (!INTEGRAL_TYPE_P (type))
1923     return;
1924 
1925   biv->have_address_use = true;
1926   data->bivs_not_used_in_addr--;
1927   base_1 = fold_build2 (PLUS_EXPR, type, biv->base, biv->step);
1928   EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
1929     {
1930       struct iv *iv = ver_info (data, i)->iv;
1931 
1932       if (!iv || !iv->biv_p || integer_zerop (iv->step)
1933 	  || iv->have_address_use || !iv->no_overflow)
1934 	continue;
1935 
1936       if (type != TREE_TYPE (iv->base)
1937 	  || !INTEGRAL_TYPE_P (TREE_TYPE (iv->base)))
1938 	continue;
1939 
1940       if (!operand_equal_p (biv->step, iv->step, 0))
1941 	continue;
1942 
1943       base_2 = fold_build2 (PLUS_EXPR, type, iv->base, iv->step);
1944       if (operand_equal_p (base_1, iv->base, 0)
1945 	  || operand_equal_p (base_2, biv->base, 0))
1946 	{
1947 	  iv->have_address_use = true;
1948 	  data->bivs_not_used_in_addr--;
1949 	}
1950     }
1951 }
1952 
1953 /* Cumulates the steps of indices into DATA and replaces their values with the
1954    initial ones.  Returns false when the value of the index cannot be determined.
1955    Callback for for_each_index.  */
1956 
1957 struct ifs_ivopts_data
1958 {
1959   struct ivopts_data *ivopts_data;
1960   gimple *stmt;
1961   tree step;
1962 };
1963 
1964 static bool
1965 idx_find_step (tree base, tree *idx, void *data)
1966 {
1967   struct ifs_ivopts_data *dta = (struct ifs_ivopts_data *) data;
1968   struct iv *iv;
1969   bool use_overflow_semantics = false;
1970   tree step, iv_base, iv_step, lbound, off;
1971   struct loop *loop = dta->ivopts_data->current_loop;
1972 
1973   /* If base is a component ref, require that the offset of the reference
1974      be invariant.  */
1975   if (TREE_CODE (base) == COMPONENT_REF)
1976     {
1977       off = component_ref_field_offset (base);
1978       return expr_invariant_in_loop_p (loop, off);
1979     }
1980 
1981   /* If base is array, first check whether we will be able to move the
1982      reference out of the loop (in order to take its address in strength
1983      reduction).  In order for this to work we need both lower bound
1984      and step to be loop invariants.  */
1985   if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
1986     {
1987       /* Moreover, for a range, the size needs to be invariant as well.  */
1988       if (TREE_CODE (base) == ARRAY_RANGE_REF
1989 	  && !expr_invariant_in_loop_p (loop, TYPE_SIZE (TREE_TYPE (base))))
1990 	return false;
1991 
1992       step = array_ref_element_size (base);
1993       lbound = array_ref_low_bound (base);
1994 
1995       if (!expr_invariant_in_loop_p (loop, step)
1996 	  || !expr_invariant_in_loop_p (loop, lbound))
1997 	return false;
1998     }
1999 
2000   if (TREE_CODE (*idx) != SSA_NAME)
2001     return true;
2002 
2003   iv = get_iv (dta->ivopts_data, *idx);
2004   if (!iv)
2005     return false;
2006 
2007   /* XXX  We produce for a base of *D42 with iv->base being &x[0]
2008 	  *&x[0], which is not folded and does not trigger the
2009 	  ARRAY_REF path below.  */
2010   *idx = iv->base;
2011 
2012   if (integer_zerop (iv->step))
2013     return true;
2014 
2015   if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
2016     {
2017       step = array_ref_element_size (base);
2018 
2019       /* We only handle addresses whose step is an integer constant.  */
2020       if (TREE_CODE (step) != INTEGER_CST)
2021 	return false;
2022     }
2023   else
2024     /* The step for pointer arithmetics already is 1 byte.  */
2025     step = size_one_node;
2026 
2027   iv_base = iv->base;
2028   iv_step = iv->step;
2029   if (iv->no_overflow && nowrap_type_p (TREE_TYPE (iv_step)))
2030     use_overflow_semantics = true;
2031 
2032   if (!convert_affine_scev (dta->ivopts_data->current_loop,
2033 			    sizetype, &iv_base, &iv_step, dta->stmt,
2034 			    use_overflow_semantics))
2035     {
2036       /* The index might wrap.  */
2037       return false;
2038     }
2039 
2040   step = fold_build2 (MULT_EXPR, sizetype, step, iv_step);
2041   dta->step = fold_build2 (PLUS_EXPR, sizetype, dta->step, step);
2042 
2043   if (dta->ivopts_data->bivs_not_used_in_addr)
2044     {
2045       if (!iv->biv_p)
2046 	iv = find_deriving_biv_for_expr (dta->ivopts_data, iv->ssa_name);
2047 
2048       record_biv_for_address_use (dta->ivopts_data, iv);
2049     }
2050   return true;
2051 }
2052 
2053 /* Records use in index IDX.  Callback for for_each_index.  Ivopts data
2054    object is passed to it in DATA.  */
2055 
2056 static bool
2057 idx_record_use (tree base, tree *idx,
2058 		void *vdata)
2059 {
2060   struct ivopts_data *data = (struct ivopts_data *) vdata;
2061   find_interesting_uses_op (data, *idx);
2062   if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
2063     {
2064       find_interesting_uses_op (data, array_ref_element_size (base));
2065       find_interesting_uses_op (data, array_ref_low_bound (base));
2066     }
2067   return true;
2068 }
2069 
2070 /* If we can prove that TOP = cst * BOT for some constant cst,
2071    store cst to MUL and return true.  Otherwise return false.
2072    The returned value is always sign-extended, regardless of the
2073    signedness of TOP and BOT.  */
2074 
2075 static bool
2076 constant_multiple_of (tree top, tree bot, widest_int *mul)
2077 {
2078   tree mby;
2079   enum tree_code code;
2080   unsigned precision = TYPE_PRECISION (TREE_TYPE (top));
2081   widest_int res, p0, p1;
2082 
2083   STRIP_NOPS (top);
2084   STRIP_NOPS (bot);
2085 
2086   if (operand_equal_p (top, bot, 0))
2087     {
2088       *mul = 1;
2089       return true;
2090     }
2091 
2092   code = TREE_CODE (top);
2093   switch (code)
2094     {
2095     case MULT_EXPR:
2096       mby = TREE_OPERAND (top, 1);
2097       if (TREE_CODE (mby) != INTEGER_CST)
2098 	return false;
2099 
2100       if (!constant_multiple_of (TREE_OPERAND (top, 0), bot, &res))
2101 	return false;
2102 
2103       *mul = wi::sext (res * wi::to_widest (mby), precision);
2104       return true;
2105 
2106     case PLUS_EXPR:
2107     case MINUS_EXPR:
2108       if (!constant_multiple_of (TREE_OPERAND (top, 0), bot, &p0)
2109 	  || !constant_multiple_of (TREE_OPERAND (top, 1), bot, &p1))
2110 	return false;
2111 
2112       if (code == MINUS_EXPR)
2113 	p1 = -p1;
2114       *mul = wi::sext (p0 + p1, precision);
2115       return true;
2116 
2117     case INTEGER_CST:
2118       if (TREE_CODE (bot) != INTEGER_CST)
2119 	return false;
2120 
2121       p0 = widest_int::from (top, SIGNED);
2122       p1 = widest_int::from (bot, SIGNED);
2123       if (p1 == 0)
2124 	return false;
2125       *mul = wi::sext (wi::divmod_trunc (p0, p1, SIGNED, &res), precision);
2126       return res == 0;
2127 
2128     default:
2129       return false;
2130     }
2131 }
2132 
2133 /* Return true if memory reference REF with step STEP may be unaligned.  */
2134 
2135 static bool
2136 may_be_unaligned_p (tree ref, tree step)
2137 {
2138   /* TARGET_MEM_REFs are translated directly to valid MEMs on the target,
2139      thus they are not misaligned.  */
2140   if (TREE_CODE (ref) == TARGET_MEM_REF)
2141     return false;
2142 
2143   unsigned int align = TYPE_ALIGN (TREE_TYPE (ref));
2144   if (GET_MODE_ALIGNMENT (TYPE_MODE (TREE_TYPE (ref))) > align)
2145     align = GET_MODE_ALIGNMENT (TYPE_MODE (TREE_TYPE (ref)));
2146 
2147   unsigned HOST_WIDE_INT bitpos;
2148   unsigned int ref_align;
2149   get_object_alignment_1 (ref, &ref_align, &bitpos);
2150   if (ref_align < align
2151       || (bitpos % align) != 0
2152       || (bitpos % BITS_PER_UNIT) != 0)
2153     return true;
2154 
2155   unsigned int trailing_zeros = tree_ctz (step);
2156   if (trailing_zeros < HOST_BITS_PER_INT
2157       && (1U << trailing_zeros) * BITS_PER_UNIT < align)
2158     return true;
2159 
2160   return false;
2161 }
2162 
2163 /* Return true if EXPR may be non-addressable.   */
2164 
2165 bool
2166 may_be_nonaddressable_p (tree expr)
2167 {
2168   switch (TREE_CODE (expr))
2169     {
2170     case TARGET_MEM_REF:
2171       /* TARGET_MEM_REFs are translated directly to valid MEMs on the
2172 	 target, thus they are always addressable.  */
2173       return false;
2174 
2175     case MEM_REF:
2176       /* Likewise for MEM_REFs, modulo the storage order.  */
2177       return REF_REVERSE_STORAGE_ORDER (expr);
2178 
2179     case BIT_FIELD_REF:
2180       if (REF_REVERSE_STORAGE_ORDER (expr))
2181 	return true;
2182       return may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
2183 
2184     case COMPONENT_REF:
2185       if (TYPE_REVERSE_STORAGE_ORDER (TREE_TYPE (TREE_OPERAND (expr, 0))))
2186 	return true;
2187       return DECL_NONADDRESSABLE_P (TREE_OPERAND (expr, 1))
2188 	     || may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
2189 
2190     case ARRAY_REF:
2191     case ARRAY_RANGE_REF:
2192       if (TYPE_REVERSE_STORAGE_ORDER (TREE_TYPE (TREE_OPERAND (expr, 0))))
2193 	return true;
2194       return may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
2195 
2196     case VIEW_CONVERT_EXPR:
2197       /* This kind of view-conversions may wrap non-addressable objects
2198 	 and make them look addressable.  After some processing the
2199 	 non-addressability may be uncovered again, causing ADDR_EXPRs
2200 	 of inappropriate objects to be built.  */
2201       if (is_gimple_reg (TREE_OPERAND (expr, 0))
2202 	  || !is_gimple_addressable (TREE_OPERAND (expr, 0)))
2203 	return true;
2204       return may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
2205 
2206     CASE_CONVERT:
2207       return true;
2208 
2209     default:
2210       break;
2211     }
2212 
2213   return false;
2214 }
2215 
2216 /* Finds addresses in *OP_P inside STMT.  */
2217 
2218 static void
2219 find_interesting_uses_address (struct ivopts_data *data, gimple *stmt,
2220 			       tree *op_p)
2221 {
2222   tree base = *op_p, step = size_zero_node;
2223   struct iv *civ;
2224   struct ifs_ivopts_data ifs_ivopts_data;
2225 
2226   /* Do not play with volatile memory references.  A bit too conservative,
2227      perhaps, but safe.  */
2228   if (gimple_has_volatile_ops (stmt))
2229     goto fail;
2230 
2231   /* Ignore bitfields for now.  Not really something terribly complicated
2232      to handle.  TODO.  */
2233   if (TREE_CODE (base) == BIT_FIELD_REF)
2234     goto fail;
2235 
2236   base = unshare_expr (base);
2237 
2238   if (TREE_CODE (base) == TARGET_MEM_REF)
2239     {
2240       tree type = build_pointer_type (TREE_TYPE (base));
2241       tree astep;
2242 
2243       if (TMR_BASE (base)
2244 	  && TREE_CODE (TMR_BASE (base)) == SSA_NAME)
2245 	{
2246 	  civ = get_iv (data, TMR_BASE (base));
2247 	  if (!civ)
2248 	    goto fail;
2249 
2250 	  TMR_BASE (base) = civ->base;
2251 	  step = civ->step;
2252 	}
2253       if (TMR_INDEX2 (base)
2254 	  && TREE_CODE (TMR_INDEX2 (base)) == SSA_NAME)
2255 	{
2256 	  civ = get_iv (data, TMR_INDEX2 (base));
2257 	  if (!civ)
2258 	    goto fail;
2259 
2260 	  TMR_INDEX2 (base) = civ->base;
2261 	  step = civ->step;
2262 	}
2263       if (TMR_INDEX (base)
2264 	  && TREE_CODE (TMR_INDEX (base)) == SSA_NAME)
2265 	{
2266 	  civ = get_iv (data, TMR_INDEX (base));
2267 	  if (!civ)
2268 	    goto fail;
2269 
2270 	  TMR_INDEX (base) = civ->base;
2271 	  astep = civ->step;
2272 
2273 	  if (astep)
2274 	    {
2275 	      if (TMR_STEP (base))
2276 		astep = fold_build2 (MULT_EXPR, type, TMR_STEP (base), astep);
2277 
2278 	      step = fold_build2 (PLUS_EXPR, type, step, astep);
2279 	    }
2280 	}
2281 
2282       if (integer_zerop (step))
2283 	goto fail;
2284       base = tree_mem_ref_addr (type, base);
2285     }
2286   else
2287     {
2288       ifs_ivopts_data.ivopts_data = data;
2289       ifs_ivopts_data.stmt = stmt;
2290       ifs_ivopts_data.step = size_zero_node;
2291       if (!for_each_index (&base, idx_find_step, &ifs_ivopts_data)
2292 	  || integer_zerop (ifs_ivopts_data.step))
2293 	goto fail;
2294       step = ifs_ivopts_data.step;
2295 
2296       /* Check that the base expression is addressable.  This needs
2297 	 to be done after substituting bases of IVs into it.  */
2298       if (may_be_nonaddressable_p (base))
2299 	goto fail;
2300 
2301       /* Moreover, on strict alignment platforms, check that it is
2302 	 sufficiently aligned.  */
2303       if (STRICT_ALIGNMENT && may_be_unaligned_p (base, step))
2304 	goto fail;
2305 
2306       base = build_fold_addr_expr (base);
2307 
2308       /* Substituting bases of IVs into the base expression might
2309 	 have caused folding opportunities.  */
2310       if (TREE_CODE (base) == ADDR_EXPR)
2311 	{
2312 	  tree *ref = &TREE_OPERAND (base, 0);
2313 	  while (handled_component_p (*ref))
2314 	    ref = &TREE_OPERAND (*ref, 0);
2315 	  if (TREE_CODE (*ref) == MEM_REF)
2316 	    {
2317 	      tree tem = fold_binary (MEM_REF, TREE_TYPE (*ref),
2318 				      TREE_OPERAND (*ref, 0),
2319 				      TREE_OPERAND (*ref, 1));
2320 	      if (tem)
2321 		*ref = tem;
2322 	    }
2323 	}
2324     }
2325 
2326   civ = alloc_iv (data, base, step);
2327   /* Fail if base object of this memory reference is unknown.  */
2328   if (civ->base_object == NULL_TREE)
2329     goto fail;
2330 
2331   record_group_use (data, op_p, civ, stmt, USE_ADDRESS);
2332   return;
2333 
2334 fail:
2335   for_each_index (op_p, idx_record_use, data);
2336 }
2337 
2338 /* Finds and records invariants used in STMT.  */
2339 
2340 static void
2341 find_invariants_stmt (struct ivopts_data *data, gimple *stmt)
2342 {
2343   ssa_op_iter iter;
2344   use_operand_p use_p;
2345   tree op;
2346 
2347   FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
2348     {
2349       op = USE_FROM_PTR (use_p);
2350       record_invariant (data, op, false);
2351     }
2352 }
2353 
2354 /* Finds interesting uses of induction variables in the statement STMT.  */
2355 
2356 static void
2357 find_interesting_uses_stmt (struct ivopts_data *data, gimple *stmt)
2358 {
2359   struct iv *iv;
2360   tree op, *lhs, *rhs;
2361   ssa_op_iter iter;
2362   use_operand_p use_p;
2363   enum tree_code code;
2364 
2365   find_invariants_stmt (data, stmt);
2366 
2367   if (gimple_code (stmt) == GIMPLE_COND)
2368     {
2369       find_interesting_uses_cond (data, stmt);
2370       return;
2371     }
2372 
2373   if (is_gimple_assign (stmt))
2374     {
2375       lhs = gimple_assign_lhs_ptr (stmt);
2376       rhs = gimple_assign_rhs1_ptr (stmt);
2377 
2378       if (TREE_CODE (*lhs) == SSA_NAME)
2379 	{
2380 	  /* If the statement defines an induction variable, the uses are not
2381 	     interesting by themselves.  */
2382 
2383 	  iv = get_iv (data, *lhs);
2384 
2385 	  if (iv && !integer_zerop (iv->step))
2386 	    return;
2387 	}
2388 
2389       code = gimple_assign_rhs_code (stmt);
2390       if (get_gimple_rhs_class (code) == GIMPLE_SINGLE_RHS
2391 	  && (REFERENCE_CLASS_P (*rhs)
2392 	      || is_gimple_val (*rhs)))
2393 	{
2394 	  if (REFERENCE_CLASS_P (*rhs))
2395 	    find_interesting_uses_address (data, stmt, rhs);
2396 	  else
2397 	    find_interesting_uses_op (data, *rhs);
2398 
2399 	  if (REFERENCE_CLASS_P (*lhs))
2400 	    find_interesting_uses_address (data, stmt, lhs);
2401 	  return;
2402 	}
2403       else if (TREE_CODE_CLASS (code) == tcc_comparison)
2404 	{
2405 	  find_interesting_uses_cond (data, stmt);
2406 	  return;
2407 	}
2408 
2409       /* TODO -- we should also handle address uses of type
2410 
2411 	 memory = call (whatever);
2412 
2413 	 and
2414 
2415 	 call (memory).  */
2416     }
2417 
2418   if (gimple_code (stmt) == GIMPLE_PHI
2419       && gimple_bb (stmt) == data->current_loop->header)
2420     {
2421       iv = get_iv (data, PHI_RESULT (stmt));
2422 
2423       if (iv && !integer_zerop (iv->step))
2424 	return;
2425     }
2426 
2427   FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
2428     {
2429       op = USE_FROM_PTR (use_p);
2430 
2431       if (TREE_CODE (op) != SSA_NAME)
2432 	continue;
2433 
2434       iv = get_iv (data, op);
2435       if (!iv)
2436 	continue;
2437 
2438       find_interesting_uses_op (data, op);
2439     }
2440 }
2441 
2442 /* Finds interesting uses of induction variables outside of loops
2443    on loop exit edge EXIT.  */
2444 
2445 static void
2446 find_interesting_uses_outside (struct ivopts_data *data, edge exit)
2447 {
2448   gphi *phi;
2449   gphi_iterator psi;
2450   tree def;
2451 
2452   for (psi = gsi_start_phis (exit->dest); !gsi_end_p (psi); gsi_next (&psi))
2453     {
2454       phi = psi.phi ();
2455       def = PHI_ARG_DEF_FROM_EDGE (phi, exit);
2456       if (!virtual_operand_p (def))
2457 	find_interesting_uses_op (data, def);
2458     }
2459 }
2460 
2461 /* Compute maximum offset of [base + offset] addressing mode
2462    for memory reference represented by USE.  */
2463 
2464 static HOST_WIDE_INT
2465 compute_max_addr_offset (struct iv_use *use)
2466 {
2467   int width;
2468   rtx reg, addr;
2469   HOST_WIDE_INT i, off;
2470   unsigned list_index, num;
2471   addr_space_t as;
2472   machine_mode mem_mode, addr_mode;
2473   static vec<HOST_WIDE_INT> max_offset_list;
2474 
2475   as = TYPE_ADDR_SPACE (TREE_TYPE (use->iv->base));
2476   mem_mode = TYPE_MODE (TREE_TYPE (*use->op_p));
2477 
2478   num = max_offset_list.length ();
2479   list_index = (unsigned) as * MAX_MACHINE_MODE + (unsigned) mem_mode;
2480   if (list_index >= num)
2481     {
2482       max_offset_list.safe_grow (list_index + MAX_MACHINE_MODE);
2483       for (; num < max_offset_list.length (); num++)
2484 	max_offset_list[num] = -1;
2485     }
2486 
2487   off = max_offset_list[list_index];
2488   if (off != -1)
2489     return off;
2490 
2491   addr_mode = targetm.addr_space.address_mode (as);
2492   reg = gen_raw_REG (addr_mode, LAST_VIRTUAL_REGISTER + 1);
2493   addr = gen_rtx_fmt_ee (PLUS, addr_mode, reg, NULL_RTX);
2494 
2495   width = GET_MODE_BITSIZE (addr_mode) - 1;
2496   if (width > (HOST_BITS_PER_WIDE_INT - 1))
2497     width = HOST_BITS_PER_WIDE_INT - 1;
2498 
2499   for (i = width; i > 0; i--)
2500     {
2501       off = (HOST_WIDE_INT_1U << i) - 1;
2502       XEXP (addr, 1) = gen_int_mode (off, addr_mode);
2503       if (memory_address_addr_space_p (mem_mode, addr, as))
2504 	break;
2505 
2506       /* For some strict-alignment targets, the offset must be naturally
2507 	 aligned.  Try an aligned offset if mem_mode is not QImode.  */
2508       off = (HOST_WIDE_INT_1U << i);
2509       if (off > GET_MODE_SIZE (mem_mode) && mem_mode != QImode)
2510 	{
2511 	  off -= GET_MODE_SIZE (mem_mode);
2512 	  XEXP (addr, 1) = gen_int_mode (off, addr_mode);
2513 	  if (memory_address_addr_space_p (mem_mode, addr, as))
2514 	    break;
2515 	}
2516     }
2517   if (i == 0)
2518     off = 0;
2519 
2520   max_offset_list[list_index] = off;
2521   return off;
2522 }
2523 
2524 /* Comparison function to sort group in ascending order of addr_offset.  */
2525 
2526 static int
2527 group_compare_offset (const void *a, const void *b)
2528 {
2529   const struct iv_use *const *u1 = (const struct iv_use *const *) a;
2530   const struct iv_use *const *u2 = (const struct iv_use *const *) b;
2531 
2532   if ((*u1)->addr_offset != (*u2)->addr_offset)
2533     return (*u1)->addr_offset < (*u2)->addr_offset ? -1 : 1;
2534   else
2535     return 0;
2536 }
2537 
2538 /* Check if small groups should be split.  Return true if no group
2539    contains more than two uses with distinct addr_offsets.  Return
2540    false otherwise.  We want to split such groups because:
2541 
2542      1) Small groups don't have much benefit and may interfer with
2543 	general candidate selection.
2544      2) Size for problem with only small groups is usually small and
2545 	general algorithm can handle it well.
2546 
2547    TODO -- Above claim may not hold when we want to merge memory
2548    accesses with conseuctive addresses.  */
2549 
2550 static bool
2551 split_small_address_groups_p (struct ivopts_data *data)
2552 {
2553   unsigned int i, j, distinct = 1;
2554   struct iv_use *pre;
2555   struct iv_group *group;
2556 
2557   for (i = 0; i < data->vgroups.length (); i++)
2558     {
2559       group = data->vgroups[i];
2560       if (group->vuses.length () == 1)
2561 	continue;
2562 
2563       gcc_assert (group->type == USE_ADDRESS);
2564       if (group->vuses.length () == 2)
2565 	{
2566 	  if (group->vuses[0]->addr_offset > group->vuses[1]->addr_offset)
2567 	    std::swap (group->vuses[0], group->vuses[1]);
2568 	}
2569       else
2570 	group->vuses.qsort (group_compare_offset);
2571 
2572       if (distinct > 2)
2573 	continue;
2574 
2575       distinct = 1;
2576       for (pre = group->vuses[0], j = 1; j < group->vuses.length (); j++)
2577 	{
2578 	  if (group->vuses[j]->addr_offset != pre->addr_offset)
2579 	    {
2580 	      pre = group->vuses[j];
2581 	      distinct++;
2582 	    }
2583 
2584 	  if (distinct > 2)
2585 	    break;
2586 	}
2587     }
2588 
2589   return (distinct <= 2);
2590 }
2591 
2592 /* For each group of address type uses, this function further groups
2593    these uses according to the maximum offset supported by target's
2594    [base + offset] addressing mode.  */
2595 
2596 static void
2597 split_address_groups (struct ivopts_data *data)
2598 {
2599   unsigned int i, j;
2600   HOST_WIDE_INT max_offset = -1;
2601 
2602   /* Reset max offset to split all small groups.  */
2603   if (split_small_address_groups_p (data))
2604     max_offset = 0;
2605 
2606   for (i = 0; i < data->vgroups.length (); i++)
2607     {
2608       struct iv_group *group = data->vgroups[i];
2609       struct iv_use *use = group->vuses[0];
2610 
2611       use->id = 0;
2612       use->group_id = group->id;
2613       if (group->vuses.length () == 1)
2614 	continue;
2615 
2616       if (max_offset != 0)
2617 	max_offset = compute_max_addr_offset (use);
2618 
2619       for (j = 1; j < group->vuses.length (); j++)
2620 	{
2621 	  struct iv_use *next = group->vuses[j];
2622 
2623 	  /* Only uses with offset that can fit in offset part against
2624 	     the first use can be grouped together.  */
2625 	  if (next->addr_offset - use->addr_offset
2626 	      > (unsigned HOST_WIDE_INT) max_offset)
2627 	    break;
2628 
2629 	  next->id = j;
2630 	  next->group_id = group->id;
2631 	}
2632       /* Split group.  */
2633       if (j < group->vuses.length ())
2634 	{
2635 	  struct iv_group *new_group = record_group (data, group->type);
2636 	  new_group->vuses.safe_splice (group->vuses);
2637 	  new_group->vuses.block_remove (0, j);
2638 	  group->vuses.truncate (j);
2639 	}
2640     }
2641 }
2642 
2643 /* Finds uses of the induction variables that are interesting.  */
2644 
2645 static void
2646 find_interesting_uses (struct ivopts_data *data)
2647 {
2648   basic_block bb;
2649   gimple_stmt_iterator bsi;
2650   basic_block *body = get_loop_body (data->current_loop);
2651   unsigned i;
2652   edge e;
2653 
2654   for (i = 0; i < data->current_loop->num_nodes; i++)
2655     {
2656       edge_iterator ei;
2657       bb = body[i];
2658 
2659       FOR_EACH_EDGE (e, ei, bb->succs)
2660 	if (e->dest != EXIT_BLOCK_PTR_FOR_FN (cfun)
2661 	    && !flow_bb_inside_loop_p (data->current_loop, e->dest))
2662 	  find_interesting_uses_outside (data, e);
2663 
2664       for (bsi = gsi_start_phis (bb); !gsi_end_p (bsi); gsi_next (&bsi))
2665 	find_interesting_uses_stmt (data, gsi_stmt (bsi));
2666       for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
2667 	if (!is_gimple_debug (gsi_stmt (bsi)))
2668 	  find_interesting_uses_stmt (data, gsi_stmt (bsi));
2669     }
2670 
2671   split_address_groups (data);
2672 
2673   if (dump_file && (dump_flags & TDF_DETAILS))
2674     {
2675       bitmap_iterator bi;
2676 
2677       fprintf (dump_file, "\n<Invariant Vars>:\n");
2678       EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
2679 	{
2680 	  struct version_info *info = ver_info (data, i);
2681 	  if (info->inv_id)
2682 	    {
2683 	      fprintf (dump_file, "Inv %d:\t", info->inv_id);
2684 	      print_generic_expr (dump_file, info->name, TDF_SLIM);
2685 	      fprintf (dump_file, "%s\n",
2686 		       info->has_nonlin_use ? "" : "\t(eliminable)");
2687 	    }
2688 	}
2689 
2690       fprintf (dump_file, "\n<IV Groups>:\n");
2691       dump_groups (dump_file, data);
2692       fprintf (dump_file, "\n");
2693     }
2694 
2695   free (body);
2696 }
2697 
2698 /* Strips constant offsets from EXPR and stores them to OFFSET.  If INSIDE_ADDR
2699    is true, assume we are inside an address.  If TOP_COMPREF is true, assume
2700    we are at the top-level of the processed address.  */
2701 
2702 static tree
2703 strip_offset_1 (tree expr, bool inside_addr, bool top_compref,
2704 		HOST_WIDE_INT *offset)
2705 {
2706   tree op0 = NULL_TREE, op1 = NULL_TREE, tmp, step;
2707   enum tree_code code;
2708   tree type, orig_type = TREE_TYPE (expr);
2709   HOST_WIDE_INT off0, off1, st;
2710   tree orig_expr = expr;
2711 
2712   STRIP_NOPS (expr);
2713 
2714   type = TREE_TYPE (expr);
2715   code = TREE_CODE (expr);
2716   *offset = 0;
2717 
2718   switch (code)
2719     {
2720     case INTEGER_CST:
2721       if (!cst_and_fits_in_hwi (expr)
2722 	  || integer_zerop (expr))
2723 	return orig_expr;
2724 
2725       *offset = int_cst_value (expr);
2726       return build_int_cst (orig_type, 0);
2727 
2728     case POINTER_PLUS_EXPR:
2729     case PLUS_EXPR:
2730     case MINUS_EXPR:
2731       op0 = TREE_OPERAND (expr, 0);
2732       op1 = TREE_OPERAND (expr, 1);
2733 
2734       op0 = strip_offset_1 (op0, false, false, &off0);
2735       op1 = strip_offset_1 (op1, false, false, &off1);
2736 
2737       *offset = (code == MINUS_EXPR ? off0 - off1 : off0 + off1);
2738       if (op0 == TREE_OPERAND (expr, 0)
2739 	  && op1 == TREE_OPERAND (expr, 1))
2740 	return orig_expr;
2741 
2742       if (integer_zerop (op1))
2743 	expr = op0;
2744       else if (integer_zerop (op0))
2745 	{
2746 	  if (code == MINUS_EXPR)
2747 	    expr = fold_build1 (NEGATE_EXPR, type, op1);
2748 	  else
2749 	    expr = op1;
2750 	}
2751       else
2752 	expr = fold_build2 (code, type, op0, op1);
2753 
2754       return fold_convert (orig_type, expr);
2755 
2756     case MULT_EXPR:
2757       op1 = TREE_OPERAND (expr, 1);
2758       if (!cst_and_fits_in_hwi (op1))
2759 	return orig_expr;
2760 
2761       op0 = TREE_OPERAND (expr, 0);
2762       op0 = strip_offset_1 (op0, false, false, &off0);
2763       if (op0 == TREE_OPERAND (expr, 0))
2764 	return orig_expr;
2765 
2766       *offset = off0 * int_cst_value (op1);
2767       if (integer_zerop (op0))
2768 	expr = op0;
2769       else
2770 	expr = fold_build2 (MULT_EXPR, type, op0, op1);
2771 
2772       return fold_convert (orig_type, expr);
2773 
2774     case ARRAY_REF:
2775     case ARRAY_RANGE_REF:
2776       if (!inside_addr)
2777 	return orig_expr;
2778 
2779       step = array_ref_element_size (expr);
2780       if (!cst_and_fits_in_hwi (step))
2781 	break;
2782 
2783       st = int_cst_value (step);
2784       op1 = TREE_OPERAND (expr, 1);
2785       op1 = strip_offset_1 (op1, false, false, &off1);
2786       *offset = off1 * st;
2787 
2788       if (top_compref
2789 	  && integer_zerop (op1))
2790 	{
2791 	  /* Strip the component reference completely.  */
2792 	  op0 = TREE_OPERAND (expr, 0);
2793 	  op0 = strip_offset_1 (op0, inside_addr, top_compref, &off0);
2794 	  *offset += off0;
2795 	  return op0;
2796 	}
2797       break;
2798 
2799     case COMPONENT_REF:
2800       {
2801 	tree field;
2802 
2803 	if (!inside_addr)
2804 	  return orig_expr;
2805 
2806 	tmp = component_ref_field_offset (expr);
2807 	field = TREE_OPERAND (expr, 1);
2808 	if (top_compref
2809 	    && cst_and_fits_in_hwi (tmp)
2810 	    && cst_and_fits_in_hwi (DECL_FIELD_BIT_OFFSET (field)))
2811 	  {
2812 	    HOST_WIDE_INT boffset, abs_off;
2813 
2814 	    /* Strip the component reference completely.  */
2815 	    op0 = TREE_OPERAND (expr, 0);
2816 	    op0 = strip_offset_1 (op0, inside_addr, top_compref, &off0);
2817 	    boffset = int_cst_value (DECL_FIELD_BIT_OFFSET (field));
2818 	    abs_off = abs_hwi (boffset) / BITS_PER_UNIT;
2819 	    if (boffset < 0)
2820 	      abs_off = -abs_off;
2821 
2822 	    *offset = off0 + int_cst_value (tmp) + abs_off;
2823 	    return op0;
2824 	  }
2825       }
2826       break;
2827 
2828     case ADDR_EXPR:
2829       op0 = TREE_OPERAND (expr, 0);
2830       op0 = strip_offset_1 (op0, true, true, &off0);
2831       *offset += off0;
2832 
2833       if (op0 == TREE_OPERAND (expr, 0))
2834 	return orig_expr;
2835 
2836       expr = build_fold_addr_expr (op0);
2837       return fold_convert (orig_type, expr);
2838 
2839     case MEM_REF:
2840       /* ???  Offset operand?  */
2841       inside_addr = false;
2842       break;
2843 
2844     default:
2845       return orig_expr;
2846     }
2847 
2848   /* Default handling of expressions for that we want to recurse into
2849      the first operand.  */
2850   op0 = TREE_OPERAND (expr, 0);
2851   op0 = strip_offset_1 (op0, inside_addr, false, &off0);
2852   *offset += off0;
2853 
2854   if (op0 == TREE_OPERAND (expr, 0)
2855       && (!op1 || op1 == TREE_OPERAND (expr, 1)))
2856     return orig_expr;
2857 
2858   expr = copy_node (expr);
2859   TREE_OPERAND (expr, 0) = op0;
2860   if (op1)
2861     TREE_OPERAND (expr, 1) = op1;
2862 
2863   /* Inside address, we might strip the top level component references,
2864      thus changing type of the expression.  Handling of ADDR_EXPR
2865      will fix that.  */
2866   expr = fold_convert (orig_type, expr);
2867 
2868   return expr;
2869 }
2870 
2871 /* Strips constant offsets from EXPR and stores them to OFFSET.  */
2872 
2873 static tree
2874 strip_offset (tree expr, unsigned HOST_WIDE_INT *offset)
2875 {
2876   HOST_WIDE_INT off;
2877   tree core = strip_offset_1 (expr, false, false, &off);
2878   *offset = off;
2879   return core;
2880 }
2881 
2882 /* Returns variant of TYPE that can be used as base for different uses.
2883    We return unsigned type with the same precision, which avoids problems
2884    with overflows.  */
2885 
2886 static tree
2887 generic_type_for (tree type)
2888 {
2889   if (POINTER_TYPE_P (type))
2890     return unsigned_type_for (type);
2891 
2892   if (TYPE_UNSIGNED (type))
2893     return type;
2894 
2895   return unsigned_type_for (type);
2896 }
2897 
2898 /* Records invariants in *EXPR_P.  Callback for walk_tree.  DATA contains
2899    the bitmap to that we should store it.  */
2900 
2901 static struct ivopts_data *fd_ivopts_data;
2902 static tree
2903 find_depends (tree *expr_p, int *ws ATTRIBUTE_UNUSED, void *data)
2904 {
2905   bitmap *depends_on = (bitmap *) data;
2906   struct version_info *info;
2907 
2908   if (TREE_CODE (*expr_p) != SSA_NAME)
2909     return NULL_TREE;
2910   info = name_info (fd_ivopts_data, *expr_p);
2911 
2912   if (!info->inv_id || info->has_nonlin_use)
2913     return NULL_TREE;
2914 
2915   if (!*depends_on)
2916     *depends_on = BITMAP_ALLOC (NULL);
2917   bitmap_set_bit (*depends_on, info->inv_id);
2918 
2919   return NULL_TREE;
2920 }
2921 
2922 /* Adds a candidate BASE + STEP * i.  Important field is set to IMPORTANT and
2923    position to POS.  If USE is not NULL, the candidate is set as related to
2924    it.  If both BASE and STEP are NULL, we add a pseudocandidate for the
2925    replacement of the final value of the iv by a direct computation.  */
2926 
2927 static struct iv_cand *
2928 add_candidate_1 (struct ivopts_data *data,
2929 		 tree base, tree step, bool important, enum iv_position pos,
2930 		 struct iv_use *use, gimple *incremented_at,
2931 		 struct iv *orig_iv = NULL)
2932 {
2933   unsigned i;
2934   struct iv_cand *cand = NULL;
2935   tree type, orig_type;
2936 
2937   gcc_assert (base && step);
2938 
2939   /* -fkeep-gc-roots-live means that we have to keep a real pointer
2940      live, but the ivopts code may replace a real pointer with one
2941      pointing before or after the memory block that is then adjusted
2942      into the memory block during the loop.  FIXME: It would likely be
2943      better to actually force the pointer live and still use ivopts;
2944      for example, it would be enough to write the pointer into memory
2945      and keep it there until after the loop.  */
2946   if (flag_keep_gc_roots_live && POINTER_TYPE_P (TREE_TYPE (base)))
2947     return NULL;
2948 
2949   /* For non-original variables, make sure their values are computed in a type
2950      that does not invoke undefined behavior on overflows (since in general,
2951      we cannot prove that these induction variables are non-wrapping).  */
2952   if (pos != IP_ORIGINAL)
2953     {
2954       orig_type = TREE_TYPE (base);
2955       type = generic_type_for (orig_type);
2956       if (type != orig_type)
2957 	{
2958 	  base = fold_convert (type, base);
2959 	  step = fold_convert (type, step);
2960 	}
2961     }
2962 
2963   for (i = 0; i < data->vcands.length (); i++)
2964     {
2965       cand = data->vcands[i];
2966 
2967       if (cand->pos != pos)
2968 	continue;
2969 
2970       if (cand->incremented_at != incremented_at
2971 	  || ((pos == IP_AFTER_USE || pos == IP_BEFORE_USE)
2972 	      && cand->ainc_use != use))
2973 	continue;
2974 
2975       if (operand_equal_p (base, cand->iv->base, 0)
2976 	  && operand_equal_p (step, cand->iv->step, 0)
2977 	  && (TYPE_PRECISION (TREE_TYPE (base))
2978 	      == TYPE_PRECISION (TREE_TYPE (cand->iv->base))))
2979 	break;
2980     }
2981 
2982   if (i == data->vcands.length ())
2983     {
2984       cand = XCNEW (struct iv_cand);
2985       cand->id = i;
2986       cand->iv = alloc_iv (data, base, step);
2987       cand->pos = pos;
2988       if (pos != IP_ORIGINAL)
2989 	{
2990 	  cand->var_before = create_tmp_var_raw (TREE_TYPE (base), "ivtmp");
2991 	  cand->var_after = cand->var_before;
2992 	}
2993       cand->important = important;
2994       cand->incremented_at = incremented_at;
2995       data->vcands.safe_push (cand);
2996 
2997       if (TREE_CODE (step) != INTEGER_CST)
2998 	{
2999 	  fd_ivopts_data = data;
3000 	  walk_tree (&step, find_depends, &cand->depends_on, NULL);
3001 	}
3002 
3003       if (pos == IP_AFTER_USE || pos == IP_BEFORE_USE)
3004 	cand->ainc_use = use;
3005       else
3006 	cand->ainc_use = NULL;
3007 
3008       cand->orig_iv = orig_iv;
3009       if (dump_file && (dump_flags & TDF_DETAILS))
3010 	dump_cand (dump_file, cand);
3011     }
3012 
3013   cand->important |= important;
3014 
3015   /* Relate candidate to the group for which it is added.  */
3016   if (use)
3017     bitmap_set_bit (data->vgroups[use->group_id]->related_cands, i);
3018 
3019   return cand;
3020 }
3021 
3022 /* Returns true if incrementing the induction variable at the end of the LOOP
3023    is allowed.
3024 
3025    The purpose is to avoid splitting latch edge with a biv increment, thus
3026    creating a jump, possibly confusing other optimization passes and leaving
3027    less freedom to scheduler.  So we allow IP_END_POS only if IP_NORMAL_POS
3028    is not available (so we do not have a better alternative), or if the latch
3029    edge is already nonempty.  */
3030 
3031 static bool
3032 allow_ip_end_pos_p (struct loop *loop)
3033 {
3034   if (!ip_normal_pos (loop))
3035     return true;
3036 
3037   if (!empty_block_p (ip_end_pos (loop)))
3038     return true;
3039 
3040   return false;
3041 }
3042 
3043 /* If possible, adds autoincrement candidates BASE + STEP * i based on use USE.
3044    Important field is set to IMPORTANT.  */
3045 
3046 static void
3047 add_autoinc_candidates (struct ivopts_data *data, tree base, tree step,
3048 			bool important, struct iv_use *use)
3049 {
3050   basic_block use_bb = gimple_bb (use->stmt);
3051   machine_mode mem_mode;
3052   unsigned HOST_WIDE_INT cstepi;
3053 
3054   /* If we insert the increment in any position other than the standard
3055      ones, we must ensure that it is incremented once per iteration.
3056      It must not be in an inner nested loop, or one side of an if
3057      statement.  */
3058   if (use_bb->loop_father != data->current_loop
3059       || !dominated_by_p (CDI_DOMINATORS, data->current_loop->latch, use_bb)
3060       || stmt_could_throw_p (use->stmt)
3061       || !cst_and_fits_in_hwi (step))
3062     return;
3063 
3064   cstepi = int_cst_value (step);
3065 
3066   mem_mode = TYPE_MODE (TREE_TYPE (*use->op_p));
3067   if (((USE_LOAD_PRE_INCREMENT (mem_mode)
3068 	|| USE_STORE_PRE_INCREMENT (mem_mode))
3069        && GET_MODE_SIZE (mem_mode) == cstepi)
3070       || ((USE_LOAD_PRE_DECREMENT (mem_mode)
3071 	   || USE_STORE_PRE_DECREMENT (mem_mode))
3072 	  && GET_MODE_SIZE (mem_mode) == -cstepi))
3073     {
3074       enum tree_code code = MINUS_EXPR;
3075       tree new_base;
3076       tree new_step = step;
3077 
3078       if (POINTER_TYPE_P (TREE_TYPE (base)))
3079 	{
3080 	  new_step = fold_build1 (NEGATE_EXPR, TREE_TYPE (step), step);
3081 	  code = POINTER_PLUS_EXPR;
3082 	}
3083       else
3084 	new_step = fold_convert (TREE_TYPE (base), new_step);
3085       new_base = fold_build2 (code, TREE_TYPE (base), base, new_step);
3086       add_candidate_1 (data, new_base, step, important, IP_BEFORE_USE, use,
3087 		       use->stmt);
3088     }
3089   if (((USE_LOAD_POST_INCREMENT (mem_mode)
3090 	|| USE_STORE_POST_INCREMENT (mem_mode))
3091        && GET_MODE_SIZE (mem_mode) == cstepi)
3092       || ((USE_LOAD_POST_DECREMENT (mem_mode)
3093 	   || USE_STORE_POST_DECREMENT (mem_mode))
3094 	  && GET_MODE_SIZE (mem_mode) == -cstepi))
3095     {
3096       add_candidate_1 (data, base, step, important, IP_AFTER_USE, use,
3097 		       use->stmt);
3098     }
3099 }
3100 
3101 /* Adds a candidate BASE + STEP * i.  Important field is set to IMPORTANT and
3102    position to POS.  If USE is not NULL, the candidate is set as related to
3103    it.  The candidate computation is scheduled before exit condition and at
3104    the end of loop.  */
3105 
3106 static void
3107 add_candidate (struct ivopts_data *data,
3108 	       tree base, tree step, bool important, struct iv_use *use,
3109 	       struct iv *orig_iv = NULL)
3110 {
3111   if (ip_normal_pos (data->current_loop))
3112     add_candidate_1 (data, base, step, important,
3113 		     IP_NORMAL, use, NULL, orig_iv);
3114   if (ip_end_pos (data->current_loop)
3115       && allow_ip_end_pos_p (data->current_loop))
3116     add_candidate_1 (data, base, step, important, IP_END, use, NULL, orig_iv);
3117 }
3118 
3119 /* Adds standard iv candidates.  */
3120 
3121 static void
3122 add_standard_iv_candidates (struct ivopts_data *data)
3123 {
3124   add_candidate (data, integer_zero_node, integer_one_node, true, NULL);
3125 
3126   /* The same for a double-integer type if it is still fast enough.  */
3127   if (TYPE_PRECISION
3128 	(long_integer_type_node) > TYPE_PRECISION (integer_type_node)
3129       && TYPE_PRECISION (long_integer_type_node) <= BITS_PER_WORD)
3130     add_candidate (data, build_int_cst (long_integer_type_node, 0),
3131 		   build_int_cst (long_integer_type_node, 1), true, NULL);
3132 
3133   /* The same for a double-integer type if it is still fast enough.  */
3134   if (TYPE_PRECISION
3135 	(long_long_integer_type_node) > TYPE_PRECISION (long_integer_type_node)
3136       && TYPE_PRECISION (long_long_integer_type_node) <= BITS_PER_WORD)
3137     add_candidate (data, build_int_cst (long_long_integer_type_node, 0),
3138 		   build_int_cst (long_long_integer_type_node, 1), true, NULL);
3139 }
3140 
3141 
3142 /* Adds candidates bases on the old induction variable IV.  */
3143 
3144 static void
3145 add_iv_candidate_for_biv (struct ivopts_data *data, struct iv *iv)
3146 {
3147   gimple *phi;
3148   tree def;
3149   struct iv_cand *cand;
3150 
3151   /* Check if this biv is used in address type use.  */
3152   if (iv->no_overflow  && iv->have_address_use
3153       && INTEGRAL_TYPE_P (TREE_TYPE (iv->base))
3154       && TYPE_PRECISION (TREE_TYPE (iv->base)) < TYPE_PRECISION (sizetype))
3155     {
3156       tree base = fold_convert (sizetype, iv->base);
3157       tree step = fold_convert (sizetype, iv->step);
3158 
3159       /* Add iv cand of same precision as index part in TARGET_MEM_REF.  */
3160       add_candidate (data, base, step, true, NULL, iv);
3161       /* Add iv cand of the original type only if it has nonlinear use.  */
3162       if (iv->nonlin_use)
3163 	add_candidate (data, iv->base, iv->step, true, NULL);
3164     }
3165   else
3166     add_candidate (data, iv->base, iv->step, true, NULL);
3167 
3168   /* The same, but with initial value zero.  */
3169   if (POINTER_TYPE_P (TREE_TYPE (iv->base)))
3170     add_candidate (data, size_int (0), iv->step, true, NULL);
3171   else
3172     add_candidate (data, build_int_cst (TREE_TYPE (iv->base), 0),
3173 		   iv->step, true, NULL);
3174 
3175   phi = SSA_NAME_DEF_STMT (iv->ssa_name);
3176   if (gimple_code (phi) == GIMPLE_PHI)
3177     {
3178       /* Additionally record the possibility of leaving the original iv
3179 	 untouched.  */
3180       def = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (data->current_loop));
3181       /* Don't add candidate if it's from another PHI node because
3182 	 it's an affine iv appearing in the form of PEELED_CHREC.  */
3183       phi = SSA_NAME_DEF_STMT (def);
3184       if (gimple_code (phi) != GIMPLE_PHI)
3185 	{
3186 	  cand = add_candidate_1 (data,
3187 				  iv->base, iv->step, true, IP_ORIGINAL, NULL,
3188 				  SSA_NAME_DEF_STMT (def));
3189 	  if (cand)
3190 	    {
3191 	      cand->var_before = iv->ssa_name;
3192 	      cand->var_after = def;
3193 	    }
3194 	}
3195       else
3196 	gcc_assert (gimple_bb (phi) == data->current_loop->header);
3197     }
3198 }
3199 
3200 /* Adds candidates based on the old induction variables.  */
3201 
3202 static void
3203 add_iv_candidate_for_bivs (struct ivopts_data *data)
3204 {
3205   unsigned i;
3206   struct iv *iv;
3207   bitmap_iterator bi;
3208 
3209   EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
3210     {
3211       iv = ver_info (data, i)->iv;
3212       if (iv && iv->biv_p && !integer_zerop (iv->step))
3213 	add_iv_candidate_for_biv (data, iv);
3214     }
3215 }
3216 
3217 /* Record common candidate {BASE, STEP} derived from USE in hashtable.  */
3218 
3219 static void
3220 record_common_cand (struct ivopts_data *data, tree base,
3221 		    tree step, struct iv_use *use)
3222 {
3223   struct iv_common_cand ent;
3224   struct iv_common_cand **slot;
3225 
3226   ent.base = base;
3227   ent.step = step;
3228   ent.hash = iterative_hash_expr (base, 0);
3229   ent.hash = iterative_hash_expr (step, ent.hash);
3230 
3231   slot = data->iv_common_cand_tab->find_slot (&ent, INSERT);
3232   if (*slot == NULL)
3233     {
3234       *slot = new iv_common_cand ();
3235       (*slot)->base = base;
3236       (*slot)->step = step;
3237       (*slot)->uses.create (8);
3238       (*slot)->hash = ent.hash;
3239       data->iv_common_cands.safe_push ((*slot));
3240     }
3241 
3242   gcc_assert (use != NULL);
3243   (*slot)->uses.safe_push (use);
3244   return;
3245 }
3246 
3247 /* Comparison function used to sort common candidates.  */
3248 
3249 static int
3250 common_cand_cmp (const void *p1, const void *p2)
3251 {
3252   unsigned n1, n2;
3253   const struct iv_common_cand *const *const ccand1
3254     = (const struct iv_common_cand *const *)p1;
3255   const struct iv_common_cand *const *const ccand2
3256     = (const struct iv_common_cand *const *)p2;
3257 
3258   n1 = (*ccand1)->uses.length ();
3259   n2 = (*ccand2)->uses.length ();
3260   return n2 - n1;
3261 }
3262 
3263 /* Adds IV candidates based on common candidated recorded.  */
3264 
3265 static void
3266 add_iv_candidate_derived_from_uses (struct ivopts_data *data)
3267 {
3268   unsigned i, j;
3269   struct iv_cand *cand_1, *cand_2;
3270 
3271   data->iv_common_cands.qsort (common_cand_cmp);
3272   for (i = 0; i < data->iv_common_cands.length (); i++)
3273     {
3274       struct iv_common_cand *ptr = data->iv_common_cands[i];
3275 
3276       /* Only add IV candidate if it's derived from multiple uses.  */
3277       if (ptr->uses.length () <= 1)
3278 	break;
3279 
3280       cand_1 = NULL;
3281       cand_2 = NULL;
3282       if (ip_normal_pos (data->current_loop))
3283 	cand_1 = add_candidate_1 (data, ptr->base, ptr->step,
3284 				  false, IP_NORMAL, NULL, NULL);
3285 
3286       if (ip_end_pos (data->current_loop)
3287 	  && allow_ip_end_pos_p (data->current_loop))
3288 	cand_2 = add_candidate_1 (data, ptr->base, ptr->step,
3289 				  false, IP_END, NULL, NULL);
3290 
3291       /* Bind deriving uses and the new candidates.  */
3292       for (j = 0; j < ptr->uses.length (); j++)
3293 	{
3294 	  struct iv_group *group = data->vgroups[ptr->uses[j]->group_id];
3295 	  if (cand_1)
3296 	    bitmap_set_bit (group->related_cands, cand_1->id);
3297 	  if (cand_2)
3298 	    bitmap_set_bit (group->related_cands, cand_2->id);
3299 	}
3300     }
3301 
3302   /* Release data since it is useless from this point.  */
3303   data->iv_common_cand_tab->empty ();
3304   data->iv_common_cands.truncate (0);
3305 }
3306 
3307 /* Adds candidates based on the value of USE's iv.  */
3308 
3309 static void
3310 add_iv_candidate_for_use (struct ivopts_data *data, struct iv_use *use)
3311 {
3312   unsigned HOST_WIDE_INT offset;
3313   tree base;
3314   tree basetype;
3315   struct iv *iv = use->iv;
3316 
3317   add_candidate (data, iv->base, iv->step, false, use);
3318 
3319   /* Record common candidate for use in case it can be shared by others.  */
3320   record_common_cand (data, iv->base, iv->step, use);
3321 
3322   /* Record common candidate with initial value zero.  */
3323   basetype = TREE_TYPE (iv->base);
3324   if (POINTER_TYPE_P (basetype))
3325     basetype = sizetype;
3326   record_common_cand (data, build_int_cst (basetype, 0), iv->step, use);
3327 
3328   /* Record common candidate with constant offset stripped in base.
3329      Like the use itself, we also add candidate directly for it.  */
3330   base = strip_offset (iv->base, &offset);
3331   if (offset || base != iv->base)
3332     {
3333       record_common_cand (data, base, iv->step, use);
3334       add_candidate (data, base, iv->step, false, use);
3335     }
3336 
3337   /* Record common candidate with base_object removed in base.  */
3338   base = iv->base;
3339   STRIP_NOPS (base);
3340   if (iv->base_object != NULL && TREE_CODE (base) == POINTER_PLUS_EXPR)
3341     {
3342       tree step = iv->step;
3343 
3344       STRIP_NOPS (step);
3345       base = TREE_OPERAND (base, 1);
3346       step = fold_convert (sizetype, step);
3347       record_common_cand (data, base, step, use);
3348       /* Also record common candidate with offset stripped.  */
3349       base = strip_offset (base, &offset);
3350       if (offset)
3351 	record_common_cand (data, base, step, use);
3352     }
3353 
3354   /* At last, add auto-incremental candidates.  Make such variables
3355      important since other iv uses with same base object may be based
3356      on it.  */
3357   if (use != NULL && use->type == USE_ADDRESS)
3358     add_autoinc_candidates (data, iv->base, iv->step, true, use);
3359 }
3360 
3361 /* Adds candidates based on the uses.  */
3362 
3363 static void
3364 add_iv_candidate_for_groups (struct ivopts_data *data)
3365 {
3366   unsigned i;
3367 
3368   /* Only add candidate for the first use in group.  */
3369   for (i = 0; i < data->vgroups.length (); i++)
3370     {
3371       struct iv_group *group = data->vgroups[i];
3372 
3373       gcc_assert (group->vuses[0] != NULL);
3374       add_iv_candidate_for_use (data, group->vuses[0]);
3375     }
3376   add_iv_candidate_derived_from_uses (data);
3377 }
3378 
3379 /* Record important candidates and add them to related_cands bitmaps.  */
3380 
3381 static void
3382 record_important_candidates (struct ivopts_data *data)
3383 {
3384   unsigned i;
3385   struct iv_group *group;
3386 
3387   for (i = 0; i < data->vcands.length (); i++)
3388     {
3389       struct iv_cand *cand = data->vcands[i];
3390 
3391       if (cand->important)
3392 	bitmap_set_bit (data->important_candidates, i);
3393     }
3394 
3395   data->consider_all_candidates = (data->vcands.length ()
3396 				   <= CONSIDER_ALL_CANDIDATES_BOUND);
3397 
3398   /* Add important candidates to groups' related_cands bitmaps.  */
3399   for (i = 0; i < data->vgroups.length (); i++)
3400     {
3401       group = data->vgroups[i];
3402       bitmap_ior_into (group->related_cands, data->important_candidates);
3403     }
3404 }
3405 
3406 /* Allocates the data structure mapping the (use, candidate) pairs to costs.
3407    If consider_all_candidates is true, we use a two-dimensional array, otherwise
3408    we allocate a simple list to every use.  */
3409 
3410 static void
3411 alloc_use_cost_map (struct ivopts_data *data)
3412 {
3413   unsigned i, size, s;
3414 
3415   for (i = 0; i < data->vgroups.length (); i++)
3416     {
3417       struct iv_group *group = data->vgroups[i];
3418 
3419       if (data->consider_all_candidates)
3420 	size = data->vcands.length ();
3421       else
3422 	{
3423 	  s = bitmap_count_bits (group->related_cands);
3424 
3425 	  /* Round up to the power of two, so that moduling by it is fast.  */
3426 	  size = s ? (1 << ceil_log2 (s)) : 1;
3427 	}
3428 
3429       group->n_map_members = size;
3430       group->cost_map = XCNEWVEC (struct cost_pair, size);
3431     }
3432 }
3433 
3434 /* Sets cost of (GROUP, CAND) pair to COST and record that it depends
3435    on invariants DEPENDS_ON and that the value used in expressing it
3436    is VALUE, and in case of iv elimination the comparison operator is COMP.  */
3437 
3438 static void
3439 set_group_iv_cost (struct ivopts_data *data,
3440 		   struct iv_group *group, struct iv_cand *cand,
3441 		   comp_cost cost, bitmap depends_on, tree value,
3442 		   enum tree_code comp, iv_inv_expr_ent *inv_expr)
3443 {
3444   unsigned i, s;
3445 
3446   if (cost.infinite_cost_p ())
3447     {
3448       BITMAP_FREE (depends_on);
3449       return;
3450     }
3451 
3452   if (data->consider_all_candidates)
3453     {
3454       group->cost_map[cand->id].cand = cand;
3455       group->cost_map[cand->id].cost = cost;
3456       group->cost_map[cand->id].depends_on = depends_on;
3457       group->cost_map[cand->id].value = value;
3458       group->cost_map[cand->id].comp = comp;
3459       group->cost_map[cand->id].inv_expr = inv_expr;
3460       return;
3461     }
3462 
3463   /* n_map_members is a power of two, so this computes modulo.  */
3464   s = cand->id & (group->n_map_members - 1);
3465   for (i = s; i < group->n_map_members; i++)
3466     if (!group->cost_map[i].cand)
3467       goto found;
3468   for (i = 0; i < s; i++)
3469     if (!group->cost_map[i].cand)
3470       goto found;
3471 
3472   gcc_unreachable ();
3473 
3474 found:
3475   group->cost_map[i].cand = cand;
3476   group->cost_map[i].cost = cost;
3477   group->cost_map[i].depends_on = depends_on;
3478   group->cost_map[i].value = value;
3479   group->cost_map[i].comp = comp;
3480   group->cost_map[i].inv_expr = inv_expr;
3481 }
3482 
3483 /* Gets cost of (GROUP, CAND) pair.  */
3484 
3485 static struct cost_pair *
3486 get_group_iv_cost (struct ivopts_data *data, struct iv_group *group,
3487 		   struct iv_cand *cand)
3488 {
3489   unsigned i, s;
3490   struct cost_pair *ret;
3491 
3492   if (!cand)
3493     return NULL;
3494 
3495   if (data->consider_all_candidates)
3496     {
3497       ret = group->cost_map + cand->id;
3498       if (!ret->cand)
3499 	return NULL;
3500 
3501       return ret;
3502     }
3503 
3504   /* n_map_members is a power of two, so this computes modulo.  */
3505   s = cand->id & (group->n_map_members - 1);
3506   for (i = s; i < group->n_map_members; i++)
3507     if (group->cost_map[i].cand == cand)
3508       return group->cost_map + i;
3509     else if (group->cost_map[i].cand == NULL)
3510       return NULL;
3511   for (i = 0; i < s; i++)
3512     if (group->cost_map[i].cand == cand)
3513       return group->cost_map + i;
3514     else if (group->cost_map[i].cand == NULL)
3515       return NULL;
3516 
3517   return NULL;
3518 }
3519 
3520 /* Produce DECL_RTL for object obj so it looks like it is stored in memory.  */
3521 static rtx
3522 produce_memory_decl_rtl (tree obj, int *regno)
3523 {
3524   addr_space_t as = TYPE_ADDR_SPACE (TREE_TYPE (obj));
3525   machine_mode address_mode = targetm.addr_space.address_mode (as);
3526   rtx x;
3527 
3528   gcc_assert (obj);
3529   if (TREE_STATIC (obj) || DECL_EXTERNAL (obj))
3530     {
3531       const char *name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (obj));
3532       x = gen_rtx_SYMBOL_REF (address_mode, name);
3533       SET_SYMBOL_REF_DECL (x, obj);
3534       x = gen_rtx_MEM (DECL_MODE (obj), x);
3535       set_mem_addr_space (x, as);
3536       targetm.encode_section_info (obj, x, true);
3537     }
3538   else
3539     {
3540       x = gen_raw_REG (address_mode, (*regno)++);
3541       x = gen_rtx_MEM (DECL_MODE (obj), x);
3542       set_mem_addr_space (x, as);
3543     }
3544 
3545   return x;
3546 }
3547 
3548 /* Prepares decl_rtl for variables referred in *EXPR_P.  Callback for
3549    walk_tree.  DATA contains the actual fake register number.  */
3550 
3551 static tree
3552 prepare_decl_rtl (tree *expr_p, int *ws, void *data)
3553 {
3554   tree obj = NULL_TREE;
3555   rtx x = NULL_RTX;
3556   int *regno = (int *) data;
3557 
3558   switch (TREE_CODE (*expr_p))
3559     {
3560     case ADDR_EXPR:
3561       for (expr_p = &TREE_OPERAND (*expr_p, 0);
3562 	   handled_component_p (*expr_p);
3563 	   expr_p = &TREE_OPERAND (*expr_p, 0))
3564 	continue;
3565       obj = *expr_p;
3566       if (DECL_P (obj) && HAS_RTL_P (obj) && !DECL_RTL_SET_P (obj))
3567 	x = produce_memory_decl_rtl (obj, regno);
3568       break;
3569 
3570     case SSA_NAME:
3571       *ws = 0;
3572       obj = SSA_NAME_VAR (*expr_p);
3573       /* Defer handling of anonymous SSA_NAMEs to the expander.  */
3574       if (!obj)
3575 	return NULL_TREE;
3576       if (!DECL_RTL_SET_P (obj))
3577 	x = gen_raw_REG (DECL_MODE (obj), (*regno)++);
3578       break;
3579 
3580     case VAR_DECL:
3581     case PARM_DECL:
3582     case RESULT_DECL:
3583       *ws = 0;
3584       obj = *expr_p;
3585 
3586       if (DECL_RTL_SET_P (obj))
3587 	break;
3588 
3589       if (DECL_MODE (obj) == BLKmode)
3590 	x = produce_memory_decl_rtl (obj, regno);
3591       else
3592 	x = gen_raw_REG (DECL_MODE (obj), (*regno)++);
3593 
3594       break;
3595 
3596     default:
3597       break;
3598     }
3599 
3600   if (x)
3601     {
3602       decl_rtl_to_reset.safe_push (obj);
3603       SET_DECL_RTL (obj, x);
3604     }
3605 
3606   return NULL_TREE;
3607 }
3608 
3609 /* Determines cost of the computation of EXPR.  */
3610 
3611 static unsigned
3612 computation_cost (tree expr, bool speed)
3613 {
3614   rtx_insn *seq;
3615   rtx rslt;
3616   tree type = TREE_TYPE (expr);
3617   unsigned cost;
3618   /* Avoid using hard regs in ways which may be unsupported.  */
3619   int regno = LAST_VIRTUAL_REGISTER + 1;
3620   struct cgraph_node *node = cgraph_node::get (current_function_decl);
3621   enum node_frequency real_frequency = node->frequency;
3622 
3623   node->frequency = NODE_FREQUENCY_NORMAL;
3624   crtl->maybe_hot_insn_p = speed;
3625   walk_tree (&expr, prepare_decl_rtl, &regno, NULL);
3626   start_sequence ();
3627   rslt = expand_expr (expr, NULL_RTX, TYPE_MODE (type), EXPAND_NORMAL);
3628   seq = get_insns ();
3629   end_sequence ();
3630   default_rtl_profile ();
3631   node->frequency = real_frequency;
3632 
3633   cost = seq_cost (seq, speed);
3634   if (MEM_P (rslt))
3635     cost += address_cost (XEXP (rslt, 0), TYPE_MODE (type),
3636 			  TYPE_ADDR_SPACE (type), speed);
3637   else if (!REG_P (rslt))
3638     cost += set_src_cost (rslt, TYPE_MODE (type), speed);
3639 
3640   return cost;
3641 }
3642 
3643 /* Returns variable containing the value of candidate CAND at statement AT.  */
3644 
3645 static tree
3646 var_at_stmt (struct loop *loop, struct iv_cand *cand, gimple *stmt)
3647 {
3648   if (stmt_after_increment (loop, cand, stmt))
3649     return cand->var_after;
3650   else
3651     return cand->var_before;
3652 }
3653 
3654 /* If A is (TYPE) BA and B is (TYPE) BB, and the types of BA and BB have the
3655    same precision that is at least as wide as the precision of TYPE, stores
3656    BA to A and BB to B, and returns the type of BA.  Otherwise, returns the
3657    type of A and B.  */
3658 
3659 static tree
3660 determine_common_wider_type (tree *a, tree *b)
3661 {
3662   tree wider_type = NULL;
3663   tree suba, subb;
3664   tree atype = TREE_TYPE (*a);
3665 
3666   if (CONVERT_EXPR_P (*a))
3667     {
3668       suba = TREE_OPERAND (*a, 0);
3669       wider_type = TREE_TYPE (suba);
3670       if (TYPE_PRECISION (wider_type) < TYPE_PRECISION (atype))
3671 	return atype;
3672     }
3673   else
3674     return atype;
3675 
3676   if (CONVERT_EXPR_P (*b))
3677     {
3678       subb = TREE_OPERAND (*b, 0);
3679       if (TYPE_PRECISION (wider_type) != TYPE_PRECISION (TREE_TYPE (subb)))
3680 	return atype;
3681     }
3682   else
3683     return atype;
3684 
3685   *a = suba;
3686   *b = subb;
3687   return wider_type;
3688 }
3689 
3690 /* Determines the expression by that USE is expressed from induction variable
3691    CAND at statement AT in LOOP.  The expression is stored in a decomposed
3692    form into AFF.  Returns false if USE cannot be expressed using CAND.  */
3693 
3694 static bool
3695 get_computation_aff (struct loop *loop,
3696 		     struct iv_use *use, struct iv_cand *cand, gimple *at,
3697 		     struct aff_tree *aff)
3698 {
3699   tree ubase = use->iv->base;
3700   tree ustep = use->iv->step;
3701   tree cbase = cand->iv->base;
3702   tree cstep = cand->iv->step, cstep_common;
3703   tree utype = TREE_TYPE (ubase), ctype = TREE_TYPE (cbase);
3704   tree common_type, var;
3705   tree uutype;
3706   aff_tree cbase_aff, var_aff;
3707   widest_int rat;
3708 
3709   if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
3710     {
3711       /* We do not have a precision to express the values of use.  */
3712       return false;
3713     }
3714 
3715   var = var_at_stmt (loop, cand, at);
3716   uutype = unsigned_type_for (utype);
3717 
3718   /* If the conversion is not noop, perform it.  */
3719   if (TYPE_PRECISION (utype) < TYPE_PRECISION (ctype))
3720     {
3721       if (cand->orig_iv != NULL && CONVERT_EXPR_P (cbase)
3722 	  && (CONVERT_EXPR_P (cstep) || TREE_CODE (cstep) == INTEGER_CST))
3723 	{
3724 	  tree inner_base, inner_step, inner_type;
3725 	  inner_base = TREE_OPERAND (cbase, 0);
3726 	  if (CONVERT_EXPR_P (cstep))
3727 	    inner_step = TREE_OPERAND (cstep, 0);
3728 	  else
3729 	    inner_step = cstep;
3730 
3731 	  inner_type = TREE_TYPE (inner_base);
3732 	  /* If candidate is added from a biv whose type is smaller than
3733 	     ctype, we know both candidate and the biv won't overflow.
3734 	     In this case, it's safe to skip the convertion in candidate.
3735 	     As an example, (unsigned short)((unsigned long)A) equals to
3736 	     (unsigned short)A, if A has a type no larger than short.  */
3737 	  if (TYPE_PRECISION (inner_type) <= TYPE_PRECISION (uutype))
3738 	    {
3739 	      cbase = inner_base;
3740 	      cstep = inner_step;
3741 	    }
3742 	}
3743       cstep = fold_convert (uutype, cstep);
3744       cbase = fold_convert (uutype, cbase);
3745       var = fold_convert (uutype, var);
3746     }
3747 
3748   /* Ratio is 1 when computing the value of biv cand by itself.
3749      We can't rely on constant_multiple_of in this case because the
3750      use is created after the original biv is selected.  The call
3751      could fail because of inconsistent fold behavior.  See PR68021
3752      for more information.  */
3753   if (cand->pos == IP_ORIGINAL && cand->incremented_at == use->stmt)
3754     {
3755       gcc_assert (is_gimple_assign (use->stmt));
3756       gcc_assert (use->iv->ssa_name == cand->var_after);
3757       gcc_assert (gimple_assign_lhs (use->stmt) == cand->var_after);
3758       rat = 1;
3759     }
3760   else if (!constant_multiple_of (ustep, cstep, &rat))
3761     return false;
3762 
3763   /* In case both UBASE and CBASE are shortened to UUTYPE from some common
3764      type, we achieve better folding by computing their difference in this
3765      wider type, and cast the result to UUTYPE.  We do not need to worry about
3766      overflows, as all the arithmetics will in the end be performed in UUTYPE
3767      anyway.  */
3768   common_type = determine_common_wider_type (&ubase, &cbase);
3769 
3770   /* use = ubase - ratio * cbase + ratio * var.  */
3771   tree_to_aff_combination (ubase, common_type, aff);
3772   tree_to_aff_combination (cbase, common_type, &cbase_aff);
3773   tree_to_aff_combination (var, uutype, &var_aff);
3774 
3775   /* We need to shift the value if we are after the increment.  */
3776   if (stmt_after_increment (loop, cand, at))
3777     {
3778       aff_tree cstep_aff;
3779 
3780       if (common_type != uutype)
3781 	cstep_common = fold_convert (common_type, cstep);
3782       else
3783 	cstep_common = cstep;
3784 
3785       tree_to_aff_combination (cstep_common, common_type, &cstep_aff);
3786       aff_combination_add (&cbase_aff, &cstep_aff);
3787     }
3788 
3789   aff_combination_scale (&cbase_aff, -rat);
3790   aff_combination_add (aff, &cbase_aff);
3791   if (common_type != uutype)
3792     aff_combination_convert (aff, uutype);
3793 
3794   aff_combination_scale (&var_aff, rat);
3795   aff_combination_add (aff, &var_aff);
3796 
3797   return true;
3798 }
3799 
3800 /* Return the type of USE.  */
3801 
3802 static tree
3803 get_use_type (struct iv_use *use)
3804 {
3805   tree base_type = TREE_TYPE (use->iv->base);
3806   tree type;
3807 
3808   if (use->type == USE_ADDRESS)
3809     {
3810       /* The base_type may be a void pointer.  Create a pointer type based on
3811 	 the mem_ref instead.  */
3812       type = build_pointer_type (TREE_TYPE (*use->op_p));
3813       gcc_assert (TYPE_ADDR_SPACE (TREE_TYPE (type))
3814 		  == TYPE_ADDR_SPACE (TREE_TYPE (base_type)));
3815     }
3816   else
3817     type = base_type;
3818 
3819   return type;
3820 }
3821 
3822 /* Determines the expression by that USE is expressed from induction variable
3823    CAND at statement AT in LOOP.  The computation is unshared.  */
3824 
3825 static tree
3826 get_computation_at (struct loop *loop,
3827 		    struct iv_use *use, struct iv_cand *cand, gimple *at)
3828 {
3829   aff_tree aff;
3830   tree type = get_use_type (use);
3831 
3832   if (!get_computation_aff (loop, use, cand, at, &aff))
3833     return NULL_TREE;
3834   unshare_aff_combination (&aff);
3835   return fold_convert (type, aff_combination_to_tree (&aff));
3836 }
3837 
3838 /* Determines the expression by that USE is expressed from induction variable
3839    CAND in LOOP.  The computation is unshared.  */
3840 
3841 static tree
3842 get_computation (struct loop *loop, struct iv_use *use, struct iv_cand *cand)
3843 {
3844   return get_computation_at (loop, use, cand, use->stmt);
3845 }
3846 
3847 /* Adjust the cost COST for being in loop setup rather than loop body.
3848    If we're optimizing for space, the loop setup overhead is constant;
3849    if we're optimizing for speed, amortize it over the per-iteration cost.  */
3850 static unsigned
3851 adjust_setup_cost (struct ivopts_data *data, unsigned cost)
3852 {
3853   if (cost == INFTY)
3854     return cost;
3855   else if (optimize_loop_for_speed_p (data->current_loop))
3856     return cost / avg_loop_niter (data->current_loop);
3857   else
3858     return cost;
3859 }
3860 
3861 /* Returns true if multiplying by RATIO is allowed in an address.  Test the
3862    validity for a memory reference accessing memory of mode MODE in
3863    address space AS.  */
3864 
3865 
3866 bool
3867 multiplier_allowed_in_address_p (HOST_WIDE_INT ratio, machine_mode mode,
3868 				 addr_space_t as)
3869 {
3870 #define MAX_RATIO 128
3871   unsigned int data_index = (int) as * MAX_MACHINE_MODE + (int) mode;
3872   static vec<sbitmap> valid_mult_list;
3873   sbitmap valid_mult;
3874 
3875   if (data_index >= valid_mult_list.length ())
3876     valid_mult_list.safe_grow_cleared (data_index + 1);
3877 
3878   valid_mult = valid_mult_list[data_index];
3879   if (!valid_mult)
3880     {
3881       machine_mode address_mode = targetm.addr_space.address_mode (as);
3882       rtx reg1 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 1);
3883       rtx reg2 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 2);
3884       rtx addr, scaled;
3885       HOST_WIDE_INT i;
3886 
3887       valid_mult = sbitmap_alloc (2 * MAX_RATIO + 1);
3888       bitmap_clear (valid_mult);
3889       scaled = gen_rtx_fmt_ee (MULT, address_mode, reg1, NULL_RTX);
3890       addr = gen_rtx_fmt_ee (PLUS, address_mode, scaled, reg2);
3891       for (i = -MAX_RATIO; i <= MAX_RATIO; i++)
3892 	{
3893 	  XEXP (scaled, 1) = gen_int_mode (i, address_mode);
3894 	  if (memory_address_addr_space_p (mode, addr, as)
3895 	      || memory_address_addr_space_p (mode, scaled, as))
3896 	    bitmap_set_bit (valid_mult, i + MAX_RATIO);
3897 	}
3898 
3899       if (dump_file && (dump_flags & TDF_DETAILS))
3900 	{
3901 	  fprintf (dump_file, "  allowed multipliers:");
3902 	  for (i = -MAX_RATIO; i <= MAX_RATIO; i++)
3903 	    if (bitmap_bit_p (valid_mult, i + MAX_RATIO))
3904 	      fprintf (dump_file, " %d", (int) i);
3905 	  fprintf (dump_file, "\n");
3906 	  fprintf (dump_file, "\n");
3907 	}
3908 
3909       valid_mult_list[data_index] = valid_mult;
3910     }
3911 
3912   if (ratio > MAX_RATIO || ratio < -MAX_RATIO)
3913     return false;
3914 
3915   return bitmap_bit_p (valid_mult, ratio + MAX_RATIO);
3916 }
3917 
3918 /* Returns cost of address in shape symbol + var + OFFSET + RATIO * index.
3919    If SYMBOL_PRESENT is false, symbol is omitted.  If VAR_PRESENT is false,
3920    variable is omitted.  Compute the cost for a memory reference that accesses
3921    a memory location of mode MEM_MODE in address space AS.
3922 
3923    MAY_AUTOINC is set to true if the autoincrement (increasing index by
3924    size of MEM_MODE / RATIO) is available.  To make this determination, we
3925    look at the size of the increment to be made, which is given in CSTEP.
3926    CSTEP may be zero if the step is unknown.
3927    STMT_AFTER_INC is true iff the statement we're looking at is after the
3928    increment of the original biv.
3929 
3930    TODO -- there must be some better way.  This all is quite crude.  */
3931 
3932 enum ainc_type
3933 {
3934   AINC_PRE_INC,		/* Pre increment.  */
3935   AINC_PRE_DEC,		/* Pre decrement.  */
3936   AINC_POST_INC,	/* Post increment.  */
3937   AINC_POST_DEC,	/* Post decrement.  */
3938   AINC_NONE		/* Also the number of auto increment types.  */
3939 };
3940 
3941 struct address_cost_data
3942 {
3943   HOST_WIDE_INT min_offset, max_offset;
3944   unsigned costs[2][2][2][2];
3945   unsigned ainc_costs[AINC_NONE];
3946 };
3947 
3948 
3949 static comp_cost
3950 get_address_cost (bool symbol_present, bool var_present,
3951 		  unsigned HOST_WIDE_INT offset, HOST_WIDE_INT ratio,
3952 		  HOST_WIDE_INT cstep, machine_mode mem_mode,
3953 		  addr_space_t as, bool speed,
3954 		  bool stmt_after_inc, bool *may_autoinc)
3955 {
3956   machine_mode address_mode = targetm.addr_space.address_mode (as);
3957   static vec<address_cost_data *> address_cost_data_list;
3958   unsigned int data_index = (int) as * MAX_MACHINE_MODE + (int) mem_mode;
3959   address_cost_data *data;
3960   static bool has_preinc[MAX_MACHINE_MODE], has_postinc[MAX_MACHINE_MODE];
3961   static bool has_predec[MAX_MACHINE_MODE], has_postdec[MAX_MACHINE_MODE];
3962   unsigned cost, acost, complexity;
3963   enum ainc_type autoinc_type;
3964   bool offset_p, ratio_p, autoinc;
3965   HOST_WIDE_INT s_offset, autoinc_offset, msize;
3966   unsigned HOST_WIDE_INT mask;
3967   unsigned bits;
3968 
3969   if (data_index >= address_cost_data_list.length ())
3970     address_cost_data_list.safe_grow_cleared (data_index + 1);
3971 
3972   data = address_cost_data_list[data_index];
3973   if (!data)
3974     {
3975       HOST_WIDE_INT i;
3976       HOST_WIDE_INT rat, off = 0;
3977       int old_cse_not_expected, width;
3978       unsigned sym_p, var_p, off_p, rat_p, add_c;
3979       rtx_insn *seq;
3980       rtx addr, base;
3981       rtx reg0, reg1;
3982 
3983       data = (address_cost_data *) xcalloc (1, sizeof (*data));
3984 
3985       reg1 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 1);
3986 
3987       width = GET_MODE_BITSIZE (address_mode) - 1;
3988       if (width > (HOST_BITS_PER_WIDE_INT - 1))
3989 	width = HOST_BITS_PER_WIDE_INT - 1;
3990       addr = gen_rtx_fmt_ee (PLUS, address_mode, reg1, NULL_RTX);
3991 
3992       for (i = width; i >= 0; i--)
3993 	{
3994 	  off = -(HOST_WIDE_INT_1U << i);
3995 	  XEXP (addr, 1) = gen_int_mode (off, address_mode);
3996 	  if (memory_address_addr_space_p (mem_mode, addr, as))
3997 	    break;
3998 	}
3999       data->min_offset = (i == -1? 0 : off);
4000 
4001       for (i = width; i >= 0; i--)
4002 	{
4003 	  off = (HOST_WIDE_INT_1U << i) - 1;
4004 	  XEXP (addr, 1) = gen_int_mode (off, address_mode);
4005 	  if (memory_address_addr_space_p (mem_mode, addr, as))
4006 	    break;
4007 	  /* For some strict-alignment targets, the offset must be naturally
4008 	     aligned.  Try an aligned offset if mem_mode is not QImode.  */
4009 	  off = mem_mode != QImode
4010 		? (HOST_WIDE_INT_1U << i)
4011 		    - GET_MODE_SIZE (mem_mode)
4012 		: 0;
4013 	  if (off > 0)
4014 	    {
4015 	      XEXP (addr, 1) = gen_int_mode (off, address_mode);
4016 	      if (memory_address_addr_space_p (mem_mode, addr, as))
4017 		break;
4018 	    }
4019 	}
4020       if (i == -1)
4021 	off = 0;
4022       data->max_offset = off;
4023 
4024       if (dump_file && (dump_flags & TDF_DETAILS))
4025 	{
4026 	  fprintf (dump_file, "get_address_cost:\n");
4027 	  fprintf (dump_file, "  min offset %s " HOST_WIDE_INT_PRINT_DEC "\n",
4028 		   GET_MODE_NAME (mem_mode),
4029 		   data->min_offset);
4030 	  fprintf (dump_file, "  max offset %s " HOST_WIDE_INT_PRINT_DEC "\n",
4031 		   GET_MODE_NAME (mem_mode),
4032 		   data->max_offset);
4033 	}
4034 
4035       rat = 1;
4036       for (i = 2; i <= MAX_RATIO; i++)
4037 	if (multiplier_allowed_in_address_p (i, mem_mode, as))
4038 	  {
4039 	    rat = i;
4040 	    break;
4041 	  }
4042 
4043       /* Compute the cost of various addressing modes.  */
4044       acost = 0;
4045       reg0 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 1);
4046       reg1 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 2);
4047 
4048       if (USE_LOAD_PRE_DECREMENT (mem_mode)
4049 	  || USE_STORE_PRE_DECREMENT (mem_mode))
4050 	{
4051 	  addr = gen_rtx_PRE_DEC (address_mode, reg0);
4052 	  has_predec[mem_mode]
4053 	    = memory_address_addr_space_p (mem_mode, addr, as);
4054 
4055 	  if (has_predec[mem_mode])
4056 	    data->ainc_costs[AINC_PRE_DEC]
4057 	      = address_cost (addr, mem_mode, as, speed);
4058 	}
4059       if (USE_LOAD_POST_DECREMENT (mem_mode)
4060 	  || USE_STORE_POST_DECREMENT (mem_mode))
4061 	{
4062 	  addr = gen_rtx_POST_DEC (address_mode, reg0);
4063 	  has_postdec[mem_mode]
4064 	    = memory_address_addr_space_p (mem_mode, addr, as);
4065 
4066 	  if (has_postdec[mem_mode])
4067 	    data->ainc_costs[AINC_POST_DEC]
4068 	      = address_cost (addr, mem_mode, as, speed);
4069 	}
4070       if (USE_LOAD_PRE_INCREMENT (mem_mode)
4071 	  || USE_STORE_PRE_DECREMENT (mem_mode))
4072 	{
4073 	  addr = gen_rtx_PRE_INC (address_mode, reg0);
4074 	  has_preinc[mem_mode]
4075 	    = memory_address_addr_space_p (mem_mode, addr, as);
4076 
4077 	  if (has_preinc[mem_mode])
4078 	    data->ainc_costs[AINC_PRE_INC]
4079 	      = address_cost (addr, mem_mode, as, speed);
4080 	}
4081       if (USE_LOAD_POST_INCREMENT (mem_mode)
4082 	  || USE_STORE_POST_INCREMENT (mem_mode))
4083 	{
4084 	  addr = gen_rtx_POST_INC (address_mode, reg0);
4085 	  has_postinc[mem_mode]
4086 	    = memory_address_addr_space_p (mem_mode, addr, as);
4087 
4088 	  if (has_postinc[mem_mode])
4089 	    data->ainc_costs[AINC_POST_INC]
4090 	      = address_cost (addr, mem_mode, as, speed);
4091 	}
4092       for (i = 0; i < 16; i++)
4093 	{
4094 	  sym_p = i & 1;
4095 	  var_p = (i >> 1) & 1;
4096 	  off_p = (i >> 2) & 1;
4097 	  rat_p = (i >> 3) & 1;
4098 
4099 	  addr = reg0;
4100 	  if (rat_p)
4101 	    addr = gen_rtx_fmt_ee (MULT, address_mode, addr,
4102 				   gen_int_mode (rat, address_mode));
4103 
4104 	  if (var_p)
4105 	    addr = gen_rtx_fmt_ee (PLUS, address_mode, addr, reg1);
4106 
4107 	  if (sym_p)
4108 	    {
4109 	      base = gen_rtx_SYMBOL_REF (address_mode, ggc_strdup (""));
4110 	      /* ??? We can run into trouble with some backends by presenting
4111 		 it with symbols which haven't been properly passed through
4112 		 targetm.encode_section_info.  By setting the local bit, we
4113 		 enhance the probability of things working.  */
4114 	      SYMBOL_REF_FLAGS (base) = SYMBOL_FLAG_LOCAL;
4115 
4116 	      if (off_p)
4117 		base = gen_rtx_fmt_e (CONST, address_mode,
4118 				      gen_rtx_fmt_ee
4119 					(PLUS, address_mode, base,
4120 					 gen_int_mode (off, address_mode)));
4121 	    }
4122 	  else if (off_p)
4123 	    base = gen_int_mode (off, address_mode);
4124 	  else
4125 	    base = NULL_RTX;
4126 
4127 	  if (base)
4128 	    addr = gen_rtx_fmt_ee (PLUS, address_mode, addr, base);
4129 
4130 	  start_sequence ();
4131 	  /* To avoid splitting addressing modes, pretend that no cse will
4132 	     follow.  */
4133 	  old_cse_not_expected = cse_not_expected;
4134 	  cse_not_expected = true;
4135 	  addr = memory_address_addr_space (mem_mode, addr, as);
4136 	  cse_not_expected = old_cse_not_expected;
4137 	  seq = get_insns ();
4138 	  end_sequence ();
4139 
4140 	  acost = seq_cost (seq, speed);
4141 	  acost += address_cost (addr, mem_mode, as, speed);
4142 
4143 	  if (!acost)
4144 	    acost = 1;
4145 	  data->costs[sym_p][var_p][off_p][rat_p] = acost;
4146 	}
4147 
4148       /* On some targets, it is quite expensive to load symbol to a register,
4149 	 which makes addresses that contain symbols look much more expensive.
4150 	 However, the symbol will have to be loaded in any case before the
4151 	 loop (and quite likely we have it in register already), so it does not
4152 	 make much sense to penalize them too heavily.  So make some final
4153 	 tweaks for the SYMBOL_PRESENT modes:
4154 
4155 	 If VAR_PRESENT is false, and the mode obtained by changing symbol to
4156 	 var is cheaper, use this mode with small penalty.
4157 	 If VAR_PRESENT is true, try whether the mode with
4158 	 SYMBOL_PRESENT = false is cheaper even with cost of addition, and
4159 	 if this is the case, use it.  */
4160       add_c = add_cost (speed, address_mode);
4161       for (i = 0; i < 8; i++)
4162 	{
4163 	  var_p = i & 1;
4164 	  off_p = (i >> 1) & 1;
4165 	  rat_p = (i >> 2) & 1;
4166 
4167 	  acost = data->costs[0][1][off_p][rat_p] + 1;
4168 	  if (var_p)
4169 	    acost += add_c;
4170 
4171 	  if (acost < data->costs[1][var_p][off_p][rat_p])
4172 	    data->costs[1][var_p][off_p][rat_p] = acost;
4173 	}
4174 
4175       if (dump_file && (dump_flags & TDF_DETAILS))
4176 	{
4177 	  fprintf (dump_file, "<Address Costs>:\n");
4178 
4179 	  for (i = 0; i < 16; i++)
4180 	    {
4181 	      sym_p = i & 1;
4182 	      var_p = (i >> 1) & 1;
4183 	      off_p = (i >> 2) & 1;
4184 	      rat_p = (i >> 3) & 1;
4185 
4186 	      fprintf (dump_file, "  ");
4187 	      if (sym_p)
4188 		fprintf (dump_file, "sym + ");
4189 	      if (var_p)
4190 		fprintf (dump_file, "var + ");
4191 	      if (off_p)
4192 		fprintf (dump_file, "cst + ");
4193 	      if (rat_p)
4194 		fprintf (dump_file, "rat * ");
4195 
4196 	      acost = data->costs[sym_p][var_p][off_p][rat_p];
4197 	      fprintf (dump_file, "index costs %d\n", acost);
4198 	    }
4199 	  if (has_predec[mem_mode] || has_postdec[mem_mode]
4200 	      || has_preinc[mem_mode] || has_postinc[mem_mode])
4201 	    fprintf (dump_file, "  May include autoinc/dec\n");
4202 	  fprintf (dump_file, "\n");
4203 	}
4204 
4205       address_cost_data_list[data_index] = data;
4206     }
4207 
4208   bits = GET_MODE_BITSIZE (address_mode);
4209   mask = ~(HOST_WIDE_INT_M1U << (bits - 1) << 1);
4210   offset &= mask;
4211   if ((offset >> (bits - 1) & 1))
4212     offset |= ~mask;
4213   s_offset = offset;
4214 
4215   autoinc = false;
4216   autoinc_type = AINC_NONE;
4217   msize = GET_MODE_SIZE (mem_mode);
4218   autoinc_offset = offset;
4219   if (stmt_after_inc)
4220     autoinc_offset += ratio * cstep;
4221   if (symbol_present || var_present || ratio != 1)
4222     autoinc = false;
4223   else
4224     {
4225       if (has_postinc[mem_mode] && autoinc_offset == 0
4226 	  && msize == cstep)
4227 	autoinc_type = AINC_POST_INC;
4228       else if (has_postdec[mem_mode] && autoinc_offset == 0
4229 	       && msize == -cstep)
4230 	autoinc_type = AINC_POST_DEC;
4231       else if (has_preinc[mem_mode] && autoinc_offset == msize
4232 	       && msize == cstep)
4233 	autoinc_type = AINC_PRE_INC;
4234       else if (has_predec[mem_mode] && autoinc_offset == -msize
4235 	       && msize == -cstep)
4236 	autoinc_type = AINC_PRE_DEC;
4237 
4238       if (autoinc_type != AINC_NONE)
4239 	autoinc = true;
4240     }
4241 
4242   cost = 0;
4243   offset_p = (s_offset != 0
4244 	      && data->min_offset <= s_offset
4245 	      && s_offset <= data->max_offset);
4246   ratio_p = (ratio != 1
4247 	     && multiplier_allowed_in_address_p (ratio, mem_mode, as));
4248 
4249   if (ratio != 1 && !ratio_p)
4250     cost += mult_by_coeff_cost (ratio, address_mode, speed);
4251 
4252   if (s_offset && !offset_p && !symbol_present)
4253     cost += add_cost (speed, address_mode);
4254 
4255   if (may_autoinc)
4256     *may_autoinc = autoinc;
4257   if (autoinc)
4258     acost = data->ainc_costs[autoinc_type];
4259   else
4260     acost = data->costs[symbol_present][var_present][offset_p][ratio_p];
4261   complexity = (symbol_present != 0) + (var_present != 0) + offset_p + ratio_p;
4262   return comp_cost (cost + acost, complexity);
4263 }
4264 
4265  /* Calculate the SPEED or size cost of shiftadd EXPR in MODE.  MULT is the
4266     EXPR operand holding the shift.  COST0 and COST1 are the costs for
4267     calculating the operands of EXPR.  Returns true if successful, and returns
4268     the cost in COST.  */
4269 
4270 static bool
4271 get_shiftadd_cost (tree expr, machine_mode mode, comp_cost cost0,
4272 		   comp_cost cost1, tree mult, bool speed, comp_cost *cost)
4273 {
4274   comp_cost res;
4275   tree op1 = TREE_OPERAND (expr, 1);
4276   tree cst = TREE_OPERAND (mult, 1);
4277   tree multop = TREE_OPERAND (mult, 0);
4278   int m = exact_log2 (int_cst_value (cst));
4279   int maxm = MIN (BITS_PER_WORD, GET_MODE_BITSIZE (mode));
4280   int as_cost, sa_cost;
4281   bool mult_in_op1;
4282 
4283   if (!(m >= 0 && m < maxm))
4284     return false;
4285 
4286   STRIP_NOPS (op1);
4287   mult_in_op1 = operand_equal_p (op1, mult, 0);
4288 
4289   as_cost = add_cost (speed, mode) + shift_cost (speed, mode, m);
4290 
4291   /* If the target has a cheap shift-and-add or shift-and-sub instruction,
4292      use that in preference to a shift insn followed by an add insn.  */
4293   sa_cost = (TREE_CODE (expr) != MINUS_EXPR
4294 	     ? shiftadd_cost (speed, mode, m)
4295 	     : (mult_in_op1
4296 		? shiftsub1_cost (speed, mode, m)
4297 		: shiftsub0_cost (speed, mode, m)));
4298 
4299   res = comp_cost (MIN (as_cost, sa_cost), 0);
4300   res += (mult_in_op1 ? cost0 : cost1);
4301 
4302   STRIP_NOPS (multop);
4303   if (!is_gimple_val (multop))
4304     res += force_expr_to_var_cost (multop, speed);
4305 
4306   *cost = res;
4307   return true;
4308 }
4309 
4310 /* Estimates cost of forcing expression EXPR into a variable.  */
4311 
4312 static comp_cost
4313 force_expr_to_var_cost (tree expr, bool speed)
4314 {
4315   static bool costs_initialized = false;
4316   static unsigned integer_cost [2];
4317   static unsigned symbol_cost [2];
4318   static unsigned address_cost [2];
4319   tree op0, op1;
4320   comp_cost cost0, cost1, cost;
4321   machine_mode mode;
4322 
4323   if (!costs_initialized)
4324     {
4325       tree type = build_pointer_type (integer_type_node);
4326       tree var, addr;
4327       rtx x;
4328       int i;
4329 
4330       var = create_tmp_var_raw (integer_type_node, "test_var");
4331       TREE_STATIC (var) = 1;
4332       x = produce_memory_decl_rtl (var, NULL);
4333       SET_DECL_RTL (var, x);
4334 
4335       addr = build1 (ADDR_EXPR, type, var);
4336 
4337 
4338       for (i = 0; i < 2; i++)
4339 	{
4340 	  integer_cost[i] = computation_cost (build_int_cst (integer_type_node,
4341 							     2000), i);
4342 
4343 	  symbol_cost[i] = computation_cost (addr, i) + 1;
4344 
4345 	  address_cost[i]
4346 	    = computation_cost (fold_build_pointer_plus_hwi (addr, 2000), i) + 1;
4347 	  if (dump_file && (dump_flags & TDF_DETAILS))
4348 	    {
4349 	      fprintf (dump_file, "force_expr_to_var_cost %s costs:\n", i ? "speed" : "size");
4350 	      fprintf (dump_file, "  integer %d\n", (int) integer_cost[i]);
4351 	      fprintf (dump_file, "  symbol %d\n", (int) symbol_cost[i]);
4352 	      fprintf (dump_file, "  address %d\n", (int) address_cost[i]);
4353 	      fprintf (dump_file, "  other %d\n", (int) target_spill_cost[i]);
4354 	      fprintf (dump_file, "\n");
4355 	    }
4356 	}
4357 
4358       costs_initialized = true;
4359     }
4360 
4361   STRIP_NOPS (expr);
4362 
4363   if (SSA_VAR_P (expr))
4364     return no_cost;
4365 
4366   if (is_gimple_min_invariant (expr))
4367     {
4368       if (TREE_CODE (expr) == INTEGER_CST)
4369 	return comp_cost (integer_cost [speed], 0);
4370 
4371       if (TREE_CODE (expr) == ADDR_EXPR)
4372 	{
4373 	  tree obj = TREE_OPERAND (expr, 0);
4374 
4375 	  if (VAR_P (obj)
4376 	      || TREE_CODE (obj) == PARM_DECL
4377 	      || TREE_CODE (obj) == RESULT_DECL)
4378 	    return comp_cost (symbol_cost [speed], 0);
4379 	}
4380 
4381       return comp_cost (address_cost [speed], 0);
4382     }
4383 
4384   switch (TREE_CODE (expr))
4385     {
4386     case POINTER_PLUS_EXPR:
4387     case PLUS_EXPR:
4388     case MINUS_EXPR:
4389     case MULT_EXPR:
4390       op0 = TREE_OPERAND (expr, 0);
4391       op1 = TREE_OPERAND (expr, 1);
4392       STRIP_NOPS (op0);
4393       STRIP_NOPS (op1);
4394       break;
4395 
4396     CASE_CONVERT:
4397     case NEGATE_EXPR:
4398       op0 = TREE_OPERAND (expr, 0);
4399       STRIP_NOPS (op0);
4400       op1 = NULL_TREE;
4401       break;
4402 
4403     default:
4404       /* Just an arbitrary value, FIXME.  */
4405       return comp_cost (target_spill_cost[speed], 0);
4406     }
4407 
4408   if (op0 == NULL_TREE
4409       || TREE_CODE (op0) == SSA_NAME || CONSTANT_CLASS_P (op0))
4410     cost0 = no_cost;
4411   else
4412     cost0 = force_expr_to_var_cost (op0, speed);
4413 
4414   if (op1 == NULL_TREE
4415       || TREE_CODE (op1) == SSA_NAME || CONSTANT_CLASS_P (op1))
4416     cost1 = no_cost;
4417   else
4418     cost1 = force_expr_to_var_cost (op1, speed);
4419 
4420   mode = TYPE_MODE (TREE_TYPE (expr));
4421   switch (TREE_CODE (expr))
4422     {
4423     case POINTER_PLUS_EXPR:
4424     case PLUS_EXPR:
4425     case MINUS_EXPR:
4426     case NEGATE_EXPR:
4427       cost = comp_cost (add_cost (speed, mode), 0);
4428       if (TREE_CODE (expr) != NEGATE_EXPR)
4429 	{
4430 	  tree mult = NULL_TREE;
4431 	  comp_cost sa_cost;
4432 	  if (TREE_CODE (op1) == MULT_EXPR)
4433 	    mult = op1;
4434 	  else if (TREE_CODE (op0) == MULT_EXPR)
4435 	    mult = op0;
4436 
4437 	  if (mult != NULL_TREE
4438 	      && cst_and_fits_in_hwi (TREE_OPERAND (mult, 1))
4439 	      && get_shiftadd_cost (expr, mode, cost0, cost1, mult,
4440 				    speed, &sa_cost))
4441 	    return sa_cost;
4442 	}
4443       break;
4444 
4445     CASE_CONVERT:
4446       {
4447 	tree inner_mode, outer_mode;
4448 	outer_mode = TREE_TYPE (expr);
4449 	inner_mode = TREE_TYPE (op0);
4450 	cost = comp_cost (convert_cost (TYPE_MODE (outer_mode),
4451 				       TYPE_MODE (inner_mode), speed), 0);
4452       }
4453       break;
4454 
4455     case MULT_EXPR:
4456       if (cst_and_fits_in_hwi (op0))
4457 	cost = comp_cost (mult_by_coeff_cost (int_cst_value (op0),
4458 					     mode, speed), 0);
4459       else if (cst_and_fits_in_hwi (op1))
4460 	cost = comp_cost (mult_by_coeff_cost (int_cst_value (op1),
4461 					     mode, speed), 0);
4462       else
4463 	return comp_cost (target_spill_cost [speed], 0);
4464       break;
4465 
4466     default:
4467       gcc_unreachable ();
4468     }
4469 
4470   cost += cost0;
4471   cost += cost1;
4472 
4473   /* Bound the cost by target_spill_cost.  The parts of complicated
4474      computations often are either loop invariant or at least can
4475      be shared between several iv uses, so letting this grow without
4476      limits would not give reasonable results.  */
4477   if (cost.cost > (int) target_spill_cost [speed])
4478     cost.cost = target_spill_cost [speed];
4479 
4480   return cost;
4481 }
4482 
4483 /* Estimates cost of forcing EXPR into a variable.  DEPENDS_ON is a set of the
4484    invariants the computation depends on.  */
4485 
4486 static comp_cost
4487 force_var_cost (struct ivopts_data *data,
4488 		tree expr, bitmap *depends_on)
4489 {
4490   if (depends_on)
4491     {
4492       fd_ivopts_data = data;
4493       walk_tree (&expr, find_depends, depends_on, NULL);
4494     }
4495 
4496   return force_expr_to_var_cost (expr, data->speed);
4497 }
4498 
4499 /* Estimates cost of expressing address ADDR  as var + symbol + offset.  The
4500    value of offset is added to OFFSET, SYMBOL_PRESENT and VAR_PRESENT are set
4501    to false if the corresponding part is missing.  DEPENDS_ON is a set of the
4502    invariants the computation depends on.  */
4503 
4504 static comp_cost
4505 split_address_cost (struct ivopts_data *data,
4506 		    tree addr, bool *symbol_present, bool *var_present,
4507 		    unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
4508 {
4509   tree core;
4510   HOST_WIDE_INT bitsize;
4511   HOST_WIDE_INT bitpos;
4512   tree toffset;
4513   machine_mode mode;
4514   int unsignedp, reversep, volatilep;
4515 
4516   core = get_inner_reference (addr, &bitsize, &bitpos, &toffset, &mode,
4517 			      &unsignedp, &reversep, &volatilep);
4518 
4519   if (toffset != 0
4520       || bitpos % BITS_PER_UNIT != 0
4521       || reversep
4522       || !VAR_P (core))
4523     {
4524       *symbol_present = false;
4525       *var_present = true;
4526       fd_ivopts_data = data;
4527       if (depends_on)
4528 	walk_tree (&addr, find_depends, depends_on, NULL);
4529 
4530       return comp_cost (target_spill_cost[data->speed], 0);
4531     }
4532 
4533   *offset += bitpos / BITS_PER_UNIT;
4534   if (TREE_STATIC (core)
4535       || DECL_EXTERNAL (core))
4536     {
4537       *symbol_present = true;
4538       *var_present = false;
4539       return no_cost;
4540     }
4541 
4542   *symbol_present = false;
4543   *var_present = true;
4544   return no_cost;
4545 }
4546 
4547 /* Estimates cost of expressing difference of addresses E1 - E2 as
4548    var + symbol + offset.  The value of offset is added to OFFSET,
4549    SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding
4550    part is missing.  DEPENDS_ON is a set of the invariants the computation
4551    depends on.  */
4552 
4553 static comp_cost
4554 ptr_difference_cost (struct ivopts_data *data,
4555 		     tree e1, tree e2, bool *symbol_present, bool *var_present,
4556 		     unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
4557 {
4558   HOST_WIDE_INT diff = 0;
4559   aff_tree aff_e1, aff_e2;
4560   tree type;
4561 
4562   gcc_assert (TREE_CODE (e1) == ADDR_EXPR);
4563 
4564   if (ptr_difference_const (e1, e2, &diff))
4565     {
4566       *offset += diff;
4567       *symbol_present = false;
4568       *var_present = false;
4569       return no_cost;
4570     }
4571 
4572   if (integer_zerop (e2))
4573     return split_address_cost (data, TREE_OPERAND (e1, 0),
4574 			       symbol_present, var_present, offset, depends_on);
4575 
4576   *symbol_present = false;
4577   *var_present = true;
4578 
4579   type = signed_type_for (TREE_TYPE (e1));
4580   tree_to_aff_combination (e1, type, &aff_e1);
4581   tree_to_aff_combination (e2, type, &aff_e2);
4582   aff_combination_scale (&aff_e2, -1);
4583   aff_combination_add (&aff_e1, &aff_e2);
4584 
4585   return force_var_cost (data, aff_combination_to_tree (&aff_e1), depends_on);
4586 }
4587 
4588 /* Estimates cost of expressing difference E1 - E2 as
4589    var + symbol + offset.  The value of offset is added to OFFSET,
4590    SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding
4591    part is missing.  DEPENDS_ON is a set of the invariants the computation
4592    depends on.  */
4593 
4594 static comp_cost
4595 difference_cost (struct ivopts_data *data,
4596 		 tree e1, tree e2, bool *symbol_present, bool *var_present,
4597 		 unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
4598 {
4599   machine_mode mode = TYPE_MODE (TREE_TYPE (e1));
4600   unsigned HOST_WIDE_INT off1, off2;
4601   aff_tree aff_e1, aff_e2;
4602   tree type;
4603 
4604   e1 = strip_offset (e1, &off1);
4605   e2 = strip_offset (e2, &off2);
4606   *offset += off1 - off2;
4607 
4608   STRIP_NOPS (e1);
4609   STRIP_NOPS (e2);
4610 
4611   if (TREE_CODE (e1) == ADDR_EXPR)
4612     return ptr_difference_cost (data, e1, e2, symbol_present, var_present,
4613 				offset, depends_on);
4614   *symbol_present = false;
4615 
4616   if (operand_equal_p (e1, e2, 0))
4617     {
4618       *var_present = false;
4619       return no_cost;
4620     }
4621 
4622   *var_present = true;
4623 
4624   if (integer_zerop (e2))
4625     return force_var_cost (data, e1, depends_on);
4626 
4627   if (integer_zerop (e1))
4628     {
4629       comp_cost cost = force_var_cost (data, e2, depends_on);
4630       cost += mult_by_coeff_cost (-1, mode, data->speed);
4631       return cost;
4632     }
4633 
4634   type = signed_type_for (TREE_TYPE (e1));
4635   tree_to_aff_combination (e1, type, &aff_e1);
4636   tree_to_aff_combination (e2, type, &aff_e2);
4637   aff_combination_scale (&aff_e2, -1);
4638   aff_combination_add (&aff_e1, &aff_e2);
4639 
4640   return force_var_cost (data, aff_combination_to_tree (&aff_e1), depends_on);
4641 }
4642 
4643 /* Returns true if AFF1 and AFF2 are identical.  */
4644 
4645 static bool
4646 compare_aff_trees (aff_tree *aff1, aff_tree *aff2)
4647 {
4648   unsigned i;
4649 
4650   if (aff1->n != aff2->n)
4651     return false;
4652 
4653   for (i = 0; i < aff1->n; i++)
4654     {
4655       if (aff1->elts[i].coef != aff2->elts[i].coef)
4656 	return false;
4657 
4658       if (!operand_equal_p (aff1->elts[i].val, aff2->elts[i].val, 0))
4659 	return false;
4660     }
4661   return true;
4662 }
4663 
4664 /* Stores EXPR in DATA->inv_expr_tab, return pointer to iv_inv_expr_ent.  */
4665 
4666 static iv_inv_expr_ent *
4667 record_inv_expr (struct ivopts_data *data, tree expr)
4668 {
4669   struct iv_inv_expr_ent ent;
4670   struct iv_inv_expr_ent **slot;
4671 
4672   ent.expr = expr;
4673   ent.hash = iterative_hash_expr (expr, 0);
4674   slot = data->inv_expr_tab->find_slot (&ent, INSERT);
4675 
4676   if (!*slot)
4677     {
4678       *slot = XNEW (struct iv_inv_expr_ent);
4679       (*slot)->expr = expr;
4680       (*slot)->hash = ent.hash;
4681       (*slot)->id = data->max_inv_expr_id++;
4682     }
4683 
4684   return *slot;
4685 }
4686 
4687 /* Returns the invariant expression if expression UBASE - RATIO * CBASE
4688    requires a new compiler generated temporary.  Returns -1 otherwise.
4689    ADDRESS_P is a flag indicating if the expression is for address
4690    computation.  */
4691 
4692 static iv_inv_expr_ent *
4693 get_loop_invariant_expr (struct ivopts_data *data, tree ubase,
4694 			 tree cbase, HOST_WIDE_INT ratio,
4695 			 bool address_p)
4696 {
4697   aff_tree ubase_aff, cbase_aff;
4698   tree expr, ub, cb;
4699 
4700   STRIP_NOPS (ubase);
4701   STRIP_NOPS (cbase);
4702   ub = ubase;
4703   cb = cbase;
4704 
4705   if ((TREE_CODE (ubase) == INTEGER_CST)
4706       && (TREE_CODE (cbase) == INTEGER_CST))
4707     return NULL;
4708 
4709   /* Strips the constant part. */
4710   if (TREE_CODE (ubase) == PLUS_EXPR
4711       || TREE_CODE (ubase) == MINUS_EXPR
4712       || TREE_CODE (ubase) == POINTER_PLUS_EXPR)
4713     {
4714       if (TREE_CODE (TREE_OPERAND (ubase, 1)) == INTEGER_CST)
4715 	ubase = TREE_OPERAND (ubase, 0);
4716     }
4717 
4718   /* Strips the constant part. */
4719   if (TREE_CODE (cbase) == PLUS_EXPR
4720       || TREE_CODE (cbase) == MINUS_EXPR
4721       || TREE_CODE (cbase) == POINTER_PLUS_EXPR)
4722     {
4723       if (TREE_CODE (TREE_OPERAND (cbase, 1)) == INTEGER_CST)
4724 	cbase = TREE_OPERAND (cbase, 0);
4725     }
4726 
4727   if (address_p)
4728     {
4729       if (((TREE_CODE (ubase) == SSA_NAME)
4730 	   || (TREE_CODE (ubase) == ADDR_EXPR
4731 	       && is_gimple_min_invariant (ubase)))
4732 	  && (TREE_CODE (cbase) == INTEGER_CST))
4733 	return NULL;
4734 
4735       if (((TREE_CODE (cbase) == SSA_NAME)
4736 	   || (TREE_CODE (cbase) == ADDR_EXPR
4737 	       && is_gimple_min_invariant (cbase)))
4738 	  && (TREE_CODE (ubase) == INTEGER_CST))
4739 	return NULL;
4740     }
4741 
4742   if (ratio == 1)
4743     {
4744       if (operand_equal_p (ubase, cbase, 0))
4745 	return NULL;
4746 
4747       if (TREE_CODE (ubase) == ADDR_EXPR
4748 	  && TREE_CODE (cbase) == ADDR_EXPR)
4749 	{
4750 	  tree usym, csym;
4751 
4752 	  usym = TREE_OPERAND (ubase, 0);
4753 	  csym = TREE_OPERAND (cbase, 0);
4754 	  if (TREE_CODE (usym) == ARRAY_REF)
4755 	    {
4756 	      tree ind = TREE_OPERAND (usym, 1);
4757 	      if (TREE_CODE (ind) == INTEGER_CST
4758 		  && tree_fits_shwi_p (ind)
4759 		  && tree_to_shwi (ind) == 0)
4760 		usym = TREE_OPERAND (usym, 0);
4761 	    }
4762 	  if (TREE_CODE (csym) == ARRAY_REF)
4763 	    {
4764 	      tree ind = TREE_OPERAND (csym, 1);
4765 	      if (TREE_CODE (ind) == INTEGER_CST
4766 		  && tree_fits_shwi_p (ind)
4767 		  && tree_to_shwi (ind) == 0)
4768 		csym = TREE_OPERAND (csym, 0);
4769 	    }
4770 	  if (operand_equal_p (usym, csym, 0))
4771 	    return NULL;
4772 	}
4773       /* Now do more complex comparison  */
4774       tree_to_aff_combination (ubase, TREE_TYPE (ubase), &ubase_aff);
4775       tree_to_aff_combination (cbase, TREE_TYPE (cbase), &cbase_aff);
4776       if (compare_aff_trees (&ubase_aff, &cbase_aff))
4777 	return NULL;
4778     }
4779 
4780   tree_to_aff_combination (ub, TREE_TYPE (ub), &ubase_aff);
4781   tree_to_aff_combination (cb, TREE_TYPE (cb), &cbase_aff);
4782 
4783   aff_combination_scale (&cbase_aff, -1 * ratio);
4784   aff_combination_add (&ubase_aff, &cbase_aff);
4785   expr = aff_combination_to_tree (&ubase_aff);
4786   return record_inv_expr (data, expr);
4787 }
4788 
4789 /* Scale (multiply) the computed COST (except scratch part that should be
4790    hoisted out a loop) by header->frequency / AT->frequency,
4791    which makes expected cost more accurate.  */
4792 
4793 static comp_cost
4794 get_scaled_computation_cost_at (ivopts_data *data, gimple *at, iv_cand *cand,
4795 				comp_cost cost)
4796 {
4797    int loop_freq = data->current_loop->header->frequency;
4798    int bb_freq = gimple_bb (at)->frequency;
4799    if (loop_freq != 0)
4800      {
4801        gcc_assert (cost.scratch <= cost.cost);
4802        int scaled_cost
4803 	 = cost.scratch + (cost.cost - cost.scratch) * bb_freq / loop_freq;
4804 
4805        if (dump_file && (dump_flags & TDF_DETAILS))
4806 	 fprintf (dump_file, "Scaling iv_use based on cand %d "
4807 		  "by %2.2f: %d (scratch: %d) -> %d (%d/%d)\n",
4808 		  cand->id, 1.0f * bb_freq / loop_freq, cost.cost,
4809 		  cost.scratch, scaled_cost, bb_freq, loop_freq);
4810 
4811        cost.cost = scaled_cost;
4812      }
4813 
4814   return cost;
4815 }
4816 
4817 /* Determines the cost of the computation by that USE is expressed
4818    from induction variable CAND.  If ADDRESS_P is true, we just need
4819    to create an address from it, otherwise we want to get it into
4820    register.  A set of invariants we depend on is stored in
4821    DEPENDS_ON.  AT is the statement at that the value is computed.
4822    If CAN_AUTOINC is nonnull, use it to record whether autoinc
4823    addressing is likely.  */
4824 
4825 static comp_cost
4826 get_computation_cost_at (struct ivopts_data *data,
4827 			 struct iv_use *use, struct iv_cand *cand,
4828 			 bool address_p, bitmap *depends_on, gimple *at,
4829 			 bool *can_autoinc,
4830 			 iv_inv_expr_ent **inv_expr)
4831 {
4832   tree ubase = use->iv->base, ustep = use->iv->step;
4833   tree cbase, cstep;
4834   tree utype = TREE_TYPE (ubase), ctype;
4835   unsigned HOST_WIDE_INT cstepi, offset = 0;
4836   HOST_WIDE_INT ratio, aratio;
4837   bool var_present, symbol_present, stmt_is_after_inc;
4838   comp_cost cost;
4839   widest_int rat;
4840   bool speed = optimize_bb_for_speed_p (gimple_bb (at));
4841   machine_mode mem_mode = (address_p
4842 				? TYPE_MODE (TREE_TYPE (*use->op_p))
4843 				: VOIDmode);
4844 
4845   if (depends_on)
4846     *depends_on = NULL;
4847 
4848   /* Only consider real candidates.  */
4849   if (!cand->iv)
4850     return infinite_cost;
4851 
4852   cbase = cand->iv->base;
4853   cstep = cand->iv->step;
4854   ctype = TREE_TYPE (cbase);
4855 
4856   if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
4857     {
4858       /* We do not have a precision to express the values of use.  */
4859       return infinite_cost;
4860     }
4861 
4862   if (address_p
4863       || (use->iv->base_object
4864 	  && cand->iv->base_object
4865 	  && POINTER_TYPE_P (TREE_TYPE (use->iv->base_object))
4866 	  && POINTER_TYPE_P (TREE_TYPE (cand->iv->base_object))))
4867     {
4868       /* Do not try to express address of an object with computation based
4869 	 on address of a different object.  This may cause problems in rtl
4870 	 level alias analysis (that does not expect this to be happening,
4871 	 as this is illegal in C), and would be unlikely to be useful
4872 	 anyway.  */
4873       if (use->iv->base_object
4874 	  && cand->iv->base_object
4875 	  && !operand_equal_p (use->iv->base_object, cand->iv->base_object, 0))
4876 	return infinite_cost;
4877     }
4878 
4879   if (TYPE_PRECISION (utype) < TYPE_PRECISION (ctype))
4880     {
4881       /* TODO -- add direct handling of this case.  */
4882       goto fallback;
4883     }
4884 
4885   /* CSTEPI is removed from the offset in case statement is after the
4886      increment.  If the step is not constant, we use zero instead.
4887      This is a bit imprecise (there is the extra addition), but
4888      redundancy elimination is likely to transform the code so that
4889      it uses value of the variable before increment anyway,
4890      so it is not that much unrealistic.  */
4891   if (cst_and_fits_in_hwi (cstep))
4892     cstepi = int_cst_value (cstep);
4893   else
4894     cstepi = 0;
4895 
4896   if (!constant_multiple_of (ustep, cstep, &rat))
4897     return infinite_cost;
4898 
4899   if (wi::fits_shwi_p (rat))
4900     ratio = rat.to_shwi ();
4901   else
4902     return infinite_cost;
4903 
4904   STRIP_NOPS (cbase);
4905   ctype = TREE_TYPE (cbase);
4906 
4907   stmt_is_after_inc = stmt_after_increment (data->current_loop, cand, at);
4908 
4909   /* use = ubase + ratio * (var - cbase).  If either cbase is a constant
4910      or ratio == 1, it is better to handle this like
4911 
4912      ubase - ratio * cbase + ratio * var
4913 
4914      (also holds in the case ratio == -1, TODO.  */
4915 
4916   if (cst_and_fits_in_hwi (cbase))
4917     {
4918       offset = - ratio * (unsigned HOST_WIDE_INT) int_cst_value (cbase);
4919       cost = difference_cost (data,
4920 			      ubase, build_int_cst (utype, 0),
4921 			      &symbol_present, &var_present, &offset,
4922 			      depends_on);
4923       cost /= avg_loop_niter (data->current_loop);
4924     }
4925   else if (ratio == 1)
4926     {
4927       tree real_cbase = cbase;
4928 
4929       /* Check to see if any adjustment is needed.  */
4930       if (cstepi == 0 && stmt_is_after_inc)
4931 	{
4932 	  aff_tree real_cbase_aff;
4933 	  aff_tree cstep_aff;
4934 
4935 	  tree_to_aff_combination (cbase, TREE_TYPE (real_cbase),
4936 				   &real_cbase_aff);
4937 	  tree_to_aff_combination (cstep, TREE_TYPE (cstep), &cstep_aff);
4938 
4939 	  aff_combination_add (&real_cbase_aff, &cstep_aff);
4940 	  real_cbase = aff_combination_to_tree (&real_cbase_aff);
4941 	}
4942 
4943       cost = difference_cost (data,
4944 			      ubase, real_cbase,
4945 			      &symbol_present, &var_present, &offset,
4946 			      depends_on);
4947       cost /= avg_loop_niter (data->current_loop);
4948     }
4949   else if (address_p
4950 	   && !POINTER_TYPE_P (ctype)
4951 	   && multiplier_allowed_in_address_p
4952 		(ratio, mem_mode,
4953 			TYPE_ADDR_SPACE (TREE_TYPE (utype))))
4954     {
4955       tree real_cbase = cbase;
4956 
4957       if (cstepi == 0 && stmt_is_after_inc)
4958 	{
4959 	  if (POINTER_TYPE_P (ctype))
4960 	    real_cbase = fold_build2 (POINTER_PLUS_EXPR, ctype, cbase, cstep);
4961 	  else
4962 	    real_cbase = fold_build2 (PLUS_EXPR, ctype, cbase, cstep);
4963 	}
4964       real_cbase = fold_build2 (MULT_EXPR, ctype, real_cbase,
4965 				build_int_cst (ctype, ratio));
4966       cost = difference_cost (data,
4967 			      ubase, real_cbase,
4968 			      &symbol_present, &var_present, &offset,
4969 			      depends_on);
4970       cost /= avg_loop_niter (data->current_loop);
4971     }
4972   else
4973     {
4974       cost = force_var_cost (data, cbase, depends_on);
4975       cost += difference_cost (data, ubase, build_int_cst (utype, 0),
4976 			       &symbol_present, &var_present, &offset,
4977 			       depends_on);
4978       cost /= avg_loop_niter (data->current_loop);
4979       cost += add_cost (data->speed, TYPE_MODE (ctype));
4980     }
4981 
4982   /* Record setup cost in scratch field.  */
4983   cost.scratch = cost.cost;
4984 
4985   if (inv_expr && depends_on && *depends_on)
4986     {
4987       *inv_expr = get_loop_invariant_expr (data, ubase, cbase, ratio,
4988 					   address_p);
4989       /* Clear depends on.  */
4990       if (*inv_expr != NULL)
4991 	bitmap_clear (*depends_on);
4992     }
4993 
4994   /* If we are after the increment, the value of the candidate is higher by
4995      one iteration.  */
4996   if (stmt_is_after_inc)
4997     offset -= ratio * cstepi;
4998 
4999   /* Now the computation is in shape symbol + var1 + const + ratio * var2.
5000      (symbol/var1/const parts may be omitted).  If we are looking for an
5001      address, find the cost of addressing this.  */
5002   if (address_p)
5003     {
5004       cost += get_address_cost (symbol_present, var_present,
5005 				offset, ratio, cstepi,
5006 				mem_mode,
5007 				TYPE_ADDR_SPACE (TREE_TYPE (utype)),
5008 				speed, stmt_is_after_inc, can_autoinc);
5009       return get_scaled_computation_cost_at (data, at, cand, cost);
5010     }
5011 
5012   /* Otherwise estimate the costs for computing the expression.  */
5013   if (!symbol_present && !var_present && !offset)
5014     {
5015       if (ratio != 1)
5016 	cost += mult_by_coeff_cost (ratio, TYPE_MODE (ctype), speed);
5017       return get_scaled_computation_cost_at (data, at, cand, cost);
5018     }
5019 
5020   /* Symbol + offset should be compile-time computable so consider that they
5021       are added once to the variable, if present.  */
5022   if (var_present && (symbol_present || offset))
5023     cost += adjust_setup_cost (data,
5024 				    add_cost (speed, TYPE_MODE (ctype)));
5025 
5026   /* Having offset does not affect runtime cost in case it is added to
5027      symbol, but it increases complexity.  */
5028   if (offset)
5029     cost.complexity++;
5030 
5031   cost += add_cost (speed, TYPE_MODE (ctype));
5032 
5033   aratio = ratio > 0 ? ratio : -ratio;
5034   if (aratio != 1)
5035     cost += mult_by_coeff_cost (aratio, TYPE_MODE (ctype), speed);
5036 
5037   return get_scaled_computation_cost_at (data, at, cand, cost);
5038 
5039 fallback:
5040   if (can_autoinc)
5041     *can_autoinc = false;
5042 
5043   /* Just get the expression, expand it and measure the cost.  */
5044   tree comp = get_computation_at (data->current_loop, use, cand, at);
5045 
5046   if (!comp)
5047     return infinite_cost;
5048 
5049   if (address_p)
5050     comp = build_simple_mem_ref (comp);
5051 
5052   cost = comp_cost (computation_cost (comp, speed), 0);
5053 
5054   return get_scaled_computation_cost_at (data, at, cand, cost);
5055 }
5056 
5057 /* Determines the cost of the computation by that USE is expressed
5058    from induction variable CAND.  If ADDRESS_P is true, we just need
5059    to create an address from it, otherwise we want to get it into
5060    register.  A set of invariants we depend on is stored in
5061    DEPENDS_ON.  If CAN_AUTOINC is nonnull, use it to record whether
5062    autoinc addressing is likely.  */
5063 
5064 static comp_cost
5065 get_computation_cost (struct ivopts_data *data,
5066 		      struct iv_use *use, struct iv_cand *cand,
5067 		      bool address_p, bitmap *depends_on,
5068 		      bool *can_autoinc, iv_inv_expr_ent **inv_expr)
5069 {
5070   return get_computation_cost_at (data,
5071 				  use, cand, address_p, depends_on, use->stmt,
5072 				  can_autoinc, inv_expr);
5073 }
5074 
5075 /* Determines cost of computing the use in GROUP with CAND in a generic
5076    expression.  */
5077 
5078 static bool
5079 determine_group_iv_cost_generic (struct ivopts_data *data,
5080 				 struct iv_group *group, struct iv_cand *cand)
5081 {
5082   comp_cost cost;
5083   iv_inv_expr_ent *inv_expr = NULL;
5084   bitmap depends_on = NULL;
5085   struct iv_use *use = group->vuses[0];
5086 
5087   /* The simple case first -- if we need to express value of the preserved
5088      original biv, the cost is 0.  This also prevents us from counting the
5089      cost of increment twice -- once at this use and once in the cost of
5090      the candidate.  */
5091   if (cand->pos == IP_ORIGINAL && cand->incremented_at == use->stmt)
5092     cost = no_cost;
5093   else
5094     cost = get_computation_cost (data, use, cand, false,
5095 				 &depends_on, NULL, &inv_expr);
5096 
5097   set_group_iv_cost (data, group, cand, cost, depends_on,
5098 		     NULL_TREE, ERROR_MARK, inv_expr);
5099   return !cost.infinite_cost_p ();
5100 }
5101 
5102 /* Determines cost of computing uses in GROUP with CAND in addresses.  */
5103 
5104 static bool
5105 determine_group_iv_cost_address (struct ivopts_data *data,
5106 				 struct iv_group *group, struct iv_cand *cand)
5107 {
5108   unsigned i;
5109   bitmap depends_on;
5110   bool can_autoinc;
5111   iv_inv_expr_ent *inv_expr = NULL;
5112   struct iv_use *use = group->vuses[0];
5113   comp_cost sum_cost = no_cost, cost;
5114 
5115   cost = get_computation_cost (data, use, cand, true,
5116 			       &depends_on, &can_autoinc, &inv_expr);
5117 
5118   sum_cost = cost;
5119   if (!sum_cost.infinite_cost_p () && cand->ainc_use == use)
5120     {
5121       if (can_autoinc)
5122 	sum_cost -= cand->cost_step;
5123       /* If we generated the candidate solely for exploiting autoincrement
5124 	 opportunities, and it turns out it can't be used, set the cost to
5125 	 infinity to make sure we ignore it.  */
5126       else if (cand->pos == IP_AFTER_USE || cand->pos == IP_BEFORE_USE)
5127 	sum_cost = infinite_cost;
5128     }
5129 
5130   /* Uses in a group can share setup code, so only add setup cost once.  */
5131   cost -= cost.scratch;
5132   /* Compute and add costs for rest uses of this group.  */
5133   for (i = 1; i < group->vuses.length () && !sum_cost.infinite_cost_p (); i++)
5134     {
5135       struct iv_use *next = group->vuses[i];
5136 
5137       /* TODO: We could skip computing cost for sub iv_use when it has the
5138 	 same cost as the first iv_use, but the cost really depends on the
5139 	 offset and where the iv_use is.  */
5140 	cost = get_computation_cost (data, next, cand, true,
5141 				     NULL, &can_autoinc, NULL);
5142       sum_cost += cost;
5143     }
5144   set_group_iv_cost (data, group, cand, sum_cost, depends_on,
5145 		     NULL_TREE, ERROR_MARK, inv_expr);
5146 
5147   return !sum_cost.infinite_cost_p ();
5148 }
5149 
5150 /* Computes value of candidate CAND at position AT in iteration NITER, and
5151    stores it to VAL.  */
5152 
5153 static void
5154 cand_value_at (struct loop *loop, struct iv_cand *cand, gimple *at, tree niter,
5155 	       aff_tree *val)
5156 {
5157   aff_tree step, delta, nit;
5158   struct iv *iv = cand->iv;
5159   tree type = TREE_TYPE (iv->base);
5160   tree steptype;
5161   if (POINTER_TYPE_P (type))
5162     steptype = sizetype;
5163   else
5164     steptype = unsigned_type_for (type);
5165 
5166   tree_to_aff_combination (iv->step, TREE_TYPE (iv->step), &step);
5167   aff_combination_convert (&step, steptype);
5168   tree_to_aff_combination (niter, TREE_TYPE (niter), &nit);
5169   aff_combination_convert (&nit, steptype);
5170   aff_combination_mult (&nit, &step, &delta);
5171   if (stmt_after_increment (loop, cand, at))
5172     aff_combination_add (&delta, &step);
5173 
5174   tree_to_aff_combination (iv->base, type, val);
5175   if (!POINTER_TYPE_P (type))
5176     aff_combination_convert (val, steptype);
5177   aff_combination_add (val, &delta);
5178 }
5179 
5180 /* Returns period of induction variable iv.  */
5181 
5182 static tree
5183 iv_period (struct iv *iv)
5184 {
5185   tree step = iv->step, period, type;
5186   tree pow2div;
5187 
5188   gcc_assert (step && TREE_CODE (step) == INTEGER_CST);
5189 
5190   type = unsigned_type_for (TREE_TYPE (step));
5191   /* Period of the iv is lcm (step, type_range)/step -1,
5192      i.e., N*type_range/step - 1. Since type range is power
5193      of two, N == (step >> num_of_ending_zeros_binary (step),
5194      so the final result is
5195 
5196        (type_range >> num_of_ending_zeros_binary (step)) - 1
5197 
5198   */
5199   pow2div = num_ending_zeros (step);
5200 
5201   period = build_low_bits_mask (type,
5202 				(TYPE_PRECISION (type)
5203 				 - tree_to_uhwi (pow2div)));
5204 
5205   return period;
5206 }
5207 
5208 /* Returns the comparison operator used when eliminating the iv USE.  */
5209 
5210 static enum tree_code
5211 iv_elimination_compare (struct ivopts_data *data, struct iv_use *use)
5212 {
5213   struct loop *loop = data->current_loop;
5214   basic_block ex_bb;
5215   edge exit;
5216 
5217   ex_bb = gimple_bb (use->stmt);
5218   exit = EDGE_SUCC (ex_bb, 0);
5219   if (flow_bb_inside_loop_p (loop, exit->dest))
5220     exit = EDGE_SUCC (ex_bb, 1);
5221 
5222   return (exit->flags & EDGE_TRUE_VALUE ? EQ_EXPR : NE_EXPR);
5223 }
5224 
5225 /* Returns true if we can prove that BASE - OFFSET does not overflow.  For now,
5226    we only detect the situation that BASE = SOMETHING + OFFSET, where the
5227    calculation is performed in non-wrapping type.
5228 
5229    TODO: More generally, we could test for the situation that
5230 	 BASE = SOMETHING + OFFSET' and OFFSET is between OFFSET' and zero.
5231 	 This would require knowing the sign of OFFSET.  */
5232 
5233 static bool
5234 difference_cannot_overflow_p (struct ivopts_data *data, tree base, tree offset)
5235 {
5236   enum tree_code code;
5237   tree e1, e2;
5238   aff_tree aff_e1, aff_e2, aff_offset;
5239 
5240   if (!nowrap_type_p (TREE_TYPE (base)))
5241     return false;
5242 
5243   base = expand_simple_operations (base);
5244 
5245   if (TREE_CODE (base) == SSA_NAME)
5246     {
5247       gimple *stmt = SSA_NAME_DEF_STMT (base);
5248 
5249       if (gimple_code (stmt) != GIMPLE_ASSIGN)
5250 	return false;
5251 
5252       code = gimple_assign_rhs_code (stmt);
5253       if (get_gimple_rhs_class (code) != GIMPLE_BINARY_RHS)
5254 	return false;
5255 
5256       e1 = gimple_assign_rhs1 (stmt);
5257       e2 = gimple_assign_rhs2 (stmt);
5258     }
5259   else
5260     {
5261       code = TREE_CODE (base);
5262       if (get_gimple_rhs_class (code) != GIMPLE_BINARY_RHS)
5263 	return false;
5264       e1 = TREE_OPERAND (base, 0);
5265       e2 = TREE_OPERAND (base, 1);
5266     }
5267 
5268   /* Use affine expansion as deeper inspection to prove the equality.  */
5269   tree_to_aff_combination_expand (e2, TREE_TYPE (e2),
5270 				  &aff_e2, &data->name_expansion_cache);
5271   tree_to_aff_combination_expand (offset, TREE_TYPE (offset),
5272 				  &aff_offset, &data->name_expansion_cache);
5273   aff_combination_scale (&aff_offset, -1);
5274   switch (code)
5275     {
5276     case PLUS_EXPR:
5277       aff_combination_add (&aff_e2, &aff_offset);
5278       if (aff_combination_zero_p (&aff_e2))
5279 	return true;
5280 
5281       tree_to_aff_combination_expand (e1, TREE_TYPE (e1),
5282 				      &aff_e1, &data->name_expansion_cache);
5283       aff_combination_add (&aff_e1, &aff_offset);
5284       return aff_combination_zero_p (&aff_e1);
5285 
5286     case POINTER_PLUS_EXPR:
5287       aff_combination_add (&aff_e2, &aff_offset);
5288       return aff_combination_zero_p (&aff_e2);
5289 
5290     default:
5291       return false;
5292     }
5293 }
5294 
5295 /* Tries to replace loop exit by one formulated in terms of a LT_EXPR
5296    comparison with CAND.  NITER describes the number of iterations of
5297    the loops.  If successful, the comparison in COMP_P is altered accordingly.
5298 
5299    We aim to handle the following situation:
5300 
5301    sometype *base, *p;
5302    int a, b, i;
5303 
5304    i = a;
5305    p = p_0 = base + a;
5306 
5307    do
5308      {
5309        bla (*p);
5310        p++;
5311        i++;
5312      }
5313    while (i < b);
5314 
5315    Here, the number of iterations of the loop is (a + 1 > b) ? 0 : b - a - 1.
5316    We aim to optimize this to
5317 
5318    p = p_0 = base + a;
5319    do
5320      {
5321        bla (*p);
5322        p++;
5323      }
5324    while (p < p_0 - a + b);
5325 
5326    This preserves the correctness, since the pointer arithmetics does not
5327    overflow.  More precisely:
5328 
5329    1) if a + 1 <= b, then p_0 - a + b is the final value of p, hence there is no
5330       overflow in computing it or the values of p.
5331    2) if a + 1 > b, then we need to verify that the expression p_0 - a does not
5332       overflow.  To prove this, we use the fact that p_0 = base + a.  */
5333 
5334 static bool
5335 iv_elimination_compare_lt (struct ivopts_data *data,
5336 			   struct iv_cand *cand, enum tree_code *comp_p,
5337 			   struct tree_niter_desc *niter)
5338 {
5339   tree cand_type, a, b, mbz, nit_type = TREE_TYPE (niter->niter), offset;
5340   struct aff_tree nit, tmpa, tmpb;
5341   enum tree_code comp;
5342   HOST_WIDE_INT step;
5343 
5344   /* We need to know that the candidate induction variable does not overflow.
5345      While more complex analysis may be used to prove this, for now just
5346      check that the variable appears in the original program and that it
5347      is computed in a type that guarantees no overflows.  */
5348   cand_type = TREE_TYPE (cand->iv->base);
5349   if (cand->pos != IP_ORIGINAL || !nowrap_type_p (cand_type))
5350     return false;
5351 
5352   /* Make sure that the loop iterates till the loop bound is hit, as otherwise
5353      the calculation of the BOUND could overflow, making the comparison
5354      invalid.  */
5355   if (!data->loop_single_exit_p)
5356     return false;
5357 
5358   /* We need to be able to decide whether candidate is increasing or decreasing
5359      in order to choose the right comparison operator.  */
5360   if (!cst_and_fits_in_hwi (cand->iv->step))
5361     return false;
5362   step = int_cst_value (cand->iv->step);
5363 
5364   /* Check that the number of iterations matches the expected pattern:
5365      a + 1 > b ? 0 : b - a - 1.  */
5366   mbz = niter->may_be_zero;
5367   if (TREE_CODE (mbz) == GT_EXPR)
5368     {
5369       /* Handle a + 1 > b.  */
5370       tree op0 = TREE_OPERAND (mbz, 0);
5371       if (TREE_CODE (op0) == PLUS_EXPR && integer_onep (TREE_OPERAND (op0, 1)))
5372 	{
5373 	  a = TREE_OPERAND (op0, 0);
5374 	  b = TREE_OPERAND (mbz, 1);
5375 	}
5376       else
5377 	return false;
5378     }
5379   else if (TREE_CODE (mbz) == LT_EXPR)
5380     {
5381       tree op1 = TREE_OPERAND (mbz, 1);
5382 
5383       /* Handle b < a + 1.  */
5384       if (TREE_CODE (op1) == PLUS_EXPR && integer_onep (TREE_OPERAND (op1, 1)))
5385 	{
5386 	  a = TREE_OPERAND (op1, 0);
5387 	  b = TREE_OPERAND (mbz, 0);
5388 	}
5389       else
5390 	return false;
5391     }
5392   else
5393     return false;
5394 
5395   /* Expected number of iterations is B - A - 1.  Check that it matches
5396      the actual number, i.e., that B - A - NITER = 1.  */
5397   tree_to_aff_combination (niter->niter, nit_type, &nit);
5398   tree_to_aff_combination (fold_convert (nit_type, a), nit_type, &tmpa);
5399   tree_to_aff_combination (fold_convert (nit_type, b), nit_type, &tmpb);
5400   aff_combination_scale (&nit, -1);
5401   aff_combination_scale (&tmpa, -1);
5402   aff_combination_add (&tmpb, &tmpa);
5403   aff_combination_add (&tmpb, &nit);
5404   if (tmpb.n != 0 || tmpb.offset != 1)
5405     return false;
5406 
5407   /* Finally, check that CAND->IV->BASE - CAND->IV->STEP * A does not
5408      overflow.  */
5409   offset = fold_build2 (MULT_EXPR, TREE_TYPE (cand->iv->step),
5410 			cand->iv->step,
5411 			fold_convert (TREE_TYPE (cand->iv->step), a));
5412   if (!difference_cannot_overflow_p (data, cand->iv->base, offset))
5413     return false;
5414 
5415   /* Determine the new comparison operator.  */
5416   comp = step < 0 ? GT_EXPR : LT_EXPR;
5417   if (*comp_p == NE_EXPR)
5418     *comp_p = comp;
5419   else if (*comp_p == EQ_EXPR)
5420     *comp_p = invert_tree_comparison (comp, false);
5421   else
5422     gcc_unreachable ();
5423 
5424   return true;
5425 }
5426 
5427 /* Check whether it is possible to express the condition in USE by comparison
5428    of candidate CAND.  If so, store the value compared with to BOUND, and the
5429    comparison operator to COMP.  */
5430 
5431 static bool
5432 may_eliminate_iv (struct ivopts_data *data,
5433 		  struct iv_use *use, struct iv_cand *cand, tree *bound,
5434 		  enum tree_code *comp)
5435 {
5436   basic_block ex_bb;
5437   edge exit;
5438   tree period;
5439   struct loop *loop = data->current_loop;
5440   aff_tree bnd;
5441   struct tree_niter_desc *desc = NULL;
5442 
5443   if (TREE_CODE (cand->iv->step) != INTEGER_CST)
5444     return false;
5445 
5446   /* For now works only for exits that dominate the loop latch.
5447      TODO: extend to other conditions inside loop body.  */
5448   ex_bb = gimple_bb (use->stmt);
5449   if (use->stmt != last_stmt (ex_bb)
5450       || gimple_code (use->stmt) != GIMPLE_COND
5451       || !dominated_by_p (CDI_DOMINATORS, loop->latch, ex_bb))
5452     return false;
5453 
5454   exit = EDGE_SUCC (ex_bb, 0);
5455   if (flow_bb_inside_loop_p (loop, exit->dest))
5456     exit = EDGE_SUCC (ex_bb, 1);
5457   if (flow_bb_inside_loop_p (loop, exit->dest))
5458     return false;
5459 
5460   desc = niter_for_exit (data, exit);
5461   if (!desc)
5462     return false;
5463 
5464   /* Determine whether we can use the variable to test the exit condition.
5465      This is the case iff the period of the induction variable is greater
5466      than the number of iterations for which the exit condition is true.  */
5467   period = iv_period (cand->iv);
5468 
5469   /* If the number of iterations is constant, compare against it directly.  */
5470   if (TREE_CODE (desc->niter) == INTEGER_CST)
5471     {
5472       /* See cand_value_at.  */
5473       if (stmt_after_increment (loop, cand, use->stmt))
5474 	{
5475 	  if (!tree_int_cst_lt (desc->niter, period))
5476 	    return false;
5477 	}
5478       else
5479 	{
5480 	  if (tree_int_cst_lt (period, desc->niter))
5481 	    return false;
5482 	}
5483     }
5484 
5485   /* If not, and if this is the only possible exit of the loop, see whether
5486      we can get a conservative estimate on the number of iterations of the
5487      entire loop and compare against that instead.  */
5488   else
5489     {
5490       widest_int period_value, max_niter;
5491 
5492       max_niter = desc->max;
5493       if (stmt_after_increment (loop, cand, use->stmt))
5494 	max_niter += 1;
5495       period_value = wi::to_widest (period);
5496       if (wi::gtu_p (max_niter, period_value))
5497 	{
5498 	  /* See if we can take advantage of inferred loop bound
5499 	     information.  */
5500 	  if (data->loop_single_exit_p)
5501 	    {
5502 	      if (!max_loop_iterations (loop, &max_niter))
5503 		return false;
5504 	      /* The loop bound is already adjusted by adding 1.  */
5505 	      if (wi::gtu_p (max_niter, period_value))
5506 		return false;
5507 	    }
5508 	  else
5509 	    return false;
5510 	}
5511     }
5512 
5513   cand_value_at (loop, cand, use->stmt, desc->niter, &bnd);
5514 
5515   *bound = fold_convert (TREE_TYPE (cand->iv->base),
5516 			 aff_combination_to_tree (&bnd));
5517   *comp = iv_elimination_compare (data, use);
5518 
5519   /* It is unlikely that computing the number of iterations using division
5520      would be more profitable than keeping the original induction variable.  */
5521   if (expression_expensive_p (*bound))
5522     return false;
5523 
5524   /* Sometimes, it is possible to handle the situation that the number of
5525      iterations may be zero unless additional assumptions by using <
5526      instead of != in the exit condition.
5527 
5528      TODO: we could also calculate the value MAY_BE_ZERO ? 0 : NITER and
5529 	   base the exit condition on it.  However, that is often too
5530 	   expensive.  */
5531   if (!integer_zerop (desc->may_be_zero))
5532     return iv_elimination_compare_lt (data, cand, comp, desc);
5533 
5534   return true;
5535 }
5536 
5537  /* Calculates the cost of BOUND, if it is a PARM_DECL.  A PARM_DECL must
5538     be copied, if it is used in the loop body and DATA->body_includes_call.  */
5539 
5540 static int
5541 parm_decl_cost (struct ivopts_data *data, tree bound)
5542 {
5543   tree sbound = bound;
5544   STRIP_NOPS (sbound);
5545 
5546   if (TREE_CODE (sbound) == SSA_NAME
5547       && SSA_NAME_IS_DEFAULT_DEF (sbound)
5548       && TREE_CODE (SSA_NAME_VAR (sbound)) == PARM_DECL
5549       && data->body_includes_call)
5550     return COSTS_N_INSNS (1);
5551 
5552   return 0;
5553 }
5554 
5555 /* Determines cost of computing the use in GROUP with CAND in a condition.  */
5556 
5557 static bool
5558 determine_group_iv_cost_cond (struct ivopts_data *data,
5559 			      struct iv_group *group, struct iv_cand *cand)
5560 {
5561   tree bound = NULL_TREE;
5562   struct iv *cmp_iv;
5563   bitmap depends_on_elim = NULL, depends_on_express = NULL, depends_on;
5564   comp_cost elim_cost, express_cost, cost, bound_cost;
5565   bool ok;
5566   iv_inv_expr_ent *elim_inv_expr = NULL, *express_inv_expr = NULL, *inv_expr;
5567   tree *control_var, *bound_cst;
5568   enum tree_code comp = ERROR_MARK;
5569   struct iv_use *use = group->vuses[0];
5570 
5571   gcc_assert (cand->iv);
5572 
5573   /* Try iv elimination.  */
5574   if (may_eliminate_iv (data, use, cand, &bound, &comp))
5575     {
5576       elim_cost = force_var_cost (data, bound, &depends_on_elim);
5577       if (elim_cost.cost == 0)
5578 	elim_cost.cost = parm_decl_cost (data, bound);
5579       else if (TREE_CODE (bound) == INTEGER_CST)
5580 	elim_cost.cost = 0;
5581       /* If we replace a loop condition 'i < n' with 'p < base + n',
5582 	 depends_on_elim will have 'base' and 'n' set, which implies
5583 	 that both 'base' and 'n' will be live during the loop.	 More likely,
5584 	 'base + n' will be loop invariant, resulting in only one live value
5585 	 during the loop.  So in that case we clear depends_on_elim and set
5586 	elim_inv_expr_id instead.  */
5587       if (depends_on_elim && bitmap_count_bits (depends_on_elim) > 1)
5588 	{
5589 	  elim_inv_expr = record_inv_expr (data, bound);
5590 	  bitmap_clear (depends_on_elim);
5591 	}
5592       /* The bound is a loop invariant, so it will be only computed
5593 	 once.  */
5594       elim_cost.cost = adjust_setup_cost (data, elim_cost.cost);
5595     }
5596   else
5597     elim_cost = infinite_cost;
5598 
5599   /* Try expressing the original giv.  If it is compared with an invariant,
5600      note that we cannot get rid of it.  */
5601   ok = extract_cond_operands (data, use->stmt, &control_var, &bound_cst,
5602 			      NULL, &cmp_iv);
5603   gcc_assert (ok);
5604 
5605   /* When the condition is a comparison of the candidate IV against
5606      zero, prefer this IV.
5607 
5608      TODO: The constant that we're subtracting from the cost should
5609      be target-dependent.  This information should be added to the
5610      target costs for each backend.  */
5611   if (!elim_cost.infinite_cost_p () /* Do not try to decrease infinite! */
5612       && integer_zerop (*bound_cst)
5613       && (operand_equal_p (*control_var, cand->var_after, 0)
5614 	  || operand_equal_p (*control_var, cand->var_before, 0)))
5615     elim_cost -= 1;
5616 
5617   express_cost = get_computation_cost (data, use, cand, false,
5618 				       &depends_on_express, NULL,
5619 				       &express_inv_expr);
5620   fd_ivopts_data = data;
5621   walk_tree (&cmp_iv->base, find_depends, &depends_on_express, NULL);
5622 
5623   /* Count the cost of the original bound as well.  */
5624   bound_cost = force_var_cost (data, *bound_cst, NULL);
5625   if (bound_cost.cost == 0)
5626     bound_cost.cost = parm_decl_cost (data, *bound_cst);
5627   else if (TREE_CODE (*bound_cst) == INTEGER_CST)
5628     bound_cost.cost = 0;
5629   express_cost += bound_cost;
5630 
5631   /* Choose the better approach, preferring the eliminated IV. */
5632   if (elim_cost <= express_cost)
5633     {
5634       cost = elim_cost;
5635       depends_on = depends_on_elim;
5636       depends_on_elim = NULL;
5637       inv_expr = elim_inv_expr;
5638     }
5639   else
5640     {
5641       cost = express_cost;
5642       depends_on = depends_on_express;
5643       depends_on_express = NULL;
5644       bound = NULL_TREE;
5645       comp = ERROR_MARK;
5646       inv_expr = express_inv_expr;
5647     }
5648 
5649   set_group_iv_cost (data, group, cand, cost,
5650 		     depends_on, bound, comp, inv_expr);
5651 
5652   if (depends_on_elim)
5653     BITMAP_FREE (depends_on_elim);
5654   if (depends_on_express)
5655     BITMAP_FREE (depends_on_express);
5656 
5657   return !cost.infinite_cost_p ();
5658 }
5659 
5660 /* Determines cost of computing uses in GROUP with CAND.  Returns false
5661    if USE cannot be represented with CAND.  */
5662 
5663 static bool
5664 determine_group_iv_cost (struct ivopts_data *data,
5665 			 struct iv_group *group, struct iv_cand *cand)
5666 {
5667   switch (group->type)
5668     {
5669     case USE_NONLINEAR_EXPR:
5670       return determine_group_iv_cost_generic (data, group, cand);
5671 
5672     case USE_ADDRESS:
5673       return determine_group_iv_cost_address (data, group, cand);
5674 
5675     case USE_COMPARE:
5676       return determine_group_iv_cost_cond (data, group, cand);
5677 
5678     default:
5679       gcc_unreachable ();
5680     }
5681 }
5682 
5683 /* Return true if get_computation_cost indicates that autoincrement is
5684    a possibility for the pair of USE and CAND, false otherwise.  */
5685 
5686 static bool
5687 autoinc_possible_for_pair (struct ivopts_data *data, struct iv_use *use,
5688 			   struct iv_cand *cand)
5689 {
5690   bitmap depends_on;
5691   bool can_autoinc;
5692   comp_cost cost;
5693 
5694   if (use->type != USE_ADDRESS)
5695     return false;
5696 
5697   cost = get_computation_cost (data, use, cand, true, &depends_on,
5698 			       &can_autoinc, NULL);
5699 
5700   BITMAP_FREE (depends_on);
5701 
5702   return !cost.infinite_cost_p () && can_autoinc;
5703 }
5704 
5705 /* Examine IP_ORIGINAL candidates to see if they are incremented next to a
5706    use that allows autoincrement, and set their AINC_USE if possible.  */
5707 
5708 static void
5709 set_autoinc_for_original_candidates (struct ivopts_data *data)
5710 {
5711   unsigned i, j;
5712 
5713   for (i = 0; i < data->vcands.length (); i++)
5714     {
5715       struct iv_cand *cand = data->vcands[i];
5716       struct iv_use *closest_before = NULL;
5717       struct iv_use *closest_after = NULL;
5718       if (cand->pos != IP_ORIGINAL)
5719 	continue;
5720 
5721       for (j = 0; j < data->vgroups.length (); j++)
5722 	{
5723 	  struct iv_group *group = data->vgroups[j];
5724 	  struct iv_use *use = group->vuses[0];
5725 	  unsigned uid = gimple_uid (use->stmt);
5726 
5727 	  if (gimple_bb (use->stmt) != gimple_bb (cand->incremented_at))
5728 	    continue;
5729 
5730 	  if (uid < gimple_uid (cand->incremented_at)
5731 	      && (closest_before == NULL
5732 		  || uid > gimple_uid (closest_before->stmt)))
5733 	    closest_before = use;
5734 
5735 	  if (uid > gimple_uid (cand->incremented_at)
5736 	      && (closest_after == NULL
5737 		  || uid < gimple_uid (closest_after->stmt)))
5738 	    closest_after = use;
5739 	}
5740 
5741       if (closest_before != NULL
5742 	  && autoinc_possible_for_pair (data, closest_before, cand))
5743 	cand->ainc_use = closest_before;
5744       else if (closest_after != NULL
5745 	       && autoinc_possible_for_pair (data, closest_after, cand))
5746 	cand->ainc_use = closest_after;
5747     }
5748 }
5749 
5750 /* Finds the candidates for the induction variables.  */
5751 
5752 static void
5753 find_iv_candidates (struct ivopts_data *data)
5754 {
5755   /* Add commonly used ivs.  */
5756   add_standard_iv_candidates (data);
5757 
5758   /* Add old induction variables.  */
5759   add_iv_candidate_for_bivs (data);
5760 
5761   /* Add induction variables derived from uses.  */
5762   add_iv_candidate_for_groups (data);
5763 
5764   set_autoinc_for_original_candidates (data);
5765 
5766   /* Record the important candidates.  */
5767   record_important_candidates (data);
5768 
5769   if (dump_file && (dump_flags & TDF_DETAILS))
5770     {
5771       unsigned i;
5772 
5773       fprintf (dump_file, "\n<Important Candidates>:\t");
5774       for (i = 0; i < data->vcands.length (); i++)
5775 	if (data->vcands[i]->important)
5776 	  fprintf (dump_file, " %d,", data->vcands[i]->id);
5777       fprintf (dump_file, "\n");
5778 
5779       fprintf (dump_file, "\n<Group, Cand> Related:\n");
5780       for (i = 0; i < data->vgroups.length (); i++)
5781 	{
5782 	  struct iv_group *group = data->vgroups[i];
5783 
5784 	  if (group->related_cands)
5785 	    {
5786 	      fprintf (dump_file, "  Group %d:\t", group->id);
5787 	      dump_bitmap (dump_file, group->related_cands);
5788 	    }
5789 	}
5790       fprintf (dump_file, "\n");
5791     }
5792 }
5793 
5794 /* Determines costs of computing use of iv with an iv candidate.  */
5795 
5796 static void
5797 determine_group_iv_costs (struct ivopts_data *data)
5798 {
5799   unsigned i, j;
5800   struct iv_cand *cand;
5801   struct iv_group *group;
5802   bitmap to_clear = BITMAP_ALLOC (NULL);
5803 
5804   alloc_use_cost_map (data);
5805 
5806   for (i = 0; i < data->vgroups.length (); i++)
5807     {
5808       group = data->vgroups[i];
5809 
5810       if (data->consider_all_candidates)
5811 	{
5812 	  for (j = 0; j < data->vcands.length (); j++)
5813 	    {
5814 	      cand = data->vcands[j];
5815 	      determine_group_iv_cost (data, group, cand);
5816 	    }
5817 	}
5818       else
5819 	{
5820 	  bitmap_iterator bi;
5821 
5822 	  EXECUTE_IF_SET_IN_BITMAP (group->related_cands, 0, j, bi)
5823 	    {
5824 	      cand = data->vcands[j];
5825 	      if (!determine_group_iv_cost (data, group, cand))
5826 		bitmap_set_bit (to_clear, j);
5827 	    }
5828 
5829 	  /* Remove the candidates for that the cost is infinite from
5830 	     the list of related candidates.  */
5831 	  bitmap_and_compl_into (group->related_cands, to_clear);
5832 	  bitmap_clear (to_clear);
5833 	}
5834     }
5835 
5836   BITMAP_FREE (to_clear);
5837 
5838   if (dump_file && (dump_flags & TDF_DETAILS))
5839     {
5840       fprintf (dump_file, "\n<Invariant Expressions>:\n");
5841       auto_vec <iv_inv_expr_ent *> list (data->inv_expr_tab->elements ());
5842 
5843       for (hash_table<iv_inv_expr_hasher>::iterator it
5844 	   = data->inv_expr_tab->begin (); it != data->inv_expr_tab->end ();
5845 	   ++it)
5846 	list.safe_push (*it);
5847 
5848       list.qsort (sort_iv_inv_expr_ent);
5849 
5850       for (i = 0; i < list.length (); ++i)
5851 	{
5852 	  fprintf (dump_file, "inv_expr %d: \t", i);
5853 	  print_generic_expr (dump_file, list[i]->expr, TDF_SLIM);
5854 	  fprintf (dump_file, "\n");
5855 	}
5856 
5857       fprintf (dump_file, "\n<Group-candidate Costs>:\n");
5858 
5859       for (i = 0; i < data->vgroups.length (); i++)
5860 	{
5861 	  group = data->vgroups[i];
5862 
5863 	  fprintf (dump_file, "Group %d:\n", i);
5864 	  fprintf (dump_file, "  cand\tcost\tcompl.\tinv.ex.\tdepends on\n");
5865 	  for (j = 0; j < group->n_map_members; j++)
5866 	    {
5867 	      if (!group->cost_map[j].cand
5868 		  || group->cost_map[j].cost.infinite_cost_p ())
5869 		continue;
5870 
5871 	      fprintf (dump_file, "  %d\t%d\t%d\t",
5872 		       group->cost_map[j].cand->id,
5873 		       group->cost_map[j].cost.cost,
5874 		       group->cost_map[j].cost.complexity);
5875 	      if (group->cost_map[j].inv_expr != NULL)
5876 		fprintf (dump_file, "%d\t",
5877 			 group->cost_map[j].inv_expr->id);
5878 	      else
5879 		fprintf (dump_file, "\t");
5880 	      if (group->cost_map[j].depends_on)
5881 		bitmap_print (dump_file,
5882 			      group->cost_map[j].depends_on, "","");
5883 	      fprintf (dump_file, "\n");
5884 	    }
5885 
5886 	  fprintf (dump_file, "\n");
5887 	}
5888       fprintf (dump_file, "\n");
5889     }
5890 }
5891 
5892 /* Determines cost of the candidate CAND.  */
5893 
5894 static void
5895 determine_iv_cost (struct ivopts_data *data, struct iv_cand *cand)
5896 {
5897   comp_cost cost_base;
5898   unsigned cost, cost_step;
5899   tree base;
5900 
5901   if (!cand->iv)
5902     {
5903       cand->cost = 0;
5904       return;
5905     }
5906 
5907   /* There are two costs associated with the candidate -- its increment
5908      and its initialization.  The second is almost negligible for any loop
5909      that rolls enough, so we take it just very little into account.  */
5910 
5911   base = cand->iv->base;
5912   cost_base = force_var_cost (data, base, NULL);
5913   /* It will be exceptional that the iv register happens to be initialized with
5914      the proper value at no cost.  In general, there will at least be a regcopy
5915      or a const set.  */
5916   if (cost_base.cost == 0)
5917     cost_base.cost = COSTS_N_INSNS (1);
5918   cost_step = add_cost (data->speed, TYPE_MODE (TREE_TYPE (base)));
5919 
5920   cost = cost_step + adjust_setup_cost (data, cost_base.cost);
5921 
5922   /* Prefer the original ivs unless we may gain something by replacing it.
5923      The reason is to make debugging simpler; so this is not relevant for
5924      artificial ivs created by other optimization passes.  */
5925   if (cand->pos != IP_ORIGINAL
5926       || !SSA_NAME_VAR (cand->var_before)
5927       || DECL_ARTIFICIAL (SSA_NAME_VAR (cand->var_before)))
5928     cost++;
5929 
5930   /* Prefer not to insert statements into latch unless there are some
5931      already (so that we do not create unnecessary jumps).  */
5932   if (cand->pos == IP_END
5933       && empty_block_p (ip_end_pos (data->current_loop)))
5934     cost++;
5935 
5936   cand->cost = cost;
5937   cand->cost_step = cost_step;
5938 }
5939 
5940 /* Determines costs of computation of the candidates.  */
5941 
5942 static void
5943 determine_iv_costs (struct ivopts_data *data)
5944 {
5945   unsigned i;
5946 
5947   if (dump_file && (dump_flags & TDF_DETAILS))
5948     {
5949       fprintf (dump_file, "<Candidate Costs>:\n");
5950       fprintf (dump_file, "  cand\tcost\n");
5951     }
5952 
5953   for (i = 0; i < data->vcands.length (); i++)
5954     {
5955       struct iv_cand *cand = data->vcands[i];
5956 
5957       determine_iv_cost (data, cand);
5958 
5959       if (dump_file && (dump_flags & TDF_DETAILS))
5960 	fprintf (dump_file, "  %d\t%d\n", i, cand->cost);
5961     }
5962 
5963   if (dump_file && (dump_flags & TDF_DETAILS))
5964     fprintf (dump_file, "\n");
5965 }
5966 
5967 /* Calculates cost for having SIZE induction variables.  */
5968 
5969 static unsigned
5970 ivopts_global_cost_for_size (struct ivopts_data *data, unsigned size)
5971 {
5972   /* We add size to the cost, so that we prefer eliminating ivs
5973      if possible.  */
5974   return size + estimate_reg_pressure_cost (size, data->regs_used, data->speed,
5975 					    data->body_includes_call);
5976 }
5977 
5978 /* For each size of the induction variable set determine the penalty.  */
5979 
5980 static void
5981 determine_set_costs (struct ivopts_data *data)
5982 {
5983   unsigned j, n;
5984   gphi *phi;
5985   gphi_iterator psi;
5986   tree op;
5987   struct loop *loop = data->current_loop;
5988   bitmap_iterator bi;
5989 
5990   if (dump_file && (dump_flags & TDF_DETAILS))
5991     {
5992       fprintf (dump_file, "<Global Costs>:\n");
5993       fprintf (dump_file, "  target_avail_regs %d\n", target_avail_regs);
5994       fprintf (dump_file, "  target_clobbered_regs %d\n", target_clobbered_regs);
5995       fprintf (dump_file, "  target_reg_cost %d\n", target_reg_cost[data->speed]);
5996       fprintf (dump_file, "  target_spill_cost %d\n", target_spill_cost[data->speed]);
5997     }
5998 
5999   n = 0;
6000   for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
6001     {
6002       phi = psi.phi ();
6003       op = PHI_RESULT (phi);
6004 
6005       if (virtual_operand_p (op))
6006 	continue;
6007 
6008       if (get_iv (data, op))
6009 	continue;
6010 
6011       n++;
6012     }
6013 
6014   EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j, bi)
6015     {
6016       struct version_info *info = ver_info (data, j);
6017 
6018       if (info->inv_id && info->has_nonlin_use)
6019 	n++;
6020     }
6021 
6022   data->regs_used = n;
6023   if (dump_file && (dump_flags & TDF_DETAILS))
6024     fprintf (dump_file, "  regs_used %d\n", n);
6025 
6026   if (dump_file && (dump_flags & TDF_DETAILS))
6027     {
6028       fprintf (dump_file, "  cost for size:\n");
6029       fprintf (dump_file, "  ivs\tcost\n");
6030       for (j = 0; j <= 2 * target_avail_regs; j++)
6031 	fprintf (dump_file, "  %d\t%d\n", j,
6032 		 ivopts_global_cost_for_size (data, j));
6033       fprintf (dump_file, "\n");
6034     }
6035 }
6036 
6037 /* Returns true if A is a cheaper cost pair than B.  */
6038 
6039 static bool
6040 cheaper_cost_pair (struct cost_pair *a, struct cost_pair *b)
6041 {
6042   if (!a)
6043     return false;
6044 
6045   if (!b)
6046     return true;
6047 
6048   if (a->cost < b->cost)
6049     return true;
6050 
6051   if (b->cost < a->cost)
6052     return false;
6053 
6054   /* In case the costs are the same, prefer the cheaper candidate.  */
6055   if (a->cand->cost < b->cand->cost)
6056     return true;
6057 
6058   return false;
6059 }
6060 
6061 
6062 /* Returns candidate by that USE is expressed in IVS.  */
6063 
6064 static struct cost_pair *
6065 iv_ca_cand_for_group (struct iv_ca *ivs, struct iv_group *group)
6066 {
6067   return ivs->cand_for_group[group->id];
6068 }
6069 
6070 /* Computes the cost field of IVS structure.  */
6071 
6072 static void
6073 iv_ca_recount_cost (struct ivopts_data *data, struct iv_ca *ivs)
6074 {
6075   comp_cost cost = ivs->cand_use_cost;
6076 
6077   cost += ivs->cand_cost;
6078 
6079   cost += ivopts_global_cost_for_size (data,
6080 				       ivs->n_regs
6081 				       + ivs->used_inv_exprs->elements ());
6082 
6083   ivs->cost = cost;
6084 }
6085 
6086 /* Remove invariants in set INVS to set IVS.  */
6087 
6088 static void
6089 iv_ca_set_remove_invariants (struct iv_ca *ivs, bitmap invs)
6090 {
6091   bitmap_iterator bi;
6092   unsigned iid;
6093 
6094   if (!invs)
6095     return;
6096 
6097   EXECUTE_IF_SET_IN_BITMAP (invs, 0, iid, bi)
6098     {
6099       ivs->n_invariant_uses[iid]--;
6100       if (ivs->n_invariant_uses[iid] == 0)
6101 	ivs->n_regs--;
6102     }
6103 }
6104 
6105 /* Set USE not to be expressed by any candidate in IVS.  */
6106 
6107 static void
6108 iv_ca_set_no_cp (struct ivopts_data *data, struct iv_ca *ivs,
6109 		 struct iv_group *group)
6110 {
6111   unsigned gid = group->id, cid;
6112   struct cost_pair *cp;
6113 
6114   cp = ivs->cand_for_group[gid];
6115   if (!cp)
6116     return;
6117   cid = cp->cand->id;
6118 
6119   ivs->bad_groups++;
6120   ivs->cand_for_group[gid] = NULL;
6121   ivs->n_cand_uses[cid]--;
6122 
6123   if (ivs->n_cand_uses[cid] == 0)
6124     {
6125       bitmap_clear_bit (ivs->cands, cid);
6126       /* Do not count the pseudocandidates.  */
6127       if (cp->cand->iv)
6128 	ivs->n_regs--;
6129       ivs->n_cands--;
6130       ivs->cand_cost -= cp->cand->cost;
6131 
6132       iv_ca_set_remove_invariants (ivs, cp->cand->depends_on);
6133     }
6134 
6135   ivs->cand_use_cost -= cp->cost;
6136 
6137   iv_ca_set_remove_invariants (ivs, cp->depends_on);
6138 
6139   if (cp->inv_expr != NULL)
6140     {
6141       unsigned *slot = ivs->used_inv_exprs->get (cp->inv_expr);
6142       --(*slot);
6143       if (*slot == 0)
6144 	ivs->used_inv_exprs->remove (cp->inv_expr);
6145     }
6146   iv_ca_recount_cost (data, ivs);
6147 }
6148 
6149 /* Add invariants in set INVS to set IVS.  */
6150 
6151 static void
6152 iv_ca_set_add_invariants (struct iv_ca *ivs, bitmap invs)
6153 {
6154   bitmap_iterator bi;
6155   unsigned iid;
6156 
6157   if (!invs)
6158     return;
6159 
6160   EXECUTE_IF_SET_IN_BITMAP (invs, 0, iid, bi)
6161     {
6162       ivs->n_invariant_uses[iid]++;
6163       if (ivs->n_invariant_uses[iid] == 1)
6164 	ivs->n_regs++;
6165     }
6166 }
6167 
6168 /* Set cost pair for GROUP in set IVS to CP.  */
6169 
6170 static void
6171 iv_ca_set_cp (struct ivopts_data *data, struct iv_ca *ivs,
6172 	      struct iv_group *group, struct cost_pair *cp)
6173 {
6174   unsigned gid = group->id, cid;
6175 
6176   if (ivs->cand_for_group[gid] == cp)
6177     return;
6178 
6179   if (ivs->cand_for_group[gid])
6180     iv_ca_set_no_cp (data, ivs, group);
6181 
6182   if (cp)
6183     {
6184       cid = cp->cand->id;
6185 
6186       ivs->bad_groups--;
6187       ivs->cand_for_group[gid] = cp;
6188       ivs->n_cand_uses[cid]++;
6189       if (ivs->n_cand_uses[cid] == 1)
6190 	{
6191 	  bitmap_set_bit (ivs->cands, cid);
6192 	  /* Do not count the pseudocandidates.  */
6193 	  if (cp->cand->iv)
6194 	    ivs->n_regs++;
6195 	  ivs->n_cands++;
6196 	  ivs->cand_cost += cp->cand->cost;
6197 
6198 	  iv_ca_set_add_invariants (ivs, cp->cand->depends_on);
6199 	}
6200 
6201       ivs->cand_use_cost += cp->cost;
6202       iv_ca_set_add_invariants (ivs, cp->depends_on);
6203 
6204       if (cp->inv_expr != NULL)
6205 	{
6206 	  unsigned *slot = &ivs->used_inv_exprs->get_or_insert (cp->inv_expr);
6207 	  ++(*slot);
6208 	}
6209       iv_ca_recount_cost (data, ivs);
6210     }
6211 }
6212 
6213 /* Extend set IVS by expressing USE by some of the candidates in it
6214    if possible.  Consider all important candidates if candidates in
6215    set IVS don't give any result.  */
6216 
6217 static void
6218 iv_ca_add_group (struct ivopts_data *data, struct iv_ca *ivs,
6219 	       struct iv_group *group)
6220 {
6221   struct cost_pair *best_cp = NULL, *cp;
6222   bitmap_iterator bi;
6223   unsigned i;
6224   struct iv_cand *cand;
6225 
6226   gcc_assert (ivs->upto >= group->id);
6227   ivs->upto++;
6228   ivs->bad_groups++;
6229 
6230   EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
6231     {
6232       cand = data->vcands[i];
6233       cp = get_group_iv_cost (data, group, cand);
6234       if (cheaper_cost_pair (cp, best_cp))
6235 	best_cp = cp;
6236     }
6237 
6238   if (best_cp == NULL)
6239     {
6240       EXECUTE_IF_SET_IN_BITMAP (data->important_candidates, 0, i, bi)
6241 	{
6242 	  cand = data->vcands[i];
6243 	  cp = get_group_iv_cost (data, group, cand);
6244 	  if (cheaper_cost_pair (cp, best_cp))
6245 	    best_cp = cp;
6246 	}
6247     }
6248 
6249   iv_ca_set_cp (data, ivs, group, best_cp);
6250 }
6251 
6252 /* Get cost for assignment IVS.  */
6253 
6254 static comp_cost
6255 iv_ca_cost (struct iv_ca *ivs)
6256 {
6257   /* This was a conditional expression but it triggered a bug in
6258      Sun C 5.5.  */
6259   if (ivs->bad_groups)
6260     return infinite_cost;
6261   else
6262     return ivs->cost;
6263 }
6264 
6265 /* Returns true if all dependences of CP are among invariants in IVS.  */
6266 
6267 static bool
6268 iv_ca_has_deps (struct iv_ca *ivs, struct cost_pair *cp)
6269 {
6270   unsigned i;
6271   bitmap_iterator bi;
6272 
6273   if (!cp->depends_on)
6274     return true;
6275 
6276   EXECUTE_IF_SET_IN_BITMAP (cp->depends_on, 0, i, bi)
6277     {
6278       if (ivs->n_invariant_uses[i] == 0)
6279 	return false;
6280     }
6281 
6282   return true;
6283 }
6284 
6285 /* Creates change of expressing GROUP by NEW_CP instead of OLD_CP and chains
6286    it before NEXT.  */
6287 
6288 static struct iv_ca_delta *
6289 iv_ca_delta_add (struct iv_group *group, struct cost_pair *old_cp,
6290 		 struct cost_pair *new_cp, struct iv_ca_delta *next)
6291 {
6292   struct iv_ca_delta *change = XNEW (struct iv_ca_delta);
6293 
6294   change->group = group;
6295   change->old_cp = old_cp;
6296   change->new_cp = new_cp;
6297   change->next = next;
6298 
6299   return change;
6300 }
6301 
6302 /* Joins two lists of changes L1 and L2.  Destructive -- old lists
6303    are rewritten.  */
6304 
6305 static struct iv_ca_delta *
6306 iv_ca_delta_join (struct iv_ca_delta *l1, struct iv_ca_delta *l2)
6307 {
6308   struct iv_ca_delta *last;
6309 
6310   if (!l2)
6311     return l1;
6312 
6313   if (!l1)
6314     return l2;
6315 
6316   for (last = l1; last->next; last = last->next)
6317     continue;
6318   last->next = l2;
6319 
6320   return l1;
6321 }
6322 
6323 /* Reverse the list of changes DELTA, forming the inverse to it.  */
6324 
6325 static struct iv_ca_delta *
6326 iv_ca_delta_reverse (struct iv_ca_delta *delta)
6327 {
6328   struct iv_ca_delta *act, *next, *prev = NULL;
6329 
6330   for (act = delta; act; act = next)
6331     {
6332       next = act->next;
6333       act->next = prev;
6334       prev = act;
6335 
6336       std::swap (act->old_cp, act->new_cp);
6337     }
6338 
6339   return prev;
6340 }
6341 
6342 /* Commit changes in DELTA to IVS.  If FORWARD is false, the changes are
6343    reverted instead.  */
6344 
6345 static void
6346 iv_ca_delta_commit (struct ivopts_data *data, struct iv_ca *ivs,
6347 		    struct iv_ca_delta *delta, bool forward)
6348 {
6349   struct cost_pair *from, *to;
6350   struct iv_ca_delta *act;
6351 
6352   if (!forward)
6353     delta = iv_ca_delta_reverse (delta);
6354 
6355   for (act = delta; act; act = act->next)
6356     {
6357       from = act->old_cp;
6358       to = act->new_cp;
6359       gcc_assert (iv_ca_cand_for_group (ivs, act->group) == from);
6360       iv_ca_set_cp (data, ivs, act->group, to);
6361     }
6362 
6363   if (!forward)
6364     iv_ca_delta_reverse (delta);
6365 }
6366 
6367 /* Returns true if CAND is used in IVS.  */
6368 
6369 static bool
6370 iv_ca_cand_used_p (struct iv_ca *ivs, struct iv_cand *cand)
6371 {
6372   return ivs->n_cand_uses[cand->id] > 0;
6373 }
6374 
6375 /* Returns number of induction variable candidates in the set IVS.  */
6376 
6377 static unsigned
6378 iv_ca_n_cands (struct iv_ca *ivs)
6379 {
6380   return ivs->n_cands;
6381 }
6382 
6383 /* Free the list of changes DELTA.  */
6384 
6385 static void
6386 iv_ca_delta_free (struct iv_ca_delta **delta)
6387 {
6388   struct iv_ca_delta *act, *next;
6389 
6390   for (act = *delta; act; act = next)
6391     {
6392       next = act->next;
6393       free (act);
6394     }
6395 
6396   *delta = NULL;
6397 }
6398 
6399 /* Allocates new iv candidates assignment.  */
6400 
6401 static struct iv_ca *
6402 iv_ca_new (struct ivopts_data *data)
6403 {
6404   struct iv_ca *nw = XNEW (struct iv_ca);
6405 
6406   nw->upto = 0;
6407   nw->bad_groups = 0;
6408   nw->cand_for_group = XCNEWVEC (struct cost_pair *,
6409 				 data->vgroups.length ());
6410   nw->n_cand_uses = XCNEWVEC (unsigned, data->vcands.length ());
6411   nw->cands = BITMAP_ALLOC (NULL);
6412   nw->n_cands = 0;
6413   nw->n_regs = 0;
6414   nw->cand_use_cost = no_cost;
6415   nw->cand_cost = 0;
6416   nw->n_invariant_uses = XCNEWVEC (unsigned, data->max_inv_id + 1);
6417   nw->used_inv_exprs = new hash_map <iv_inv_expr_ent *, unsigned> (13);
6418   nw->cost = no_cost;
6419 
6420   return nw;
6421 }
6422 
6423 /* Free memory occupied by the set IVS.  */
6424 
6425 static void
6426 iv_ca_free (struct iv_ca **ivs)
6427 {
6428   free ((*ivs)->cand_for_group);
6429   free ((*ivs)->n_cand_uses);
6430   BITMAP_FREE ((*ivs)->cands);
6431   free ((*ivs)->n_invariant_uses);
6432   delete ((*ivs)->used_inv_exprs);
6433   free (*ivs);
6434   *ivs = NULL;
6435 }
6436 
6437 /* Dumps IVS to FILE.  */
6438 
6439 static void
6440 iv_ca_dump (struct ivopts_data *data, FILE *file, struct iv_ca *ivs)
6441 {
6442   unsigned i;
6443   comp_cost cost = iv_ca_cost (ivs);
6444 
6445   fprintf (file, "  cost: %d (complexity %d)\n", cost.cost,
6446 	   cost.complexity);
6447   fprintf (file, "  cand_cost: %d\n  cand_group_cost: %d (complexity %d)\n",
6448 	   ivs->cand_cost, ivs->cand_use_cost.cost,
6449 	   ivs->cand_use_cost.complexity);
6450   bitmap_print (file, ivs->cands, "  candidates: ","\n");
6451 
6452   for (i = 0; i < ivs->upto; i++)
6453     {
6454       struct iv_group *group = data->vgroups[i];
6455       struct cost_pair *cp = iv_ca_cand_for_group (ivs, group);
6456       if (cp)
6457         fprintf (file, "   group:%d --> iv_cand:%d, cost=(%d,%d)\n",
6458 		 group->id, cp->cand->id, cp->cost.cost,
6459 		 cp->cost.complexity);
6460       else
6461 	fprintf (file, "   group:%d --> ??\n", group->id);
6462     }
6463 
6464   const char *pref = "";
6465   fprintf (file, "  invariant variables: ");
6466   for (i = 1; i <= data->max_inv_id; i++)
6467     if (ivs->n_invariant_uses[i])
6468       {
6469 	fprintf (file, "%s%d", pref, i);
6470 	pref = ", ";
6471       }
6472 
6473   pref = "";
6474   fprintf (file, "\n  invariant expressions: ");
6475   for (hash_map<iv_inv_expr_ent *, unsigned>::iterator it
6476        = ivs->used_inv_exprs->begin (); it != ivs->used_inv_exprs->end (); ++it)
6477     {
6478 	fprintf (file, "%s%d", pref, (*it).first->id);
6479 	pref = ", ";
6480     }
6481 
6482   fprintf (file, "\n\n");
6483 }
6484 
6485 /* Try changing candidate in IVS to CAND for each use.  Return cost of the
6486    new set, and store differences in DELTA.  Number of induction variables
6487    in the new set is stored to N_IVS. MIN_NCAND is a flag. When it is true
6488    the function will try to find a solution with mimimal iv candidates.  */
6489 
6490 static comp_cost
6491 iv_ca_extend (struct ivopts_data *data, struct iv_ca *ivs,
6492 	      struct iv_cand *cand, struct iv_ca_delta **delta,
6493 	      unsigned *n_ivs, bool min_ncand)
6494 {
6495   unsigned i;
6496   comp_cost cost;
6497   struct iv_group *group;
6498   struct cost_pair *old_cp, *new_cp;
6499 
6500   *delta = NULL;
6501   for (i = 0; i < ivs->upto; i++)
6502     {
6503       group = data->vgroups[i];
6504       old_cp = iv_ca_cand_for_group (ivs, group);
6505 
6506       if (old_cp
6507 	  && old_cp->cand == cand)
6508 	continue;
6509 
6510       new_cp = get_group_iv_cost (data, group, cand);
6511       if (!new_cp)
6512 	continue;
6513 
6514       if (!min_ncand && !iv_ca_has_deps (ivs, new_cp))
6515 	continue;
6516 
6517       if (!min_ncand && !cheaper_cost_pair (new_cp, old_cp))
6518 	continue;
6519 
6520       *delta = iv_ca_delta_add (group, old_cp, new_cp, *delta);
6521     }
6522 
6523   iv_ca_delta_commit (data, ivs, *delta, true);
6524   cost = iv_ca_cost (ivs);
6525   if (n_ivs)
6526     *n_ivs = iv_ca_n_cands (ivs);
6527   iv_ca_delta_commit (data, ivs, *delta, false);
6528 
6529   return cost;
6530 }
6531 
6532 /* Try narrowing set IVS by removing CAND.  Return the cost of
6533    the new set and store the differences in DELTA.  START is
6534    the candidate with which we start narrowing.  */
6535 
6536 static comp_cost
6537 iv_ca_narrow (struct ivopts_data *data, struct iv_ca *ivs,
6538 	      struct iv_cand *cand, struct iv_cand *start,
6539 	      struct iv_ca_delta **delta)
6540 {
6541   unsigned i, ci;
6542   struct iv_group *group;
6543   struct cost_pair *old_cp, *new_cp, *cp;
6544   bitmap_iterator bi;
6545   struct iv_cand *cnd;
6546   comp_cost cost, best_cost, acost;
6547 
6548   *delta = NULL;
6549   for (i = 0; i < data->vgroups.length (); i++)
6550     {
6551       group = data->vgroups[i];
6552 
6553       old_cp = iv_ca_cand_for_group (ivs, group);
6554       if (old_cp->cand != cand)
6555 	continue;
6556 
6557       best_cost = iv_ca_cost (ivs);
6558       /* Start narrowing with START.  */
6559       new_cp = get_group_iv_cost (data, group, start);
6560 
6561       if (data->consider_all_candidates)
6562 	{
6563 	  EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, ci, bi)
6564 	    {
6565 	      if (ci == cand->id || (start && ci == start->id))
6566 		continue;
6567 
6568 	      cnd = data->vcands[ci];
6569 
6570 	      cp = get_group_iv_cost (data, group, cnd);
6571 	      if (!cp)
6572 		continue;
6573 
6574 	      iv_ca_set_cp (data, ivs, group, cp);
6575 	      acost = iv_ca_cost (ivs);
6576 
6577 	      if (acost < best_cost)
6578 		{
6579 		  best_cost = acost;
6580 		  new_cp = cp;
6581 		}
6582 	    }
6583 	}
6584       else
6585 	{
6586 	  EXECUTE_IF_AND_IN_BITMAP (group->related_cands, ivs->cands, 0, ci, bi)
6587 	    {
6588 	      if (ci == cand->id || (start && ci == start->id))
6589 		continue;
6590 
6591 	      cnd = data->vcands[ci];
6592 
6593 	      cp = get_group_iv_cost (data, group, cnd);
6594 	      if (!cp)
6595 		continue;
6596 
6597 	      iv_ca_set_cp (data, ivs, group, cp);
6598 	      acost = iv_ca_cost (ivs);
6599 
6600 	      if (acost < best_cost)
6601 		{
6602 		  best_cost = acost;
6603 		  new_cp = cp;
6604 		}
6605 	    }
6606 	}
6607       /* Restore to old cp for use.  */
6608       iv_ca_set_cp (data, ivs, group, old_cp);
6609 
6610       if (!new_cp)
6611 	{
6612 	  iv_ca_delta_free (delta);
6613 	  return infinite_cost;
6614 	}
6615 
6616       *delta = iv_ca_delta_add (group, old_cp, new_cp, *delta);
6617     }
6618 
6619   iv_ca_delta_commit (data, ivs, *delta, true);
6620   cost = iv_ca_cost (ivs);
6621   iv_ca_delta_commit (data, ivs, *delta, false);
6622 
6623   return cost;
6624 }
6625 
6626 /* Try optimizing the set of candidates IVS by removing candidates different
6627    from to EXCEPT_CAND from it.  Return cost of the new set, and store
6628    differences in DELTA.  */
6629 
6630 static comp_cost
6631 iv_ca_prune (struct ivopts_data *data, struct iv_ca *ivs,
6632 	     struct iv_cand *except_cand, struct iv_ca_delta **delta)
6633 {
6634   bitmap_iterator bi;
6635   struct iv_ca_delta *act_delta, *best_delta;
6636   unsigned i;
6637   comp_cost best_cost, acost;
6638   struct iv_cand *cand;
6639 
6640   best_delta = NULL;
6641   best_cost = iv_ca_cost (ivs);
6642 
6643   EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
6644     {
6645       cand = data->vcands[i];
6646 
6647       if (cand == except_cand)
6648 	continue;
6649 
6650       acost = iv_ca_narrow (data, ivs, cand, except_cand, &act_delta);
6651 
6652       if (acost < best_cost)
6653 	{
6654 	  best_cost = acost;
6655 	  iv_ca_delta_free (&best_delta);
6656 	  best_delta = act_delta;
6657 	}
6658       else
6659 	iv_ca_delta_free (&act_delta);
6660     }
6661 
6662   if (!best_delta)
6663     {
6664       *delta = NULL;
6665       return best_cost;
6666     }
6667 
6668   /* Recurse to possibly remove other unnecessary ivs.  */
6669   iv_ca_delta_commit (data, ivs, best_delta, true);
6670   best_cost = iv_ca_prune (data, ivs, except_cand, delta);
6671   iv_ca_delta_commit (data, ivs, best_delta, false);
6672   *delta = iv_ca_delta_join (best_delta, *delta);
6673   return best_cost;
6674 }
6675 
6676 /* Check if CAND_IDX is a candidate other than OLD_CAND and has
6677    cheaper local cost for GROUP than BEST_CP.  Return pointer to
6678    the corresponding cost_pair, otherwise just return BEST_CP.  */
6679 
6680 static struct cost_pair*
6681 cheaper_cost_with_cand (struct ivopts_data *data, struct iv_group *group,
6682 			unsigned int cand_idx, struct iv_cand *old_cand,
6683 			struct cost_pair *best_cp)
6684 {
6685   struct iv_cand *cand;
6686   struct cost_pair *cp;
6687 
6688   gcc_assert (old_cand != NULL && best_cp != NULL);
6689   if (cand_idx == old_cand->id)
6690     return best_cp;
6691 
6692   cand = data->vcands[cand_idx];
6693   cp = get_group_iv_cost (data, group, cand);
6694   if (cp != NULL && cheaper_cost_pair (cp, best_cp))
6695     return cp;
6696 
6697   return best_cp;
6698 }
6699 
6700 /* Try breaking local optimal fixed-point for IVS by replacing candidates
6701    which are used by more than one iv uses.  For each of those candidates,
6702    this function tries to represent iv uses under that candidate using
6703    other ones with lower local cost, then tries to prune the new set.
6704    If the new set has lower cost, It returns the new cost after recording
6705    candidate replacement in list DELTA.  */
6706 
6707 static comp_cost
6708 iv_ca_replace (struct ivopts_data *data, struct iv_ca *ivs,
6709 	       struct iv_ca_delta **delta)
6710 {
6711   bitmap_iterator bi, bj;
6712   unsigned int i, j, k;
6713   struct iv_cand *cand;
6714   comp_cost orig_cost, acost;
6715   struct iv_ca_delta *act_delta, *tmp_delta;
6716   struct cost_pair *old_cp, *best_cp = NULL;
6717 
6718   *delta = NULL;
6719   orig_cost = iv_ca_cost (ivs);
6720 
6721   EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
6722     {
6723       if (ivs->n_cand_uses[i] == 1
6724 	  || ivs->n_cand_uses[i] > ALWAYS_PRUNE_CAND_SET_BOUND)
6725 	continue;
6726 
6727       cand = data->vcands[i];
6728 
6729       act_delta = NULL;
6730       /*  Represent uses under current candidate using other ones with
6731 	  lower local cost.  */
6732       for (j = 0; j < ivs->upto; j++)
6733 	{
6734 	  struct iv_group *group = data->vgroups[j];
6735 	  old_cp = iv_ca_cand_for_group (ivs, group);
6736 
6737 	  if (old_cp->cand != cand)
6738 	    continue;
6739 
6740 	  best_cp = old_cp;
6741 	  if (data->consider_all_candidates)
6742 	    for (k = 0; k < data->vcands.length (); k++)
6743 	      best_cp = cheaper_cost_with_cand (data, group, k,
6744 						old_cp->cand, best_cp);
6745 	  else
6746 	    EXECUTE_IF_SET_IN_BITMAP (group->related_cands, 0, k, bj)
6747 	      best_cp = cheaper_cost_with_cand (data, group, k,
6748 						old_cp->cand, best_cp);
6749 
6750 	  if (best_cp == old_cp)
6751 	    continue;
6752 
6753 	  act_delta = iv_ca_delta_add (group, old_cp, best_cp, act_delta);
6754 	}
6755       /* No need for further prune.  */
6756       if (!act_delta)
6757 	continue;
6758 
6759       /* Prune the new candidate set.  */
6760       iv_ca_delta_commit (data, ivs, act_delta, true);
6761       acost = iv_ca_prune (data, ivs, NULL, &tmp_delta);
6762       iv_ca_delta_commit (data, ivs, act_delta, false);
6763       act_delta = iv_ca_delta_join (act_delta, tmp_delta);
6764 
6765       if (acost < orig_cost)
6766 	{
6767 	  *delta = act_delta;
6768 	  return acost;
6769 	}
6770       else
6771 	iv_ca_delta_free (&act_delta);
6772     }
6773 
6774   return orig_cost;
6775 }
6776 
6777 /* Tries to extend the sets IVS in the best possible way in order to
6778    express the GROUP.  If ORIGINALP is true, prefer candidates from
6779    the original set of IVs, otherwise favor important candidates not
6780    based on any memory object.  */
6781 
6782 static bool
6783 try_add_cand_for (struct ivopts_data *data, struct iv_ca *ivs,
6784 		  struct iv_group *group, bool originalp)
6785 {
6786   comp_cost best_cost, act_cost;
6787   unsigned i;
6788   bitmap_iterator bi;
6789   struct iv_cand *cand;
6790   struct iv_ca_delta *best_delta = NULL, *act_delta;
6791   struct cost_pair *cp;
6792 
6793   iv_ca_add_group (data, ivs, group);
6794   best_cost = iv_ca_cost (ivs);
6795   cp = iv_ca_cand_for_group (ivs, group);
6796   if (cp)
6797     {
6798       best_delta = iv_ca_delta_add (group, NULL, cp, NULL);
6799       iv_ca_set_no_cp (data, ivs, group);
6800     }
6801 
6802   /* If ORIGINALP is true, try to find the original IV for the use.  Otherwise
6803      first try important candidates not based on any memory object.  Only if
6804      this fails, try the specific ones.  Rationale -- in loops with many
6805      variables the best choice often is to use just one generic biv.  If we
6806      added here many ivs specific to the uses, the optimization algorithm later
6807      would be likely to get stuck in a local minimum, thus causing us to create
6808      too many ivs.  The approach from few ivs to more seems more likely to be
6809      successful -- starting from few ivs, replacing an expensive use by a
6810      specific iv should always be a win.  */
6811   EXECUTE_IF_SET_IN_BITMAP (group->related_cands, 0, i, bi)
6812     {
6813       cand = data->vcands[i];
6814 
6815       if (originalp && cand->pos !=IP_ORIGINAL)
6816 	continue;
6817 
6818       if (!originalp && cand->iv->base_object != NULL_TREE)
6819 	continue;
6820 
6821       if (iv_ca_cand_used_p (ivs, cand))
6822 	continue;
6823 
6824       cp = get_group_iv_cost (data, group, cand);
6825       if (!cp)
6826 	continue;
6827 
6828       iv_ca_set_cp (data, ivs, group, cp);
6829       act_cost = iv_ca_extend (data, ivs, cand, &act_delta, NULL,
6830 			       true);
6831       iv_ca_set_no_cp (data, ivs, group);
6832       act_delta = iv_ca_delta_add (group, NULL, cp, act_delta);
6833 
6834       if (act_cost < best_cost)
6835 	{
6836 	  best_cost = act_cost;
6837 
6838 	  iv_ca_delta_free (&best_delta);
6839 	  best_delta = act_delta;
6840 	}
6841       else
6842 	iv_ca_delta_free (&act_delta);
6843     }
6844 
6845   if (best_cost.infinite_cost_p ())
6846     {
6847       for (i = 0; i < group->n_map_members; i++)
6848 	{
6849 	  cp = group->cost_map + i;
6850 	  cand = cp->cand;
6851 	  if (!cand)
6852 	    continue;
6853 
6854 	  /* Already tried this.  */
6855 	  if (cand->important)
6856 	    {
6857 	      if (originalp && cand->pos == IP_ORIGINAL)
6858 		continue;
6859 	      if (!originalp && cand->iv->base_object == NULL_TREE)
6860 		continue;
6861 	    }
6862 
6863 	  if (iv_ca_cand_used_p (ivs, cand))
6864 	    continue;
6865 
6866 	  act_delta = NULL;
6867 	  iv_ca_set_cp (data, ivs, group, cp);
6868 	  act_cost = iv_ca_extend (data, ivs, cand, &act_delta, NULL, true);
6869 	  iv_ca_set_no_cp (data, ivs, group);
6870 	  act_delta = iv_ca_delta_add (group,
6871 				       iv_ca_cand_for_group (ivs, group),
6872 				       cp, act_delta);
6873 
6874 	  if (act_cost < best_cost)
6875 	    {
6876 	      best_cost = act_cost;
6877 
6878 	      if (best_delta)
6879 		iv_ca_delta_free (&best_delta);
6880 	      best_delta = act_delta;
6881 	    }
6882 	  else
6883 	    iv_ca_delta_free (&act_delta);
6884 	}
6885     }
6886 
6887   iv_ca_delta_commit (data, ivs, best_delta, true);
6888   iv_ca_delta_free (&best_delta);
6889 
6890   return !best_cost.infinite_cost_p ();
6891 }
6892 
6893 /* Finds an initial assignment of candidates to uses.  */
6894 
6895 static struct iv_ca *
6896 get_initial_solution (struct ivopts_data *data, bool originalp)
6897 {
6898   unsigned i;
6899   struct iv_ca *ivs = iv_ca_new (data);
6900 
6901   for (i = 0; i < data->vgroups.length (); i++)
6902     if (!try_add_cand_for (data, ivs, data->vgroups[i], originalp))
6903       {
6904 	iv_ca_free (&ivs);
6905 	return NULL;
6906       }
6907 
6908   return ivs;
6909 }
6910 
6911 /* Tries to improve set of induction variables IVS.  TRY_REPLACE_P
6912    points to a bool variable, this function tries to break local
6913    optimal fixed-point by replacing candidates in IVS if it's true.  */
6914 
6915 static bool
6916 try_improve_iv_set (struct ivopts_data *data,
6917 		    struct iv_ca *ivs, bool *try_replace_p)
6918 {
6919   unsigned i, n_ivs;
6920   comp_cost acost, best_cost = iv_ca_cost (ivs);
6921   struct iv_ca_delta *best_delta = NULL, *act_delta, *tmp_delta;
6922   struct iv_cand *cand;
6923 
6924   /* Try extending the set of induction variables by one.  */
6925   for (i = 0; i < data->vcands.length (); i++)
6926     {
6927       cand = data->vcands[i];
6928 
6929       if (iv_ca_cand_used_p (ivs, cand))
6930 	continue;
6931 
6932       acost = iv_ca_extend (data, ivs, cand, &act_delta, &n_ivs, false);
6933       if (!act_delta)
6934 	continue;
6935 
6936       /* If we successfully added the candidate and the set is small enough,
6937 	 try optimizing it by removing other candidates.  */
6938       if (n_ivs <= ALWAYS_PRUNE_CAND_SET_BOUND)
6939       	{
6940 	  iv_ca_delta_commit (data, ivs, act_delta, true);
6941 	  acost = iv_ca_prune (data, ivs, cand, &tmp_delta);
6942 	  iv_ca_delta_commit (data, ivs, act_delta, false);
6943 	  act_delta = iv_ca_delta_join (act_delta, tmp_delta);
6944 	}
6945 
6946       if (acost < best_cost)
6947 	{
6948 	  best_cost = acost;
6949 	  iv_ca_delta_free (&best_delta);
6950 	  best_delta = act_delta;
6951 	}
6952       else
6953 	iv_ca_delta_free (&act_delta);
6954     }
6955 
6956   if (!best_delta)
6957     {
6958       /* Try removing the candidates from the set instead.  */
6959       best_cost = iv_ca_prune (data, ivs, NULL, &best_delta);
6960 
6961       if (!best_delta && *try_replace_p)
6962 	{
6963 	  *try_replace_p = false;
6964 	  /* So far candidate selecting algorithm tends to choose fewer IVs
6965 	     so that it can handle cases in which loops have many variables
6966 	     but the best choice is often to use only one general biv.  One
6967 	     weakness is it can't handle opposite cases, in which different
6968 	     candidates should be chosen with respect to each use.  To solve
6969 	     the problem, we replace candidates in a manner described by the
6970 	     comments of iv_ca_replace, thus give general algorithm a chance
6971 	     to break local optimal fixed-point in these cases.  */
6972 	  best_cost = iv_ca_replace (data, ivs, &best_delta);
6973 	}
6974 
6975       if (!best_delta)
6976 	return false;
6977     }
6978 
6979   iv_ca_delta_commit (data, ivs, best_delta, true);
6980   gcc_assert (best_cost == iv_ca_cost (ivs));
6981   iv_ca_delta_free (&best_delta);
6982   return true;
6983 }
6984 
6985 /* Attempts to find the optimal set of induction variables.  We do simple
6986    greedy heuristic -- we try to replace at most one candidate in the selected
6987    solution and remove the unused ivs while this improves the cost.  */
6988 
6989 static struct iv_ca *
6990 find_optimal_iv_set_1 (struct ivopts_data *data, bool originalp)
6991 {
6992   struct iv_ca *set;
6993   bool try_replace_p = true;
6994 
6995   /* Get the initial solution.  */
6996   set = get_initial_solution (data, originalp);
6997   if (!set)
6998     {
6999       if (dump_file && (dump_flags & TDF_DETAILS))
7000 	fprintf (dump_file, "Unable to substitute for ivs, failed.\n");
7001       return NULL;
7002     }
7003 
7004   if (dump_file && (dump_flags & TDF_DETAILS))
7005     {
7006       fprintf (dump_file, "Initial set of candidates:\n");
7007       iv_ca_dump (data, dump_file, set);
7008     }
7009 
7010   while (try_improve_iv_set (data, set, &try_replace_p))
7011     {
7012       if (dump_file && (dump_flags & TDF_DETAILS))
7013 	{
7014 	  fprintf (dump_file, "Improved to:\n");
7015 	  iv_ca_dump (data, dump_file, set);
7016 	}
7017     }
7018 
7019   return set;
7020 }
7021 
7022 static struct iv_ca *
7023 find_optimal_iv_set (struct ivopts_data *data)
7024 {
7025   unsigned i;
7026   comp_cost cost, origcost;
7027   struct iv_ca *set, *origset;
7028 
7029   /* Determine the cost based on a strategy that starts with original IVs,
7030      and try again using a strategy that prefers candidates not based
7031      on any IVs.  */
7032   origset = find_optimal_iv_set_1 (data, true);
7033   set = find_optimal_iv_set_1 (data, false);
7034 
7035   if (!origset && !set)
7036     return NULL;
7037 
7038   origcost = origset ? iv_ca_cost (origset) : infinite_cost;
7039   cost = set ? iv_ca_cost (set) : infinite_cost;
7040 
7041   if (dump_file && (dump_flags & TDF_DETAILS))
7042     {
7043       fprintf (dump_file, "Original cost %d (complexity %d)\n\n",
7044 	       origcost.cost, origcost.complexity);
7045       fprintf (dump_file, "Final cost %d (complexity %d)\n\n",
7046 	       cost.cost, cost.complexity);
7047     }
7048 
7049   /* Choose the one with the best cost.  */
7050   if (origcost <= cost)
7051     {
7052       if (set)
7053 	iv_ca_free (&set);
7054       set = origset;
7055     }
7056   else if (origset)
7057     iv_ca_free (&origset);
7058 
7059   for (i = 0; i < data->vgroups.length (); i++)
7060     {
7061       struct iv_group *group = data->vgroups[i];
7062       group->selected = iv_ca_cand_for_group (set, group)->cand;
7063     }
7064 
7065   return set;
7066 }
7067 
7068 /* Creates a new induction variable corresponding to CAND.  */
7069 
7070 static void
7071 create_new_iv (struct ivopts_data *data, struct iv_cand *cand)
7072 {
7073   gimple_stmt_iterator incr_pos;
7074   tree base;
7075   struct iv_use *use;
7076   struct iv_group *group;
7077   bool after = false;
7078 
7079   if (!cand->iv)
7080     return;
7081 
7082   switch (cand->pos)
7083     {
7084     case IP_NORMAL:
7085       incr_pos = gsi_last_bb (ip_normal_pos (data->current_loop));
7086       break;
7087 
7088     case IP_END:
7089       incr_pos = gsi_last_bb (ip_end_pos (data->current_loop));
7090       after = true;
7091       break;
7092 
7093     case IP_AFTER_USE:
7094       after = true;
7095       /* fall through */
7096     case IP_BEFORE_USE:
7097       incr_pos = gsi_for_stmt (cand->incremented_at);
7098       break;
7099 
7100     case IP_ORIGINAL:
7101       /* Mark that the iv is preserved.  */
7102       name_info (data, cand->var_before)->preserve_biv = true;
7103       name_info (data, cand->var_after)->preserve_biv = true;
7104 
7105       /* Rewrite the increment so that it uses var_before directly.  */
7106       use = find_interesting_uses_op (data, cand->var_after);
7107       group = data->vgroups[use->group_id];
7108       group->selected = cand;
7109       return;
7110     }
7111 
7112   gimple_add_tmp_var (cand->var_before);
7113 
7114   base = unshare_expr (cand->iv->base);
7115 
7116   create_iv (base, unshare_expr (cand->iv->step),
7117 	     cand->var_before, data->current_loop,
7118 	     &incr_pos, after, &cand->var_before, &cand->var_after);
7119 }
7120 
7121 /* Creates new induction variables described in SET.  */
7122 
7123 static void
7124 create_new_ivs (struct ivopts_data *data, struct iv_ca *set)
7125 {
7126   unsigned i;
7127   struct iv_cand *cand;
7128   bitmap_iterator bi;
7129 
7130   EXECUTE_IF_SET_IN_BITMAP (set->cands, 0, i, bi)
7131     {
7132       cand = data->vcands[i];
7133       create_new_iv (data, cand);
7134     }
7135 
7136   if (dump_file && (dump_flags & TDF_DETAILS))
7137     {
7138       fprintf (dump_file, "Selected IV set for loop %d",
7139 	       data->current_loop->num);
7140       if (data->loop_loc != UNKNOWN_LOCATION)
7141 	fprintf (dump_file, " at %s:%d", LOCATION_FILE (data->loop_loc),
7142 		 LOCATION_LINE (data->loop_loc));
7143       fprintf (dump_file, ", " HOST_WIDE_INT_PRINT_DEC " avg niters",
7144 	       avg_loop_niter (data->current_loop));
7145       fprintf (dump_file, ", " HOST_WIDE_INT_PRINT_UNSIGNED " expressions",
7146 	       (unsigned HOST_WIDE_INT) set->used_inv_exprs->elements ());
7147       fprintf (dump_file, ", %lu IVs:\n", bitmap_count_bits (set->cands));
7148       EXECUTE_IF_SET_IN_BITMAP (set->cands, 0, i, bi)
7149 	{
7150 	  cand = data->vcands[i];
7151 	  dump_cand (dump_file, cand);
7152 	}
7153       fprintf (dump_file, "\n");
7154     }
7155 }
7156 
7157 /* Rewrites USE (definition of iv used in a nonlinear expression)
7158    using candidate CAND.  */
7159 
7160 static void
7161 rewrite_use_nonlinear_expr (struct ivopts_data *data,
7162 			    struct iv_use *use, struct iv_cand *cand)
7163 {
7164   tree comp;
7165   tree tgt;
7166   gassign *ass;
7167   gimple_stmt_iterator bsi;
7168 
7169   /* An important special case -- if we are asked to express value of
7170      the original iv by itself, just exit; there is no need to
7171      introduce a new computation (that might also need casting the
7172      variable to unsigned and back).  */
7173   if (cand->pos == IP_ORIGINAL
7174       && cand->incremented_at == use->stmt)
7175     {
7176       tree op = NULL_TREE;
7177       enum tree_code stmt_code;
7178 
7179       gcc_assert (is_gimple_assign (use->stmt));
7180       gcc_assert (gimple_assign_lhs (use->stmt) == cand->var_after);
7181 
7182       /* Check whether we may leave the computation unchanged.
7183 	 This is the case only if it does not rely on other
7184 	 computations in the loop -- otherwise, the computation
7185 	 we rely upon may be removed in remove_unused_ivs,
7186 	 thus leading to ICE.  */
7187       stmt_code = gimple_assign_rhs_code (use->stmt);
7188       if (stmt_code == PLUS_EXPR
7189 	  || stmt_code == MINUS_EXPR
7190 	  || stmt_code == POINTER_PLUS_EXPR)
7191 	{
7192 	  if (gimple_assign_rhs1 (use->stmt) == cand->var_before)
7193 	    op = gimple_assign_rhs2 (use->stmt);
7194 	  else if (gimple_assign_rhs2 (use->stmt) == cand->var_before)
7195 	    op = gimple_assign_rhs1 (use->stmt);
7196 	}
7197 
7198       if (op != NULL_TREE)
7199 	{
7200 	  if (expr_invariant_in_loop_p (data->current_loop, op))
7201 	    return;
7202 	  if (TREE_CODE (op) == SSA_NAME)
7203 	    {
7204 	      struct iv *iv = get_iv (data, op);
7205 	      if (iv != NULL && integer_zerop (iv->step))
7206 		return;
7207 	    }
7208 	}
7209     }
7210 
7211   comp = get_computation (data->current_loop, use, cand);
7212   gcc_assert (comp != NULL_TREE);
7213 
7214   switch (gimple_code (use->stmt))
7215     {
7216     case GIMPLE_PHI:
7217       tgt = PHI_RESULT (use->stmt);
7218 
7219       /* If we should keep the biv, do not replace it.  */
7220       if (name_info (data, tgt)->preserve_biv)
7221 	return;
7222 
7223       bsi = gsi_after_labels (gimple_bb (use->stmt));
7224       break;
7225 
7226     case GIMPLE_ASSIGN:
7227       tgt = gimple_assign_lhs (use->stmt);
7228       bsi = gsi_for_stmt (use->stmt);
7229       break;
7230 
7231     default:
7232       gcc_unreachable ();
7233     }
7234 
7235   if (!valid_gimple_rhs_p (comp)
7236       || (gimple_code (use->stmt) != GIMPLE_PHI
7237 	  /* We can't allow re-allocating the stmt as it might be pointed
7238 	     to still.  */
7239 	  && (get_gimple_rhs_num_ops (TREE_CODE (comp))
7240 	      >= gimple_num_ops (gsi_stmt (bsi)))))
7241     {
7242       comp = force_gimple_operand_gsi (&bsi, comp, true, NULL_TREE,
7243 				       true, GSI_SAME_STMT);
7244       if (POINTER_TYPE_P (TREE_TYPE (tgt)))
7245 	{
7246 	  duplicate_ssa_name_ptr_info (comp, SSA_NAME_PTR_INFO (tgt));
7247 	  /* As this isn't a plain copy we have to reset alignment
7248 	     information.  */
7249 	  if (SSA_NAME_PTR_INFO (comp))
7250 	    mark_ptr_info_alignment_unknown (SSA_NAME_PTR_INFO (comp));
7251 	}
7252     }
7253 
7254   if (gimple_code (use->stmt) == GIMPLE_PHI)
7255     {
7256       ass = gimple_build_assign (tgt, comp);
7257       gsi_insert_before (&bsi, ass, GSI_SAME_STMT);
7258 
7259       bsi = gsi_for_stmt (use->stmt);
7260       remove_phi_node (&bsi, false);
7261     }
7262   else
7263     {
7264       gimple_assign_set_rhs_from_tree (&bsi, comp);
7265       use->stmt = gsi_stmt (bsi);
7266     }
7267 }
7268 
7269 /* Performs a peephole optimization to reorder the iv update statement with
7270    a mem ref to enable instruction combining in later phases. The mem ref uses
7271    the iv value before the update, so the reordering transformation requires
7272    adjustment of the offset. CAND is the selected IV_CAND.
7273 
7274    Example:
7275 
7276    t = MEM_REF (base, iv1, 8, 16);  // base, index, stride, offset
7277    iv2 = iv1 + 1;
7278 
7279    if (t < val)      (1)
7280      goto L;
7281    goto Head;
7282 
7283 
7284    directly propagating t over to (1) will introduce overlapping live range
7285    thus increase register pressure. This peephole transform it into:
7286 
7287 
7288    iv2 = iv1 + 1;
7289    t = MEM_REF (base, iv2, 8, 8);
7290    if (t < val)
7291      goto L;
7292    goto Head;
7293 */
7294 
7295 static void
7296 adjust_iv_update_pos (struct iv_cand *cand, struct iv_use *use)
7297 {
7298   tree var_after;
7299   gimple *iv_update, *stmt;
7300   basic_block bb;
7301   gimple_stmt_iterator gsi, gsi_iv;
7302 
7303   if (cand->pos != IP_NORMAL)
7304     return;
7305 
7306   var_after = cand->var_after;
7307   iv_update = SSA_NAME_DEF_STMT (var_after);
7308 
7309   bb = gimple_bb (iv_update);
7310   gsi = gsi_last_nondebug_bb (bb);
7311   stmt = gsi_stmt (gsi);
7312 
7313   /* Only handle conditional statement for now.  */
7314   if (gimple_code (stmt) != GIMPLE_COND)
7315     return;
7316 
7317   gsi_prev_nondebug (&gsi);
7318   stmt = gsi_stmt (gsi);
7319   if (stmt != iv_update)
7320     return;
7321 
7322   gsi_prev_nondebug (&gsi);
7323   if (gsi_end_p (gsi))
7324     return;
7325 
7326   stmt = gsi_stmt (gsi);
7327   if (gimple_code (stmt) != GIMPLE_ASSIGN)
7328     return;
7329 
7330   if (stmt != use->stmt)
7331     return;
7332 
7333   if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
7334     return;
7335 
7336   if (dump_file && (dump_flags & TDF_DETAILS))
7337     {
7338       fprintf (dump_file, "Reordering \n");
7339       print_gimple_stmt (dump_file, iv_update, 0, 0);
7340       print_gimple_stmt (dump_file, use->stmt, 0, 0);
7341       fprintf (dump_file, "\n");
7342     }
7343 
7344   gsi = gsi_for_stmt (use->stmt);
7345   gsi_iv = gsi_for_stmt (iv_update);
7346   gsi_move_before (&gsi_iv, &gsi);
7347 
7348   cand->pos = IP_BEFORE_USE;
7349   cand->incremented_at = use->stmt;
7350 }
7351 
7352 /* Rewrites USE (address that is an iv) using candidate CAND.  */
7353 
7354 static void
7355 rewrite_use_address (struct ivopts_data *data,
7356 		     struct iv_use *use, struct iv_cand *cand)
7357 {
7358   aff_tree aff;
7359   gimple_stmt_iterator bsi = gsi_for_stmt (use->stmt);
7360   tree base_hint = NULL_TREE;
7361   tree ref, iv;
7362   bool ok;
7363 
7364   adjust_iv_update_pos (cand, use);
7365   ok = get_computation_aff (data->current_loop, use, cand, use->stmt, &aff);
7366   gcc_assert (ok);
7367   unshare_aff_combination (&aff);
7368 
7369   /* To avoid undefined overflow problems, all IV candidates use unsigned
7370      integer types.  The drawback is that this makes it impossible for
7371      create_mem_ref to distinguish an IV that is based on a memory object
7372      from one that represents simply an offset.
7373 
7374      To work around this problem, we pass a hint to create_mem_ref that
7375      indicates which variable (if any) in aff is an IV based on a memory
7376      object.  Note that we only consider the candidate.  If this is not
7377      based on an object, the base of the reference is in some subexpression
7378      of the use -- but these will use pointer types, so they are recognized
7379      by the create_mem_ref heuristics anyway.  */
7380   if (cand->iv->base_object)
7381     base_hint = var_at_stmt (data->current_loop, cand, use->stmt);
7382 
7383   iv = var_at_stmt (data->current_loop, cand, use->stmt);
7384   tree type = TREE_TYPE (*use->op_p);
7385   unsigned int align = get_object_alignment (*use->op_p);
7386   if (align != TYPE_ALIGN (type))
7387     type = build_aligned_type (type, align);
7388   ref = create_mem_ref (&bsi, type, &aff,
7389 			reference_alias_ptr_type (*use->op_p),
7390 			iv, base_hint, data->speed);
7391   copy_ref_info (ref, *use->op_p);
7392   *use->op_p = ref;
7393 }
7394 
7395 /* Rewrites USE (the condition such that one of the arguments is an iv) using
7396    candidate CAND.  */
7397 
7398 static void
7399 rewrite_use_compare (struct ivopts_data *data,
7400 		     struct iv_use *use, struct iv_cand *cand)
7401 {
7402   tree comp, *var_p, op, bound;
7403   gimple_stmt_iterator bsi = gsi_for_stmt (use->stmt);
7404   enum tree_code compare;
7405   struct iv_group *group = data->vgroups[use->group_id];
7406   struct cost_pair *cp = get_group_iv_cost (data, group, cand);
7407   bool ok;
7408 
7409   bound = cp->value;
7410   if (bound)
7411     {
7412       tree var = var_at_stmt (data->current_loop, cand, use->stmt);
7413       tree var_type = TREE_TYPE (var);
7414       gimple_seq stmts;
7415 
7416       if (dump_file && (dump_flags & TDF_DETAILS))
7417 	{
7418 	  fprintf (dump_file, "Replacing exit test: ");
7419 	  print_gimple_stmt (dump_file, use->stmt, 0, TDF_SLIM);
7420 	}
7421       compare = cp->comp;
7422       bound = unshare_expr (fold_convert (var_type, bound));
7423       op = force_gimple_operand (bound, &stmts, true, NULL_TREE);
7424       if (stmts)
7425 	gsi_insert_seq_on_edge_immediate (
7426 		loop_preheader_edge (data->current_loop),
7427 		stmts);
7428 
7429       gcond *cond_stmt = as_a <gcond *> (use->stmt);
7430       gimple_cond_set_lhs (cond_stmt, var);
7431       gimple_cond_set_code (cond_stmt, compare);
7432       gimple_cond_set_rhs (cond_stmt, op);
7433       return;
7434     }
7435 
7436   /* The induction variable elimination failed; just express the original
7437      giv.  */
7438   comp = get_computation (data->current_loop, use, cand);
7439   gcc_assert (comp != NULL_TREE);
7440 
7441   ok = extract_cond_operands (data, use->stmt, &var_p, NULL, NULL, NULL);
7442   gcc_assert (ok);
7443 
7444   *var_p = force_gimple_operand_gsi (&bsi, comp, true, SSA_NAME_VAR (*var_p),
7445 				     true, GSI_SAME_STMT);
7446 }
7447 
7448 /* Rewrite the groups using the selected induction variables.  */
7449 
7450 static void
7451 rewrite_groups (struct ivopts_data *data)
7452 {
7453   unsigned i, j;
7454 
7455   for (i = 0; i < data->vgroups.length (); i++)
7456     {
7457       struct iv_group *group = data->vgroups[i];
7458       struct iv_cand *cand = group->selected;
7459 
7460       gcc_assert (cand);
7461 
7462       if (group->type == USE_NONLINEAR_EXPR)
7463 	{
7464 	  for (j = 0; j < group->vuses.length (); j++)
7465 	    {
7466 	      rewrite_use_nonlinear_expr (data, group->vuses[j], cand);
7467 	      update_stmt (group->vuses[j]->stmt);
7468 	    }
7469 	}
7470       else if (group->type == USE_ADDRESS)
7471 	{
7472 	  for (j = 0; j < group->vuses.length (); j++)
7473 	    {
7474 	      rewrite_use_address (data, group->vuses[j], cand);
7475 	      update_stmt (group->vuses[j]->stmt);
7476 	    }
7477 	}
7478       else
7479 	{
7480 	  gcc_assert (group->type == USE_COMPARE);
7481 
7482 	  for (j = 0; j < group->vuses.length (); j++)
7483 	    {
7484 	      rewrite_use_compare (data, group->vuses[j], cand);
7485 	      update_stmt (group->vuses[j]->stmt);
7486 	    }
7487 	}
7488     }
7489 }
7490 
7491 /* Removes the ivs that are not used after rewriting.  */
7492 
7493 static void
7494 remove_unused_ivs (struct ivopts_data *data)
7495 {
7496   unsigned j;
7497   bitmap_iterator bi;
7498   bitmap toremove = BITMAP_ALLOC (NULL);
7499 
7500   /* Figure out an order in which to release SSA DEFs so that we don't
7501      release something that we'd have to propagate into a debug stmt
7502      afterwards.  */
7503   EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j, bi)
7504     {
7505       struct version_info *info;
7506 
7507       info = ver_info (data, j);
7508       if (info->iv
7509 	  && !integer_zerop (info->iv->step)
7510 	  && !info->inv_id
7511 	  && !info->iv->nonlin_use
7512 	  && !info->preserve_biv)
7513 	{
7514 	  bitmap_set_bit (toremove, SSA_NAME_VERSION (info->iv->ssa_name));
7515 
7516 	  tree def = info->iv->ssa_name;
7517 
7518 	  if (MAY_HAVE_DEBUG_STMTS && SSA_NAME_DEF_STMT (def))
7519 	    {
7520 	      imm_use_iterator imm_iter;
7521 	      use_operand_p use_p;
7522 	      gimple *stmt;
7523 	      int count = 0;
7524 
7525 	      FOR_EACH_IMM_USE_STMT (stmt, imm_iter, def)
7526 		{
7527 		  if (!gimple_debug_bind_p (stmt))
7528 		    continue;
7529 
7530 		  /* We just want to determine whether to do nothing
7531 		     (count == 0), to substitute the computed
7532 		     expression into a single use of the SSA DEF by
7533 		     itself (count == 1), or to use a debug temp
7534 		     because the SSA DEF is used multiple times or as
7535 		     part of a larger expression (count > 1). */
7536 		  count++;
7537 		  if (gimple_debug_bind_get_value (stmt) != def)
7538 		    count++;
7539 
7540 		  if (count > 1)
7541 		    BREAK_FROM_IMM_USE_STMT (imm_iter);
7542 		}
7543 
7544 	      if (!count)
7545 		continue;
7546 
7547 	      struct iv_use dummy_use;
7548 	      struct iv_cand *best_cand = NULL, *cand;
7549 	      unsigned i, best_pref = 0, cand_pref;
7550 
7551 	      memset (&dummy_use, 0, sizeof (dummy_use));
7552 	      dummy_use.iv = info->iv;
7553 	      for (i = 0; i < data->vgroups.length () && i < 64; i++)
7554 		{
7555 		  cand = data->vgroups[i]->selected;
7556 		  if (cand == best_cand)
7557 		    continue;
7558 		  cand_pref = operand_equal_p (cand->iv->step,
7559 					       info->iv->step, 0)
7560 		    ? 4 : 0;
7561 		  cand_pref
7562 		    += TYPE_MODE (TREE_TYPE (cand->iv->base))
7563 		    == TYPE_MODE (TREE_TYPE (info->iv->base))
7564 		    ? 2 : 0;
7565 		  cand_pref
7566 		    += TREE_CODE (cand->iv->base) == INTEGER_CST
7567 		    ? 1 : 0;
7568 		  if (best_cand == NULL || best_pref < cand_pref)
7569 		    {
7570 		      best_cand = cand;
7571 		      best_pref = cand_pref;
7572 		    }
7573 		}
7574 
7575 	      if (!best_cand)
7576 		continue;
7577 
7578 	      tree comp = get_computation_at (data->current_loop,
7579 					      &dummy_use, best_cand,
7580 					      SSA_NAME_DEF_STMT (def));
7581 	      if (!comp)
7582 		continue;
7583 
7584 	      if (count > 1)
7585 		{
7586 		  tree vexpr = make_node (DEBUG_EXPR_DECL);
7587 		  DECL_ARTIFICIAL (vexpr) = 1;
7588 		  TREE_TYPE (vexpr) = TREE_TYPE (comp);
7589 		  if (SSA_NAME_VAR (def))
7590 		    SET_DECL_MODE (vexpr, DECL_MODE (SSA_NAME_VAR (def)));
7591 		  else
7592 		    SET_DECL_MODE (vexpr, TYPE_MODE (TREE_TYPE (vexpr)));
7593 		  gdebug *def_temp
7594 		    = gimple_build_debug_bind (vexpr, comp, NULL);
7595 		  gimple_stmt_iterator gsi;
7596 
7597 		  if (gimple_code (SSA_NAME_DEF_STMT (def)) == GIMPLE_PHI)
7598 		    gsi = gsi_after_labels (gimple_bb
7599 					    (SSA_NAME_DEF_STMT (def)));
7600 		  else
7601 		    gsi = gsi_for_stmt (SSA_NAME_DEF_STMT (def));
7602 
7603 		  gsi_insert_before (&gsi, def_temp, GSI_SAME_STMT);
7604 		  comp = vexpr;
7605 		}
7606 
7607 	      FOR_EACH_IMM_USE_STMT (stmt, imm_iter, def)
7608 		{
7609 		  if (!gimple_debug_bind_p (stmt))
7610 		    continue;
7611 
7612 		  FOR_EACH_IMM_USE_ON_STMT (use_p, imm_iter)
7613 		    SET_USE (use_p, comp);
7614 
7615 		  update_stmt (stmt);
7616 		}
7617 	    }
7618 	}
7619     }
7620 
7621   release_defs_bitset (toremove);
7622 
7623   BITMAP_FREE (toremove);
7624 }
7625 
7626 /* Frees memory occupied by struct tree_niter_desc in *VALUE. Callback
7627    for hash_map::traverse.  */
7628 
7629 bool
7630 free_tree_niter_desc (edge const &, tree_niter_desc *const &value, void *)
7631 {
7632   free (value);
7633   return true;
7634 }
7635 
7636 /* Frees data allocated by the optimization of a single loop.  */
7637 
7638 static void
7639 free_loop_data (struct ivopts_data *data)
7640 {
7641   unsigned i, j;
7642   bitmap_iterator bi;
7643   tree obj;
7644 
7645   if (data->niters)
7646     {
7647       data->niters->traverse<void *, free_tree_niter_desc> (NULL);
7648       delete data->niters;
7649       data->niters = NULL;
7650     }
7651 
7652   EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
7653     {
7654       struct version_info *info;
7655 
7656       info = ver_info (data, i);
7657       info->iv = NULL;
7658       info->has_nonlin_use = false;
7659       info->preserve_biv = false;
7660       info->inv_id = 0;
7661     }
7662   bitmap_clear (data->relevant);
7663   bitmap_clear (data->important_candidates);
7664 
7665   for (i = 0; i < data->vgroups.length (); i++)
7666     {
7667       struct iv_group *group = data->vgroups[i];
7668 
7669       for (j = 0; j < group->vuses.length (); j++)
7670 	free (group->vuses[j]);
7671       group->vuses.release ();
7672 
7673       BITMAP_FREE (group->related_cands);
7674       for (j = 0; j < group->n_map_members; j++)
7675 	if (group->cost_map[j].depends_on)
7676 	  BITMAP_FREE (group->cost_map[j].depends_on);
7677 
7678       free (group->cost_map);
7679       free (group);
7680     }
7681   data->vgroups.truncate (0);
7682 
7683   for (i = 0; i < data->vcands.length (); i++)
7684     {
7685       struct iv_cand *cand = data->vcands[i];
7686 
7687       if (cand->depends_on)
7688 	BITMAP_FREE (cand->depends_on);
7689       free (cand);
7690     }
7691   data->vcands.truncate (0);
7692 
7693   if (data->version_info_size < num_ssa_names)
7694     {
7695       data->version_info_size = 2 * num_ssa_names;
7696       free (data->version_info);
7697       data->version_info = XCNEWVEC (struct version_info, data->version_info_size);
7698     }
7699 
7700   data->max_inv_id = 0;
7701 
7702   FOR_EACH_VEC_ELT (decl_rtl_to_reset, i, obj)
7703     SET_DECL_RTL (obj, NULL_RTX);
7704 
7705   decl_rtl_to_reset.truncate (0);
7706 
7707   data->inv_expr_tab->empty ();
7708   data->max_inv_expr_id = 0;
7709 
7710   data->iv_common_cand_tab->empty ();
7711   data->iv_common_cands.truncate (0);
7712 }
7713 
7714 /* Finalizes data structures used by the iv optimization pass.  LOOPS is the
7715    loop tree.  */
7716 
7717 static void
7718 tree_ssa_iv_optimize_finalize (struct ivopts_data *data)
7719 {
7720   free_loop_data (data);
7721   free (data->version_info);
7722   BITMAP_FREE (data->relevant);
7723   BITMAP_FREE (data->important_candidates);
7724 
7725   decl_rtl_to_reset.release ();
7726   data->vgroups.release ();
7727   data->vcands.release ();
7728   delete data->inv_expr_tab;
7729   data->inv_expr_tab = NULL;
7730   free_affine_expand_cache (&data->name_expansion_cache);
7731   delete data->iv_common_cand_tab;
7732   data->iv_common_cand_tab = NULL;
7733   data->iv_common_cands.release ();
7734   obstack_free (&data->iv_obstack, NULL);
7735 }
7736 
7737 /* Returns true if the loop body BODY includes any function calls.  */
7738 
7739 static bool
7740 loop_body_includes_call (basic_block *body, unsigned num_nodes)
7741 {
7742   gimple_stmt_iterator gsi;
7743   unsigned i;
7744 
7745   for (i = 0; i < num_nodes; i++)
7746     for (gsi = gsi_start_bb (body[i]); !gsi_end_p (gsi); gsi_next (&gsi))
7747       {
7748 	gimple *stmt = gsi_stmt (gsi);
7749 	if (is_gimple_call (stmt)
7750 	    && !gimple_call_internal_p (stmt)
7751 	    && !is_inexpensive_builtin (gimple_call_fndecl (stmt)))
7752 	  return true;
7753       }
7754   return false;
7755 }
7756 
7757 /* Optimizes the LOOP.  Returns true if anything changed.  */
7758 
7759 static bool
7760 tree_ssa_iv_optimize_loop (struct ivopts_data *data, struct loop *loop)
7761 {
7762   bool changed = false;
7763   struct iv_ca *iv_ca;
7764   edge exit = single_dom_exit (loop);
7765   basic_block *body;
7766 
7767   gcc_assert (!data->niters);
7768   data->current_loop = loop;
7769   data->loop_loc = find_loop_location (loop);
7770   data->speed = optimize_loop_for_speed_p (loop);
7771 
7772   if (dump_file && (dump_flags & TDF_DETAILS))
7773     {
7774       fprintf (dump_file, "Processing loop %d", loop->num);
7775       if (data->loop_loc != UNKNOWN_LOCATION)
7776 	fprintf (dump_file, " at %s:%d", LOCATION_FILE (data->loop_loc),
7777 		 LOCATION_LINE (data->loop_loc));
7778       fprintf (dump_file, "\n");
7779 
7780       if (exit)
7781 	{
7782 	  fprintf (dump_file, "  single exit %d -> %d, exit condition ",
7783 		   exit->src->index, exit->dest->index);
7784 	  print_gimple_stmt (dump_file, last_stmt (exit->src), 0, TDF_SLIM);
7785 	  fprintf (dump_file, "\n");
7786 	}
7787 
7788       fprintf (dump_file, "\n");
7789     }
7790 
7791   body = get_loop_body (loop);
7792   data->body_includes_call = loop_body_includes_call (body, loop->num_nodes);
7793   renumber_gimple_stmt_uids_in_blocks (body, loop->num_nodes);
7794   free (body);
7795 
7796   data->loop_single_exit_p = exit != NULL && loop_only_exit_p (loop, exit);
7797 
7798   /* For each ssa name determines whether it behaves as an induction variable
7799      in some loop.  */
7800   if (!find_induction_variables (data))
7801     goto finish;
7802 
7803   /* Finds interesting uses (item 1).  */
7804   find_interesting_uses (data);
7805   if (data->vgroups.length () > MAX_CONSIDERED_GROUPS)
7806     goto finish;
7807 
7808   /* Finds candidates for the induction variables (item 2).  */
7809   find_iv_candidates (data);
7810 
7811   /* Calculates the costs (item 3, part 1).  */
7812   determine_iv_costs (data);
7813   determine_group_iv_costs (data);
7814   determine_set_costs (data);
7815 
7816   /* Find the optimal set of induction variables (item 3, part 2).  */
7817   iv_ca = find_optimal_iv_set (data);
7818   if (!iv_ca)
7819     goto finish;
7820   changed = true;
7821 
7822   /* Create the new induction variables (item 4, part 1).  */
7823   create_new_ivs (data, iv_ca);
7824   iv_ca_free (&iv_ca);
7825 
7826   /* Rewrite the uses (item 4, part 2).  */
7827   rewrite_groups (data);
7828 
7829   /* Remove the ivs that are unused after rewriting.  */
7830   remove_unused_ivs (data);
7831 
7832   /* We have changed the structure of induction variables; it might happen
7833      that definitions in the scev database refer to some of them that were
7834      eliminated.  */
7835   scev_reset ();
7836 
7837 finish:
7838   free_loop_data (data);
7839 
7840   return changed;
7841 }
7842 
7843 /* Main entry point.  Optimizes induction variables in loops.  */
7844 
7845 void
7846 tree_ssa_iv_optimize (void)
7847 {
7848   struct loop *loop;
7849   struct ivopts_data data;
7850 
7851   tree_ssa_iv_optimize_init (&data);
7852 
7853   /* Optimize the loops starting with the innermost ones.  */
7854   FOR_EACH_LOOP (loop, LI_FROM_INNERMOST)
7855     {
7856       if (dump_file && (dump_flags & TDF_DETAILS))
7857 	flow_loop_dump (loop, dump_file, NULL, 1);
7858 
7859       tree_ssa_iv_optimize_loop (&data, loop);
7860     }
7861 
7862   tree_ssa_iv_optimize_finalize (&data);
7863 }
7864