xref: /netbsd-src/external/gpl3/gcc.old/dist/gcc/tree-ssa-loop-ivopts.c (revision b7b7574d3bf8eeb51a1fa3977b59142ec6434a55)
1 /* Induction variable optimizations.
2    Copyright (C) 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010
3    Free Software Foundation, Inc.
4 
5 This file is part of GCC.
6 
7 GCC is free software; you can redistribute it and/or modify it
8 under the terms of the GNU General Public License as published by the
9 Free Software Foundation; either version 3, or (at your option) any
10 later version.
11 
12 GCC is distributed in the hope that it will be useful, but WITHOUT
13 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
15 for more details.
16 
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3.  If not see
19 <http://www.gnu.org/licenses/>.  */
20 
21 /* This pass tries to find the optimal set of induction variables for the loop.
22    It optimizes just the basic linear induction variables (although adding
23    support for other types should not be too hard).  It includes the
24    optimizations commonly known as strength reduction, induction variable
25    coalescing and induction variable elimination.  It does it in the
26    following steps:
27 
28    1) The interesting uses of induction variables are found.  This includes
29 
30       -- uses of induction variables in non-linear expressions
31       -- addresses of arrays
32       -- comparisons of induction variables
33 
34    2) Candidates for the induction variables are found.  This includes
35 
36       -- old induction variables
37       -- the variables defined by expressions derived from the "interesting
38 	 uses" above
39 
40    3) The optimal (w.r. to a cost function) set of variables is chosen.  The
41       cost function assigns a cost to sets of induction variables and consists
42       of three parts:
43 
44       -- The use costs.  Each of the interesting uses chooses the best induction
45 	 variable in the set and adds its cost to the sum.  The cost reflects
46 	 the time spent on modifying the induction variables value to be usable
47 	 for the given purpose (adding base and offset for arrays, etc.).
48       -- The variable costs.  Each of the variables has a cost assigned that
49 	 reflects the costs associated with incrementing the value of the
50 	 variable.  The original variables are somewhat preferred.
51       -- The set cost.  Depending on the size of the set, extra cost may be
52 	 added to reflect register pressure.
53 
54       All the costs are defined in a machine-specific way, using the target
55       hooks and machine descriptions to determine them.
56 
57    4) The trees are transformed to use the new variables, the dead code is
58       removed.
59 
60    All of this is done loop by loop.  Doing it globally is theoretically
61    possible, it might give a better performance and it might enable us
62    to decide costs more precisely, but getting all the interactions right
63    would be complicated.  */
64 
65 #include "config.h"
66 #include "system.h"
67 #include "coretypes.h"
68 #include "tm.h"
69 #include "tree.h"
70 #include "rtl.h"
71 #include "tm_p.h"
72 #include "hard-reg-set.h"
73 #include "basic-block.h"
74 #include "output.h"
75 #include "diagnostic.h"
76 #include "tree-flow.h"
77 #include "tree-dump.h"
78 #include "timevar.h"
79 #include "cfgloop.h"
80 #include "varray.h"
81 #include "expr.h"
82 #include "tree-pass.h"
83 #include "ggc.h"
84 #include "insn-config.h"
85 #include "recog.h"
86 #include "pointer-set.h"
87 #include "hashtab.h"
88 #include "tree-chrec.h"
89 #include "tree-scalar-evolution.h"
90 #include "cfgloop.h"
91 #include "params.h"
92 #include "langhooks.h"
93 #include "tree-affine.h"
94 #include "target.h"
95 
96 /* The infinite cost.  */
97 #define INFTY 10000000
98 
99 /* The expected number of loop iterations.  TODO -- use profiling instead of
100    this.  */
101 #define AVG_LOOP_NITER(LOOP) 5
102 
103 
104 /* Representation of the induction variable.  */
105 struct iv
106 {
107   tree base;		/* Initial value of the iv.  */
108   tree base_object;	/* A memory object to that the induction variable points.  */
109   tree step;		/* Step of the iv (constant only).  */
110   tree ssa_name;	/* The ssa name with the value.  */
111   bool biv_p;		/* Is it a biv?  */
112   bool have_use_for;	/* Do we already have a use for it?  */
113   unsigned use_id;	/* The identifier in the use if it is the case.  */
114 };
115 
116 /* Per-ssa version information (induction variable descriptions, etc.).  */
117 struct version_info
118 {
119   tree name;		/* The ssa name.  */
120   struct iv *iv;	/* Induction variable description.  */
121   bool has_nonlin_use;	/* For a loop-level invariant, whether it is used in
122 			   an expression that is not an induction variable.  */
123   unsigned inv_id;	/* Id of an invariant.  */
124   bool preserve_biv;	/* For the original biv, whether to preserve it.  */
125 };
126 
127 /* Types of uses.  */
128 enum use_type
129 {
130   USE_NONLINEAR_EXPR,	/* Use in a nonlinear expression.  */
131   USE_ADDRESS,		/* Use in an address.  */
132   USE_COMPARE		/* Use is a compare.  */
133 };
134 
135 /* Cost of a computation.  */
136 typedef struct
137 {
138   int cost;		/* The runtime cost.  */
139   unsigned complexity;	/* The estimate of the complexity of the code for
140 			   the computation (in no concrete units --
141 			   complexity field should be larger for more
142 			   complex expressions and addressing modes).  */
143 } comp_cost;
144 
145 static const comp_cost zero_cost = {0, 0};
146 static const comp_cost infinite_cost = {INFTY, INFTY};
147 
148 /* The candidate - cost pair.  */
149 struct cost_pair
150 {
151   struct iv_cand *cand;	/* The candidate.  */
152   comp_cost cost;	/* The cost.  */
153   bitmap depends_on;	/* The list of invariants that have to be
154 			   preserved.  */
155   tree value;		/* For final value elimination, the expression for
156 			   the final value of the iv.  For iv elimination,
157 			   the new bound to compare with.  */
158 };
159 
160 /* Use.  */
161 struct iv_use
162 {
163   unsigned id;		/* The id of the use.  */
164   enum use_type type;	/* Type of the use.  */
165   struct iv *iv;	/* The induction variable it is based on.  */
166   gimple stmt;		/* Statement in that it occurs.  */
167   tree *op_p;		/* The place where it occurs.  */
168   bitmap related_cands;	/* The set of "related" iv candidates, plus the common
169 			   important ones.  */
170 
171   unsigned n_map_members; /* Number of candidates in the cost_map list.  */
172   struct cost_pair *cost_map;
173 			/* The costs wrto the iv candidates.  */
174 
175   struct iv_cand *selected;
176 			/* The selected candidate.  */
177 };
178 
179 /* The position where the iv is computed.  */
180 enum iv_position
181 {
182   IP_NORMAL,		/* At the end, just before the exit condition.  */
183   IP_END,		/* At the end of the latch block.  */
184   IP_BEFORE_USE,	/* Immediately before a specific use.  */
185   IP_AFTER_USE,		/* Immediately after a specific use.  */
186   IP_ORIGINAL		/* The original biv.  */
187 };
188 
189 /* The induction variable candidate.  */
190 struct iv_cand
191 {
192   unsigned id;		/* The number of the candidate.  */
193   bool important;	/* Whether this is an "important" candidate, i.e. such
194 			   that it should be considered by all uses.  */
195   enum iv_position pos;	/* Where it is computed.  */
196   gimple incremented_at;/* For original biv, the statement where it is
197 			   incremented.  */
198   tree var_before;	/* The variable used for it before increment.  */
199   tree var_after;	/* The variable used for it after increment.  */
200   struct iv *iv;	/* The value of the candidate.  NULL for
201 			   "pseudocandidate" used to indicate the possibility
202 			   to replace the final value of an iv by direct
203 			   computation of the value.  */
204   unsigned cost;	/* Cost of the candidate.  */
205   unsigned cost_step;	/* Cost of the candidate's increment operation.  */
206   struct iv_use *ainc_use; /* For IP_{BEFORE,AFTER}_USE candidates, the place
207 			      where it is incremented.  */
208   bitmap depends_on;	/* The list of invariants that are used in step of the
209 			   biv.  */
210 };
211 
212 /* The data used by the induction variable optimizations.  */
213 
214 typedef struct iv_use *iv_use_p;
215 DEF_VEC_P(iv_use_p);
216 DEF_VEC_ALLOC_P(iv_use_p,heap);
217 
218 typedef struct iv_cand *iv_cand_p;
219 DEF_VEC_P(iv_cand_p);
220 DEF_VEC_ALLOC_P(iv_cand_p,heap);
221 
222 struct ivopts_data
223 {
224   /* The currently optimized loop.  */
225   struct loop *current_loop;
226 
227   /* Numbers of iterations for all exits of the current loop.  */
228   struct pointer_map_t *niters;
229 
230   /* Number of registers used in it.  */
231   unsigned regs_used;
232 
233   /* The size of version_info array allocated.  */
234   unsigned version_info_size;
235 
236   /* The array of information for the ssa names.  */
237   struct version_info *version_info;
238 
239   /* The bitmap of indices in version_info whose value was changed.  */
240   bitmap relevant;
241 
242   /* The uses of induction variables.  */
243   VEC(iv_use_p,heap) *iv_uses;
244 
245   /* The candidates.  */
246   VEC(iv_cand_p,heap) *iv_candidates;
247 
248   /* A bitmap of important candidates.  */
249   bitmap important_candidates;
250 
251   /* The maximum invariant id.  */
252   unsigned max_inv_id;
253 
254   /* Whether to consider just related and important candidates when replacing a
255      use.  */
256   bool consider_all_candidates;
257 
258   /* Are we optimizing for speed?  */
259   bool speed;
260 };
261 
262 /* An assignment of iv candidates to uses.  */
263 
264 struct iv_ca
265 {
266   /* The number of uses covered by the assignment.  */
267   unsigned upto;
268 
269   /* Number of uses that cannot be expressed by the candidates in the set.  */
270   unsigned bad_uses;
271 
272   /* Candidate assigned to a use, together with the related costs.  */
273   struct cost_pair **cand_for_use;
274 
275   /* Number of times each candidate is used.  */
276   unsigned *n_cand_uses;
277 
278   /* The candidates used.  */
279   bitmap cands;
280 
281   /* The number of candidates in the set.  */
282   unsigned n_cands;
283 
284   /* Total number of registers needed.  */
285   unsigned n_regs;
286 
287   /* Total cost of expressing uses.  */
288   comp_cost cand_use_cost;
289 
290   /* Total cost of candidates.  */
291   unsigned cand_cost;
292 
293   /* Number of times each invariant is used.  */
294   unsigned *n_invariant_uses;
295 
296   /* Total cost of the assignment.  */
297   comp_cost cost;
298 };
299 
300 /* Difference of two iv candidate assignments.  */
301 
302 struct iv_ca_delta
303 {
304   /* Changed use.  */
305   struct iv_use *use;
306 
307   /* An old assignment (for rollback purposes).  */
308   struct cost_pair *old_cp;
309 
310   /* A new assignment.  */
311   struct cost_pair *new_cp;
312 
313   /* Next change in the list.  */
314   struct iv_ca_delta *next_change;
315 };
316 
317 /* Bound on number of candidates below that all candidates are considered.  */
318 
319 #define CONSIDER_ALL_CANDIDATES_BOUND \
320   ((unsigned) PARAM_VALUE (PARAM_IV_CONSIDER_ALL_CANDIDATES_BOUND))
321 
322 /* If there are more iv occurrences, we just give up (it is quite unlikely that
323    optimizing such a loop would help, and it would take ages).  */
324 
325 #define MAX_CONSIDERED_USES \
326   ((unsigned) PARAM_VALUE (PARAM_IV_MAX_CONSIDERED_USES))
327 
328 /* If there are at most this number of ivs in the set, try removing unnecessary
329    ivs from the set always.  */
330 
331 #define ALWAYS_PRUNE_CAND_SET_BOUND \
332   ((unsigned) PARAM_VALUE (PARAM_IV_ALWAYS_PRUNE_CAND_SET_BOUND))
333 
334 /* The list of trees for that the decl_rtl field must be reset is stored
335    here.  */
336 
337 static VEC(tree,heap) *decl_rtl_to_reset;
338 
339 /* Number of uses recorded in DATA.  */
340 
341 static inline unsigned
342 n_iv_uses (struct ivopts_data *data)
343 {
344   return VEC_length (iv_use_p, data->iv_uses);
345 }
346 
347 /* Ith use recorded in DATA.  */
348 
349 static inline struct iv_use *
350 iv_use (struct ivopts_data *data, unsigned i)
351 {
352   return VEC_index (iv_use_p, data->iv_uses, i);
353 }
354 
355 /* Number of candidates recorded in DATA.  */
356 
357 static inline unsigned
358 n_iv_cands (struct ivopts_data *data)
359 {
360   return VEC_length (iv_cand_p, data->iv_candidates);
361 }
362 
363 /* Ith candidate recorded in DATA.  */
364 
365 static inline struct iv_cand *
366 iv_cand (struct ivopts_data *data, unsigned i)
367 {
368   return VEC_index (iv_cand_p, data->iv_candidates, i);
369 }
370 
371 /* The single loop exit if it dominates the latch, NULL otherwise.  */
372 
373 edge
374 single_dom_exit (struct loop *loop)
375 {
376   edge exit = single_exit (loop);
377 
378   if (!exit)
379     return NULL;
380 
381   if (!just_once_each_iteration_p (loop, exit->src))
382     return NULL;
383 
384   return exit;
385 }
386 
387 /* Dumps information about the induction variable IV to FILE.  */
388 
389 extern void dump_iv (FILE *, struct iv *);
390 void
391 dump_iv (FILE *file, struct iv *iv)
392 {
393   if (iv->ssa_name)
394     {
395       fprintf (file, "ssa name ");
396       print_generic_expr (file, iv->ssa_name, TDF_SLIM);
397       fprintf (file, "\n");
398     }
399 
400   fprintf (file, "  type ");
401   print_generic_expr (file, TREE_TYPE (iv->base), TDF_SLIM);
402   fprintf (file, "\n");
403 
404   if (iv->step)
405     {
406       fprintf (file, "  base ");
407       print_generic_expr (file, iv->base, TDF_SLIM);
408       fprintf (file, "\n");
409 
410       fprintf (file, "  step ");
411       print_generic_expr (file, iv->step, TDF_SLIM);
412       fprintf (file, "\n");
413     }
414   else
415     {
416       fprintf (file, "  invariant ");
417       print_generic_expr (file, iv->base, TDF_SLIM);
418       fprintf (file, "\n");
419     }
420 
421   if (iv->base_object)
422     {
423       fprintf (file, "  base object ");
424       print_generic_expr (file, iv->base_object, TDF_SLIM);
425       fprintf (file, "\n");
426     }
427 
428   if (iv->biv_p)
429     fprintf (file, "  is a biv\n");
430 }
431 
432 /* Dumps information about the USE to FILE.  */
433 
434 extern void dump_use (FILE *, struct iv_use *);
435 void
436 dump_use (FILE *file, struct iv_use *use)
437 {
438   fprintf (file, "use %d\n", use->id);
439 
440   switch (use->type)
441     {
442     case USE_NONLINEAR_EXPR:
443       fprintf (file, "  generic\n");
444       break;
445 
446     case USE_ADDRESS:
447       fprintf (file, "  address\n");
448       break;
449 
450     case USE_COMPARE:
451       fprintf (file, "  compare\n");
452       break;
453 
454     default:
455       gcc_unreachable ();
456     }
457 
458   fprintf (file, "  in statement ");
459   print_gimple_stmt (file, use->stmt, 0, 0);
460   fprintf (file, "\n");
461 
462   fprintf (file, "  at position ");
463   if (use->op_p)
464     print_generic_expr (file, *use->op_p, TDF_SLIM);
465   fprintf (file, "\n");
466 
467   dump_iv (file, use->iv);
468 
469   if (use->related_cands)
470     {
471       fprintf (file, "  related candidates ");
472       dump_bitmap (file, use->related_cands);
473     }
474 }
475 
476 /* Dumps information about the uses to FILE.  */
477 
478 extern void dump_uses (FILE *, struct ivopts_data *);
479 void
480 dump_uses (FILE *file, struct ivopts_data *data)
481 {
482   unsigned i;
483   struct iv_use *use;
484 
485   for (i = 0; i < n_iv_uses (data); i++)
486     {
487       use = iv_use (data, i);
488 
489       dump_use (file, use);
490       fprintf (file, "\n");
491     }
492 }
493 
494 /* Dumps information about induction variable candidate CAND to FILE.  */
495 
496 extern void dump_cand (FILE *, struct iv_cand *);
497 void
498 dump_cand (FILE *file, struct iv_cand *cand)
499 {
500   struct iv *iv = cand->iv;
501 
502   fprintf (file, "candidate %d%s\n",
503 	   cand->id, cand->important ? " (important)" : "");
504 
505   if (cand->depends_on)
506     {
507       fprintf (file, "  depends on ");
508       dump_bitmap (file, cand->depends_on);
509     }
510 
511   if (!iv)
512     {
513       fprintf (file, "  final value replacement\n");
514       return;
515     }
516 
517   switch (cand->pos)
518     {
519     case IP_NORMAL:
520       fprintf (file, "  incremented before exit test\n");
521       break;
522 
523     case IP_BEFORE_USE:
524       fprintf (file, "  incremented before use %d\n", cand->ainc_use->id);
525       break;
526 
527     case IP_AFTER_USE:
528       fprintf (file, "  incremented after use %d\n", cand->ainc_use->id);
529       break;
530 
531     case IP_END:
532       fprintf (file, "  incremented at end\n");
533       break;
534 
535     case IP_ORIGINAL:
536       fprintf (file, "  original biv\n");
537       break;
538     }
539 
540   dump_iv (file, iv);
541 }
542 
543 /* Returns the info for ssa version VER.  */
544 
545 static inline struct version_info *
546 ver_info (struct ivopts_data *data, unsigned ver)
547 {
548   return data->version_info + ver;
549 }
550 
551 /* Returns the info for ssa name NAME.  */
552 
553 static inline struct version_info *
554 name_info (struct ivopts_data *data, tree name)
555 {
556   return ver_info (data, SSA_NAME_VERSION (name));
557 }
558 
559 /* Returns true if STMT is after the place where the IP_NORMAL ivs will be
560    emitted in LOOP.  */
561 
562 static bool
563 stmt_after_ip_normal_pos (struct loop *loop, gimple stmt)
564 {
565   basic_block bb = ip_normal_pos (loop), sbb = gimple_bb (stmt);
566 
567   gcc_assert (bb);
568 
569   if (sbb == loop->latch)
570     return true;
571 
572   if (sbb != bb)
573     return false;
574 
575   return stmt == last_stmt (bb);
576 }
577 
578 /* Returns true if STMT if after the place where the original induction
579    variable CAND is incremented.  If TRUE_IF_EQUAL is set, we return true
580    if the positions are identical.  */
581 
582 static bool
583 stmt_after_inc_pos (struct iv_cand *cand, gimple stmt, bool true_if_equal)
584 {
585   basic_block cand_bb = gimple_bb (cand->incremented_at);
586   basic_block stmt_bb = gimple_bb (stmt);
587 
588   if (!dominated_by_p (CDI_DOMINATORS, stmt_bb, cand_bb))
589     return false;
590 
591   if (stmt_bb != cand_bb)
592     return true;
593 
594   if (true_if_equal
595       && gimple_uid (stmt) == gimple_uid (cand->incremented_at))
596     return true;
597   return gimple_uid (stmt) > gimple_uid (cand->incremented_at);
598 }
599 
600 /* Returns true if STMT if after the place where the induction variable
601    CAND is incremented in LOOP.  */
602 
603 static bool
604 stmt_after_increment (struct loop *loop, struct iv_cand *cand, gimple stmt)
605 {
606   switch (cand->pos)
607     {
608     case IP_END:
609       return false;
610 
611     case IP_NORMAL:
612       return stmt_after_ip_normal_pos (loop, stmt);
613 
614     case IP_ORIGINAL:
615     case IP_AFTER_USE:
616       return stmt_after_inc_pos (cand, stmt, false);
617 
618     case IP_BEFORE_USE:
619       return stmt_after_inc_pos (cand, stmt, true);
620 
621     default:
622       gcc_unreachable ();
623     }
624 }
625 
626 /* Returns true if EXP is a ssa name that occurs in an abnormal phi node.  */
627 
628 static bool
629 abnormal_ssa_name_p (tree exp)
630 {
631   if (!exp)
632     return false;
633 
634   if (TREE_CODE (exp) != SSA_NAME)
635     return false;
636 
637   return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (exp) != 0;
638 }
639 
640 /* Returns false if BASE or INDEX contains a ssa name that occurs in an
641    abnormal phi node.  Callback for for_each_index.  */
642 
643 static bool
644 idx_contains_abnormal_ssa_name_p (tree base, tree *index,
645 				  void *data ATTRIBUTE_UNUSED)
646 {
647   if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
648     {
649       if (abnormal_ssa_name_p (TREE_OPERAND (base, 2)))
650 	return false;
651       if (abnormal_ssa_name_p (TREE_OPERAND (base, 3)))
652 	return false;
653     }
654 
655   return !abnormal_ssa_name_p (*index);
656 }
657 
658 /* Returns true if EXPR contains a ssa name that occurs in an
659    abnormal phi node.  */
660 
661 bool
662 contains_abnormal_ssa_name_p (tree expr)
663 {
664   enum tree_code code;
665   enum tree_code_class codeclass;
666 
667   if (!expr)
668     return false;
669 
670   code = TREE_CODE (expr);
671   codeclass = TREE_CODE_CLASS (code);
672 
673   if (code == SSA_NAME)
674     return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (expr) != 0;
675 
676   if (code == INTEGER_CST
677       || is_gimple_min_invariant (expr))
678     return false;
679 
680   if (code == ADDR_EXPR)
681     return !for_each_index (&TREE_OPERAND (expr, 0),
682 			    idx_contains_abnormal_ssa_name_p,
683 			    NULL);
684 
685   switch (codeclass)
686     {
687     case tcc_binary:
688     case tcc_comparison:
689       if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 1)))
690 	return true;
691 
692       /* Fallthru.  */
693     case tcc_unary:
694       if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 0)))
695 	return true;
696 
697       break;
698 
699     default:
700       gcc_unreachable ();
701     }
702 
703   return false;
704 }
705 
706 /*  Returns tree describing number of iterations determined from
707     EXIT of DATA->current_loop, or NULL if something goes wrong.  */
708 
709 static tree
710 niter_for_exit (struct ivopts_data *data, edge exit)
711 {
712   struct tree_niter_desc desc;
713   tree niter;
714   void **slot;
715 
716   if (!data->niters)
717     {
718       data->niters = pointer_map_create ();
719       slot = NULL;
720     }
721   else
722     slot = pointer_map_contains (data->niters, exit);
723 
724   if (!slot)
725     {
726       /* Try to determine number of iterations.  We must know it
727 	 unconditionally (i.e., without possibility of # of iterations
728 	 being zero).  Also, we cannot safely work with ssa names that
729 	 appear in phi nodes on abnormal edges, so that we do not create
730 	 overlapping life ranges for them (PR 27283).  */
731       if (number_of_iterations_exit (data->current_loop,
732 				     exit, &desc, true)
733 	  && integer_zerop (desc.may_be_zero)
734      	  && !contains_abnormal_ssa_name_p (desc.niter))
735 	niter = desc.niter;
736       else
737 	niter = NULL_TREE;
738 
739       *pointer_map_insert (data->niters, exit) = niter;
740     }
741   else
742     niter = (tree) *slot;
743 
744   return niter;
745 }
746 
747 /* Returns tree describing number of iterations determined from
748    single dominating exit of DATA->current_loop, or NULL if something
749    goes wrong.  */
750 
751 static tree
752 niter_for_single_dom_exit (struct ivopts_data *data)
753 {
754   edge exit = single_dom_exit (data->current_loop);
755 
756   if (!exit)
757     return NULL;
758 
759   return niter_for_exit (data, exit);
760 }
761 
762 /* Initializes data structures used by the iv optimization pass, stored
763    in DATA.  */
764 
765 static void
766 tree_ssa_iv_optimize_init (struct ivopts_data *data)
767 {
768   data->version_info_size = 2 * num_ssa_names;
769   data->version_info = XCNEWVEC (struct version_info, data->version_info_size);
770   data->relevant = BITMAP_ALLOC (NULL);
771   data->important_candidates = BITMAP_ALLOC (NULL);
772   data->max_inv_id = 0;
773   data->niters = NULL;
774   data->iv_uses = VEC_alloc (iv_use_p, heap, 20);
775   data->iv_candidates = VEC_alloc (iv_cand_p, heap, 20);
776   decl_rtl_to_reset = VEC_alloc (tree, heap, 20);
777 }
778 
779 /* Returns a memory object to that EXPR points.  In case we are able to
780    determine that it does not point to any such object, NULL is returned.  */
781 
782 static tree
783 determine_base_object (tree expr)
784 {
785   enum tree_code code = TREE_CODE (expr);
786   tree base, obj;
787 
788   /* If this is a pointer casted to any type, we need to determine
789      the base object for the pointer; so handle conversions before
790      throwing away non-pointer expressions.  */
791   if (CONVERT_EXPR_P (expr))
792     return determine_base_object (TREE_OPERAND (expr, 0));
793 
794   if (!POINTER_TYPE_P (TREE_TYPE (expr)))
795     return NULL_TREE;
796 
797   switch (code)
798     {
799     case INTEGER_CST:
800       return NULL_TREE;
801 
802     case ADDR_EXPR:
803       obj = TREE_OPERAND (expr, 0);
804       base = get_base_address (obj);
805 
806       if (!base)
807 	return expr;
808 
809       if (TREE_CODE (base) == INDIRECT_REF)
810 	return determine_base_object (TREE_OPERAND (base, 0));
811 
812       return fold_convert (ptr_type_node,
813 		           build_fold_addr_expr (base));
814 
815     case POINTER_PLUS_EXPR:
816       return determine_base_object (TREE_OPERAND (expr, 0));
817 
818     case PLUS_EXPR:
819     case MINUS_EXPR:
820       /* Pointer addition is done solely using POINTER_PLUS_EXPR.  */
821       gcc_unreachable ();
822 
823     default:
824       return fold_convert (ptr_type_node, expr);
825     }
826 }
827 
828 /* Allocates an induction variable with given initial value BASE and step STEP
829    for loop LOOP.  */
830 
831 static struct iv *
832 alloc_iv (tree base, tree step)
833 {
834   struct iv *iv = XCNEW (struct iv);
835   gcc_assert (step != NULL_TREE);
836 
837   iv->base = base;
838   iv->base_object = determine_base_object (base);
839   iv->step = step;
840   iv->biv_p = false;
841   iv->have_use_for = false;
842   iv->use_id = 0;
843   iv->ssa_name = NULL_TREE;
844 
845   return iv;
846 }
847 
848 /* Sets STEP and BASE for induction variable IV.  */
849 
850 static void
851 set_iv (struct ivopts_data *data, tree iv, tree base, tree step)
852 {
853   struct version_info *info = name_info (data, iv);
854 
855   gcc_assert (!info->iv);
856 
857   bitmap_set_bit (data->relevant, SSA_NAME_VERSION (iv));
858   info->iv = alloc_iv (base, step);
859   info->iv->ssa_name = iv;
860 }
861 
862 /* Finds induction variable declaration for VAR.  */
863 
864 static struct iv *
865 get_iv (struct ivopts_data *data, tree var)
866 {
867   basic_block bb;
868   tree type = TREE_TYPE (var);
869 
870   if (!POINTER_TYPE_P (type)
871       && !INTEGRAL_TYPE_P (type))
872     return NULL;
873 
874   if (!name_info (data, var)->iv)
875     {
876       bb = gimple_bb (SSA_NAME_DEF_STMT (var));
877 
878       if (!bb
879 	  || !flow_bb_inside_loop_p (data->current_loop, bb))
880 	set_iv (data, var, var, build_int_cst (type, 0));
881     }
882 
883   return name_info (data, var)->iv;
884 }
885 
886 /* Determines the step of a biv defined in PHI.  Returns NULL if PHI does
887    not define a simple affine biv with nonzero step.  */
888 
889 static tree
890 determine_biv_step (gimple phi)
891 {
892   struct loop *loop = gimple_bb (phi)->loop_father;
893   tree name = PHI_RESULT (phi);
894   affine_iv iv;
895 
896   if (!is_gimple_reg (name))
897     return NULL_TREE;
898 
899   if (!simple_iv (loop, loop, name, &iv, true))
900     return NULL_TREE;
901 
902   return integer_zerop (iv.step) ? NULL_TREE : iv.step;
903 }
904 
905 /* Finds basic ivs.  */
906 
907 static bool
908 find_bivs (struct ivopts_data *data)
909 {
910   gimple phi;
911   tree step, type, base;
912   bool found = false;
913   struct loop *loop = data->current_loop;
914   gimple_stmt_iterator psi;
915 
916   for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
917     {
918       phi = gsi_stmt (psi);
919 
920       if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (PHI_RESULT (phi)))
921 	continue;
922 
923       step = determine_biv_step (phi);
924       if (!step)
925 	continue;
926 
927       base = PHI_ARG_DEF_FROM_EDGE (phi, loop_preheader_edge (loop));
928       base = expand_simple_operations (base);
929       if (contains_abnormal_ssa_name_p (base)
930 	  || contains_abnormal_ssa_name_p (step))
931 	continue;
932 
933       type = TREE_TYPE (PHI_RESULT (phi));
934       base = fold_convert (type, base);
935       if (step)
936 	{
937 	  if (POINTER_TYPE_P (type))
938 	    step = fold_convert (sizetype, step);
939 	  else
940 	    step = fold_convert (type, step);
941 	}
942 
943       set_iv (data, PHI_RESULT (phi), base, step);
944       found = true;
945     }
946 
947   return found;
948 }
949 
950 /* Marks basic ivs.  */
951 
952 static void
953 mark_bivs (struct ivopts_data *data)
954 {
955   gimple phi;
956   tree var;
957   struct iv *iv, *incr_iv;
958   struct loop *loop = data->current_loop;
959   basic_block incr_bb;
960   gimple_stmt_iterator psi;
961 
962   for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
963     {
964       phi = gsi_stmt (psi);
965 
966       iv = get_iv (data, PHI_RESULT (phi));
967       if (!iv)
968 	continue;
969 
970       var = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (loop));
971       incr_iv = get_iv (data, var);
972       if (!incr_iv)
973 	continue;
974 
975       /* If the increment is in the subloop, ignore it.  */
976       incr_bb = gimple_bb (SSA_NAME_DEF_STMT (var));
977       if (incr_bb->loop_father != data->current_loop
978 	  || (incr_bb->flags & BB_IRREDUCIBLE_LOOP))
979 	continue;
980 
981       iv->biv_p = true;
982       incr_iv->biv_p = true;
983     }
984 }
985 
986 /* Checks whether STMT defines a linear induction variable and stores its
987    parameters to IV.  */
988 
989 static bool
990 find_givs_in_stmt_scev (struct ivopts_data *data, gimple stmt, affine_iv *iv)
991 {
992   tree lhs;
993   struct loop *loop = data->current_loop;
994 
995   iv->base = NULL_TREE;
996   iv->step = NULL_TREE;
997 
998   if (gimple_code (stmt) != GIMPLE_ASSIGN)
999     return false;
1000 
1001   lhs = gimple_assign_lhs (stmt);
1002   if (TREE_CODE (lhs) != SSA_NAME)
1003     return false;
1004 
1005   if (!simple_iv (loop, loop_containing_stmt (stmt), lhs, iv, true))
1006     return false;
1007   iv->base = expand_simple_operations (iv->base);
1008 
1009   if (contains_abnormal_ssa_name_p (iv->base)
1010       || contains_abnormal_ssa_name_p (iv->step))
1011     return false;
1012 
1013   return true;
1014 }
1015 
1016 /* Finds general ivs in statement STMT.  */
1017 
1018 static void
1019 find_givs_in_stmt (struct ivopts_data *data, gimple stmt)
1020 {
1021   affine_iv iv;
1022 
1023   if (!find_givs_in_stmt_scev (data, stmt, &iv))
1024     return;
1025 
1026   set_iv (data, gimple_assign_lhs (stmt), iv.base, iv.step);
1027 }
1028 
1029 /* Finds general ivs in basic block BB.  */
1030 
1031 static void
1032 find_givs_in_bb (struct ivopts_data *data, basic_block bb)
1033 {
1034   gimple_stmt_iterator bsi;
1035 
1036   for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
1037     find_givs_in_stmt (data, gsi_stmt (bsi));
1038 }
1039 
1040 /* Finds general ivs.  */
1041 
1042 static void
1043 find_givs (struct ivopts_data *data)
1044 {
1045   struct loop *loop = data->current_loop;
1046   basic_block *body = get_loop_body_in_dom_order (loop);
1047   unsigned i;
1048 
1049   for (i = 0; i < loop->num_nodes; i++)
1050     find_givs_in_bb (data, body[i]);
1051   free (body);
1052 }
1053 
1054 /* For each ssa name defined in LOOP determines whether it is an induction
1055    variable and if so, its initial value and step.  */
1056 
1057 static bool
1058 find_induction_variables (struct ivopts_data *data)
1059 {
1060   unsigned i;
1061   bitmap_iterator bi;
1062 
1063   if (!find_bivs (data))
1064     return false;
1065 
1066   find_givs (data);
1067   mark_bivs (data);
1068 
1069   if (dump_file && (dump_flags & TDF_DETAILS))
1070     {
1071       tree niter = niter_for_single_dom_exit (data);
1072 
1073       if (niter)
1074 	{
1075 	  fprintf (dump_file, "  number of iterations ");
1076 	  print_generic_expr (dump_file, niter, TDF_SLIM);
1077 	  fprintf (dump_file, "\n\n");
1078     	};
1079 
1080       fprintf (dump_file, "Induction variables:\n\n");
1081 
1082       EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
1083 	{
1084 	  if (ver_info (data, i)->iv)
1085 	    dump_iv (dump_file, ver_info (data, i)->iv);
1086 	}
1087     }
1088 
1089   return true;
1090 }
1091 
1092 /* Records a use of type USE_TYPE at *USE_P in STMT whose value is IV.  */
1093 
1094 static struct iv_use *
1095 record_use (struct ivopts_data *data, tree *use_p, struct iv *iv,
1096 	    gimple stmt, enum use_type use_type)
1097 {
1098   struct iv_use *use = XCNEW (struct iv_use);
1099 
1100   use->id = n_iv_uses (data);
1101   use->type = use_type;
1102   use->iv = iv;
1103   use->stmt = stmt;
1104   use->op_p = use_p;
1105   use->related_cands = BITMAP_ALLOC (NULL);
1106 
1107   /* To avoid showing ssa name in the dumps, if it was not reset by the
1108      caller.  */
1109   iv->ssa_name = NULL_TREE;
1110 
1111   if (dump_file && (dump_flags & TDF_DETAILS))
1112     dump_use (dump_file, use);
1113 
1114   VEC_safe_push (iv_use_p, heap, data->iv_uses, use);
1115 
1116   return use;
1117 }
1118 
1119 /* Checks whether OP is a loop-level invariant and if so, records it.
1120    NONLINEAR_USE is true if the invariant is used in a way we do not
1121    handle specially.  */
1122 
1123 static void
1124 record_invariant (struct ivopts_data *data, tree op, bool nonlinear_use)
1125 {
1126   basic_block bb;
1127   struct version_info *info;
1128 
1129   if (TREE_CODE (op) != SSA_NAME
1130       || !is_gimple_reg (op))
1131     return;
1132 
1133   bb = gimple_bb (SSA_NAME_DEF_STMT (op));
1134   if (bb
1135       && flow_bb_inside_loop_p (data->current_loop, bb))
1136     return;
1137 
1138   info = name_info (data, op);
1139   info->name = op;
1140   info->has_nonlin_use |= nonlinear_use;
1141   if (!info->inv_id)
1142     info->inv_id = ++data->max_inv_id;
1143   bitmap_set_bit (data->relevant, SSA_NAME_VERSION (op));
1144 }
1145 
1146 /* Checks whether the use OP is interesting and if so, records it.  */
1147 
1148 static struct iv_use *
1149 find_interesting_uses_op (struct ivopts_data *data, tree op)
1150 {
1151   struct iv *iv;
1152   struct iv *civ;
1153   gimple stmt;
1154   struct iv_use *use;
1155 
1156   if (TREE_CODE (op) != SSA_NAME)
1157     return NULL;
1158 
1159   iv = get_iv (data, op);
1160   if (!iv)
1161     return NULL;
1162 
1163   if (iv->have_use_for)
1164     {
1165       use = iv_use (data, iv->use_id);
1166 
1167       gcc_assert (use->type == USE_NONLINEAR_EXPR);
1168       return use;
1169     }
1170 
1171   if (integer_zerop (iv->step))
1172     {
1173       record_invariant (data, op, true);
1174       return NULL;
1175     }
1176   iv->have_use_for = true;
1177 
1178   civ = XNEW (struct iv);
1179   *civ = *iv;
1180 
1181   stmt = SSA_NAME_DEF_STMT (op);
1182   gcc_assert (gimple_code (stmt) == GIMPLE_PHI
1183 	      || is_gimple_assign (stmt));
1184 
1185   use = record_use (data, NULL, civ, stmt, USE_NONLINEAR_EXPR);
1186   iv->use_id = use->id;
1187 
1188   return use;
1189 }
1190 
1191 /* Given a condition in statement STMT, checks whether it is a compare
1192    of an induction variable and an invariant.  If this is the case,
1193    CONTROL_VAR is set to location of the iv, BOUND to the location of
1194    the invariant, IV_VAR and IV_BOUND are set to the corresponding
1195    induction variable descriptions, and true is returned.  If this is not
1196    the case, CONTROL_VAR and BOUND are set to the arguments of the
1197    condition and false is returned.  */
1198 
1199 static bool
1200 extract_cond_operands (struct ivopts_data *data, gimple stmt,
1201 		       tree **control_var, tree **bound,
1202 		       struct iv **iv_var, struct iv **iv_bound)
1203 {
1204   /* The objects returned when COND has constant operands.  */
1205   static struct iv const_iv;
1206   static tree zero;
1207   tree *op0 = &zero, *op1 = &zero, *tmp_op;
1208   struct iv *iv0 = &const_iv, *iv1 = &const_iv, *tmp_iv;
1209   bool ret = false;
1210 
1211   if (gimple_code (stmt) == GIMPLE_COND)
1212     {
1213       op0 = gimple_cond_lhs_ptr (stmt);
1214       op1 = gimple_cond_rhs_ptr (stmt);
1215     }
1216   else
1217     {
1218       op0 = gimple_assign_rhs1_ptr (stmt);
1219       op1 = gimple_assign_rhs2_ptr (stmt);
1220     }
1221 
1222   zero = integer_zero_node;
1223   const_iv.step = integer_zero_node;
1224 
1225   if (TREE_CODE (*op0) == SSA_NAME)
1226     iv0 = get_iv (data, *op0);
1227   if (TREE_CODE (*op1) == SSA_NAME)
1228     iv1 = get_iv (data, *op1);
1229 
1230   /* Exactly one of the compared values must be an iv, and the other one must
1231      be an invariant.  */
1232   if (!iv0 || !iv1)
1233     goto end;
1234 
1235   if (integer_zerop (iv0->step))
1236     {
1237       /* Control variable may be on the other side.  */
1238       tmp_op = op0; op0 = op1; op1 = tmp_op;
1239       tmp_iv = iv0; iv0 = iv1; iv1 = tmp_iv;
1240     }
1241   ret = !integer_zerop (iv0->step) && integer_zerop (iv1->step);
1242 
1243 end:
1244   if (control_var)
1245     *control_var = op0;;
1246   if (iv_var)
1247     *iv_var = iv0;;
1248   if (bound)
1249     *bound = op1;
1250   if (iv_bound)
1251     *iv_bound = iv1;
1252 
1253   return ret;
1254 }
1255 
1256 /* Checks whether the condition in STMT is interesting and if so,
1257    records it.  */
1258 
1259 static void
1260 find_interesting_uses_cond (struct ivopts_data *data, gimple stmt)
1261 {
1262   tree *var_p, *bound_p;
1263   struct iv *var_iv, *civ;
1264 
1265   if (!extract_cond_operands (data, stmt, &var_p, &bound_p, &var_iv, NULL))
1266     {
1267       find_interesting_uses_op (data, *var_p);
1268       find_interesting_uses_op (data, *bound_p);
1269       return;
1270     }
1271 
1272   civ = XNEW (struct iv);
1273   *civ = *var_iv;
1274   record_use (data, NULL, civ, stmt, USE_COMPARE);
1275 }
1276 
1277 /* Returns true if expression EXPR is obviously invariant in LOOP,
1278    i.e. if all its operands are defined outside of the LOOP.  LOOP
1279    should not be the function body.  */
1280 
1281 bool
1282 expr_invariant_in_loop_p (struct loop *loop, tree expr)
1283 {
1284   basic_block def_bb;
1285   unsigned i, len;
1286 
1287   gcc_assert (loop_depth (loop) > 0);
1288 
1289   if (is_gimple_min_invariant (expr))
1290     return true;
1291 
1292   if (TREE_CODE (expr) == SSA_NAME)
1293     {
1294       def_bb = gimple_bb (SSA_NAME_DEF_STMT (expr));
1295       if (def_bb
1296 	  && flow_bb_inside_loop_p (loop, def_bb))
1297 	return false;
1298 
1299       return true;
1300     }
1301 
1302   if (!EXPR_P (expr))
1303     return false;
1304 
1305   len = TREE_OPERAND_LENGTH (expr);
1306   for (i = 0; i < len; i++)
1307     if (!expr_invariant_in_loop_p (loop, TREE_OPERAND (expr, i)))
1308       return false;
1309 
1310   return true;
1311 }
1312 
1313 /* Returns true if statement STMT is obviously invariant in LOOP,
1314    i.e. if all its operands on the RHS are defined outside of the LOOP.
1315    LOOP should not be the function body.  */
1316 
1317 bool
1318 stmt_invariant_in_loop_p (struct loop *loop, gimple stmt)
1319 {
1320   unsigned i;
1321   tree lhs;
1322 
1323   gcc_assert (loop_depth (loop) > 0);
1324 
1325   lhs = gimple_get_lhs (stmt);
1326   for (i = 0; i < gimple_num_ops (stmt); i++)
1327     {
1328       tree op = gimple_op (stmt, i);
1329       if (op != lhs && !expr_invariant_in_loop_p (loop, op))
1330 	return false;
1331     }
1332 
1333   return true;
1334 }
1335 
1336 /* Cumulates the steps of indices into DATA and replaces their values with the
1337    initial ones.  Returns false when the value of the index cannot be determined.
1338    Callback for for_each_index.  */
1339 
1340 struct ifs_ivopts_data
1341 {
1342   struct ivopts_data *ivopts_data;
1343   gimple stmt;
1344   tree step;
1345 };
1346 
1347 static bool
1348 idx_find_step (tree base, tree *idx, void *data)
1349 {
1350   struct ifs_ivopts_data *dta = (struct ifs_ivopts_data *) data;
1351   struct iv *iv;
1352   tree step, iv_base, iv_step, lbound, off;
1353   struct loop *loop = dta->ivopts_data->current_loop;
1354 
1355   if (TREE_CODE (base) == MISALIGNED_INDIRECT_REF
1356       || TREE_CODE (base) == ALIGN_INDIRECT_REF)
1357     return false;
1358 
1359   /* If base is a component ref, require that the offset of the reference
1360      be invariant.  */
1361   if (TREE_CODE (base) == COMPONENT_REF)
1362     {
1363       off = component_ref_field_offset (base);
1364       return expr_invariant_in_loop_p (loop, off);
1365     }
1366 
1367   /* If base is array, first check whether we will be able to move the
1368      reference out of the loop (in order to take its address in strength
1369      reduction).  In order for this to work we need both lower bound
1370      and step to be loop invariants.  */
1371   if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
1372     {
1373       /* Moreover, for a range, the size needs to be invariant as well.  */
1374       if (TREE_CODE (base) == ARRAY_RANGE_REF
1375 	  && !expr_invariant_in_loop_p (loop, TYPE_SIZE (TREE_TYPE (base))))
1376 	return false;
1377 
1378       step = array_ref_element_size (base);
1379       lbound = array_ref_low_bound (base);
1380 
1381       if (!expr_invariant_in_loop_p (loop, step)
1382 	  || !expr_invariant_in_loop_p (loop, lbound))
1383 	return false;
1384     }
1385 
1386   if (TREE_CODE (*idx) != SSA_NAME)
1387     return true;
1388 
1389   iv = get_iv (dta->ivopts_data, *idx);
1390   if (!iv)
1391     return false;
1392 
1393   /* XXX  We produce for a base of *D42 with iv->base being &x[0]
1394 	  *&x[0], which is not folded and does not trigger the
1395 	  ARRAY_REF path below.  */
1396   *idx = iv->base;
1397 
1398   if (integer_zerop (iv->step))
1399     return true;
1400 
1401   if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
1402     {
1403       step = array_ref_element_size (base);
1404 
1405       /* We only handle addresses whose step is an integer constant.  */
1406       if (TREE_CODE (step) != INTEGER_CST)
1407 	return false;
1408     }
1409   else
1410     /* The step for pointer arithmetics already is 1 byte.  */
1411     step = build_int_cst (sizetype, 1);
1412 
1413   iv_base = iv->base;
1414   iv_step = iv->step;
1415   if (!convert_affine_scev (dta->ivopts_data->current_loop,
1416 			    sizetype, &iv_base, &iv_step, dta->stmt,
1417 			    false))
1418     {
1419       /* The index might wrap.  */
1420       return false;
1421     }
1422 
1423   step = fold_build2 (MULT_EXPR, sizetype, step, iv_step);
1424   dta->step = fold_build2 (PLUS_EXPR, sizetype, dta->step, step);
1425 
1426   return true;
1427 }
1428 
1429 /* Records use in index IDX.  Callback for for_each_index.  Ivopts data
1430    object is passed to it in DATA.  */
1431 
1432 static bool
1433 idx_record_use (tree base, tree *idx,
1434 		void *vdata)
1435 {
1436   struct ivopts_data *data = (struct ivopts_data *) vdata;
1437   find_interesting_uses_op (data, *idx);
1438   if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
1439     {
1440       find_interesting_uses_op (data, array_ref_element_size (base));
1441       find_interesting_uses_op (data, array_ref_low_bound (base));
1442     }
1443   return true;
1444 }
1445 
1446 /* If we can prove that TOP = cst * BOT for some constant cst,
1447    store cst to MUL and return true.  Otherwise return false.
1448    The returned value is always sign-extended, regardless of the
1449    signedness of TOP and BOT.  */
1450 
1451 static bool
1452 constant_multiple_of (tree top, tree bot, double_int *mul)
1453 {
1454   tree mby;
1455   enum tree_code code;
1456   double_int res, p0, p1;
1457   unsigned precision = TYPE_PRECISION (TREE_TYPE (top));
1458 
1459   STRIP_NOPS (top);
1460   STRIP_NOPS (bot);
1461 
1462   if (operand_equal_p (top, bot, 0))
1463     {
1464       *mul = double_int_one;
1465       return true;
1466     }
1467 
1468   code = TREE_CODE (top);
1469   switch (code)
1470     {
1471     case MULT_EXPR:
1472       mby = TREE_OPERAND (top, 1);
1473       if (TREE_CODE (mby) != INTEGER_CST)
1474 	return false;
1475 
1476       if (!constant_multiple_of (TREE_OPERAND (top, 0), bot, &res))
1477 	return false;
1478 
1479       *mul = double_int_sext (double_int_mul (res, tree_to_double_int (mby)),
1480 			      precision);
1481       return true;
1482 
1483     case PLUS_EXPR:
1484     case MINUS_EXPR:
1485       if (!constant_multiple_of (TREE_OPERAND (top, 0), bot, &p0)
1486 	  || !constant_multiple_of (TREE_OPERAND (top, 1), bot, &p1))
1487 	return false;
1488 
1489       if (code == MINUS_EXPR)
1490 	p1 = double_int_neg (p1);
1491       *mul = double_int_sext (double_int_add (p0, p1), precision);
1492       return true;
1493 
1494     case INTEGER_CST:
1495       if (TREE_CODE (bot) != INTEGER_CST)
1496 	return false;
1497 
1498       p0 = double_int_sext (tree_to_double_int (top), precision);
1499       p1 = double_int_sext (tree_to_double_int (bot), precision);
1500       if (double_int_zero_p (p1))
1501 	return false;
1502       *mul = double_int_sext (double_int_sdivmod (p0, p1, FLOOR_DIV_EXPR, &res),
1503 			      precision);
1504       return double_int_zero_p (res);
1505 
1506     default:
1507       return false;
1508     }
1509 }
1510 
1511 /* Returns true if memory reference REF with step STEP may be unaligned.  */
1512 
1513 static bool
1514 may_be_unaligned_p (tree ref, tree step)
1515 {
1516   tree base;
1517   tree base_type;
1518   HOST_WIDE_INT bitsize;
1519   HOST_WIDE_INT bitpos;
1520   tree toffset;
1521   enum machine_mode mode;
1522   int unsignedp, volatilep;
1523   unsigned base_align;
1524 
1525   /* TARGET_MEM_REFs are translated directly to valid MEMs on the target,
1526      thus they are not misaligned.  */
1527   if (TREE_CODE (ref) == TARGET_MEM_REF)
1528     return false;
1529 
1530   /* The test below is basically copy of what expr.c:normal_inner_ref
1531      does to check whether the object must be loaded by parts when
1532      STRICT_ALIGNMENT is true.  */
1533   base = get_inner_reference (ref, &bitsize, &bitpos, &toffset, &mode,
1534 			      &unsignedp, &volatilep, true);
1535   base_type = TREE_TYPE (base);
1536   base_align = TYPE_ALIGN (base_type);
1537 
1538   if (mode != BLKmode)
1539     {
1540       unsigned mode_align = GET_MODE_ALIGNMENT (mode);
1541 
1542       if (base_align < mode_align
1543 	  || (bitpos % mode_align) != 0
1544 	  || (bitpos % BITS_PER_UNIT) != 0)
1545 	return true;
1546 
1547       if (toffset
1548 	  && (highest_pow2_factor (toffset) * BITS_PER_UNIT) < mode_align)
1549 	return true;
1550 
1551       if ((highest_pow2_factor (step) * BITS_PER_UNIT) < mode_align)
1552 	return true;
1553     }
1554 
1555   return false;
1556 }
1557 
1558 /* Return true if EXPR may be non-addressable.   */
1559 
1560 bool
1561 may_be_nonaddressable_p (tree expr)
1562 {
1563   switch (TREE_CODE (expr))
1564     {
1565     case TARGET_MEM_REF:
1566       /* TARGET_MEM_REFs are translated directly to valid MEMs on the
1567 	 target, thus they are always addressable.  */
1568       return false;
1569 
1570     case COMPONENT_REF:
1571       return DECL_NONADDRESSABLE_P (TREE_OPERAND (expr, 1))
1572 	     || may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
1573 
1574     case VIEW_CONVERT_EXPR:
1575       /* This kind of view-conversions may wrap non-addressable objects
1576 	 and make them look addressable.  After some processing the
1577 	 non-addressability may be uncovered again, causing ADDR_EXPRs
1578 	 of inappropriate objects to be built.  */
1579       if (is_gimple_reg (TREE_OPERAND (expr, 0))
1580 	  || !is_gimple_addressable (TREE_OPERAND (expr, 0)))
1581 	return true;
1582 
1583       /* ... fall through ... */
1584 
1585     case ARRAY_REF:
1586     case ARRAY_RANGE_REF:
1587       return may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
1588 
1589     CASE_CONVERT:
1590       return true;
1591 
1592     default:
1593       break;
1594     }
1595 
1596   return false;
1597 }
1598 
1599 /* Finds addresses in *OP_P inside STMT.  */
1600 
1601 static void
1602 find_interesting_uses_address (struct ivopts_data *data, gimple stmt, tree *op_p)
1603 {
1604   tree base = *op_p, step = build_int_cst (sizetype, 0);
1605   struct iv *civ;
1606   struct ifs_ivopts_data ifs_ivopts_data;
1607 
1608   /* Do not play with volatile memory references.  A bit too conservative,
1609      perhaps, but safe.  */
1610   if (gimple_has_volatile_ops (stmt))
1611     goto fail;
1612 
1613   /* Ignore bitfields for now.  Not really something terribly complicated
1614      to handle.  TODO.  */
1615   if (TREE_CODE (base) == BIT_FIELD_REF)
1616     goto fail;
1617 
1618   base = unshare_expr (base);
1619 
1620   if (TREE_CODE (base) == TARGET_MEM_REF)
1621     {
1622       tree type = build_pointer_type (TREE_TYPE (base));
1623       tree astep;
1624 
1625       if (TMR_BASE (base)
1626 	  && TREE_CODE (TMR_BASE (base)) == SSA_NAME)
1627 	{
1628 	  civ = get_iv (data, TMR_BASE (base));
1629 	  if (!civ)
1630 	    goto fail;
1631 
1632 	  TMR_BASE (base) = civ->base;
1633 	  step = civ->step;
1634 	}
1635       if (TMR_INDEX (base)
1636 	  && TREE_CODE (TMR_INDEX (base)) == SSA_NAME)
1637 	{
1638 	  civ = get_iv (data, TMR_INDEX (base));
1639 	  if (!civ)
1640 	    goto fail;
1641 
1642 	  TMR_INDEX (base) = civ->base;
1643 	  astep = civ->step;
1644 
1645 	  if (astep)
1646 	    {
1647 	      if (TMR_STEP (base))
1648 		astep = fold_build2 (MULT_EXPR, type, TMR_STEP (base), astep);
1649 
1650 	      step = fold_build2 (PLUS_EXPR, type, step, astep);
1651 	    }
1652 	}
1653 
1654       if (integer_zerop (step))
1655 	goto fail;
1656       base = tree_mem_ref_addr (type, base);
1657     }
1658   else
1659     {
1660       ifs_ivopts_data.ivopts_data = data;
1661       ifs_ivopts_data.stmt = stmt;
1662       ifs_ivopts_data.step = build_int_cst (sizetype, 0);
1663       if (!for_each_index (&base, idx_find_step, &ifs_ivopts_data)
1664 	  || integer_zerop (ifs_ivopts_data.step))
1665 	goto fail;
1666       step = ifs_ivopts_data.step;
1667 
1668       gcc_assert (TREE_CODE (base) != ALIGN_INDIRECT_REF);
1669       gcc_assert (TREE_CODE (base) != MISALIGNED_INDIRECT_REF);
1670 
1671       /* Check that the base expression is addressable.  This needs
1672 	 to be done after substituting bases of IVs into it.  */
1673       if (may_be_nonaddressable_p (base))
1674 	goto fail;
1675 
1676       /* Moreover, on strict alignment platforms, check that it is
1677 	 sufficiently aligned.  */
1678       if (STRICT_ALIGNMENT && may_be_unaligned_p (base, step))
1679 	goto fail;
1680 
1681       base = build_fold_addr_expr (base);
1682 
1683       /* Substituting bases of IVs into the base expression might
1684 	 have caused folding opportunities.  */
1685       if (TREE_CODE (base) == ADDR_EXPR)
1686 	{
1687 	  tree *ref = &TREE_OPERAND (base, 0);
1688 	  while (handled_component_p (*ref))
1689 	    ref = &TREE_OPERAND (*ref, 0);
1690 	  if (TREE_CODE (*ref) == INDIRECT_REF)
1691 	    {
1692 	      tree tem = gimple_fold_indirect_ref (TREE_OPERAND (*ref, 0));
1693 	      if (tem)
1694 		*ref = tem;
1695 	    }
1696 	}
1697     }
1698 
1699   civ = alloc_iv (base, step);
1700   record_use (data, op_p, civ, stmt, USE_ADDRESS);
1701   return;
1702 
1703 fail:
1704   for_each_index (op_p, idx_record_use, data);
1705 }
1706 
1707 /* Finds and records invariants used in STMT.  */
1708 
1709 static void
1710 find_invariants_stmt (struct ivopts_data *data, gimple stmt)
1711 {
1712   ssa_op_iter iter;
1713   use_operand_p use_p;
1714   tree op;
1715 
1716   FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
1717     {
1718       op = USE_FROM_PTR (use_p);
1719       record_invariant (data, op, false);
1720     }
1721 }
1722 
1723 /* Finds interesting uses of induction variables in the statement STMT.  */
1724 
1725 static void
1726 find_interesting_uses_stmt (struct ivopts_data *data, gimple stmt)
1727 {
1728   struct iv *iv;
1729   tree op, *lhs, *rhs;
1730   ssa_op_iter iter;
1731   use_operand_p use_p;
1732   enum tree_code code;
1733 
1734   find_invariants_stmt (data, stmt);
1735 
1736   if (gimple_code (stmt) == GIMPLE_COND)
1737     {
1738       find_interesting_uses_cond (data, stmt);
1739       return;
1740     }
1741 
1742   if (is_gimple_assign (stmt))
1743     {
1744       lhs = gimple_assign_lhs_ptr (stmt);
1745       rhs = gimple_assign_rhs1_ptr (stmt);
1746 
1747       if (TREE_CODE (*lhs) == SSA_NAME)
1748 	{
1749 	  /* If the statement defines an induction variable, the uses are not
1750 	     interesting by themselves.  */
1751 
1752 	  iv = get_iv (data, *lhs);
1753 
1754 	  if (iv && !integer_zerop (iv->step))
1755 	    return;
1756 	}
1757 
1758       code = gimple_assign_rhs_code (stmt);
1759       if (get_gimple_rhs_class (code) == GIMPLE_SINGLE_RHS
1760 	  && (REFERENCE_CLASS_P (*rhs)
1761 	      || is_gimple_val (*rhs)))
1762 	{
1763 	  if (REFERENCE_CLASS_P (*rhs))
1764 	    find_interesting_uses_address (data, stmt, rhs);
1765 	  else
1766 	    find_interesting_uses_op (data, *rhs);
1767 
1768 	  if (REFERENCE_CLASS_P (*lhs))
1769 	    find_interesting_uses_address (data, stmt, lhs);
1770 	  return;
1771 	}
1772       else if (TREE_CODE_CLASS (code) == tcc_comparison)
1773 	{
1774 	  find_interesting_uses_cond (data, stmt);
1775 	  return;
1776 	}
1777 
1778       /* TODO -- we should also handle address uses of type
1779 
1780 	 memory = call (whatever);
1781 
1782 	 and
1783 
1784 	 call (memory).  */
1785     }
1786 
1787   if (gimple_code (stmt) == GIMPLE_PHI
1788       && gimple_bb (stmt) == data->current_loop->header)
1789     {
1790       iv = get_iv (data, PHI_RESULT (stmt));
1791 
1792       if (iv && !integer_zerop (iv->step))
1793 	return;
1794     }
1795 
1796   FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
1797     {
1798       op = USE_FROM_PTR (use_p);
1799 
1800       if (TREE_CODE (op) != SSA_NAME)
1801 	continue;
1802 
1803       iv = get_iv (data, op);
1804       if (!iv)
1805 	continue;
1806 
1807       find_interesting_uses_op (data, op);
1808     }
1809 }
1810 
1811 /* Finds interesting uses of induction variables outside of loops
1812    on loop exit edge EXIT.  */
1813 
1814 static void
1815 find_interesting_uses_outside (struct ivopts_data *data, edge exit)
1816 {
1817   gimple phi;
1818   gimple_stmt_iterator psi;
1819   tree def;
1820 
1821   for (psi = gsi_start_phis (exit->dest); !gsi_end_p (psi); gsi_next (&psi))
1822     {
1823       phi = gsi_stmt (psi);
1824       def = PHI_ARG_DEF_FROM_EDGE (phi, exit);
1825       if (is_gimple_reg (def))
1826 	find_interesting_uses_op (data, def);
1827     }
1828 }
1829 
1830 /* Finds uses of the induction variables that are interesting.  */
1831 
1832 static void
1833 find_interesting_uses (struct ivopts_data *data)
1834 {
1835   basic_block bb;
1836   gimple_stmt_iterator bsi;
1837   basic_block *body = get_loop_body (data->current_loop);
1838   unsigned i;
1839   struct version_info *info;
1840   edge e;
1841 
1842   if (dump_file && (dump_flags & TDF_DETAILS))
1843     fprintf (dump_file, "Uses:\n\n");
1844 
1845   for (i = 0; i < data->current_loop->num_nodes; i++)
1846     {
1847       edge_iterator ei;
1848       bb = body[i];
1849 
1850       FOR_EACH_EDGE (e, ei, bb->succs)
1851 	if (e->dest != EXIT_BLOCK_PTR
1852 	    && !flow_bb_inside_loop_p (data->current_loop, e->dest))
1853 	  find_interesting_uses_outside (data, e);
1854 
1855       for (bsi = gsi_start_phis (bb); !gsi_end_p (bsi); gsi_next (&bsi))
1856 	find_interesting_uses_stmt (data, gsi_stmt (bsi));
1857       for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
1858 	if (!is_gimple_debug (gsi_stmt (bsi)))
1859 	  find_interesting_uses_stmt (data, gsi_stmt (bsi));
1860     }
1861 
1862   if (dump_file && (dump_flags & TDF_DETAILS))
1863     {
1864       bitmap_iterator bi;
1865 
1866       fprintf (dump_file, "\n");
1867 
1868       EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
1869 	{
1870 	  info = ver_info (data, i);
1871 	  if (info->inv_id)
1872 	    {
1873 	      fprintf (dump_file, "  ");
1874 	      print_generic_expr (dump_file, info->name, TDF_SLIM);
1875 	      fprintf (dump_file, " is invariant (%d)%s\n",
1876 		       info->inv_id, info->has_nonlin_use ? "" : ", eliminable");
1877 	    }
1878 	}
1879 
1880       fprintf (dump_file, "\n");
1881     }
1882 
1883   free (body);
1884 }
1885 
1886 /* Strips constant offsets from EXPR and stores them to OFFSET.  If INSIDE_ADDR
1887    is true, assume we are inside an address.  If TOP_COMPREF is true, assume
1888    we are at the top-level of the processed address.  */
1889 
1890 static tree
1891 strip_offset_1 (tree expr, bool inside_addr, bool top_compref,
1892 		unsigned HOST_WIDE_INT *offset)
1893 {
1894   tree op0 = NULL_TREE, op1 = NULL_TREE, tmp, step;
1895   enum tree_code code;
1896   tree type, orig_type = TREE_TYPE (expr);
1897   unsigned HOST_WIDE_INT off0, off1, st;
1898   tree orig_expr = expr;
1899 
1900   STRIP_NOPS (expr);
1901 
1902   type = TREE_TYPE (expr);
1903   code = TREE_CODE (expr);
1904   *offset = 0;
1905 
1906   switch (code)
1907     {
1908     case INTEGER_CST:
1909       if (!cst_and_fits_in_hwi (expr)
1910 	  || integer_zerop (expr))
1911 	return orig_expr;
1912 
1913       *offset = int_cst_value (expr);
1914       return build_int_cst (orig_type, 0);
1915 
1916     case POINTER_PLUS_EXPR:
1917     case PLUS_EXPR:
1918     case MINUS_EXPR:
1919       op0 = TREE_OPERAND (expr, 0);
1920       op1 = TREE_OPERAND (expr, 1);
1921 
1922       op0 = strip_offset_1 (op0, false, false, &off0);
1923       op1 = strip_offset_1 (op1, false, false, &off1);
1924 
1925       *offset = (code == MINUS_EXPR ? off0 - off1 : off0 + off1);
1926       if (op0 == TREE_OPERAND (expr, 0)
1927 	  && op1 == TREE_OPERAND (expr, 1))
1928 	return orig_expr;
1929 
1930       if (integer_zerop (op1))
1931 	expr = op0;
1932       else if (integer_zerop (op0))
1933 	{
1934 	  if (code == MINUS_EXPR)
1935 	    expr = fold_build1 (NEGATE_EXPR, type, op1);
1936 	  else
1937 	    expr = op1;
1938 	}
1939       else
1940 	expr = fold_build2 (code, type, op0, op1);
1941 
1942       return fold_convert (orig_type, expr);
1943 
1944     case MULT_EXPR:
1945       op1 = TREE_OPERAND (expr, 1);
1946       if (!cst_and_fits_in_hwi (op1))
1947 	return orig_expr;
1948 
1949       op0 = TREE_OPERAND (expr, 0);
1950       op0 = strip_offset_1 (op0, false, false, &off0);
1951       if (op0 == TREE_OPERAND (expr, 0))
1952 	return orig_expr;
1953 
1954       *offset = off0 * int_cst_value (op1);
1955       if (integer_zerop (op0))
1956 	expr = op0;
1957       else
1958 	expr = fold_build2 (MULT_EXPR, type, op0, op1);
1959 
1960       return fold_convert (orig_type, expr);
1961 
1962     case ARRAY_REF:
1963     case ARRAY_RANGE_REF:
1964       if (!inside_addr)
1965 	return orig_expr;
1966 
1967       step = array_ref_element_size (expr);
1968       if (!cst_and_fits_in_hwi (step))
1969 	break;
1970 
1971       st = int_cst_value (step);
1972       op1 = TREE_OPERAND (expr, 1);
1973       op1 = strip_offset_1 (op1, false, false, &off1);
1974       *offset = off1 * st;
1975 
1976       if (top_compref
1977 	  && integer_zerop (op1))
1978 	{
1979 	  /* Strip the component reference completely.  */
1980 	  op0 = TREE_OPERAND (expr, 0);
1981 	  op0 = strip_offset_1 (op0, inside_addr, top_compref, &off0);
1982 	  *offset += off0;
1983 	  return op0;
1984 	}
1985       break;
1986 
1987     case COMPONENT_REF:
1988       if (!inside_addr)
1989 	return orig_expr;
1990 
1991       tmp = component_ref_field_offset (expr);
1992       if (top_compref
1993 	  && cst_and_fits_in_hwi (tmp))
1994 	{
1995 	  /* Strip the component reference completely.  */
1996 	  op0 = TREE_OPERAND (expr, 0);
1997 	  op0 = strip_offset_1 (op0, inside_addr, top_compref, &off0);
1998 	  *offset = off0 + int_cst_value (tmp);
1999 	  return op0;
2000 	}
2001       break;
2002 
2003     case ADDR_EXPR:
2004       op0 = TREE_OPERAND (expr, 0);
2005       op0 = strip_offset_1 (op0, true, true, &off0);
2006       *offset += off0;
2007 
2008       if (op0 == TREE_OPERAND (expr, 0))
2009 	return orig_expr;
2010 
2011       expr = build_fold_addr_expr (op0);
2012       return fold_convert (orig_type, expr);
2013 
2014     case INDIRECT_REF:
2015       inside_addr = false;
2016       break;
2017 
2018     default:
2019       return orig_expr;
2020     }
2021 
2022   /* Default handling of expressions for that we want to recurse into
2023      the first operand.  */
2024   op0 = TREE_OPERAND (expr, 0);
2025   op0 = strip_offset_1 (op0, inside_addr, false, &off0);
2026   *offset += off0;
2027 
2028   if (op0 == TREE_OPERAND (expr, 0)
2029       && (!op1 || op1 == TREE_OPERAND (expr, 1)))
2030     return orig_expr;
2031 
2032   expr = copy_node (expr);
2033   TREE_OPERAND (expr, 0) = op0;
2034   if (op1)
2035     TREE_OPERAND (expr, 1) = op1;
2036 
2037   /* Inside address, we might strip the top level component references,
2038      thus changing type of the expression.  Handling of ADDR_EXPR
2039      will fix that.  */
2040   expr = fold_convert (orig_type, expr);
2041 
2042   return expr;
2043 }
2044 
2045 /* Strips constant offsets from EXPR and stores them to OFFSET.  */
2046 
2047 static tree
2048 strip_offset (tree expr, unsigned HOST_WIDE_INT *offset)
2049 {
2050   return strip_offset_1 (expr, false, false, offset);
2051 }
2052 
2053 /* Returns variant of TYPE that can be used as base for different uses.
2054    We return unsigned type with the same precision, which avoids problems
2055    with overflows.  */
2056 
2057 static tree
2058 generic_type_for (tree type)
2059 {
2060   if (POINTER_TYPE_P (type))
2061     return unsigned_type_for (type);
2062 
2063   if (TYPE_UNSIGNED (type))
2064     return type;
2065 
2066   return unsigned_type_for (type);
2067 }
2068 
2069 /* Records invariants in *EXPR_P.  Callback for walk_tree.  DATA contains
2070    the bitmap to that we should store it.  */
2071 
2072 static struct ivopts_data *fd_ivopts_data;
2073 static tree
2074 find_depends (tree *expr_p, int *ws ATTRIBUTE_UNUSED, void *data)
2075 {
2076   bitmap *depends_on = (bitmap *) data;
2077   struct version_info *info;
2078 
2079   if (TREE_CODE (*expr_p) != SSA_NAME)
2080     return NULL_TREE;
2081   info = name_info (fd_ivopts_data, *expr_p);
2082 
2083   if (!info->inv_id || info->has_nonlin_use)
2084     return NULL_TREE;
2085 
2086   if (!*depends_on)
2087     *depends_on = BITMAP_ALLOC (NULL);
2088   bitmap_set_bit (*depends_on, info->inv_id);
2089 
2090   return NULL_TREE;
2091 }
2092 
2093 /* Adds a candidate BASE + STEP * i.  Important field is set to IMPORTANT and
2094    position to POS.  If USE is not NULL, the candidate is set as related to
2095    it.  If both BASE and STEP are NULL, we add a pseudocandidate for the
2096    replacement of the final value of the iv by a direct computation.  */
2097 
2098 static struct iv_cand *
2099 add_candidate_1 (struct ivopts_data *data,
2100 		 tree base, tree step, bool important, enum iv_position pos,
2101 		 struct iv_use *use, gimple incremented_at)
2102 {
2103   unsigned i;
2104   struct iv_cand *cand = NULL;
2105   tree type, orig_type;
2106 
2107   if (base)
2108     {
2109       orig_type = TREE_TYPE (base);
2110       type = generic_type_for (orig_type);
2111       if (type != orig_type)
2112 	{
2113 	  base = fold_convert (type, base);
2114 	  step = fold_convert (type, step);
2115 	}
2116     }
2117 
2118   for (i = 0; i < n_iv_cands (data); i++)
2119     {
2120       cand = iv_cand (data, i);
2121 
2122       if (cand->pos != pos)
2123 	continue;
2124 
2125       if (cand->incremented_at != incremented_at
2126 	  || ((pos == IP_AFTER_USE || pos == IP_BEFORE_USE)
2127 	      && cand->ainc_use != use))
2128 	continue;
2129 
2130       if (!cand->iv)
2131 	{
2132 	  if (!base && !step)
2133 	    break;
2134 
2135 	  continue;
2136 	}
2137 
2138       if (!base && !step)
2139 	continue;
2140 
2141       if (operand_equal_p (base, cand->iv->base, 0)
2142 	  && operand_equal_p (step, cand->iv->step, 0))
2143 	break;
2144     }
2145 
2146   if (i == n_iv_cands (data))
2147     {
2148       cand = XCNEW (struct iv_cand);
2149       cand->id = i;
2150 
2151       if (!base && !step)
2152 	cand->iv = NULL;
2153       else
2154 	cand->iv = alloc_iv (base, step);
2155 
2156       cand->pos = pos;
2157       if (pos != IP_ORIGINAL && cand->iv)
2158 	{
2159 	  cand->var_before = create_tmp_var_raw (TREE_TYPE (base), "ivtmp");
2160 	  cand->var_after = cand->var_before;
2161 	}
2162       cand->important = important;
2163       cand->incremented_at = incremented_at;
2164       VEC_safe_push (iv_cand_p, heap, data->iv_candidates, cand);
2165 
2166       if (step
2167 	  && TREE_CODE (step) != INTEGER_CST)
2168 	{
2169 	  fd_ivopts_data = data;
2170 	  walk_tree (&step, find_depends, &cand->depends_on, NULL);
2171 	}
2172 
2173       if (pos == IP_AFTER_USE || pos == IP_BEFORE_USE)
2174 	cand->ainc_use = use;
2175       else
2176 	cand->ainc_use = NULL;
2177 
2178       if (dump_file && (dump_flags & TDF_DETAILS))
2179 	dump_cand (dump_file, cand);
2180     }
2181 
2182   if (important && !cand->important)
2183     {
2184       cand->important = true;
2185       if (dump_file && (dump_flags & TDF_DETAILS))
2186 	fprintf (dump_file, "Candidate %d is important\n", cand->id);
2187     }
2188 
2189   if (use)
2190     {
2191       bitmap_set_bit (use->related_cands, i);
2192       if (dump_file && (dump_flags & TDF_DETAILS))
2193 	fprintf (dump_file, "Candidate %d is related to use %d\n",
2194 		 cand->id, use->id);
2195     }
2196 
2197   return cand;
2198 }
2199 
2200 /* Returns true if incrementing the induction variable at the end of the LOOP
2201    is allowed.
2202 
2203    The purpose is to avoid splitting latch edge with a biv increment, thus
2204    creating a jump, possibly confusing other optimization passes and leaving
2205    less freedom to scheduler.  So we allow IP_END_POS only if IP_NORMAL_POS
2206    is not available (so we do not have a better alternative), or if the latch
2207    edge is already nonempty.  */
2208 
2209 static bool
2210 allow_ip_end_pos_p (struct loop *loop)
2211 {
2212   if (!ip_normal_pos (loop))
2213     return true;
2214 
2215   if (!empty_block_p (ip_end_pos (loop)))
2216     return true;
2217 
2218   return false;
2219 }
2220 
2221 /* If possible, adds autoincrement candidates BASE + STEP * i based on use USE.
2222    Important field is set to IMPORTANT.  */
2223 
2224 static void
2225 add_autoinc_candidates (struct ivopts_data *data, tree base, tree step,
2226 			bool important, struct iv_use *use)
2227 {
2228   basic_block use_bb = gimple_bb (use->stmt);
2229   enum machine_mode mem_mode;
2230   unsigned HOST_WIDE_INT cstepi;
2231 
2232   /* If we insert the increment in any position other than the standard
2233      ones, we must ensure that it is incremented once per iteration.
2234      It must not be in an inner nested loop, or one side of an if
2235      statement.  */
2236   if (use_bb->loop_father != data->current_loop
2237       || !dominated_by_p (CDI_DOMINATORS, data->current_loop->latch, use_bb)
2238       || stmt_could_throw_p (use->stmt)
2239       || !cst_and_fits_in_hwi (step))
2240     return;
2241 
2242   cstepi = int_cst_value (step);
2243 
2244   mem_mode = TYPE_MODE (TREE_TYPE (*use->op_p));
2245   if ((HAVE_PRE_INCREMENT && GET_MODE_SIZE (mem_mode) == cstepi)
2246       || (HAVE_PRE_DECREMENT && GET_MODE_SIZE (mem_mode) == -cstepi))
2247     {
2248       enum tree_code code = MINUS_EXPR;
2249       tree new_base;
2250       tree new_step = step;
2251 
2252       if (POINTER_TYPE_P (TREE_TYPE (base)))
2253 	{
2254 	  new_step = fold_build1 (NEGATE_EXPR, TREE_TYPE (step), step);
2255 	  code = POINTER_PLUS_EXPR;
2256 	}
2257       else
2258 	new_step = fold_convert (TREE_TYPE (base), new_step);
2259       new_base = fold_build2 (code, TREE_TYPE (base), base, new_step);
2260       add_candidate_1 (data, new_base, step, important, IP_BEFORE_USE, use,
2261 		       use->stmt);
2262     }
2263   if ((HAVE_POST_INCREMENT && GET_MODE_SIZE (mem_mode) == cstepi)
2264       || (HAVE_POST_DECREMENT && GET_MODE_SIZE (mem_mode) == -cstepi))
2265     {
2266       add_candidate_1 (data, base, step, important, IP_AFTER_USE, use,
2267 		       use->stmt);
2268     }
2269 }
2270 
2271 /* Adds a candidate BASE + STEP * i.  Important field is set to IMPORTANT and
2272    position to POS.  If USE is not NULL, the candidate is set as related to
2273    it.  The candidate computation is scheduled on all available positions.  */
2274 
2275 static void
2276 add_candidate (struct ivopts_data *data,
2277 	       tree base, tree step, bool important, struct iv_use *use)
2278 {
2279   if (ip_normal_pos (data->current_loop))
2280     add_candidate_1 (data, base, step, important, IP_NORMAL, use, NULL);
2281   if (ip_end_pos (data->current_loop)
2282       && allow_ip_end_pos_p (data->current_loop))
2283     add_candidate_1 (data, base, step, important, IP_END, use, NULL);
2284 
2285   if (use != NULL && use->type == USE_ADDRESS)
2286     add_autoinc_candidates (data, base, step, important, use);
2287 }
2288 
2289 /* Add a standard "0 + 1 * iteration" iv candidate for a
2290    type with SIZE bits.  */
2291 
2292 static void
2293 add_standard_iv_candidates_for_size (struct ivopts_data *data,
2294 				     unsigned int size)
2295 {
2296   tree type = lang_hooks.types.type_for_size (size, true);
2297   add_candidate (data, build_int_cst (type, 0), build_int_cst (type, 1),
2298 		 true, NULL);
2299 }
2300 
2301 /* Adds standard iv candidates.  */
2302 
2303 static void
2304 add_standard_iv_candidates (struct ivopts_data *data)
2305 {
2306   add_standard_iv_candidates_for_size (data, INT_TYPE_SIZE);
2307 
2308   /* The same for a double-integer type if it is still fast enough.  */
2309   if (BITS_PER_WORD >= INT_TYPE_SIZE * 2)
2310     add_standard_iv_candidates_for_size (data, INT_TYPE_SIZE * 2);
2311 }
2312 
2313 
2314 /* Adds candidates bases on the old induction variable IV.  */
2315 
2316 static void
2317 add_old_iv_candidates (struct ivopts_data *data, struct iv *iv)
2318 {
2319   gimple phi;
2320   tree def;
2321   struct iv_cand *cand;
2322 
2323   add_candidate (data, iv->base, iv->step, true, NULL);
2324 
2325   /* The same, but with initial value zero.  */
2326   if (POINTER_TYPE_P (TREE_TYPE (iv->base)))
2327     add_candidate (data, size_int (0), iv->step, true, NULL);
2328   else
2329     add_candidate (data, build_int_cst (TREE_TYPE (iv->base), 0),
2330 		   iv->step, true, NULL);
2331 
2332   phi = SSA_NAME_DEF_STMT (iv->ssa_name);
2333   if (gimple_code (phi) == GIMPLE_PHI)
2334     {
2335       /* Additionally record the possibility of leaving the original iv
2336 	 untouched.  */
2337       def = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (data->current_loop));
2338       cand = add_candidate_1 (data,
2339 			      iv->base, iv->step, true, IP_ORIGINAL, NULL,
2340 			      SSA_NAME_DEF_STMT (def));
2341       cand->var_before = iv->ssa_name;
2342       cand->var_after = def;
2343     }
2344 }
2345 
2346 /* Adds candidates based on the old induction variables.  */
2347 
2348 static void
2349 add_old_ivs_candidates (struct ivopts_data *data)
2350 {
2351   unsigned i;
2352   struct iv *iv;
2353   bitmap_iterator bi;
2354 
2355   EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
2356     {
2357       iv = ver_info (data, i)->iv;
2358       if (iv && iv->biv_p && !integer_zerop (iv->step))
2359 	add_old_iv_candidates (data, iv);
2360     }
2361 }
2362 
2363 /* Adds candidates based on the value of the induction variable IV and USE.  */
2364 
2365 static void
2366 add_iv_value_candidates (struct ivopts_data *data,
2367 			 struct iv *iv, struct iv_use *use)
2368 {
2369   unsigned HOST_WIDE_INT offset;
2370   tree base;
2371   tree basetype;
2372 
2373   add_candidate (data, iv->base, iv->step, false, use);
2374 
2375   /* The same, but with initial value zero.  Make such variable important,
2376      since it is generic enough so that possibly many uses may be based
2377      on it.  */
2378   basetype = TREE_TYPE (iv->base);
2379   if (POINTER_TYPE_P (basetype))
2380     basetype = sizetype;
2381   add_candidate (data, build_int_cst (basetype, 0),
2382 		 iv->step, true, use);
2383 
2384   /* Third, try removing the constant offset.  Make sure to even
2385      add a candidate for &a[0] vs. (T *)&a.  */
2386   base = strip_offset (iv->base, &offset);
2387   if (offset
2388       || base != iv->base)
2389     add_candidate (data, base, iv->step, false, use);
2390 }
2391 
2392 /* Adds candidates based on the uses.  */
2393 
2394 static void
2395 add_derived_ivs_candidates (struct ivopts_data *data)
2396 {
2397   unsigned i;
2398 
2399   for (i = 0; i < n_iv_uses (data); i++)
2400     {
2401       struct iv_use *use = iv_use (data, i);
2402 
2403       if (!use)
2404 	continue;
2405 
2406       switch (use->type)
2407 	{
2408 	case USE_NONLINEAR_EXPR:
2409 	case USE_COMPARE:
2410 	case USE_ADDRESS:
2411 	  /* Just add the ivs based on the value of the iv used here.  */
2412 	  add_iv_value_candidates (data, use->iv, use);
2413 	  break;
2414 
2415 	default:
2416 	  gcc_unreachable ();
2417 	}
2418     }
2419 }
2420 
2421 /* Record important candidates and add them to related_cands bitmaps
2422    if needed.  */
2423 
2424 static void
2425 record_important_candidates (struct ivopts_data *data)
2426 {
2427   unsigned i;
2428   struct iv_use *use;
2429 
2430   for (i = 0; i < n_iv_cands (data); i++)
2431     {
2432       struct iv_cand *cand = iv_cand (data, i);
2433 
2434       if (cand->important)
2435 	bitmap_set_bit (data->important_candidates, i);
2436     }
2437 
2438   data->consider_all_candidates = (n_iv_cands (data)
2439 				   <= CONSIDER_ALL_CANDIDATES_BOUND);
2440 
2441   if (data->consider_all_candidates)
2442     {
2443       /* We will not need "related_cands" bitmaps in this case,
2444 	 so release them to decrease peak memory consumption.  */
2445       for (i = 0; i < n_iv_uses (data); i++)
2446 	{
2447 	  use = iv_use (data, i);
2448 	  BITMAP_FREE (use->related_cands);
2449 	}
2450     }
2451   else
2452     {
2453       /* Add important candidates to the related_cands bitmaps.  */
2454       for (i = 0; i < n_iv_uses (data); i++)
2455 	bitmap_ior_into (iv_use (data, i)->related_cands,
2456 			 data->important_candidates);
2457     }
2458 }
2459 
2460 /* Allocates the data structure mapping the (use, candidate) pairs to costs.
2461    If consider_all_candidates is true, we use a two-dimensional array, otherwise
2462    we allocate a simple list to every use.  */
2463 
2464 static void
2465 alloc_use_cost_map (struct ivopts_data *data)
2466 {
2467   unsigned i, size, s, j;
2468 
2469   for (i = 0; i < n_iv_uses (data); i++)
2470     {
2471       struct iv_use *use = iv_use (data, i);
2472       bitmap_iterator bi;
2473 
2474       if (data->consider_all_candidates)
2475 	size = n_iv_cands (data);
2476       else
2477 	{
2478 	  s = 0;
2479 	  EXECUTE_IF_SET_IN_BITMAP (use->related_cands, 0, j, bi)
2480 	    {
2481 	      s++;
2482 	    }
2483 
2484 	  /* Round up to the power of two, so that moduling by it is fast.  */
2485 	  for (size = 1; size < s; size <<= 1)
2486 	    continue;
2487 	}
2488 
2489       use->n_map_members = size;
2490       use->cost_map = XCNEWVEC (struct cost_pair, size);
2491     }
2492 }
2493 
2494 /* Returns description of computation cost of expression whose runtime
2495    cost is RUNTIME and complexity corresponds to COMPLEXITY.  */
2496 
2497 static comp_cost
2498 new_cost (unsigned runtime, unsigned complexity)
2499 {
2500   comp_cost cost;
2501 
2502   cost.cost = runtime;
2503   cost.complexity = complexity;
2504 
2505   return cost;
2506 }
2507 
2508 /* Adds costs COST1 and COST2.  */
2509 
2510 static comp_cost
2511 add_costs (comp_cost cost1, comp_cost cost2)
2512 {
2513   cost1.cost += cost2.cost;
2514   cost1.complexity += cost2.complexity;
2515 
2516   return cost1;
2517 }
2518 /* Subtracts costs COST1 and COST2.  */
2519 
2520 static comp_cost
2521 sub_costs (comp_cost cost1, comp_cost cost2)
2522 {
2523   cost1.cost -= cost2.cost;
2524   cost1.complexity -= cost2.complexity;
2525 
2526   return cost1;
2527 }
2528 
2529 /* Returns a negative number if COST1 < COST2, a positive number if
2530    COST1 > COST2, and 0 if COST1 = COST2.  */
2531 
2532 static int
2533 compare_costs (comp_cost cost1, comp_cost cost2)
2534 {
2535   if (cost1.cost == cost2.cost)
2536     return cost1.complexity - cost2.complexity;
2537 
2538   return cost1.cost - cost2.cost;
2539 }
2540 
2541 /* Returns true if COST is infinite.  */
2542 
2543 static bool
2544 infinite_cost_p (comp_cost cost)
2545 {
2546   return cost.cost == INFTY;
2547 }
2548 
2549 /* Sets cost of (USE, CANDIDATE) pair to COST and record that it depends
2550    on invariants DEPENDS_ON and that the value used in expressing it
2551    is VALUE.  */
2552 
2553 static void
2554 set_use_iv_cost (struct ivopts_data *data,
2555 		 struct iv_use *use, struct iv_cand *cand,
2556 		 comp_cost cost, bitmap depends_on, tree value)
2557 {
2558   unsigned i, s;
2559 
2560   if (infinite_cost_p (cost))
2561     {
2562       BITMAP_FREE (depends_on);
2563       return;
2564     }
2565 
2566   if (data->consider_all_candidates)
2567     {
2568       use->cost_map[cand->id].cand = cand;
2569       use->cost_map[cand->id].cost = cost;
2570       use->cost_map[cand->id].depends_on = depends_on;
2571       use->cost_map[cand->id].value = value;
2572       return;
2573     }
2574 
2575   /* n_map_members is a power of two, so this computes modulo.  */
2576   s = cand->id & (use->n_map_members - 1);
2577   for (i = s; i < use->n_map_members; i++)
2578     if (!use->cost_map[i].cand)
2579       goto found;
2580   for (i = 0; i < s; i++)
2581     if (!use->cost_map[i].cand)
2582       goto found;
2583 
2584   gcc_unreachable ();
2585 
2586 found:
2587   use->cost_map[i].cand = cand;
2588   use->cost_map[i].cost = cost;
2589   use->cost_map[i].depends_on = depends_on;
2590   use->cost_map[i].value = value;
2591 }
2592 
2593 /* Gets cost of (USE, CANDIDATE) pair.  */
2594 
2595 static struct cost_pair *
2596 get_use_iv_cost (struct ivopts_data *data, struct iv_use *use,
2597 		 struct iv_cand *cand)
2598 {
2599   unsigned i, s;
2600   struct cost_pair *ret;
2601 
2602   if (!cand)
2603     return NULL;
2604 
2605   if (data->consider_all_candidates)
2606     {
2607       ret = use->cost_map + cand->id;
2608       if (!ret->cand)
2609 	return NULL;
2610 
2611       return ret;
2612     }
2613 
2614   /* n_map_members is a power of two, so this computes modulo.  */
2615   s = cand->id & (use->n_map_members - 1);
2616   for (i = s; i < use->n_map_members; i++)
2617     if (use->cost_map[i].cand == cand)
2618       return use->cost_map + i;
2619 
2620   for (i = 0; i < s; i++)
2621     if (use->cost_map[i].cand == cand)
2622       return use->cost_map + i;
2623 
2624   return NULL;
2625 }
2626 
2627 /* Returns estimate on cost of computing SEQ.  */
2628 
2629 static unsigned
2630 seq_cost (rtx seq, bool speed)
2631 {
2632   unsigned cost = 0;
2633   rtx set;
2634 
2635   for (; seq; seq = NEXT_INSN (seq))
2636     {
2637       set = single_set (seq);
2638       if (set)
2639 	cost += rtx_cost (set, SET,speed);
2640       else
2641 	cost++;
2642     }
2643 
2644   return cost;
2645 }
2646 
2647 /* Produce DECL_RTL for object obj so it looks like it is stored in memory.  */
2648 static rtx
2649 produce_memory_decl_rtl (tree obj, int *regno)
2650 {
2651   addr_space_t as = TYPE_ADDR_SPACE (TREE_TYPE (obj));
2652   enum machine_mode address_mode = targetm.addr_space.address_mode (as);
2653   rtx x;
2654 
2655   gcc_assert (obj);
2656   if (TREE_STATIC (obj) || DECL_EXTERNAL (obj))
2657     {
2658       const char *name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (obj));
2659       x = gen_rtx_SYMBOL_REF (address_mode, name);
2660       SET_SYMBOL_REF_DECL (x, obj);
2661       x = gen_rtx_MEM (DECL_MODE (obj), x);
2662       set_mem_addr_space (x, as);
2663       targetm.encode_section_info (obj, x, true);
2664     }
2665   else
2666     {
2667       x = gen_raw_REG (address_mode, (*regno)++);
2668       x = gen_rtx_MEM (DECL_MODE (obj), x);
2669       set_mem_addr_space (x, as);
2670     }
2671 
2672   return x;
2673 }
2674 
2675 /* Prepares decl_rtl for variables referred in *EXPR_P.  Callback for
2676    walk_tree.  DATA contains the actual fake register number.  */
2677 
2678 static tree
2679 prepare_decl_rtl (tree *expr_p, int *ws, void *data)
2680 {
2681   tree obj = NULL_TREE;
2682   rtx x = NULL_RTX;
2683   int *regno = (int *) data;
2684 
2685   switch (TREE_CODE (*expr_p))
2686     {
2687     case ADDR_EXPR:
2688       for (expr_p = &TREE_OPERAND (*expr_p, 0);
2689 	   handled_component_p (*expr_p);
2690 	   expr_p = &TREE_OPERAND (*expr_p, 0))
2691 	continue;
2692       obj = *expr_p;
2693       if (DECL_P (obj) && !DECL_RTL_SET_P (obj))
2694         x = produce_memory_decl_rtl (obj, regno);
2695       break;
2696 
2697     case SSA_NAME:
2698       *ws = 0;
2699       obj = SSA_NAME_VAR (*expr_p);
2700       if (!DECL_RTL_SET_P (obj))
2701 	x = gen_raw_REG (DECL_MODE (obj), (*regno)++);
2702       break;
2703 
2704     case VAR_DECL:
2705     case PARM_DECL:
2706     case RESULT_DECL:
2707       *ws = 0;
2708       obj = *expr_p;
2709 
2710       if (DECL_RTL_SET_P (obj))
2711 	break;
2712 
2713       if (DECL_MODE (obj) == BLKmode)
2714 	x = produce_memory_decl_rtl (obj, regno);
2715       else
2716 	x = gen_raw_REG (DECL_MODE (obj), (*regno)++);
2717 
2718       break;
2719 
2720     default:
2721       break;
2722     }
2723 
2724   if (x)
2725     {
2726       VEC_safe_push (tree, heap, decl_rtl_to_reset, obj);
2727       SET_DECL_RTL (obj, x);
2728     }
2729 
2730   return NULL_TREE;
2731 }
2732 
2733 /* Determines cost of the computation of EXPR.  */
2734 
2735 static unsigned
2736 computation_cost (tree expr, bool speed)
2737 {
2738   rtx seq, rslt;
2739   tree type = TREE_TYPE (expr);
2740   unsigned cost;
2741   /* Avoid using hard regs in ways which may be unsupported.  */
2742   int regno = LAST_VIRTUAL_REGISTER + 1;
2743   enum function_frequency real_frequency = cfun->function_frequency;
2744 
2745   cfun->function_frequency = FUNCTION_FREQUENCY_NORMAL;
2746   crtl->maybe_hot_insn_p = speed;
2747   walk_tree (&expr, prepare_decl_rtl, &regno, NULL);
2748   start_sequence ();
2749   rslt = expand_expr (expr, NULL_RTX, TYPE_MODE (type), EXPAND_NORMAL);
2750   seq = get_insns ();
2751   end_sequence ();
2752   default_rtl_profile ();
2753   cfun->function_frequency = real_frequency;
2754 
2755   cost = seq_cost (seq, speed);
2756   if (MEM_P (rslt))
2757     cost += address_cost (XEXP (rslt, 0), TYPE_MODE (type),
2758 			  TYPE_ADDR_SPACE (type), speed);
2759 
2760   return cost;
2761 }
2762 
2763 /* Returns variable containing the value of candidate CAND at statement AT.  */
2764 
2765 static tree
2766 var_at_stmt (struct loop *loop, struct iv_cand *cand, gimple stmt)
2767 {
2768   if (stmt_after_increment (loop, cand, stmt))
2769     return cand->var_after;
2770   else
2771     return cand->var_before;
2772 }
2773 
2774 /* Return the most significant (sign) bit of T.  Similar to tree_int_cst_msb,
2775    but the bit is determined from TYPE_PRECISION, not MODE_BITSIZE.  */
2776 
2777 int
2778 tree_int_cst_sign_bit (const_tree t)
2779 {
2780   unsigned bitno = TYPE_PRECISION (TREE_TYPE (t)) - 1;
2781   unsigned HOST_WIDE_INT w;
2782 
2783   if (bitno < HOST_BITS_PER_WIDE_INT)
2784     w = TREE_INT_CST_LOW (t);
2785   else
2786     {
2787       w = TREE_INT_CST_HIGH (t);
2788       bitno -= HOST_BITS_PER_WIDE_INT;
2789     }
2790 
2791   return (w >> bitno) & 1;
2792 }
2793 
2794 /* If A is (TYPE) BA and B is (TYPE) BB, and the types of BA and BB have the
2795    same precision that is at least as wide as the precision of TYPE, stores
2796    BA to A and BB to B, and returns the type of BA.  Otherwise, returns the
2797    type of A and B.  */
2798 
2799 static tree
2800 determine_common_wider_type (tree *a, tree *b)
2801 {
2802   tree wider_type = NULL;
2803   tree suba, subb;
2804   tree atype = TREE_TYPE (*a);
2805 
2806   if (CONVERT_EXPR_P (*a))
2807     {
2808       suba = TREE_OPERAND (*a, 0);
2809       wider_type = TREE_TYPE (suba);
2810       if (TYPE_PRECISION (wider_type) < TYPE_PRECISION (atype))
2811 	return atype;
2812     }
2813   else
2814     return atype;
2815 
2816   if (CONVERT_EXPR_P (*b))
2817     {
2818       subb = TREE_OPERAND (*b, 0);
2819       if (TYPE_PRECISION (wider_type) != TYPE_PRECISION (TREE_TYPE (subb)))
2820 	return atype;
2821     }
2822   else
2823     return atype;
2824 
2825   *a = suba;
2826   *b = subb;
2827   return wider_type;
2828 }
2829 
2830 /* Determines the expression by that USE is expressed from induction variable
2831    CAND at statement AT in LOOP.  The expression is stored in a decomposed
2832    form into AFF.  Returns false if USE cannot be expressed using CAND.  */
2833 
2834 static bool
2835 get_computation_aff (struct loop *loop,
2836 		     struct iv_use *use, struct iv_cand *cand, gimple at,
2837 		     struct affine_tree_combination *aff)
2838 {
2839   tree ubase = use->iv->base;
2840   tree ustep = use->iv->step;
2841   tree cbase = cand->iv->base;
2842   tree cstep = cand->iv->step, cstep_common;
2843   tree utype = TREE_TYPE (ubase), ctype = TREE_TYPE (cbase);
2844   tree common_type, var;
2845   tree uutype;
2846   aff_tree cbase_aff, var_aff;
2847   double_int rat;
2848 
2849   if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
2850     {
2851       /* We do not have a precision to express the values of use.  */
2852       return false;
2853     }
2854 
2855   var = var_at_stmt (loop, cand, at);
2856   uutype = unsigned_type_for (utype);
2857 
2858   /* If the conversion is not noop, perform it.  */
2859   if (TYPE_PRECISION (utype) < TYPE_PRECISION (ctype))
2860     {
2861       cstep = fold_convert (uutype, cstep);
2862       cbase = fold_convert (uutype, cbase);
2863       var = fold_convert (uutype, var);
2864     }
2865 
2866   if (!constant_multiple_of (ustep, cstep, &rat))
2867     return false;
2868 
2869   /* In case both UBASE and CBASE are shortened to UUTYPE from some common
2870      type, we achieve better folding by computing their difference in this
2871      wider type, and cast the result to UUTYPE.  We do not need to worry about
2872      overflows, as all the arithmetics will in the end be performed in UUTYPE
2873      anyway.  */
2874   common_type = determine_common_wider_type (&ubase, &cbase);
2875 
2876   /* use = ubase - ratio * cbase + ratio * var.  */
2877   tree_to_aff_combination (ubase, common_type, aff);
2878   tree_to_aff_combination (cbase, common_type, &cbase_aff);
2879   tree_to_aff_combination (var, uutype, &var_aff);
2880 
2881   /* We need to shift the value if we are after the increment.  */
2882   if (stmt_after_increment (loop, cand, at))
2883     {
2884       aff_tree cstep_aff;
2885 
2886       if (common_type != uutype)
2887 	cstep_common = fold_convert (common_type, cstep);
2888       else
2889 	cstep_common = cstep;
2890 
2891       tree_to_aff_combination (cstep_common, common_type, &cstep_aff);
2892       aff_combination_add (&cbase_aff, &cstep_aff);
2893     }
2894 
2895   aff_combination_scale (&cbase_aff, double_int_neg (rat));
2896   aff_combination_add (aff, &cbase_aff);
2897   if (common_type != uutype)
2898     aff_combination_convert (aff, uutype);
2899 
2900   aff_combination_scale (&var_aff, rat);
2901   aff_combination_add (aff, &var_aff);
2902 
2903   return true;
2904 }
2905 
2906 /* Determines the expression by that USE is expressed from induction variable
2907    CAND at statement AT in LOOP.  The computation is unshared.  */
2908 
2909 static tree
2910 get_computation_at (struct loop *loop,
2911 		    struct iv_use *use, struct iv_cand *cand, gimple at)
2912 {
2913   aff_tree aff;
2914   tree type = TREE_TYPE (use->iv->base);
2915 
2916   if (!get_computation_aff (loop, use, cand, at, &aff))
2917     return NULL_TREE;
2918   unshare_aff_combination (&aff);
2919   return fold_convert (type, aff_combination_to_tree (&aff));
2920 }
2921 
2922 /* Determines the expression by that USE is expressed from induction variable
2923    CAND in LOOP.  The computation is unshared.  */
2924 
2925 static tree
2926 get_computation (struct loop *loop, struct iv_use *use, struct iv_cand *cand)
2927 {
2928   return get_computation_at (loop, use, cand, use->stmt);
2929 }
2930 
2931 /* Returns cost of addition in MODE.  */
2932 
2933 static unsigned
2934 add_cost (enum machine_mode mode, bool speed)
2935 {
2936   static unsigned costs[NUM_MACHINE_MODES];
2937   rtx seq;
2938   unsigned cost;
2939 
2940   if (costs[mode])
2941     return costs[mode];
2942 
2943   start_sequence ();
2944   force_operand (gen_rtx_fmt_ee (PLUS, mode,
2945 				 gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1),
2946 				 gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 2)),
2947 		 NULL_RTX);
2948   seq = get_insns ();
2949   end_sequence ();
2950 
2951   cost = seq_cost (seq, speed);
2952   if (!cost)
2953     cost = 1;
2954 
2955   costs[mode] = cost;
2956 
2957   if (dump_file && (dump_flags & TDF_DETAILS))
2958     fprintf (dump_file, "Addition in %s costs %d\n",
2959 	     GET_MODE_NAME (mode), cost);
2960   return cost;
2961 }
2962 
2963 /* Entry in a hashtable of already known costs for multiplication.  */
2964 struct mbc_entry
2965 {
2966   HOST_WIDE_INT cst;		/* The constant to multiply by.  */
2967   enum machine_mode mode;	/* In mode.  */
2968   unsigned cost;		/* The cost.  */
2969 };
2970 
2971 /* Counts hash value for the ENTRY.  */
2972 
2973 static hashval_t
2974 mbc_entry_hash (const void *entry)
2975 {
2976   const struct mbc_entry *e = (const struct mbc_entry *) entry;
2977 
2978   return 57 * (hashval_t) e->mode + (hashval_t) (e->cst % 877);
2979 }
2980 
2981 /* Compares the hash table entries ENTRY1 and ENTRY2.  */
2982 
2983 static int
2984 mbc_entry_eq (const void *entry1, const void *entry2)
2985 {
2986   const struct mbc_entry *e1 = (const struct mbc_entry *) entry1;
2987   const struct mbc_entry *e2 = (const struct mbc_entry *) entry2;
2988 
2989   return (e1->mode == e2->mode
2990 	  && e1->cst == e2->cst);
2991 }
2992 
2993 /* Returns cost of multiplication by constant CST in MODE.  */
2994 
2995 unsigned
2996 multiply_by_cost (HOST_WIDE_INT cst, enum machine_mode mode, bool speed)
2997 {
2998   static htab_t costs;
2999   struct mbc_entry **cached, act;
3000   rtx seq;
3001   unsigned cost;
3002 
3003   if (!costs)
3004     costs = htab_create (100, mbc_entry_hash, mbc_entry_eq, free);
3005 
3006   act.mode = mode;
3007   act.cst = cst;
3008   cached = (struct mbc_entry **) htab_find_slot (costs, &act, INSERT);
3009   if (*cached)
3010     return (*cached)->cost;
3011 
3012   *cached = XNEW (struct mbc_entry);
3013   (*cached)->mode = mode;
3014   (*cached)->cst = cst;
3015 
3016   start_sequence ();
3017   expand_mult (mode, gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1),
3018 	       gen_int_mode (cst, mode), NULL_RTX, 0);
3019   seq = get_insns ();
3020   end_sequence ();
3021 
3022   cost = seq_cost (seq, speed);
3023 
3024   if (dump_file && (dump_flags & TDF_DETAILS))
3025     fprintf (dump_file, "Multiplication by %d in %s costs %d\n",
3026 	     (int) cst, GET_MODE_NAME (mode), cost);
3027 
3028   (*cached)->cost = cost;
3029 
3030   return cost;
3031 }
3032 
3033 /* Returns true if multiplying by RATIO is allowed in an address.  Test the
3034    validity for a memory reference accessing memory of mode MODE in
3035    address space AS.  */
3036 
3037 DEF_VEC_P (sbitmap);
3038 DEF_VEC_ALLOC_P (sbitmap, heap);
3039 
3040 bool
3041 multiplier_allowed_in_address_p (HOST_WIDE_INT ratio, enum machine_mode mode,
3042 				 addr_space_t as)
3043 {
3044 #define MAX_RATIO 128
3045   unsigned int data_index = (int) as * MAX_MACHINE_MODE + (int) mode;
3046   static VEC (sbitmap, heap) *valid_mult_list;
3047   sbitmap valid_mult;
3048 
3049   if (data_index >= VEC_length (sbitmap, valid_mult_list))
3050     VEC_safe_grow_cleared (sbitmap, heap, valid_mult_list, data_index + 1);
3051 
3052   valid_mult = VEC_index (sbitmap, valid_mult_list, data_index);
3053   if (!valid_mult)
3054     {
3055       enum machine_mode address_mode = targetm.addr_space.address_mode (as);
3056       rtx reg1 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 1);
3057       rtx addr;
3058       HOST_WIDE_INT i;
3059 
3060       valid_mult = sbitmap_alloc (2 * MAX_RATIO + 1);
3061       sbitmap_zero (valid_mult);
3062       addr = gen_rtx_fmt_ee (MULT, address_mode, reg1, NULL_RTX);
3063       for (i = -MAX_RATIO; i <= MAX_RATIO; i++)
3064 	{
3065 	  XEXP (addr, 1) = gen_int_mode (i, address_mode);
3066 	  if (memory_address_addr_space_p (mode, addr, as))
3067 	    SET_BIT (valid_mult, i + MAX_RATIO);
3068 	}
3069 
3070       if (dump_file && (dump_flags & TDF_DETAILS))
3071 	{
3072 	  fprintf (dump_file, "  allowed multipliers:");
3073 	  for (i = -MAX_RATIO; i <= MAX_RATIO; i++)
3074 	    if (TEST_BIT (valid_mult, i + MAX_RATIO))
3075 	      fprintf (dump_file, " %d", (int) i);
3076 	  fprintf (dump_file, "\n");
3077 	  fprintf (dump_file, "\n");
3078 	}
3079 
3080       VEC_replace (sbitmap, valid_mult_list, data_index, valid_mult);
3081     }
3082 
3083   if (ratio > MAX_RATIO || ratio < -MAX_RATIO)
3084     return false;
3085 
3086   return TEST_BIT (valid_mult, ratio + MAX_RATIO);
3087 }
3088 
3089 /* Returns cost of address in shape symbol + var + OFFSET + RATIO * index.
3090    If SYMBOL_PRESENT is false, symbol is omitted.  If VAR_PRESENT is false,
3091    variable is omitted.  Compute the cost for a memory reference that accesses
3092    a memory location of mode MEM_MODE in address space AS.
3093 
3094    MAY_AUTOINC is set to true if the autoincrement (increasing index by
3095    size of MEM_MODE / RATIO) is available.  To make this determination, we
3096    look at the size of the increment to be made, which is given in CSTEP.
3097    CSTEP may be zero if the step is unknown.
3098    STMT_AFTER_INC is true iff the statement we're looking at is after the
3099    increment of the original biv.
3100 
3101    TODO -- there must be some better way.  This all is quite crude.  */
3102 
3103 typedef struct
3104 {
3105   HOST_WIDE_INT min_offset, max_offset;
3106   unsigned costs[2][2][2][2];
3107 } *address_cost_data;
3108 
3109 DEF_VEC_P (address_cost_data);
3110 DEF_VEC_ALLOC_P (address_cost_data, heap);
3111 
3112 static comp_cost
3113 get_address_cost (bool symbol_present, bool var_present,
3114 		  unsigned HOST_WIDE_INT offset, HOST_WIDE_INT ratio,
3115 		  HOST_WIDE_INT cstep, enum machine_mode mem_mode,
3116 		  addr_space_t as, bool speed,
3117 		  bool stmt_after_inc, bool *may_autoinc)
3118 {
3119   enum machine_mode address_mode = targetm.addr_space.address_mode (as);
3120   static VEC(address_cost_data, heap) *address_cost_data_list;
3121   unsigned int data_index = (int) as * MAX_MACHINE_MODE + (int) mem_mode;
3122   address_cost_data data;
3123   static bool has_preinc[MAX_MACHINE_MODE], has_postinc[MAX_MACHINE_MODE];
3124   static bool has_predec[MAX_MACHINE_MODE], has_postdec[MAX_MACHINE_MODE];
3125   unsigned cost, acost, complexity;
3126   bool offset_p, ratio_p, autoinc;
3127   HOST_WIDE_INT s_offset, autoinc_offset, msize;
3128   unsigned HOST_WIDE_INT mask;
3129   unsigned bits;
3130 
3131   if (data_index >= VEC_length (address_cost_data, address_cost_data_list))
3132     VEC_safe_grow_cleared (address_cost_data, heap, address_cost_data_list,
3133 			   data_index + 1);
3134 
3135   data = VEC_index (address_cost_data, address_cost_data_list, data_index);
3136   if (!data)
3137     {
3138       HOST_WIDE_INT i;
3139       HOST_WIDE_INT start = BIGGEST_ALIGNMENT / BITS_PER_UNIT;
3140       HOST_WIDE_INT rat, off;
3141       int old_cse_not_expected;
3142       unsigned sym_p, var_p, off_p, rat_p, add_c;
3143       rtx seq, addr, base;
3144       rtx reg0, reg1;
3145 
3146       data = (address_cost_data) xcalloc (1, sizeof (*data));
3147 
3148       reg1 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 1);
3149 
3150       addr = gen_rtx_fmt_ee (PLUS, address_mode, reg1, NULL_RTX);
3151       for (i = start; i <= 1 << 20; i <<= 1)
3152 	{
3153 	  XEXP (addr, 1) = gen_int_mode (i, address_mode);
3154 	  if (!memory_address_addr_space_p (mem_mode, addr, as))
3155 	    break;
3156 	}
3157       data->max_offset = i == start ? 0 : i >> 1;
3158       off = data->max_offset;
3159 
3160       for (i = start; i <= 1 << 20; i <<= 1)
3161 	{
3162 	  XEXP (addr, 1) = gen_int_mode (-i, address_mode);
3163 	  if (!memory_address_addr_space_p (mem_mode, addr, as))
3164 	    break;
3165 	}
3166       data->min_offset = i == start ? 0 : -(i >> 1);
3167 
3168       if (dump_file && (dump_flags & TDF_DETAILS))
3169 	{
3170 	  fprintf (dump_file, "get_address_cost:\n");
3171 	  fprintf (dump_file, "  min offset %s %d\n",
3172 		   GET_MODE_NAME (mem_mode),
3173 		   (int) data->min_offset);
3174 	  fprintf (dump_file, "  max offset %s %d\n",
3175 		   GET_MODE_NAME (mem_mode),
3176 		   (int) data->max_offset);
3177 	}
3178 
3179       rat = 1;
3180       for (i = 2; i <= MAX_RATIO; i++)
3181 	if (multiplier_allowed_in_address_p (i, mem_mode, as))
3182 	  {
3183 	    rat = i;
3184 	    break;
3185 	  }
3186 
3187       /* Compute the cost of various addressing modes.  */
3188       acost = 0;
3189       reg0 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 1);
3190       reg1 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 2);
3191 
3192       if (HAVE_PRE_DECREMENT)
3193 	{
3194 	  addr = gen_rtx_PRE_DEC (address_mode, reg0);
3195 	  has_predec[mem_mode]
3196 	    = memory_address_addr_space_p (mem_mode, addr, as);
3197 	}
3198       if (HAVE_POST_DECREMENT)
3199 	{
3200 	  addr = gen_rtx_POST_DEC (address_mode, reg0);
3201 	  has_postdec[mem_mode]
3202 	    = memory_address_addr_space_p (mem_mode, addr, as);
3203 	}
3204       if (HAVE_PRE_INCREMENT)
3205 	{
3206 	  addr = gen_rtx_PRE_INC (address_mode, reg0);
3207 	  has_preinc[mem_mode]
3208 	    = memory_address_addr_space_p (mem_mode, addr, as);
3209 	}
3210       if (HAVE_POST_INCREMENT)
3211 	{
3212 	  addr = gen_rtx_POST_INC (address_mode, reg0);
3213 	  has_postinc[mem_mode]
3214 	    = memory_address_addr_space_p (mem_mode, addr, as);
3215 	}
3216       for (i = 0; i < 16; i++)
3217 	{
3218 	  sym_p = i & 1;
3219 	  var_p = (i >> 1) & 1;
3220 	  off_p = (i >> 2) & 1;
3221 	  rat_p = (i >> 3) & 1;
3222 
3223 	  addr = reg0;
3224 	  if (rat_p)
3225 	    addr = gen_rtx_fmt_ee (MULT, address_mode, addr,
3226 				   gen_int_mode (rat, address_mode));
3227 
3228 	  if (var_p)
3229 	    addr = gen_rtx_fmt_ee (PLUS, address_mode, addr, reg1);
3230 
3231 	  if (sym_p)
3232 	    {
3233 	      base = gen_rtx_SYMBOL_REF (address_mode, ggc_strdup (""));
3234 	      /* ??? We can run into trouble with some backends by presenting
3235 		 it with symbols which haven't been properly passed through
3236 		 targetm.encode_section_info.  By setting the local bit, we
3237 		 enhance the probability of things working.  */
3238 	      SYMBOL_REF_FLAGS (base) = SYMBOL_FLAG_LOCAL;
3239 
3240 	      if (off_p)
3241 		base = gen_rtx_fmt_e (CONST, address_mode,
3242 				      gen_rtx_fmt_ee
3243 					(PLUS, address_mode, base,
3244 					 gen_int_mode (off, address_mode)));
3245 	    }
3246 	  else if (off_p)
3247 	    base = gen_int_mode (off, address_mode);
3248 	  else
3249 	    base = NULL_RTX;
3250 
3251 	  if (base)
3252 	    addr = gen_rtx_fmt_ee (PLUS, address_mode, addr, base);
3253 
3254 	  start_sequence ();
3255 	  /* To avoid splitting addressing modes, pretend that no cse will
3256 	     follow.  */
3257 	  old_cse_not_expected = cse_not_expected;
3258 	  cse_not_expected = true;
3259 	  addr = memory_address_addr_space (mem_mode, addr, as);
3260 	  cse_not_expected = old_cse_not_expected;
3261 	  seq = get_insns ();
3262 	  end_sequence ();
3263 
3264 	  acost = seq_cost (seq, speed);
3265 	  acost += address_cost (addr, mem_mode, as, speed);
3266 
3267 	  if (!acost)
3268 	    acost = 1;
3269 	  data->costs[sym_p][var_p][off_p][rat_p] = acost;
3270 	}
3271 
3272       /* On some targets, it is quite expensive to load symbol to a register,
3273 	 which makes addresses that contain symbols look much more expensive.
3274 	 However, the symbol will have to be loaded in any case before the
3275 	 loop (and quite likely we have it in register already), so it does not
3276 	 make much sense to penalize them too heavily.  So make some final
3277          tweaks for the SYMBOL_PRESENT modes:
3278 
3279          If VAR_PRESENT is false, and the mode obtained by changing symbol to
3280 	 var is cheaper, use this mode with small penalty.
3281 	 If VAR_PRESENT is true, try whether the mode with
3282 	 SYMBOL_PRESENT = false is cheaper even with cost of addition, and
3283 	 if this is the case, use it.  */
3284       add_c = add_cost (address_mode, speed);
3285       for (i = 0; i < 8; i++)
3286 	{
3287 	  var_p = i & 1;
3288 	  off_p = (i >> 1) & 1;
3289 	  rat_p = (i >> 2) & 1;
3290 
3291 	  acost = data->costs[0][1][off_p][rat_p] + 1;
3292 	  if (var_p)
3293 	    acost += add_c;
3294 
3295 	  if (acost < data->costs[1][var_p][off_p][rat_p])
3296 	    data->costs[1][var_p][off_p][rat_p] = acost;
3297 	}
3298 
3299       if (dump_file && (dump_flags & TDF_DETAILS))
3300 	{
3301 	  fprintf (dump_file, "Address costs:\n");
3302 
3303 	  for (i = 0; i < 16; i++)
3304 	    {
3305 	      sym_p = i & 1;
3306 	      var_p = (i >> 1) & 1;
3307 	      off_p = (i >> 2) & 1;
3308 	      rat_p = (i >> 3) & 1;
3309 
3310 	      fprintf (dump_file, "  ");
3311 	      if (sym_p)
3312 		fprintf (dump_file, "sym + ");
3313 	      if (var_p)
3314 		fprintf (dump_file, "var + ");
3315 	      if (off_p)
3316 		fprintf (dump_file, "cst + ");
3317 	      if (rat_p)
3318 		fprintf (dump_file, "rat * ");
3319 
3320 	      acost = data->costs[sym_p][var_p][off_p][rat_p];
3321 	      fprintf (dump_file, "index costs %d\n", acost);
3322 	    }
3323 	  if (has_predec[mem_mode] || has_postdec[mem_mode]
3324 	      || has_preinc[mem_mode] || has_postinc[mem_mode])
3325 	    fprintf (dump_file, "  May include autoinc/dec\n");
3326 	  fprintf (dump_file, "\n");
3327 	}
3328 
3329       VEC_replace (address_cost_data, address_cost_data_list,
3330 		   data_index, data);
3331     }
3332 
3333   bits = GET_MODE_BITSIZE (address_mode);
3334   mask = ~(~(unsigned HOST_WIDE_INT) 0 << (bits - 1) << 1);
3335   offset &= mask;
3336   if ((offset >> (bits - 1) & 1))
3337     offset |= ~mask;
3338   s_offset = offset;
3339 
3340   autoinc = false;
3341   msize = GET_MODE_SIZE (mem_mode);
3342   autoinc_offset = offset;
3343   if (stmt_after_inc)
3344     autoinc_offset += ratio * cstep;
3345   if (symbol_present || var_present || ratio != 1)
3346     autoinc = false;
3347   else if ((has_postinc[mem_mode] && autoinc_offset == 0
3348 	       && msize == cstep)
3349 	   || (has_postdec[mem_mode] && autoinc_offset == 0
3350 	       && msize == -cstep)
3351 	   || (has_preinc[mem_mode] && autoinc_offset == msize
3352 	       && msize == cstep)
3353 	   || (has_predec[mem_mode] && autoinc_offset == -msize
3354 	       && msize == -cstep))
3355     autoinc = true;
3356 
3357   cost = 0;
3358   offset_p = (s_offset != 0
3359 	      && data->min_offset <= s_offset
3360 	      && s_offset <= data->max_offset);
3361   ratio_p = (ratio != 1
3362 	     && multiplier_allowed_in_address_p (ratio, mem_mode, as));
3363 
3364   if (ratio != 1 && !ratio_p)
3365     cost += multiply_by_cost (ratio, address_mode, speed);
3366 
3367   if (s_offset && !offset_p && !symbol_present)
3368     cost += add_cost (address_mode, speed);
3369 
3370   if (may_autoinc)
3371     *may_autoinc = autoinc;
3372   acost = data->costs[symbol_present][var_present][offset_p][ratio_p];
3373   complexity = (symbol_present != 0) + (var_present != 0) + offset_p + ratio_p;
3374   return new_cost (cost + acost, complexity);
3375 }
3376 
3377 /* Estimates cost of forcing expression EXPR into a variable.  */
3378 
3379 static comp_cost
3380 force_expr_to_var_cost (tree expr, bool speed)
3381 {
3382   static bool costs_initialized = false;
3383   static unsigned integer_cost [2];
3384   static unsigned symbol_cost [2];
3385   static unsigned address_cost [2];
3386   tree op0, op1;
3387   comp_cost cost0, cost1, cost;
3388   enum machine_mode mode;
3389 
3390   if (!costs_initialized)
3391     {
3392       tree type = build_pointer_type (integer_type_node);
3393       tree var, addr;
3394       rtx x;
3395       int i;
3396 
3397       var = create_tmp_var_raw (integer_type_node, "test_var");
3398       TREE_STATIC (var) = 1;
3399       x = produce_memory_decl_rtl (var, NULL);
3400       SET_DECL_RTL (var, x);
3401 
3402       addr = build1 (ADDR_EXPR, type, var);
3403 
3404 
3405       for (i = 0; i < 2; i++)
3406 	{
3407 	  integer_cost[i] = computation_cost (build_int_cst (integer_type_node,
3408 							     2000), i);
3409 
3410 	  symbol_cost[i] = computation_cost (addr, i) + 1;
3411 
3412 	  address_cost[i]
3413 	    = computation_cost (build2 (POINTER_PLUS_EXPR, type,
3414 					addr,
3415 					build_int_cst (sizetype, 2000)), i) + 1;
3416 	  if (dump_file && (dump_flags & TDF_DETAILS))
3417 	    {
3418 	      fprintf (dump_file, "force_expr_to_var_cost %s costs:\n", i ? "speed" : "size");
3419 	      fprintf (dump_file, "  integer %d\n", (int) integer_cost[i]);
3420 	      fprintf (dump_file, "  symbol %d\n", (int) symbol_cost[i]);
3421 	      fprintf (dump_file, "  address %d\n", (int) address_cost[i]);
3422 	      fprintf (dump_file, "  other %d\n", (int) target_spill_cost[i]);
3423 	      fprintf (dump_file, "\n");
3424 	    }
3425 	}
3426 
3427       costs_initialized = true;
3428     }
3429 
3430   STRIP_NOPS (expr);
3431 
3432   if (SSA_VAR_P (expr))
3433     return zero_cost;
3434 
3435   if (is_gimple_min_invariant (expr))
3436     {
3437       if (TREE_CODE (expr) == INTEGER_CST)
3438 	return new_cost (integer_cost [speed], 0);
3439 
3440       if (TREE_CODE (expr) == ADDR_EXPR)
3441 	{
3442 	  tree obj = TREE_OPERAND (expr, 0);
3443 
3444 	  if (TREE_CODE (obj) == VAR_DECL
3445 	      || TREE_CODE (obj) == PARM_DECL
3446 	      || TREE_CODE (obj) == RESULT_DECL)
3447 	    return new_cost (symbol_cost [speed], 0);
3448 	}
3449 
3450       return new_cost (address_cost [speed], 0);
3451     }
3452 
3453   switch (TREE_CODE (expr))
3454     {
3455     case POINTER_PLUS_EXPR:
3456     case PLUS_EXPR:
3457     case MINUS_EXPR:
3458     case MULT_EXPR:
3459       op0 = TREE_OPERAND (expr, 0);
3460       op1 = TREE_OPERAND (expr, 1);
3461       STRIP_NOPS (op0);
3462       STRIP_NOPS (op1);
3463 
3464       if (is_gimple_val (op0))
3465 	cost0 = zero_cost;
3466       else
3467 	cost0 = force_expr_to_var_cost (op0, speed);
3468 
3469       if (is_gimple_val (op1))
3470 	cost1 = zero_cost;
3471       else
3472 	cost1 = force_expr_to_var_cost (op1, speed);
3473 
3474       break;
3475 
3476     case NEGATE_EXPR:
3477       op0 = TREE_OPERAND (expr, 0);
3478       STRIP_NOPS (op0);
3479       op1 = NULL_TREE;
3480 
3481       if (is_gimple_val (op0))
3482 	cost0 = zero_cost;
3483       else
3484 	cost0 = force_expr_to_var_cost (op0, speed);
3485 
3486       cost1 = zero_cost;
3487       break;
3488 
3489     default:
3490       /* Just an arbitrary value, FIXME.  */
3491       return new_cost (target_spill_cost[speed], 0);
3492     }
3493 
3494   mode = TYPE_MODE (TREE_TYPE (expr));
3495   switch (TREE_CODE (expr))
3496     {
3497     case POINTER_PLUS_EXPR:
3498     case PLUS_EXPR:
3499     case MINUS_EXPR:
3500     case NEGATE_EXPR:
3501       cost = new_cost (add_cost (mode, speed), 0);
3502       break;
3503 
3504     case MULT_EXPR:
3505       if (cst_and_fits_in_hwi (op0))
3506 	cost = new_cost (multiply_by_cost (int_cst_value (op0), mode, speed), 0);
3507       else if (cst_and_fits_in_hwi (op1))
3508 	cost = new_cost (multiply_by_cost (int_cst_value (op1), mode, speed), 0);
3509       else
3510 	return new_cost (target_spill_cost [speed], 0);
3511       break;
3512 
3513     default:
3514       gcc_unreachable ();
3515     }
3516 
3517   cost = add_costs (cost, cost0);
3518   cost = add_costs (cost, cost1);
3519 
3520   /* Bound the cost by target_spill_cost.  The parts of complicated
3521      computations often are either loop invariant or at least can
3522      be shared between several iv uses, so letting this grow without
3523      limits would not give reasonable results.  */
3524   if (cost.cost > (int) target_spill_cost [speed])
3525     cost.cost = target_spill_cost [speed];
3526 
3527   return cost;
3528 }
3529 
3530 /* Estimates cost of forcing EXPR into a variable.  DEPENDS_ON is a set of the
3531    invariants the computation depends on.  */
3532 
3533 static comp_cost
3534 force_var_cost (struct ivopts_data *data,
3535 		tree expr, bitmap *depends_on)
3536 {
3537   if (depends_on)
3538     {
3539       fd_ivopts_data = data;
3540       walk_tree (&expr, find_depends, depends_on, NULL);
3541     }
3542 
3543   return force_expr_to_var_cost (expr, data->speed);
3544 }
3545 
3546 /* Estimates cost of expressing address ADDR  as var + symbol + offset.  The
3547    value of offset is added to OFFSET, SYMBOL_PRESENT and VAR_PRESENT are set
3548    to false if the corresponding part is missing.  DEPENDS_ON is a set of the
3549    invariants the computation depends on.  */
3550 
3551 static comp_cost
3552 split_address_cost (struct ivopts_data *data,
3553 		    tree addr, bool *symbol_present, bool *var_present,
3554 		    unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
3555 {
3556   tree core;
3557   HOST_WIDE_INT bitsize;
3558   HOST_WIDE_INT bitpos;
3559   tree toffset;
3560   enum machine_mode mode;
3561   int unsignedp, volatilep;
3562 
3563   core = get_inner_reference (addr, &bitsize, &bitpos, &toffset, &mode,
3564 			      &unsignedp, &volatilep, false);
3565 
3566   if (toffset != 0
3567       || bitpos % BITS_PER_UNIT != 0
3568       || TREE_CODE (core) != VAR_DECL)
3569     {
3570       *symbol_present = false;
3571       *var_present = true;
3572       fd_ivopts_data = data;
3573       walk_tree (&addr, find_depends, depends_on, NULL);
3574       return new_cost (target_spill_cost[data->speed], 0);
3575     }
3576 
3577   *offset += bitpos / BITS_PER_UNIT;
3578   if (TREE_STATIC (core)
3579       || DECL_EXTERNAL (core))
3580     {
3581       *symbol_present = true;
3582       *var_present = false;
3583       return zero_cost;
3584     }
3585 
3586   *symbol_present = false;
3587   *var_present = true;
3588   return zero_cost;
3589 }
3590 
3591 /* Estimates cost of expressing difference of addresses E1 - E2 as
3592    var + symbol + offset.  The value of offset is added to OFFSET,
3593    SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding
3594    part is missing.  DEPENDS_ON is a set of the invariants the computation
3595    depends on.  */
3596 
3597 static comp_cost
3598 ptr_difference_cost (struct ivopts_data *data,
3599 		     tree e1, tree e2, bool *symbol_present, bool *var_present,
3600 		     unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
3601 {
3602   HOST_WIDE_INT diff = 0;
3603   aff_tree aff_e1, aff_e2;
3604   tree type;
3605 
3606   gcc_assert (TREE_CODE (e1) == ADDR_EXPR);
3607 
3608   if (ptr_difference_const (e1, e2, &diff))
3609     {
3610       *offset += diff;
3611       *symbol_present = false;
3612       *var_present = false;
3613       return zero_cost;
3614     }
3615 
3616   if (integer_zerop (e2))
3617     return split_address_cost (data, TREE_OPERAND (e1, 0),
3618 			       symbol_present, var_present, offset, depends_on);
3619 
3620   *symbol_present = false;
3621   *var_present = true;
3622 
3623   type = signed_type_for (TREE_TYPE (e1));
3624   tree_to_aff_combination (e1, type, &aff_e1);
3625   tree_to_aff_combination (e2, type, &aff_e2);
3626   aff_combination_scale (&aff_e2, double_int_minus_one);
3627   aff_combination_add (&aff_e1, &aff_e2);
3628 
3629   return force_var_cost (data, aff_combination_to_tree (&aff_e1), depends_on);
3630 }
3631 
3632 /* Estimates cost of expressing difference E1 - E2 as
3633    var + symbol + offset.  The value of offset is added to OFFSET,
3634    SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding
3635    part is missing.  DEPENDS_ON is a set of the invariants the computation
3636    depends on.  */
3637 
3638 static comp_cost
3639 difference_cost (struct ivopts_data *data,
3640 		 tree e1, tree e2, bool *symbol_present, bool *var_present,
3641 		 unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
3642 {
3643   enum machine_mode mode = TYPE_MODE (TREE_TYPE (e1));
3644   unsigned HOST_WIDE_INT off1, off2;
3645   aff_tree aff_e1, aff_e2;
3646   tree type;
3647 
3648   e1 = strip_offset (e1, &off1);
3649   e2 = strip_offset (e2, &off2);
3650   *offset += off1 - off2;
3651 
3652   STRIP_NOPS (e1);
3653   STRIP_NOPS (e2);
3654 
3655   if (TREE_CODE (e1) == ADDR_EXPR)
3656     return ptr_difference_cost (data, e1, e2, symbol_present, var_present,
3657 				offset, depends_on);
3658   *symbol_present = false;
3659 
3660   if (operand_equal_p (e1, e2, 0))
3661     {
3662       *var_present = false;
3663       return zero_cost;
3664     }
3665 
3666   *var_present = true;
3667 
3668   if (integer_zerop (e2))
3669     return force_var_cost (data, e1, depends_on);
3670 
3671   if (integer_zerop (e1))
3672     {
3673       comp_cost cost = force_var_cost (data, e2, depends_on);
3674       cost.cost += multiply_by_cost (-1, mode, data->speed);
3675       return cost;
3676     }
3677 
3678   type = signed_type_for (TREE_TYPE (e1));
3679   tree_to_aff_combination (e1, type, &aff_e1);
3680   tree_to_aff_combination (e2, type, &aff_e2);
3681   aff_combination_scale (&aff_e2, double_int_minus_one);
3682   aff_combination_add (&aff_e1, &aff_e2);
3683 
3684   return force_var_cost (data, aff_combination_to_tree (&aff_e1), depends_on);
3685 }
3686 
3687 /* Determines the cost of the computation by that USE is expressed
3688    from induction variable CAND.  If ADDRESS_P is true, we just need
3689    to create an address from it, otherwise we want to get it into
3690    register.  A set of invariants we depend on is stored in
3691    DEPENDS_ON.  AT is the statement at that the value is computed.
3692    If CAN_AUTOINC is nonnull, use it to record whether autoinc
3693    addressing is likely.  */
3694 
3695 static comp_cost
3696 get_computation_cost_at (struct ivopts_data *data,
3697 			 struct iv_use *use, struct iv_cand *cand,
3698 			 bool address_p, bitmap *depends_on, gimple at,
3699 			 bool *can_autoinc)
3700 {
3701   tree ubase = use->iv->base, ustep = use->iv->step;
3702   tree cbase, cstep;
3703   tree utype = TREE_TYPE (ubase), ctype;
3704   unsigned HOST_WIDE_INT cstepi, offset = 0;
3705   HOST_WIDE_INT ratio, aratio;
3706   bool var_present, symbol_present, stmt_is_after_inc;
3707   comp_cost cost;
3708   double_int rat;
3709   bool speed = optimize_bb_for_speed_p (gimple_bb (at));
3710 
3711   *depends_on = NULL;
3712 
3713   /* Only consider real candidates.  */
3714   if (!cand->iv)
3715     return infinite_cost;
3716 
3717   cbase = cand->iv->base;
3718   cstep = cand->iv->step;
3719   ctype = TREE_TYPE (cbase);
3720 
3721   if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
3722     {
3723       /* We do not have a precision to express the values of use.  */
3724       return infinite_cost;
3725     }
3726 
3727   if (address_p)
3728     {
3729       /* Do not try to express address of an object with computation based
3730 	 on address of a different object.  This may cause problems in rtl
3731 	 level alias analysis (that does not expect this to be happening,
3732 	 as this is illegal in C), and would be unlikely to be useful
3733 	 anyway.  */
3734       if (use->iv->base_object
3735 	  && cand->iv->base_object
3736 	  && !operand_equal_p (use->iv->base_object, cand->iv->base_object, 0))
3737 	return infinite_cost;
3738     }
3739 
3740   if (TYPE_PRECISION (utype) < TYPE_PRECISION (ctype))
3741     {
3742       /* TODO -- add direct handling of this case.  */
3743       goto fallback;
3744     }
3745 
3746   /* CSTEPI is removed from the offset in case statement is after the
3747      increment.  If the step is not constant, we use zero instead.
3748      This is a bit imprecise (there is the extra addition), but
3749      redundancy elimination is likely to transform the code so that
3750      it uses value of the variable before increment anyway,
3751      so it is not that much unrealistic.  */
3752   if (cst_and_fits_in_hwi (cstep))
3753     cstepi = int_cst_value (cstep);
3754   else
3755     cstepi = 0;
3756 
3757   if (!constant_multiple_of (ustep, cstep, &rat))
3758     return infinite_cost;
3759 
3760   if (double_int_fits_in_shwi_p (rat))
3761     ratio = double_int_to_shwi (rat);
3762   else
3763     return infinite_cost;
3764 
3765   STRIP_NOPS (cbase);
3766   ctype = TREE_TYPE (cbase);
3767 
3768   /* use = ubase + ratio * (var - cbase).  If either cbase is a constant
3769      or ratio == 1, it is better to handle this like
3770 
3771      ubase - ratio * cbase + ratio * var
3772 
3773      (also holds in the case ratio == -1, TODO.  */
3774 
3775   if (cst_and_fits_in_hwi (cbase))
3776     {
3777       offset = - ratio * int_cst_value (cbase);
3778       cost = difference_cost (data,
3779 			      ubase, build_int_cst (utype, 0),
3780 			      &symbol_present, &var_present, &offset,
3781 			      depends_on);
3782     }
3783   else if (ratio == 1)
3784     {
3785       cost = difference_cost (data,
3786 			      ubase, cbase,
3787 			      &symbol_present, &var_present, &offset,
3788 			      depends_on);
3789     }
3790   else if (address_p
3791 	   && !POINTER_TYPE_P (ctype)
3792 	   && multiplier_allowed_in_address_p
3793 		(ratio, TYPE_MODE (TREE_TYPE (utype)),
3794 			TYPE_ADDR_SPACE (TREE_TYPE (utype))))
3795     {
3796       cbase
3797 	= fold_build2 (MULT_EXPR, ctype, cbase, build_int_cst (ctype, ratio));
3798       cost = difference_cost (data,
3799 			      ubase, cbase,
3800 			      &symbol_present, &var_present, &offset,
3801 			      depends_on);
3802     }
3803   else
3804     {
3805       cost = force_var_cost (data, cbase, depends_on);
3806       cost.cost += add_cost (TYPE_MODE (ctype), data->speed);
3807       cost = add_costs (cost,
3808 			difference_cost (data,
3809 					 ubase, build_int_cst (utype, 0),
3810 					 &symbol_present, &var_present,
3811 					 &offset, depends_on));
3812     }
3813 
3814   /* If we are after the increment, the value of the candidate is higher by
3815      one iteration.  */
3816   stmt_is_after_inc = stmt_after_increment (data->current_loop, cand, at);
3817   if (stmt_is_after_inc)
3818     offset -= ratio * cstepi;
3819 
3820   /* Now the computation is in shape symbol + var1 + const + ratio * var2.
3821      (symbol/var1/const parts may be omitted).  If we are looking for an
3822      address, find the cost of addressing this.  */
3823   if (address_p)
3824     return add_costs (cost,
3825 		      get_address_cost (symbol_present, var_present,
3826 					offset, ratio, cstepi,
3827 					TYPE_MODE (TREE_TYPE (utype)),
3828 					TYPE_ADDR_SPACE (TREE_TYPE (utype)),
3829 					speed, stmt_is_after_inc,
3830 					can_autoinc));
3831 
3832   /* Otherwise estimate the costs for computing the expression.  */
3833   if (!symbol_present && !var_present && !offset)
3834     {
3835       if (ratio != 1)
3836 	cost.cost += multiply_by_cost (ratio, TYPE_MODE (ctype), speed);
3837       return cost;
3838     }
3839 
3840   /* Symbol + offset should be compile-time computable so consider that they
3841       are added once to the variable, if present.  */
3842   if (var_present && (symbol_present || offset))
3843     cost.cost += add_cost (TYPE_MODE (ctype), speed)
3844 		 / AVG_LOOP_NITER (data->current_loop);
3845 
3846   /* Having offset does not affect runtime cost in case it is added to
3847      symbol, but it increases complexity.  */
3848   if (offset)
3849     cost.complexity++;
3850 
3851   cost.cost += add_cost (TYPE_MODE (ctype), speed);
3852 
3853   aratio = ratio > 0 ? ratio : -ratio;
3854   if (aratio != 1)
3855     cost.cost += multiply_by_cost (aratio, TYPE_MODE (ctype), speed);
3856   return cost;
3857 
3858 fallback:
3859   if (can_autoinc)
3860     *can_autoinc = false;
3861 
3862   {
3863     /* Just get the expression, expand it and measure the cost.  */
3864     tree comp = get_computation_at (data->current_loop, use, cand, at);
3865 
3866     if (!comp)
3867       return infinite_cost;
3868 
3869     if (address_p)
3870       comp = build1 (INDIRECT_REF, TREE_TYPE (TREE_TYPE (comp)), comp);
3871 
3872     return new_cost (computation_cost (comp, speed), 0);
3873   }
3874 }
3875 
3876 /* Determines the cost of the computation by that USE is expressed
3877    from induction variable CAND.  If ADDRESS_P is true, we just need
3878    to create an address from it, otherwise we want to get it into
3879    register.  A set of invariants we depend on is stored in
3880    DEPENDS_ON.  If CAN_AUTOINC is nonnull, use it to record whether
3881    autoinc addressing is likely.  */
3882 
3883 static comp_cost
3884 get_computation_cost (struct ivopts_data *data,
3885 		      struct iv_use *use, struct iv_cand *cand,
3886 		      bool address_p, bitmap *depends_on, bool *can_autoinc)
3887 {
3888   return get_computation_cost_at (data,
3889 				  use, cand, address_p, depends_on, use->stmt,
3890 				  can_autoinc);
3891 }
3892 
3893 /* Determines cost of basing replacement of USE on CAND in a generic
3894    expression.  */
3895 
3896 static bool
3897 determine_use_iv_cost_generic (struct ivopts_data *data,
3898 			       struct iv_use *use, struct iv_cand *cand)
3899 {
3900   bitmap depends_on;
3901   comp_cost cost;
3902 
3903   /* The simple case first -- if we need to express value of the preserved
3904      original biv, the cost is 0.  This also prevents us from counting the
3905      cost of increment twice -- once at this use and once in the cost of
3906      the candidate.  */
3907   if (cand->pos == IP_ORIGINAL
3908       && cand->incremented_at == use->stmt)
3909     {
3910       set_use_iv_cost (data, use, cand, zero_cost, NULL, NULL_TREE);
3911       return true;
3912     }
3913 
3914   cost = get_computation_cost (data, use, cand, false, &depends_on, NULL);
3915   set_use_iv_cost (data, use, cand, cost, depends_on, NULL_TREE);
3916 
3917   return !infinite_cost_p (cost);
3918 }
3919 
3920 /* Determines cost of basing replacement of USE on CAND in an address.  */
3921 
3922 static bool
3923 determine_use_iv_cost_address (struct ivopts_data *data,
3924 			       struct iv_use *use, struct iv_cand *cand)
3925 {
3926   bitmap depends_on;
3927   bool can_autoinc;
3928   comp_cost cost = get_computation_cost (data, use, cand, true, &depends_on,
3929 					 &can_autoinc);
3930 
3931   if (cand->ainc_use == use)
3932     {
3933       if (can_autoinc)
3934 	cost.cost -= cand->cost_step;
3935       /* If we generated the candidate solely for exploiting autoincrement
3936 	 opportunities, and it turns out it can't be used, set the cost to
3937 	 infinity to make sure we ignore it.  */
3938       else if (cand->pos == IP_AFTER_USE || cand->pos == IP_BEFORE_USE)
3939 	cost = infinite_cost;
3940     }
3941   set_use_iv_cost (data, use, cand, cost, depends_on, NULL_TREE);
3942 
3943   return !infinite_cost_p (cost);
3944 }
3945 
3946 /* Computes value of candidate CAND at position AT in iteration NITER, and
3947    stores it to VAL.  */
3948 
3949 static void
3950 cand_value_at (struct loop *loop, struct iv_cand *cand, gimple at, tree niter,
3951 	       aff_tree *val)
3952 {
3953   aff_tree step, delta, nit;
3954   struct iv *iv = cand->iv;
3955   tree type = TREE_TYPE (iv->base);
3956   tree steptype = type;
3957   if (POINTER_TYPE_P (type))
3958     steptype = sizetype;
3959 
3960   tree_to_aff_combination (iv->step, steptype, &step);
3961   tree_to_aff_combination (niter, TREE_TYPE (niter), &nit);
3962   aff_combination_convert (&nit, steptype);
3963   aff_combination_mult (&nit, &step, &delta);
3964   if (stmt_after_increment (loop, cand, at))
3965     aff_combination_add (&delta, &step);
3966 
3967   tree_to_aff_combination (iv->base, type, val);
3968   aff_combination_add (val, &delta);
3969 }
3970 
3971 /* Returns period of induction variable iv.  */
3972 
3973 static tree
3974 iv_period (struct iv *iv)
3975 {
3976   tree step = iv->step, period, type;
3977   tree pow2div;
3978 
3979   gcc_assert (step && TREE_CODE (step) == INTEGER_CST);
3980 
3981   /* Period of the iv is gcd (step, type range).  Since type range is power
3982      of two, it suffices to determine the maximum power of two that divides
3983      step.  */
3984   pow2div = num_ending_zeros (step);
3985   type = unsigned_type_for (TREE_TYPE (step));
3986 
3987   period = build_low_bits_mask (type,
3988 				(TYPE_PRECISION (type)
3989 				 - tree_low_cst (pow2div, 1)));
3990 
3991   return period;
3992 }
3993 
3994 /* Returns the comparison operator used when eliminating the iv USE.  */
3995 
3996 static enum tree_code
3997 iv_elimination_compare (struct ivopts_data *data, struct iv_use *use)
3998 {
3999   struct loop *loop = data->current_loop;
4000   basic_block ex_bb;
4001   edge exit;
4002 
4003   ex_bb = gimple_bb (use->stmt);
4004   exit = EDGE_SUCC (ex_bb, 0);
4005   if (flow_bb_inside_loop_p (loop, exit->dest))
4006     exit = EDGE_SUCC (ex_bb, 1);
4007 
4008   return (exit->flags & EDGE_TRUE_VALUE ? EQ_EXPR : NE_EXPR);
4009 }
4010 
4011 /* Check whether it is possible to express the condition in USE by comparison
4012    of candidate CAND.  If so, store the value compared with to BOUND.  */
4013 
4014 static bool
4015 may_eliminate_iv (struct ivopts_data *data,
4016 		  struct iv_use *use, struct iv_cand *cand, tree *bound)
4017 {
4018   basic_block ex_bb;
4019   edge exit;
4020   tree nit, period;
4021   struct loop *loop = data->current_loop;
4022   aff_tree bnd;
4023 
4024   if (TREE_CODE (cand->iv->step) != INTEGER_CST)
4025     return false;
4026 
4027   /* For now works only for exits that dominate the loop latch.
4028      TODO: extend to other conditions inside loop body.  */
4029   ex_bb = gimple_bb (use->stmt);
4030   if (use->stmt != last_stmt (ex_bb)
4031       || gimple_code (use->stmt) != GIMPLE_COND
4032       || !dominated_by_p (CDI_DOMINATORS, loop->latch, ex_bb))
4033     return false;
4034 
4035   exit = EDGE_SUCC (ex_bb, 0);
4036   if (flow_bb_inside_loop_p (loop, exit->dest))
4037     exit = EDGE_SUCC (ex_bb, 1);
4038   if (flow_bb_inside_loop_p (loop, exit->dest))
4039     return false;
4040 
4041   nit = niter_for_exit (data, exit);
4042   if (!nit)
4043     return false;
4044 
4045   /* Determine whether we can use the variable to test the exit condition.
4046      This is the case iff the period of the induction variable is greater
4047      than the number of iterations for which the exit condition is true.  */
4048   period = iv_period (cand->iv);
4049 
4050   /* If the number of iterations is constant, compare against it directly.  */
4051   if (TREE_CODE (nit) == INTEGER_CST)
4052     {
4053       if (!tree_int_cst_lt (nit, period))
4054 	return false;
4055     }
4056 
4057   /* If not, and if this is the only possible exit of the loop, see whether
4058      we can get a conservative estimate on the number of iterations of the
4059      entire loop and compare against that instead.  */
4060   else if (loop_only_exit_p (loop, exit))
4061     {
4062       double_int period_value, max_niter;
4063       if (!estimated_loop_iterations (loop, true, &max_niter))
4064 	return false;
4065       period_value = tree_to_double_int (period);
4066       if (double_int_ucmp (max_niter, period_value) >= 0)
4067 	return false;
4068     }
4069 
4070   /* Otherwise, punt.  */
4071   else
4072     return false;
4073 
4074   cand_value_at (loop, cand, use->stmt, nit, &bnd);
4075 
4076   *bound = aff_combination_to_tree (&bnd);
4077   /* It is unlikely that computing the number of iterations using division
4078      would be more profitable than keeping the original induction variable.  */
4079   if (expression_expensive_p (*bound))
4080     return false;
4081   return true;
4082 }
4083 
4084 /* Determines cost of basing replacement of USE on CAND in a condition.  */
4085 
4086 static bool
4087 determine_use_iv_cost_condition (struct ivopts_data *data,
4088 				 struct iv_use *use, struct iv_cand *cand)
4089 {
4090   tree bound = NULL_TREE;
4091   struct iv *cmp_iv;
4092   bitmap depends_on_elim = NULL, depends_on_express = NULL, depends_on;
4093   comp_cost elim_cost, express_cost, cost;
4094   bool ok;
4095   tree *control_var, *bound_cst;
4096 
4097   /* Only consider real candidates.  */
4098   if (!cand->iv)
4099     {
4100       set_use_iv_cost (data, use, cand, infinite_cost, NULL, NULL_TREE);
4101       return false;
4102     }
4103 
4104   /* Try iv elimination.  */
4105   if (may_eliminate_iv (data, use, cand, &bound))
4106     {
4107       elim_cost = force_var_cost (data, bound, &depends_on_elim);
4108       /* The bound is a loop invariant, so it will be only computed
4109 	 once.  */
4110       elim_cost.cost /= AVG_LOOP_NITER (data->current_loop);
4111     }
4112   else
4113     elim_cost = infinite_cost;
4114 
4115   /* Try expressing the original giv.  If it is compared with an invariant,
4116      note that we cannot get rid of it.  */
4117   ok = extract_cond_operands (data, use->stmt, &control_var, &bound_cst,
4118 			      NULL, &cmp_iv);
4119   gcc_assert (ok);
4120 
4121   /* When the condition is a comparison of the candidate IV against
4122      zero, prefer this IV.
4123 
4124      TODO: The constant that we're substracting from the cost should
4125      be target-dependent.  This information should be added to the
4126      target costs for each backend.  */
4127   if (!infinite_cost_p (elim_cost) /* Do not try to decrease infinite! */
4128       && integer_zerop (*bound_cst)
4129       && (operand_equal_p (*control_var, cand->var_after, 0)
4130 	  || operand_equal_p (*control_var, cand->var_before, 0)))
4131     elim_cost.cost -= 1;
4132 
4133   express_cost = get_computation_cost (data, use, cand, false,
4134 				       &depends_on_express, NULL);
4135   fd_ivopts_data = data;
4136   walk_tree (&cmp_iv->base, find_depends, &depends_on_express, NULL);
4137 
4138   /* Choose the better approach, preferring the eliminated IV. */
4139   if (compare_costs (elim_cost, express_cost) <= 0)
4140     {
4141       cost = elim_cost;
4142       depends_on = depends_on_elim;
4143       depends_on_elim = NULL;
4144     }
4145   else
4146     {
4147       cost = express_cost;
4148       depends_on = depends_on_express;
4149       depends_on_express = NULL;
4150       bound = NULL_TREE;
4151     }
4152 
4153   set_use_iv_cost (data, use, cand, cost, depends_on, bound);
4154 
4155   if (depends_on_elim)
4156     BITMAP_FREE (depends_on_elim);
4157   if (depends_on_express)
4158     BITMAP_FREE (depends_on_express);
4159 
4160   return !infinite_cost_p (cost);
4161 }
4162 
4163 /* Determines cost of basing replacement of USE on CAND.  Returns false
4164    if USE cannot be based on CAND.  */
4165 
4166 static bool
4167 determine_use_iv_cost (struct ivopts_data *data,
4168 		       struct iv_use *use, struct iv_cand *cand)
4169 {
4170   switch (use->type)
4171     {
4172     case USE_NONLINEAR_EXPR:
4173       return determine_use_iv_cost_generic (data, use, cand);
4174 
4175     case USE_ADDRESS:
4176       return determine_use_iv_cost_address (data, use, cand);
4177 
4178     case USE_COMPARE:
4179       return determine_use_iv_cost_condition (data, use, cand);
4180 
4181     default:
4182       gcc_unreachable ();
4183     }
4184 }
4185 
4186 /* Return true if get_computation_cost indicates that autoincrement is
4187    a possibility for the pair of USE and CAND, false otherwise.  */
4188 
4189 static bool
4190 autoinc_possible_for_pair (struct ivopts_data *data, struct iv_use *use,
4191 			   struct iv_cand *cand)
4192 {
4193   bitmap depends_on;
4194   bool can_autoinc;
4195   comp_cost cost;
4196 
4197   if (use->type != USE_ADDRESS)
4198     return false;
4199 
4200   cost = get_computation_cost (data, use, cand, true, &depends_on,
4201 			       &can_autoinc);
4202 
4203   BITMAP_FREE (depends_on);
4204 
4205   return !infinite_cost_p (cost) && can_autoinc;
4206 }
4207 
4208 /* Examine IP_ORIGINAL candidates to see if they are incremented next to a
4209    use that allows autoincrement, and set their AINC_USE if possible.  */
4210 
4211 static void
4212 set_autoinc_for_original_candidates (struct ivopts_data *data)
4213 {
4214   unsigned i, j;
4215 
4216   for (i = 0; i < n_iv_cands (data); i++)
4217     {
4218       struct iv_cand *cand = iv_cand (data, i);
4219       struct iv_use *closest = NULL;
4220       if (cand->pos != IP_ORIGINAL)
4221 	continue;
4222       for (j = 0; j < n_iv_uses (data); j++)
4223 	{
4224 	  struct iv_use *use = iv_use (data, j);
4225 	  unsigned uid = gimple_uid (use->stmt);
4226 	  if (gimple_bb (use->stmt) != gimple_bb (cand->incremented_at)
4227 	      || uid > gimple_uid (cand->incremented_at))
4228 	    continue;
4229 	  if (closest == NULL || uid > gimple_uid (closest->stmt))
4230 	    closest = use;
4231 	}
4232       if (closest == NULL || !autoinc_possible_for_pair (data, closest, cand))
4233 	continue;
4234       cand->ainc_use = closest;
4235     }
4236 }
4237 
4238 /* Finds the candidates for the induction variables.  */
4239 
4240 static void
4241 find_iv_candidates (struct ivopts_data *data)
4242 {
4243   /* Add commonly used ivs.  */
4244   add_standard_iv_candidates (data);
4245 
4246   /* Add old induction variables.  */
4247   add_old_ivs_candidates (data);
4248 
4249   /* Add induction variables derived from uses.  */
4250   add_derived_ivs_candidates (data);
4251 
4252   set_autoinc_for_original_candidates (data);
4253 
4254   /* Record the important candidates.  */
4255   record_important_candidates (data);
4256 }
4257 
4258 /* Determines costs of basing the use of the iv on an iv candidate.  */
4259 
4260 static void
4261 determine_use_iv_costs (struct ivopts_data *data)
4262 {
4263   unsigned i, j;
4264   struct iv_use *use;
4265   struct iv_cand *cand;
4266   bitmap to_clear = BITMAP_ALLOC (NULL);
4267 
4268   alloc_use_cost_map (data);
4269 
4270   for (i = 0; i < n_iv_uses (data); i++)
4271     {
4272       use = iv_use (data, i);
4273 
4274       if (data->consider_all_candidates)
4275 	{
4276 	  for (j = 0; j < n_iv_cands (data); j++)
4277 	    {
4278 	      cand = iv_cand (data, j);
4279 	      determine_use_iv_cost (data, use, cand);
4280 	    }
4281 	}
4282       else
4283 	{
4284 	  bitmap_iterator bi;
4285 
4286 	  EXECUTE_IF_SET_IN_BITMAP (use->related_cands, 0, j, bi)
4287 	    {
4288 	      cand = iv_cand (data, j);
4289 	      if (!determine_use_iv_cost (data, use, cand))
4290 		bitmap_set_bit (to_clear, j);
4291 	    }
4292 
4293 	  /* Remove the candidates for that the cost is infinite from
4294 	     the list of related candidates.  */
4295 	  bitmap_and_compl_into (use->related_cands, to_clear);
4296 	  bitmap_clear (to_clear);
4297 	}
4298     }
4299 
4300   BITMAP_FREE (to_clear);
4301 
4302   if (dump_file && (dump_flags & TDF_DETAILS))
4303     {
4304       fprintf (dump_file, "Use-candidate costs:\n");
4305 
4306       for (i = 0; i < n_iv_uses (data); i++)
4307 	{
4308 	  use = iv_use (data, i);
4309 
4310 	  fprintf (dump_file, "Use %d:\n", i);
4311 	  fprintf (dump_file, "  cand\tcost\tcompl.\tdepends on\n");
4312 	  for (j = 0; j < use->n_map_members; j++)
4313 	    {
4314 	      if (!use->cost_map[j].cand
4315 		  || infinite_cost_p (use->cost_map[j].cost))
4316 		continue;
4317 
4318 	      fprintf (dump_file, "  %d\t%d\t%d\t",
4319 		       use->cost_map[j].cand->id,
4320 		       use->cost_map[j].cost.cost,
4321 		       use->cost_map[j].cost.complexity);
4322 	      if (use->cost_map[j].depends_on)
4323 		bitmap_print (dump_file,
4324 			      use->cost_map[j].depends_on, "","");
4325 	      fprintf (dump_file, "\n");
4326 	    }
4327 
4328 	  fprintf (dump_file, "\n");
4329 	}
4330       fprintf (dump_file, "\n");
4331     }
4332 }
4333 
4334 /* Determines cost of the candidate CAND.  */
4335 
4336 static void
4337 determine_iv_cost (struct ivopts_data *data, struct iv_cand *cand)
4338 {
4339   comp_cost cost_base;
4340   unsigned cost, cost_step;
4341   tree base;
4342 
4343   if (!cand->iv)
4344     {
4345       cand->cost = 0;
4346       return;
4347     }
4348 
4349   /* There are two costs associated with the candidate -- its increment
4350      and its initialization.  The second is almost negligible for any loop
4351      that rolls enough, so we take it just very little into account.  */
4352 
4353   base = cand->iv->base;
4354   cost_base = force_var_cost (data, base, NULL);
4355   cost_step = add_cost (TYPE_MODE (TREE_TYPE (base)), data->speed);
4356 
4357   cost = cost_step + cost_base.cost / AVG_LOOP_NITER (current_loop);
4358 
4359   /* Prefer the original ivs unless we may gain something by replacing it.
4360      The reason is to make debugging simpler; so this is not relevant for
4361      artificial ivs created by other optimization passes.  */
4362   if (cand->pos != IP_ORIGINAL
4363       || DECL_ARTIFICIAL (SSA_NAME_VAR (cand->var_before)))
4364     cost++;
4365 
4366   /* Prefer not to insert statements into latch unless there are some
4367      already (so that we do not create unnecessary jumps).  */
4368   if (cand->pos == IP_END
4369       && empty_block_p (ip_end_pos (data->current_loop)))
4370     cost++;
4371 
4372   cand->cost = cost;
4373   cand->cost_step = cost_step;
4374 }
4375 
4376 /* Determines costs of computation of the candidates.  */
4377 
4378 static void
4379 determine_iv_costs (struct ivopts_data *data)
4380 {
4381   unsigned i;
4382 
4383   if (dump_file && (dump_flags & TDF_DETAILS))
4384     {
4385       fprintf (dump_file, "Candidate costs:\n");
4386       fprintf (dump_file, "  cand\tcost\n");
4387     }
4388 
4389   for (i = 0; i < n_iv_cands (data); i++)
4390     {
4391       struct iv_cand *cand = iv_cand (data, i);
4392 
4393       determine_iv_cost (data, cand);
4394 
4395       if (dump_file && (dump_flags & TDF_DETAILS))
4396 	fprintf (dump_file, "  %d\t%d\n", i, cand->cost);
4397     }
4398 
4399   if (dump_file && (dump_flags & TDF_DETAILS))
4400     fprintf (dump_file, "\n");
4401 }
4402 
4403 /* Calculates cost for having SIZE induction variables.  */
4404 
4405 static unsigned
4406 ivopts_global_cost_for_size (struct ivopts_data *data, unsigned size)
4407 {
4408   /* We add size to the cost, so that we prefer eliminating ivs
4409      if possible.  */
4410   return size + estimate_reg_pressure_cost (size, data->regs_used, data->speed);
4411 }
4412 
4413 /* For each size of the induction variable set determine the penalty.  */
4414 
4415 static void
4416 determine_set_costs (struct ivopts_data *data)
4417 {
4418   unsigned j, n;
4419   gimple phi;
4420   gimple_stmt_iterator psi;
4421   tree op;
4422   struct loop *loop = data->current_loop;
4423   bitmap_iterator bi;
4424 
4425   /* We use the following model (definitely improvable, especially the
4426      cost function -- TODO):
4427 
4428      We estimate the number of registers available (using MD data), name it A.
4429 
4430      We estimate the number of registers used by the loop, name it U.  This
4431      number is obtained as the number of loop phi nodes (not counting virtual
4432      registers and bivs) + the number of variables from outside of the loop.
4433 
4434      We set a reserve R (free regs that are used for temporary computations,
4435      etc.).  For now the reserve is a constant 3.
4436 
4437      Let I be the number of induction variables.
4438 
4439      -- if U + I + R <= A, the cost is I * SMALL_COST (just not to encourage
4440 	make a lot of ivs without a reason).
4441      -- if A - R < U + I <= A, the cost is I * PRES_COST
4442      -- if U + I > A, the cost is I * PRES_COST and
4443         number of uses * SPILL_COST * (U + I - A) / (U + I) is added.  */
4444 
4445   if (dump_file && (dump_flags & TDF_DETAILS))
4446     {
4447       fprintf (dump_file, "Global costs:\n");
4448       fprintf (dump_file, "  target_avail_regs %d\n", target_avail_regs);
4449       fprintf (dump_file, "  target_reg_cost %d\n", target_reg_cost[data->speed]);
4450       fprintf (dump_file, "  target_spill_cost %d\n", target_spill_cost[data->speed]);
4451     }
4452 
4453   n = 0;
4454   for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
4455     {
4456       phi = gsi_stmt (psi);
4457       op = PHI_RESULT (phi);
4458 
4459       if (!is_gimple_reg (op))
4460 	continue;
4461 
4462       if (get_iv (data, op))
4463 	continue;
4464 
4465       n++;
4466     }
4467 
4468   EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j, bi)
4469     {
4470       struct version_info *info = ver_info (data, j);
4471 
4472       if (info->inv_id && info->has_nonlin_use)
4473 	n++;
4474     }
4475 
4476   data->regs_used = n;
4477   if (dump_file && (dump_flags & TDF_DETAILS))
4478     fprintf (dump_file, "  regs_used %d\n", n);
4479 
4480   if (dump_file && (dump_flags & TDF_DETAILS))
4481     {
4482       fprintf (dump_file, "  cost for size:\n");
4483       fprintf (dump_file, "  ivs\tcost\n");
4484       for (j = 0; j <= 2 * target_avail_regs; j++)
4485 	fprintf (dump_file, "  %d\t%d\n", j,
4486 		 ivopts_global_cost_for_size (data, j));
4487       fprintf (dump_file, "\n");
4488     }
4489 }
4490 
4491 /* Returns true if A is a cheaper cost pair than B.  */
4492 
4493 static bool
4494 cheaper_cost_pair (struct cost_pair *a, struct cost_pair *b)
4495 {
4496   int cmp;
4497 
4498   if (!a)
4499     return false;
4500 
4501   if (!b)
4502     return true;
4503 
4504   cmp = compare_costs (a->cost, b->cost);
4505   if (cmp < 0)
4506     return true;
4507 
4508   if (cmp > 0)
4509     return false;
4510 
4511   /* In case the costs are the same, prefer the cheaper candidate.  */
4512   if (a->cand->cost < b->cand->cost)
4513     return true;
4514 
4515   return false;
4516 }
4517 
4518 /* Computes the cost field of IVS structure.  */
4519 
4520 static void
4521 iv_ca_recount_cost (struct ivopts_data *data, struct iv_ca *ivs)
4522 {
4523   comp_cost cost = ivs->cand_use_cost;
4524   cost.cost += ivs->cand_cost;
4525   cost.cost += ivopts_global_cost_for_size (data, ivs->n_regs);
4526 
4527   ivs->cost = cost;
4528 }
4529 
4530 /* Remove invariants in set INVS to set IVS.  */
4531 
4532 static void
4533 iv_ca_set_remove_invariants (struct iv_ca *ivs, bitmap invs)
4534 {
4535   bitmap_iterator bi;
4536   unsigned iid;
4537 
4538   if (!invs)
4539     return;
4540 
4541   EXECUTE_IF_SET_IN_BITMAP (invs, 0, iid, bi)
4542     {
4543       ivs->n_invariant_uses[iid]--;
4544       if (ivs->n_invariant_uses[iid] == 0)
4545 	ivs->n_regs--;
4546     }
4547 }
4548 
4549 /* Set USE not to be expressed by any candidate in IVS.  */
4550 
4551 static void
4552 iv_ca_set_no_cp (struct ivopts_data *data, struct iv_ca *ivs,
4553 		 struct iv_use *use)
4554 {
4555   unsigned uid = use->id, cid;
4556   struct cost_pair *cp;
4557 
4558   cp = ivs->cand_for_use[uid];
4559   if (!cp)
4560     return;
4561   cid = cp->cand->id;
4562 
4563   ivs->bad_uses++;
4564   ivs->cand_for_use[uid] = NULL;
4565   ivs->n_cand_uses[cid]--;
4566 
4567   if (ivs->n_cand_uses[cid] == 0)
4568     {
4569       bitmap_clear_bit (ivs->cands, cid);
4570       /* Do not count the pseudocandidates.  */
4571       if (cp->cand->iv)
4572 	ivs->n_regs--;
4573       ivs->n_cands--;
4574       ivs->cand_cost -= cp->cand->cost;
4575 
4576       iv_ca_set_remove_invariants (ivs, cp->cand->depends_on);
4577     }
4578 
4579   ivs->cand_use_cost = sub_costs (ivs->cand_use_cost, cp->cost);
4580 
4581   iv_ca_set_remove_invariants (ivs, cp->depends_on);
4582   iv_ca_recount_cost (data, ivs);
4583 }
4584 
4585 /* Add invariants in set INVS to set IVS.  */
4586 
4587 static void
4588 iv_ca_set_add_invariants (struct iv_ca *ivs, bitmap invs)
4589 {
4590   bitmap_iterator bi;
4591   unsigned iid;
4592 
4593   if (!invs)
4594     return;
4595 
4596   EXECUTE_IF_SET_IN_BITMAP (invs, 0, iid, bi)
4597     {
4598       ivs->n_invariant_uses[iid]++;
4599       if (ivs->n_invariant_uses[iid] == 1)
4600 	ivs->n_regs++;
4601     }
4602 }
4603 
4604 /* Set cost pair for USE in set IVS to CP.  */
4605 
4606 static void
4607 iv_ca_set_cp (struct ivopts_data *data, struct iv_ca *ivs,
4608 	      struct iv_use *use, struct cost_pair *cp)
4609 {
4610   unsigned uid = use->id, cid;
4611 
4612   if (ivs->cand_for_use[uid] == cp)
4613     return;
4614 
4615   if (ivs->cand_for_use[uid])
4616     iv_ca_set_no_cp (data, ivs, use);
4617 
4618   if (cp)
4619     {
4620       cid = cp->cand->id;
4621 
4622       ivs->bad_uses--;
4623       ivs->cand_for_use[uid] = cp;
4624       ivs->n_cand_uses[cid]++;
4625       if (ivs->n_cand_uses[cid] == 1)
4626 	{
4627 	  bitmap_set_bit (ivs->cands, cid);
4628 	  /* Do not count the pseudocandidates.  */
4629 	  if (cp->cand->iv)
4630 	    ivs->n_regs++;
4631 	  ivs->n_cands++;
4632 	  ivs->cand_cost += cp->cand->cost;
4633 
4634 	  iv_ca_set_add_invariants (ivs, cp->cand->depends_on);
4635 	}
4636 
4637       ivs->cand_use_cost = add_costs (ivs->cand_use_cost, cp->cost);
4638       iv_ca_set_add_invariants (ivs, cp->depends_on);
4639       iv_ca_recount_cost (data, ivs);
4640     }
4641 }
4642 
4643 /* Extend set IVS by expressing USE by some of the candidates in it
4644    if possible.  */
4645 
4646 static void
4647 iv_ca_add_use (struct ivopts_data *data, struct iv_ca *ivs,
4648 	       struct iv_use *use)
4649 {
4650   struct cost_pair *best_cp = NULL, *cp;
4651   bitmap_iterator bi;
4652   unsigned i;
4653 
4654   gcc_assert (ivs->upto >= use->id);
4655 
4656   if (ivs->upto == use->id)
4657     {
4658       ivs->upto++;
4659       ivs->bad_uses++;
4660     }
4661 
4662   EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
4663     {
4664       cp = get_use_iv_cost (data, use, iv_cand (data, i));
4665 
4666       if (cheaper_cost_pair (cp, best_cp))
4667 	best_cp = cp;
4668     }
4669 
4670   iv_ca_set_cp (data, ivs, use, best_cp);
4671 }
4672 
4673 /* Get cost for assignment IVS.  */
4674 
4675 static comp_cost
4676 iv_ca_cost (struct iv_ca *ivs)
4677 {
4678   /* This was a conditional expression but it triggered a bug in
4679      Sun C 5.5.  */
4680   if (ivs->bad_uses)
4681     return infinite_cost;
4682   else
4683     return ivs->cost;
4684 }
4685 
4686 /* Returns true if all dependences of CP are among invariants in IVS.  */
4687 
4688 static bool
4689 iv_ca_has_deps (struct iv_ca *ivs, struct cost_pair *cp)
4690 {
4691   unsigned i;
4692   bitmap_iterator bi;
4693 
4694   if (!cp->depends_on)
4695     return true;
4696 
4697   EXECUTE_IF_SET_IN_BITMAP (cp->depends_on, 0, i, bi)
4698     {
4699       if (ivs->n_invariant_uses[i] == 0)
4700 	return false;
4701     }
4702 
4703   return true;
4704 }
4705 
4706 /* Creates change of expressing USE by NEW_CP instead of OLD_CP and chains
4707    it before NEXT_CHANGE.  */
4708 
4709 static struct iv_ca_delta *
4710 iv_ca_delta_add (struct iv_use *use, struct cost_pair *old_cp,
4711 		 struct cost_pair *new_cp, struct iv_ca_delta *next_change)
4712 {
4713   struct iv_ca_delta *change = XNEW (struct iv_ca_delta);
4714 
4715   change->use = use;
4716   change->old_cp = old_cp;
4717   change->new_cp = new_cp;
4718   change->next_change = next_change;
4719 
4720   return change;
4721 }
4722 
4723 /* Joins two lists of changes L1 and L2.  Destructive -- old lists
4724    are rewritten.  */
4725 
4726 static struct iv_ca_delta *
4727 iv_ca_delta_join (struct iv_ca_delta *l1, struct iv_ca_delta *l2)
4728 {
4729   struct iv_ca_delta *last;
4730 
4731   if (!l2)
4732     return l1;
4733 
4734   if (!l1)
4735     return l2;
4736 
4737   for (last = l1; last->next_change; last = last->next_change)
4738     continue;
4739   last->next_change = l2;
4740 
4741   return l1;
4742 }
4743 
4744 /* Returns candidate by that USE is expressed in IVS.  */
4745 
4746 static struct cost_pair *
4747 iv_ca_cand_for_use (struct iv_ca *ivs, struct iv_use *use)
4748 {
4749   return ivs->cand_for_use[use->id];
4750 }
4751 
4752 /* Reverse the list of changes DELTA, forming the inverse to it.  */
4753 
4754 static struct iv_ca_delta *
4755 iv_ca_delta_reverse (struct iv_ca_delta *delta)
4756 {
4757   struct iv_ca_delta *act, *next, *prev = NULL;
4758   struct cost_pair *tmp;
4759 
4760   for (act = delta; act; act = next)
4761     {
4762       next = act->next_change;
4763       act->next_change = prev;
4764       prev = act;
4765 
4766       tmp = act->old_cp;
4767       act->old_cp = act->new_cp;
4768       act->new_cp = tmp;
4769     }
4770 
4771   return prev;
4772 }
4773 
4774 /* Commit changes in DELTA to IVS.  If FORWARD is false, the changes are
4775    reverted instead.  */
4776 
4777 static void
4778 iv_ca_delta_commit (struct ivopts_data *data, struct iv_ca *ivs,
4779 		    struct iv_ca_delta *delta, bool forward)
4780 {
4781   struct cost_pair *from, *to;
4782   struct iv_ca_delta *act;
4783 
4784   if (!forward)
4785     delta = iv_ca_delta_reverse (delta);
4786 
4787   for (act = delta; act; act = act->next_change)
4788     {
4789       from = act->old_cp;
4790       to = act->new_cp;
4791       gcc_assert (iv_ca_cand_for_use (ivs, act->use) == from);
4792       iv_ca_set_cp (data, ivs, act->use, to);
4793     }
4794 
4795   if (!forward)
4796     iv_ca_delta_reverse (delta);
4797 }
4798 
4799 /* Returns true if CAND is used in IVS.  */
4800 
4801 static bool
4802 iv_ca_cand_used_p (struct iv_ca *ivs, struct iv_cand *cand)
4803 {
4804   return ivs->n_cand_uses[cand->id] > 0;
4805 }
4806 
4807 /* Returns number of induction variable candidates in the set IVS.  */
4808 
4809 static unsigned
4810 iv_ca_n_cands (struct iv_ca *ivs)
4811 {
4812   return ivs->n_cands;
4813 }
4814 
4815 /* Free the list of changes DELTA.  */
4816 
4817 static void
4818 iv_ca_delta_free (struct iv_ca_delta **delta)
4819 {
4820   struct iv_ca_delta *act, *next;
4821 
4822   for (act = *delta; act; act = next)
4823     {
4824       next = act->next_change;
4825       free (act);
4826     }
4827 
4828   *delta = NULL;
4829 }
4830 
4831 /* Allocates new iv candidates assignment.  */
4832 
4833 static struct iv_ca *
4834 iv_ca_new (struct ivopts_data *data)
4835 {
4836   struct iv_ca *nw = XNEW (struct iv_ca);
4837 
4838   nw->upto = 0;
4839   nw->bad_uses = 0;
4840   nw->cand_for_use = XCNEWVEC (struct cost_pair *, n_iv_uses (data));
4841   nw->n_cand_uses = XCNEWVEC (unsigned, n_iv_cands (data));
4842   nw->cands = BITMAP_ALLOC (NULL);
4843   nw->n_cands = 0;
4844   nw->n_regs = 0;
4845   nw->cand_use_cost = zero_cost;
4846   nw->cand_cost = 0;
4847   nw->n_invariant_uses = XCNEWVEC (unsigned, data->max_inv_id + 1);
4848   nw->cost = zero_cost;
4849 
4850   return nw;
4851 }
4852 
4853 /* Free memory occupied by the set IVS.  */
4854 
4855 static void
4856 iv_ca_free (struct iv_ca **ivs)
4857 {
4858   free ((*ivs)->cand_for_use);
4859   free ((*ivs)->n_cand_uses);
4860   BITMAP_FREE ((*ivs)->cands);
4861   free ((*ivs)->n_invariant_uses);
4862   free (*ivs);
4863   *ivs = NULL;
4864 }
4865 
4866 /* Dumps IVS to FILE.  */
4867 
4868 static void
4869 iv_ca_dump (struct ivopts_data *data, FILE *file, struct iv_ca *ivs)
4870 {
4871   const char *pref = "  invariants ";
4872   unsigned i;
4873   comp_cost cost = iv_ca_cost (ivs);
4874 
4875   fprintf (file, "  cost %d (complexity %d)\n", cost.cost, cost.complexity);
4876   bitmap_print (file, ivs->cands, "  candidates ","\n");
4877 
4878   for (i = 1; i <= data->max_inv_id; i++)
4879     if (ivs->n_invariant_uses[i])
4880       {
4881 	fprintf (file, "%s%d", pref, i);
4882 	pref = ", ";
4883       }
4884   fprintf (file, "\n");
4885 }
4886 
4887 /* Try changing candidate in IVS to CAND for each use.  Return cost of the
4888    new set, and store differences in DELTA.  Number of induction variables
4889    in the new set is stored to N_IVS.  */
4890 
4891 static comp_cost
4892 iv_ca_extend (struct ivopts_data *data, struct iv_ca *ivs,
4893 	      struct iv_cand *cand, struct iv_ca_delta **delta,
4894 	      unsigned *n_ivs)
4895 {
4896   unsigned i;
4897   comp_cost cost;
4898   struct iv_use *use;
4899   struct cost_pair *old_cp, *new_cp;
4900 
4901   *delta = NULL;
4902   for (i = 0; i < ivs->upto; i++)
4903     {
4904       use = iv_use (data, i);
4905       old_cp = iv_ca_cand_for_use (ivs, use);
4906 
4907       if (old_cp
4908 	  && old_cp->cand == cand)
4909 	continue;
4910 
4911       new_cp = get_use_iv_cost (data, use, cand);
4912       if (!new_cp)
4913 	continue;
4914 
4915       if (!iv_ca_has_deps (ivs, new_cp))
4916 	continue;
4917 
4918       if (!cheaper_cost_pair (new_cp, old_cp))
4919 	continue;
4920 
4921       *delta = iv_ca_delta_add (use, old_cp, new_cp, *delta);
4922     }
4923 
4924   iv_ca_delta_commit (data, ivs, *delta, true);
4925   cost = iv_ca_cost (ivs);
4926   if (n_ivs)
4927     *n_ivs = iv_ca_n_cands (ivs);
4928   iv_ca_delta_commit (data, ivs, *delta, false);
4929 
4930   return cost;
4931 }
4932 
4933 /* Try narrowing set IVS by removing CAND.  Return the cost of
4934    the new set and store the differences in DELTA.  */
4935 
4936 static comp_cost
4937 iv_ca_narrow (struct ivopts_data *data, struct iv_ca *ivs,
4938 	      struct iv_cand *cand, struct iv_ca_delta **delta)
4939 {
4940   unsigned i, ci;
4941   struct iv_use *use;
4942   struct cost_pair *old_cp, *new_cp, *cp;
4943   bitmap_iterator bi;
4944   struct iv_cand *cnd;
4945   comp_cost cost;
4946 
4947   *delta = NULL;
4948   for (i = 0; i < n_iv_uses (data); i++)
4949     {
4950       use = iv_use (data, i);
4951 
4952       old_cp = iv_ca_cand_for_use (ivs, use);
4953       if (old_cp->cand != cand)
4954 	continue;
4955 
4956       new_cp = NULL;
4957 
4958       if (data->consider_all_candidates)
4959 	{
4960 	  EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, ci, bi)
4961 	    {
4962 	      if (ci == cand->id)
4963 		continue;
4964 
4965 	      cnd = iv_cand (data, ci);
4966 
4967 	      cp = get_use_iv_cost (data, use, cnd);
4968 	      if (!cp)
4969 		continue;
4970 	      if (!iv_ca_has_deps (ivs, cp))
4971 		continue;
4972 
4973 	      if (!cheaper_cost_pair (cp, new_cp))
4974 		continue;
4975 
4976 	      new_cp = cp;
4977 	    }
4978 	}
4979       else
4980 	{
4981 	  EXECUTE_IF_AND_IN_BITMAP (use->related_cands, ivs->cands, 0, ci, bi)
4982 	    {
4983 	      if (ci == cand->id)
4984 		continue;
4985 
4986 	      cnd = iv_cand (data, ci);
4987 
4988 	      cp = get_use_iv_cost (data, use, cnd);
4989 	      if (!cp)
4990 		continue;
4991 	      if (!iv_ca_has_deps (ivs, cp))
4992 		continue;
4993 
4994 	      if (!cheaper_cost_pair (cp, new_cp))
4995 		continue;
4996 
4997 	      new_cp = cp;
4998 	    }
4999 	}
5000 
5001       if (!new_cp)
5002 	{
5003 	  iv_ca_delta_free (delta);
5004 	  return infinite_cost;
5005 	}
5006 
5007       *delta = iv_ca_delta_add (use, old_cp, new_cp, *delta);
5008     }
5009 
5010   iv_ca_delta_commit (data, ivs, *delta, true);
5011   cost = iv_ca_cost (ivs);
5012   iv_ca_delta_commit (data, ivs, *delta, false);
5013 
5014   return cost;
5015 }
5016 
5017 /* Try optimizing the set of candidates IVS by removing candidates different
5018    from to EXCEPT_CAND from it.  Return cost of the new set, and store
5019    differences in DELTA.  */
5020 
5021 static comp_cost
5022 iv_ca_prune (struct ivopts_data *data, struct iv_ca *ivs,
5023 	     struct iv_cand *except_cand, struct iv_ca_delta **delta)
5024 {
5025   bitmap_iterator bi;
5026   struct iv_ca_delta *act_delta, *best_delta;
5027   unsigned i;
5028   comp_cost best_cost, acost;
5029   struct iv_cand *cand;
5030 
5031   best_delta = NULL;
5032   best_cost = iv_ca_cost (ivs);
5033 
5034   EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
5035     {
5036       cand = iv_cand (data, i);
5037 
5038       if (cand == except_cand)
5039 	continue;
5040 
5041       acost = iv_ca_narrow (data, ivs, cand, &act_delta);
5042 
5043       if (compare_costs (acost, best_cost) < 0)
5044 	{
5045 	  best_cost = acost;
5046 	  iv_ca_delta_free (&best_delta);
5047 	  best_delta = act_delta;
5048 	}
5049       else
5050 	iv_ca_delta_free (&act_delta);
5051     }
5052 
5053   if (!best_delta)
5054     {
5055       *delta = NULL;
5056       return best_cost;
5057     }
5058 
5059   /* Recurse to possibly remove other unnecessary ivs.  */
5060   iv_ca_delta_commit (data, ivs, best_delta, true);
5061   best_cost = iv_ca_prune (data, ivs, except_cand, delta);
5062   iv_ca_delta_commit (data, ivs, best_delta, false);
5063   *delta = iv_ca_delta_join (best_delta, *delta);
5064   return best_cost;
5065 }
5066 
5067 /* Tries to extend the sets IVS in the best possible way in order
5068    to express the USE.  */
5069 
5070 static bool
5071 try_add_cand_for (struct ivopts_data *data, struct iv_ca *ivs,
5072 		  struct iv_use *use)
5073 {
5074   comp_cost best_cost, act_cost;
5075   unsigned i;
5076   bitmap_iterator bi;
5077   struct iv_cand *cand;
5078   struct iv_ca_delta *best_delta = NULL, *act_delta;
5079   struct cost_pair *cp;
5080 
5081   iv_ca_add_use (data, ivs, use);
5082   best_cost = iv_ca_cost (ivs);
5083 
5084   cp = iv_ca_cand_for_use (ivs, use);
5085   if (cp)
5086     {
5087       best_delta = iv_ca_delta_add (use, NULL, cp, NULL);
5088       iv_ca_set_no_cp (data, ivs, use);
5089     }
5090 
5091   /* First try important candidates not based on any memory object.  Only if
5092      this fails, try the specific ones.  Rationale -- in loops with many
5093      variables the best choice often is to use just one generic biv.  If we
5094      added here many ivs specific to the uses, the optimization algorithm later
5095      would be likely to get stuck in a local minimum, thus causing us to create
5096      too many ivs.  The approach from few ivs to more seems more likely to be
5097      successful -- starting from few ivs, replacing an expensive use by a
5098      specific iv should always be a win.  */
5099   EXECUTE_IF_SET_IN_BITMAP (data->important_candidates, 0, i, bi)
5100     {
5101       cand = iv_cand (data, i);
5102 
5103       if (cand->iv->base_object != NULL_TREE)
5104 	continue;
5105 
5106       if (iv_ca_cand_used_p (ivs, cand))
5107 	continue;
5108 
5109       cp = get_use_iv_cost (data, use, cand);
5110       if (!cp)
5111 	continue;
5112 
5113       iv_ca_set_cp (data, ivs, use, cp);
5114       act_cost = iv_ca_extend (data, ivs, cand, &act_delta, NULL);
5115       iv_ca_set_no_cp (data, ivs, use);
5116       act_delta = iv_ca_delta_add (use, NULL, cp, act_delta);
5117 
5118       if (compare_costs (act_cost, best_cost) < 0)
5119 	{
5120 	  best_cost = act_cost;
5121 
5122 	  iv_ca_delta_free (&best_delta);
5123 	  best_delta = act_delta;
5124 	}
5125       else
5126 	iv_ca_delta_free (&act_delta);
5127     }
5128 
5129   if (infinite_cost_p (best_cost))
5130     {
5131       for (i = 0; i < use->n_map_members; i++)
5132 	{
5133 	  cp = use->cost_map + i;
5134 	  cand = cp->cand;
5135 	  if (!cand)
5136 	    continue;
5137 
5138 	  /* Already tried this.  */
5139 	  if (cand->important && cand->iv->base_object == NULL_TREE)
5140 	    continue;
5141 
5142 	  if (iv_ca_cand_used_p (ivs, cand))
5143 	    continue;
5144 
5145 	  act_delta = NULL;
5146 	  iv_ca_set_cp (data, ivs, use, cp);
5147 	  act_cost = iv_ca_extend (data, ivs, cand, &act_delta, NULL);
5148 	  iv_ca_set_no_cp (data, ivs, use);
5149 	  act_delta = iv_ca_delta_add (use, iv_ca_cand_for_use (ivs, use),
5150 				       cp, act_delta);
5151 
5152 	  if (compare_costs (act_cost, best_cost) < 0)
5153 	    {
5154 	      best_cost = act_cost;
5155 
5156 	      if (best_delta)
5157 		iv_ca_delta_free (&best_delta);
5158 	      best_delta = act_delta;
5159 	    }
5160 	  else
5161 	    iv_ca_delta_free (&act_delta);
5162 	}
5163     }
5164 
5165   iv_ca_delta_commit (data, ivs, best_delta, true);
5166   iv_ca_delta_free (&best_delta);
5167 
5168   return !infinite_cost_p (best_cost);
5169 }
5170 
5171 /* Finds an initial assignment of candidates to uses.  */
5172 
5173 static struct iv_ca *
5174 get_initial_solution (struct ivopts_data *data)
5175 {
5176   struct iv_ca *ivs = iv_ca_new (data);
5177   unsigned i;
5178 
5179   for (i = 0; i < n_iv_uses (data); i++)
5180     if (!try_add_cand_for (data, ivs, iv_use (data, i)))
5181       {
5182 	iv_ca_free (&ivs);
5183 	return NULL;
5184       }
5185 
5186   return ivs;
5187 }
5188 
5189 /* Tries to improve set of induction variables IVS.  */
5190 
5191 static bool
5192 try_improve_iv_set (struct ivopts_data *data, struct iv_ca *ivs)
5193 {
5194   unsigned i, n_ivs;
5195   comp_cost acost, best_cost = iv_ca_cost (ivs);
5196   struct iv_ca_delta *best_delta = NULL, *act_delta, *tmp_delta;
5197   struct iv_cand *cand;
5198 
5199   /* Try extending the set of induction variables by one.  */
5200   for (i = 0; i < n_iv_cands (data); i++)
5201     {
5202       cand = iv_cand (data, i);
5203 
5204       if (iv_ca_cand_used_p (ivs, cand))
5205 	continue;
5206 
5207       acost = iv_ca_extend (data, ivs, cand, &act_delta, &n_ivs);
5208       if (!act_delta)
5209 	continue;
5210 
5211       /* If we successfully added the candidate and the set is small enough,
5212 	 try optimizing it by removing other candidates.  */
5213       if (n_ivs <= ALWAYS_PRUNE_CAND_SET_BOUND)
5214       	{
5215 	  iv_ca_delta_commit (data, ivs, act_delta, true);
5216 	  acost = iv_ca_prune (data, ivs, cand, &tmp_delta);
5217 	  iv_ca_delta_commit (data, ivs, act_delta, false);
5218 	  act_delta = iv_ca_delta_join (act_delta, tmp_delta);
5219 	}
5220 
5221       if (compare_costs (acost, best_cost) < 0)
5222 	{
5223 	  best_cost = acost;
5224 	  iv_ca_delta_free (&best_delta);
5225 	  best_delta = act_delta;
5226 	}
5227       else
5228 	iv_ca_delta_free (&act_delta);
5229     }
5230 
5231   if (!best_delta)
5232     {
5233       /* Try removing the candidates from the set instead.  */
5234       best_cost = iv_ca_prune (data, ivs, NULL, &best_delta);
5235 
5236       /* Nothing more we can do.  */
5237       if (!best_delta)
5238 	return false;
5239     }
5240 
5241   iv_ca_delta_commit (data, ivs, best_delta, true);
5242   gcc_assert (compare_costs (best_cost, iv_ca_cost (ivs)) == 0);
5243   iv_ca_delta_free (&best_delta);
5244   return true;
5245 }
5246 
5247 /* Attempts to find the optimal set of induction variables.  We do simple
5248    greedy heuristic -- we try to replace at most one candidate in the selected
5249    solution and remove the unused ivs while this improves the cost.  */
5250 
5251 static struct iv_ca *
5252 find_optimal_iv_set (struct ivopts_data *data)
5253 {
5254   unsigned i;
5255   struct iv_ca *set;
5256   struct iv_use *use;
5257 
5258   /* Get the initial solution.  */
5259   set = get_initial_solution (data);
5260   if (!set)
5261     {
5262       if (dump_file && (dump_flags & TDF_DETAILS))
5263 	fprintf (dump_file, "Unable to substitute for ivs, failed.\n");
5264       return NULL;
5265     }
5266 
5267   if (dump_file && (dump_flags & TDF_DETAILS))
5268     {
5269       fprintf (dump_file, "Initial set of candidates:\n");
5270       iv_ca_dump (data, dump_file, set);
5271     }
5272 
5273   while (try_improve_iv_set (data, set))
5274     {
5275       if (dump_file && (dump_flags & TDF_DETAILS))
5276 	{
5277 	  fprintf (dump_file, "Improved to:\n");
5278 	  iv_ca_dump (data, dump_file, set);
5279 	}
5280     }
5281 
5282   if (dump_file && (dump_flags & TDF_DETAILS))
5283     {
5284       comp_cost cost = iv_ca_cost (set);
5285       fprintf (dump_file, "Final cost %d (complexity %d)\n\n", cost.cost, cost.complexity);
5286     }
5287 
5288   for (i = 0; i < n_iv_uses (data); i++)
5289     {
5290       use = iv_use (data, i);
5291       use->selected = iv_ca_cand_for_use (set, use)->cand;
5292     }
5293 
5294   return set;
5295 }
5296 
5297 /* Creates a new induction variable corresponding to CAND.  */
5298 
5299 static void
5300 create_new_iv (struct ivopts_data *data, struct iv_cand *cand)
5301 {
5302   gimple_stmt_iterator incr_pos;
5303   tree base;
5304   bool after = false;
5305 
5306   if (!cand->iv)
5307     return;
5308 
5309   switch (cand->pos)
5310     {
5311     case IP_NORMAL:
5312       incr_pos = gsi_last_bb (ip_normal_pos (data->current_loop));
5313       break;
5314 
5315     case IP_END:
5316       incr_pos = gsi_last_bb (ip_end_pos (data->current_loop));
5317       after = true;
5318       break;
5319 
5320     case IP_AFTER_USE:
5321       after = true;
5322       /* fall through */
5323     case IP_BEFORE_USE:
5324       incr_pos = gsi_for_stmt (cand->incremented_at);
5325       break;
5326 
5327     case IP_ORIGINAL:
5328       /* Mark that the iv is preserved.  */
5329       name_info (data, cand->var_before)->preserve_biv = true;
5330       name_info (data, cand->var_after)->preserve_biv = true;
5331 
5332       /* Rewrite the increment so that it uses var_before directly.  */
5333       find_interesting_uses_op (data, cand->var_after)->selected = cand;
5334 
5335       return;
5336     }
5337 
5338   gimple_add_tmp_var (cand->var_before);
5339   add_referenced_var (cand->var_before);
5340 
5341   base = unshare_expr (cand->iv->base);
5342 
5343   create_iv (base, unshare_expr (cand->iv->step),
5344 	     cand->var_before, data->current_loop,
5345 	     &incr_pos, after, &cand->var_before, &cand->var_after);
5346 }
5347 
5348 /* Creates new induction variables described in SET.  */
5349 
5350 static void
5351 create_new_ivs (struct ivopts_data *data, struct iv_ca *set)
5352 {
5353   unsigned i;
5354   struct iv_cand *cand;
5355   bitmap_iterator bi;
5356 
5357   EXECUTE_IF_SET_IN_BITMAP (set->cands, 0, i, bi)
5358     {
5359       cand = iv_cand (data, i);
5360       create_new_iv (data, cand);
5361     }
5362 }
5363 
5364 
5365 /* Rewrites USE (definition of iv used in a nonlinear expression)
5366    using candidate CAND.  */
5367 
5368 static void
5369 rewrite_use_nonlinear_expr (struct ivopts_data *data,
5370 			    struct iv_use *use, struct iv_cand *cand)
5371 {
5372   tree comp;
5373   tree op, tgt;
5374   gimple ass;
5375   gimple_stmt_iterator bsi;
5376 
5377   /* An important special case -- if we are asked to express value of
5378      the original iv by itself, just exit; there is no need to
5379      introduce a new computation (that might also need casting the
5380      variable to unsigned and back).  */
5381   if (cand->pos == IP_ORIGINAL
5382       && cand->incremented_at == use->stmt)
5383     {
5384       tree step, ctype, utype;
5385       enum tree_code incr_code = PLUS_EXPR, old_code;
5386 
5387       gcc_assert (is_gimple_assign (use->stmt));
5388       gcc_assert (gimple_assign_lhs (use->stmt) == cand->var_after);
5389 
5390       step = cand->iv->step;
5391       ctype = TREE_TYPE (step);
5392       utype = TREE_TYPE (cand->var_after);
5393       if (TREE_CODE (step) == NEGATE_EXPR)
5394 	{
5395 	  incr_code = MINUS_EXPR;
5396 	  step = TREE_OPERAND (step, 0);
5397 	}
5398 
5399       /* Check whether we may leave the computation unchanged.
5400 	 This is the case only if it does not rely on other
5401 	 computations in the loop -- otherwise, the computation
5402 	 we rely upon may be removed in remove_unused_ivs,
5403 	 thus leading to ICE.  */
5404       old_code = gimple_assign_rhs_code (use->stmt);
5405       if (old_code == PLUS_EXPR
5406 	  || old_code == MINUS_EXPR
5407 	  || old_code == POINTER_PLUS_EXPR)
5408 	{
5409 	  if (gimple_assign_rhs1 (use->stmt) == cand->var_before)
5410 	    op = gimple_assign_rhs2 (use->stmt);
5411 	  else if (old_code != MINUS_EXPR
5412 		   && gimple_assign_rhs2 (use->stmt) == cand->var_before)
5413 	    op = gimple_assign_rhs1 (use->stmt);
5414 	  else
5415 	    op = NULL_TREE;
5416 	}
5417       else
5418 	op = NULL_TREE;
5419 
5420       if (op
5421 	  && (TREE_CODE (op) == INTEGER_CST
5422 	      || operand_equal_p (op, step, 0)))
5423 	return;
5424 
5425       /* Otherwise, add the necessary computations to express
5426 	 the iv.  */
5427       op = fold_convert (ctype, cand->var_before);
5428       comp = fold_convert (utype,
5429 			   build2 (incr_code, ctype, op,
5430 				   unshare_expr (step)));
5431     }
5432   else
5433     {
5434       comp = get_computation (data->current_loop, use, cand);
5435       gcc_assert (comp != NULL_TREE);
5436     }
5437 
5438   switch (gimple_code (use->stmt))
5439     {
5440     case GIMPLE_PHI:
5441       tgt = PHI_RESULT (use->stmt);
5442 
5443       /* If we should keep the biv, do not replace it.  */
5444       if (name_info (data, tgt)->preserve_biv)
5445 	return;
5446 
5447       bsi = gsi_after_labels (gimple_bb (use->stmt));
5448       break;
5449 
5450     case GIMPLE_ASSIGN:
5451       tgt = gimple_assign_lhs (use->stmt);
5452       bsi = gsi_for_stmt (use->stmt);
5453       break;
5454 
5455     default:
5456       gcc_unreachable ();
5457     }
5458 
5459   op = force_gimple_operand_gsi (&bsi, comp, false, SSA_NAME_VAR (tgt),
5460 				 true, GSI_SAME_STMT);
5461 
5462   if (gimple_code (use->stmt) == GIMPLE_PHI)
5463     {
5464       ass = gimple_build_assign (tgt, op);
5465       gsi_insert_before (&bsi, ass, GSI_SAME_STMT);
5466 
5467       bsi = gsi_for_stmt (use->stmt);
5468       remove_phi_node (&bsi, false);
5469     }
5470   else
5471     {
5472       gimple_assign_set_rhs_from_tree (&bsi, op);
5473       use->stmt = gsi_stmt (bsi);
5474     }
5475 }
5476 
5477 /* Replaces ssa name in index IDX by its basic variable.  Callback for
5478    for_each_index.  */
5479 
5480 static bool
5481 idx_remove_ssa_names (tree base, tree *idx,
5482 		      void *data ATTRIBUTE_UNUSED)
5483 {
5484   tree *op;
5485 
5486   if (TREE_CODE (*idx) == SSA_NAME)
5487     *idx = SSA_NAME_VAR (*idx);
5488 
5489   if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
5490     {
5491       op = &TREE_OPERAND (base, 2);
5492       if (*op
5493 	  && TREE_CODE (*op) == SSA_NAME)
5494 	*op = SSA_NAME_VAR (*op);
5495       op = &TREE_OPERAND (base, 3);
5496       if (*op
5497 	  && TREE_CODE (*op) == SSA_NAME)
5498 	*op = SSA_NAME_VAR (*op);
5499     }
5500 
5501   return true;
5502 }
5503 
5504 /* Unshares REF and replaces ssa names inside it by their basic variables.  */
5505 
5506 static tree
5507 unshare_and_remove_ssa_names (tree ref)
5508 {
5509   ref = unshare_expr (ref);
5510   for_each_index (&ref, idx_remove_ssa_names, NULL);
5511 
5512   return ref;
5513 }
5514 
5515 /* Copies the reference information from OLD_REF to NEW_REF.  */
5516 
5517 static void
5518 copy_ref_info (tree new_ref, tree old_ref)
5519 {
5520   if (TREE_CODE (old_ref) == TARGET_MEM_REF)
5521     copy_mem_ref_info (new_ref, old_ref);
5522   else
5523     {
5524       TMR_ORIGINAL (new_ref) = unshare_and_remove_ssa_names (old_ref);
5525       TREE_SIDE_EFFECTS (new_ref) = TREE_SIDE_EFFECTS (old_ref);
5526       TREE_THIS_VOLATILE (new_ref) = TREE_THIS_VOLATILE (old_ref);
5527     }
5528 }
5529 
5530 /* Rewrites USE (address that is an iv) using candidate CAND.  */
5531 
5532 static void
5533 rewrite_use_address (struct ivopts_data *data,
5534 		     struct iv_use *use, struct iv_cand *cand)
5535 {
5536   aff_tree aff;
5537   gimple_stmt_iterator bsi = gsi_for_stmt (use->stmt);
5538   tree base_hint = NULL_TREE;
5539   tree ref;
5540   bool ok;
5541 
5542   ok = get_computation_aff (data->current_loop, use, cand, use->stmt, &aff);
5543   gcc_assert (ok);
5544   unshare_aff_combination (&aff);
5545 
5546   /* To avoid undefined overflow problems, all IV candidates use unsigned
5547      integer types.  The drawback is that this makes it impossible for
5548      create_mem_ref to distinguish an IV that is based on a memory object
5549      from one that represents simply an offset.
5550 
5551      To work around this problem, we pass a hint to create_mem_ref that
5552      indicates which variable (if any) in aff is an IV based on a memory
5553      object.  Note that we only consider the candidate.  If this is not
5554      based on an object, the base of the reference is in some subexpression
5555      of the use -- but these will use pointer types, so they are recognized
5556      by the create_mem_ref heuristics anyway.  */
5557   if (cand->iv->base_object)
5558     base_hint = var_at_stmt (data->current_loop, cand, use->stmt);
5559 
5560   ref = create_mem_ref (&bsi, TREE_TYPE (*use->op_p), &aff, base_hint,
5561 			data->speed);
5562   copy_ref_info (ref, *use->op_p);
5563   *use->op_p = ref;
5564 }
5565 
5566 /* Rewrites USE (the condition such that one of the arguments is an iv) using
5567    candidate CAND.  */
5568 
5569 static void
5570 rewrite_use_compare (struct ivopts_data *data,
5571 		     struct iv_use *use, struct iv_cand *cand)
5572 {
5573   tree comp, *var_p, op, bound;
5574   gimple_stmt_iterator bsi = gsi_for_stmt (use->stmt);
5575   enum tree_code compare;
5576   struct cost_pair *cp = get_use_iv_cost (data, use, cand);
5577   bool ok;
5578 
5579   bound = cp->value;
5580   if (bound)
5581     {
5582       tree var = var_at_stmt (data->current_loop, cand, use->stmt);
5583       tree var_type = TREE_TYPE (var);
5584       gimple_seq stmts;
5585 
5586       compare = iv_elimination_compare (data, use);
5587       bound = unshare_expr (fold_convert (var_type, bound));
5588       op = force_gimple_operand (bound, &stmts, true, NULL_TREE);
5589       if (stmts)
5590 	gsi_insert_seq_on_edge_immediate (
5591 		loop_preheader_edge (data->current_loop),
5592 		stmts);
5593 
5594       gimple_cond_set_lhs (use->stmt, var);
5595       gimple_cond_set_code (use->stmt, compare);
5596       gimple_cond_set_rhs (use->stmt, op);
5597       return;
5598     }
5599 
5600   /* The induction variable elimination failed; just express the original
5601      giv.  */
5602   comp = get_computation (data->current_loop, use, cand);
5603   gcc_assert (comp != NULL_TREE);
5604 
5605   ok = extract_cond_operands (data, use->stmt, &var_p, NULL, NULL, NULL);
5606   gcc_assert (ok);
5607 
5608   *var_p = force_gimple_operand_gsi (&bsi, comp, true, SSA_NAME_VAR (*var_p),
5609 				     true, GSI_SAME_STMT);
5610 }
5611 
5612 /* Rewrites USE using candidate CAND.  */
5613 
5614 static void
5615 rewrite_use (struct ivopts_data *data, struct iv_use *use, struct iv_cand *cand)
5616 {
5617   switch (use->type)
5618     {
5619       case USE_NONLINEAR_EXPR:
5620 	rewrite_use_nonlinear_expr (data, use, cand);
5621 	break;
5622 
5623       case USE_ADDRESS:
5624 	rewrite_use_address (data, use, cand);
5625 	break;
5626 
5627       case USE_COMPARE:
5628 	rewrite_use_compare (data, use, cand);
5629 	break;
5630 
5631       default:
5632 	gcc_unreachable ();
5633     }
5634 
5635   update_stmt (use->stmt);
5636 }
5637 
5638 /* Rewrite the uses using the selected induction variables.  */
5639 
5640 static void
5641 rewrite_uses (struct ivopts_data *data)
5642 {
5643   unsigned i;
5644   struct iv_cand *cand;
5645   struct iv_use *use;
5646 
5647   for (i = 0; i < n_iv_uses (data); i++)
5648     {
5649       use = iv_use (data, i);
5650       cand = use->selected;
5651       gcc_assert (cand);
5652 
5653       rewrite_use (data, use, cand);
5654     }
5655 }
5656 
5657 /* Removes the ivs that are not used after rewriting.  */
5658 
5659 static void
5660 remove_unused_ivs (struct ivopts_data *data)
5661 {
5662   unsigned j;
5663   bitmap_iterator bi;
5664   bitmap toremove = BITMAP_ALLOC (NULL);
5665 
5666   /* Figure out an order in which to release SSA DEFs so that we don't
5667      release something that we'd have to propagate into a debug stmt
5668      afterwards.  */
5669   EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j, bi)
5670     {
5671       struct version_info *info;
5672 
5673       info = ver_info (data, j);
5674       if (info->iv
5675 	  && !integer_zerop (info->iv->step)
5676 	  && !info->inv_id
5677 	  && !info->iv->have_use_for
5678 	  && !info->preserve_biv)
5679 	bitmap_set_bit (toremove, SSA_NAME_VERSION (info->iv->ssa_name));
5680     }
5681 
5682   release_defs_bitset (toremove);
5683 
5684   BITMAP_FREE (toremove);
5685 }
5686 
5687 /* Frees data allocated by the optimization of a single loop.  */
5688 
5689 static void
5690 free_loop_data (struct ivopts_data *data)
5691 {
5692   unsigned i, j;
5693   bitmap_iterator bi;
5694   tree obj;
5695 
5696   if (data->niters)
5697     {
5698       pointer_map_destroy (data->niters);
5699       data->niters = NULL;
5700     }
5701 
5702   EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
5703     {
5704       struct version_info *info;
5705 
5706       info = ver_info (data, i);
5707       if (info->iv)
5708 	free (info->iv);
5709       info->iv = NULL;
5710       info->has_nonlin_use = false;
5711       info->preserve_biv = false;
5712       info->inv_id = 0;
5713     }
5714   bitmap_clear (data->relevant);
5715   bitmap_clear (data->important_candidates);
5716 
5717   for (i = 0; i < n_iv_uses (data); i++)
5718     {
5719       struct iv_use *use = iv_use (data, i);
5720 
5721       free (use->iv);
5722       BITMAP_FREE (use->related_cands);
5723       for (j = 0; j < use->n_map_members; j++)
5724 	if (use->cost_map[j].depends_on)
5725 	  BITMAP_FREE (use->cost_map[j].depends_on);
5726       free (use->cost_map);
5727       free (use);
5728     }
5729   VEC_truncate (iv_use_p, data->iv_uses, 0);
5730 
5731   for (i = 0; i < n_iv_cands (data); i++)
5732     {
5733       struct iv_cand *cand = iv_cand (data, i);
5734 
5735       if (cand->iv)
5736 	free (cand->iv);
5737       if (cand->depends_on)
5738 	BITMAP_FREE (cand->depends_on);
5739       free (cand);
5740     }
5741   VEC_truncate (iv_cand_p, data->iv_candidates, 0);
5742 
5743   if (data->version_info_size < num_ssa_names)
5744     {
5745       data->version_info_size = 2 * num_ssa_names;
5746       free (data->version_info);
5747       data->version_info = XCNEWVEC (struct version_info, data->version_info_size);
5748     }
5749 
5750   data->max_inv_id = 0;
5751 
5752   for (i = 0; VEC_iterate (tree, decl_rtl_to_reset, i, obj); i++)
5753     SET_DECL_RTL (obj, NULL_RTX);
5754 
5755   VEC_truncate (tree, decl_rtl_to_reset, 0);
5756 }
5757 
5758 /* Finalizes data structures used by the iv optimization pass.  LOOPS is the
5759    loop tree.  */
5760 
5761 static void
5762 tree_ssa_iv_optimize_finalize (struct ivopts_data *data)
5763 {
5764   free_loop_data (data);
5765   free (data->version_info);
5766   BITMAP_FREE (data->relevant);
5767   BITMAP_FREE (data->important_candidates);
5768 
5769   VEC_free (tree, heap, decl_rtl_to_reset);
5770   VEC_free (iv_use_p, heap, data->iv_uses);
5771   VEC_free (iv_cand_p, heap, data->iv_candidates);
5772 }
5773 
5774 /* Optimizes the LOOP.  Returns true if anything changed.  */
5775 
5776 static bool
5777 tree_ssa_iv_optimize_loop (struct ivopts_data *data, struct loop *loop)
5778 {
5779   bool changed = false;
5780   struct iv_ca *iv_ca;
5781   edge exit;
5782   basic_block *body;
5783 
5784   gcc_assert (!data->niters);
5785   data->current_loop = loop;
5786   data->speed = optimize_loop_for_speed_p (loop);
5787 
5788   if (dump_file && (dump_flags & TDF_DETAILS))
5789     {
5790       fprintf (dump_file, "Processing loop %d\n", loop->num);
5791 
5792       exit = single_dom_exit (loop);
5793       if (exit)
5794 	{
5795 	  fprintf (dump_file, "  single exit %d -> %d, exit condition ",
5796 		   exit->src->index, exit->dest->index);
5797 	  print_gimple_stmt (dump_file, last_stmt (exit->src), 0, TDF_SLIM);
5798 	  fprintf (dump_file, "\n");
5799 	}
5800 
5801       fprintf (dump_file, "\n");
5802     }
5803 
5804   body = get_loop_body (loop);
5805   renumber_gimple_stmt_uids_in_blocks (body, loop->num_nodes);
5806   free (body);
5807 
5808   /* For each ssa name determines whether it behaves as an induction variable
5809      in some loop.  */
5810   if (!find_induction_variables (data))
5811     goto finish;
5812 
5813   /* Finds interesting uses (item 1).  */
5814   find_interesting_uses (data);
5815   if (n_iv_uses (data) > MAX_CONSIDERED_USES)
5816     goto finish;
5817 
5818   /* Finds candidates for the induction variables (item 2).  */
5819   find_iv_candidates (data);
5820 
5821   /* Calculates the costs (item 3, part 1).  */
5822   determine_iv_costs (data);
5823   determine_use_iv_costs (data);
5824   determine_set_costs (data);
5825 
5826   /* Find the optimal set of induction variables (item 3, part 2).  */
5827   iv_ca = find_optimal_iv_set (data);
5828   if (!iv_ca)
5829     goto finish;
5830   changed = true;
5831 
5832   /* Create the new induction variables (item 4, part 1).  */
5833   create_new_ivs (data, iv_ca);
5834   iv_ca_free (&iv_ca);
5835 
5836   /* Rewrite the uses (item 4, part 2).  */
5837   rewrite_uses (data);
5838 
5839   /* Remove the ivs that are unused after rewriting.  */
5840   remove_unused_ivs (data);
5841 
5842   /* We have changed the structure of induction variables; it might happen
5843      that definitions in the scev database refer to some of them that were
5844      eliminated.  */
5845   scev_reset ();
5846 
5847 finish:
5848   free_loop_data (data);
5849 
5850   return changed;
5851 }
5852 
5853 /* Main entry point.  Optimizes induction variables in loops.  */
5854 
5855 void
5856 tree_ssa_iv_optimize (void)
5857 {
5858   struct loop *loop;
5859   struct ivopts_data data;
5860   loop_iterator li;
5861 
5862   tree_ssa_iv_optimize_init (&data);
5863 
5864   /* Optimize the loops starting with the innermost ones.  */
5865   FOR_EACH_LOOP (li, loop, LI_FROM_INNERMOST)
5866     {
5867       if (dump_file && (dump_flags & TDF_DETAILS))
5868 	flow_loop_dump (loop, dump_file, NULL, 1);
5869 
5870       tree_ssa_iv_optimize_loop (&data, loop);
5871     }
5872 
5873   tree_ssa_iv_optimize_finalize (&data);
5874 }
5875